@@ 1,18 1,38 @@
#! /usr/bin/env python
# s. https://gist.github.com/palday/1ff12dd110255541df0f
# adapted from
# GitHub Gist https://gist.github.com/tpoisot/7406955
# don't forget to install bibtexparser: http://bibtexparser.readthedocs.org/en/latest/install.html
# or with pip:
# pip install bibtexparser
# Copyright (c) 2014-2021, Phillip Alday
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
import argparse
import sys
import codecs
from bibtexparser.bparser import BibTexParser, logger
import bibtexparser
import logging
from logging import NullHandler
logger.addHandler(NullHandler())
logging.basicConfig(format='\033[1m\033[33m%(levelname)s:\033[0m %(message)s')
non_local_fields = ['address',
from bibtexparser.bparser import BibTexParser, logger
from bibtexparser.bibdatabase import BibDatabase
from bibtexparser.bwriter import BibTexWriter
from bibtexparser.customization import convert_to_unicode, homogenize_latex_encoding
logger.setLevel(logging.ERROR)
# fix missing standard types
bibtexparser.bibdatabase.STANDARD_TYPES.add("collection")
bibtexparser.bibdatabase.STANDARD_TYPES.add("periodical")
NON_LOCAL_FIELDS = ['address',
'annote',
'author',
'booktitle',
@@ 38,59 58,57 @@ non_local_fields = ['address',
'link',
'volume',
'year',
'eprint',
'eprintclass',
'eprinttype',
'date'
]
def dict2bib(ke,di):
# it seems the type field changed between different bibtexparser versions
try:
b = "@"+di['type'].upper()+"{"+ke+",\n"
except KeyError:
b = "@"+di['ENTRYTYPE'].upper()+"{"+ke+",\n"
def prune(entry):
"""
prune(entry)
Remove local fields from a BibTeX entry.
Local fields include things like "date-added" and references to document
storage.
This function uses `NON_LOCAL_FIELDS` as a whitelist, instead of
blacklisting local fields.
"""
keepers = NON_LOCAL_FIELDS + ['ID', 'ENTRYTYPE'] # bibtexparser fields
return {field:value for field, value in entry.items() if field in keepers}
try:
items = di.iteritems()
except AttributeError:
items = di.items()
argparser = argparse.ArgumentParser(
description="Extract minimal BibTeX entries from a large bibliography")
argparser.add_argument('keylist', type=open,
help="Filename of a newline delimited list of BibTeX keys")
argparser.add_argument('bibfile', type=open,
help="BibTeX file to extract entries from")
argparser.add_argument('outfile', type=argparse.FileType('w', encoding='UTF-8'),
help="Destination file for extracted keys (will be overwritten")
# TODO expose addition bibtexparser options, e.g.
# parser = BibTexParser(common_strings=False)
# parser.ignore_nonstandard_types = False
# parser.homogenise_fields = False
# allow for more verbose logging
def main(argv=None):
args = argparser.parse_args(argv)
for (k, v) in sorted(items):
if k.lower().strip() in non_local_fields:
if k == 'link':
k = 'url'
b += '\t' + k + ' = {'+v+'},\n'
b += '}\n'
return b
keys = [_.rstrip() for _ in args.keylist.readlines()]
bibparser = BibTexParser(common_strings=True)
bibparser.customization = convert_to_unicode
allrefs = bibtexparser.load(args.bibfile, parser=bibparser).get_entry_dict()
usedrefs = BibDatabase()
usedrefs.entries = [prune(allrefs[key]) for key in keys if key in allrefs]
missing = [key for key in keys if key not in allrefs]
if missing:
logging.warning("Following keys not found: {}".format(', '.join(missing)))
writer = BibTexWriter()
#writer.indent = ' ' * 4
args.outfile.write(writer.write(usedrefs))
if __name__ == "__main__":
## Check the number of arguments
if len(sys.argv) != 4:
raise ValueError("Wrong number of arguments")
else :
key_list = sys.argv[1]
bib_file = sys.argv[2]
out_file = sys.argv[3]
## The three arguments should be strings
if not isinstance(key_list, str):
raise TypeError("The path to the list of keys should be a string")
if not isinstance(bib_file, str):
raise TypeError("The path to the bibtex library should be a string")
if not isinstance(out_file, str):
raise TypeError("The path to the output bibtex file should be a string")
## Step 1 - read the key list
keys = [kl.rstrip(":\n") for kl in open(key_list, 'r')]
## Step 2 - read the library file
refs = BibTexParser(open(bib_file, 'r').read()).get_entry_dict()
## Step 3 - extract the used entries
used_refs = {key: refs[key] for key in keys if key in refs}
## Step 4 - convert the dicts back into bibtex
try:
used_refs_iter = used_refs.iteritems()
except AttributeError:
used_refs_iter = used_refs.items()
refs_as_bib = [dict2bib(k, v) for (k, v) in used_refs_iter ]
## Step 5 - write the output file
with codecs.open(out_file, 'w', 'utf-8-sig') as of:
of.writelines(refs_as_bib)
sys.exit(main())
@@ 1,4 1,20 @@
-#! /bin/env python3
+#! /usr/bin/env python3
+"""
+Copyright (c) 2018, Phillip Alday
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+"""
import sys
import argparse
@@ 1,6 1,3 @@
-Futhark
-=========
-
Markdown and DVCS can change the way we collaborate. Even for those pesky journals that require a submission in Word format, you can do the majority of your work in Markdown and then convert to Word via pandoc (and if need be LibreOffice to convert .odt/.docx to .doc). However, a common bibliography is still difficult to pull off. The Makefile and scripts here automagically extract the relevant pandoc references from the specified BibTeX library and create a minimal local BibTeX file that you can include in your repository.
This was inspired by and expands upon the ideas in [this blog post](http://timotheepoisot.fr/2013/11/10/shared-bibtex-file-markdown/).
@@ 10,9 7,24 @@ Requirements:
- `make`
- [Mercurial](http://mercurial.selenic.com/) ([git version available on GitHub](https://github.com/palday/futhark))
- [`pandoc`](http://johnmacfarlane.net/pandoc/)
-- [Python](https://www.python.org/)
- - currently using Python 2.x because of
- - [BibTexParser](http://bibtexparser.readthedocs.org/en/latest/install.html)
+- [Python (3)](https://www.python.org/)
+- [BibTexParser](http://bibtexparser.readthedocs.org/en/latest/install.html)
+
+You can install `pandoc` and all the Python tooling via [conda](https://www.anaconda.com/products/individual):
+```bash
+user@host:~/projectdir$ conda env create -f environment.yml
+user@host:~/projectdir$ conda activate futhark
+(futhark) user@host:~/projectdir$ conda activate futhark
+```
-License:
-My contributions are currently GPLv2, but I am building on the work of others, whose licensing conditions aren't yet clear. The LaTeX template is a modification of the standard pandoc template and is thus subject to [the same restrictions](https://github.com/jgm/pandoc-templates).
No newline at end of file
+If you prefer to use your system Python, virtual environments, PyEnv, etc., then the Python packages are available via pip:
+```bash
+user@host:~/projectdir$ python -m pip install -r requirements.txt
+```
+
+The `mdwc` is useful for word counts of markdown documents with a YAML header block and is developed [here](https://github.com/palday/mdwc).
+
+## License
+My contributions are GPLv2. Previous versions used code with an unclear license, but that has been removed in current versions.
+
+The LaTeX template is a modification of the standard pandoc template and is thus subject to [the same restrictions](https://github.com/jgm/pandoc-templates).