# HG changeset patch # User Phillip Alday # Date 1613150040 -3600 # Fri Feb 12 18:14:00 2021 +0100 # Node ID 4b673482eaaca6fbdd7d26055b3bce5781e1b9ba # Parent 7827408d23693e0ce8387ad3a3de60ab5fc3a47e recent features from futhark git (#922c76) diff --git a/environment.yml b/environment.yml new file mode 100644 --- /dev/null +++ b/environment.yml @@ -0,0 +1,14 @@ +name: futhark +channels: + - defaults +dependencies: + - pandoc>=2.0 + - pip + - python>=3.5 + - pip: + - bibtexparser==1.2.0 + - pandoc-eqnos==2.5.0 + - pandoc-fignos==2.4.0 + - pandoc-tablenos==2.3.0 + - pandoc-xnos==2.5.0 + - pandocfilters==1.4.3 diff --git a/extractbib.py b/extractbib.py --- a/extractbib.py +++ b/extractbib.py @@ -1,18 +1,38 @@ #! /usr/bin/env python -# s. https://gist.github.com/palday/1ff12dd110255541df0f -# adapted from -# GitHub Gist https://gist.github.com/tpoisot/7406955 -# don't forget to install bibtexparser: http://bibtexparser.readthedocs.org/en/latest/install.html -# or with pip: -# pip install bibtexparser +# Copyright (c) 2014-2021, Phillip Alday +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +import argparse import sys -import codecs -from bibtexparser.bparser import BibTexParser, logger +import bibtexparser + +import logging from logging import NullHandler -logger.addHandler(NullHandler()) +logging.basicConfig(format='\033[1m\033[33m%(levelname)s:\033[0m %(message)s') -non_local_fields = ['address', +from bibtexparser.bparser import BibTexParser, logger +from bibtexparser.bibdatabase import BibDatabase +from bibtexparser.bwriter import BibTexWriter +from bibtexparser.customization import convert_to_unicode, homogenize_latex_encoding +logger.setLevel(logging.ERROR) + +# fix missing standard types +bibtexparser.bibdatabase.STANDARD_TYPES.add("collection") +bibtexparser.bibdatabase.STANDARD_TYPES.add("periodical") + +NON_LOCAL_FIELDS = ['address', 'annote', 'author', 'booktitle', @@ -38,59 +58,57 @@ 'link', 'volume', 'year', - 'eprint', - 'eprintclass', - 'eprinttype', - 'date' ] -def dict2bib(ke,di): - # it seems the type field changed between different bibtexparser versions - try: - b = "@"+di['type'].upper()+"{"+ke+",\n" - except KeyError: - b = "@"+di['ENTRYTYPE'].upper()+"{"+ke+",\n" +def prune(entry): + """ + prune(entry) + + Remove local fields from a BibTeX entry. + + Local fields include things like "date-added" and references to document + storage. + + This function uses `NON_LOCAL_FIELDS` as a whitelist, instead of + blacklisting local fields. + """ + keepers = NON_LOCAL_FIELDS + ['ID', 'ENTRYTYPE'] # bibtexparser fields + return {field:value for field, value in entry.items() if field in keepers} - try: - items = di.iteritems() - except AttributeError: - items = di.items() +argparser = argparse.ArgumentParser( + description="Extract minimal BibTeX entries from a large bibliography") +argparser.add_argument('keylist', type=open, + help="Filename of a newline delimited list of BibTeX keys") +argparser.add_argument('bibfile', type=open, + help="BibTeX file to extract entries from") +argparser.add_argument('outfile', type=argparse.FileType('w', encoding='UTF-8'), + help="Destination file for extracted keys (will be overwritten") +# TODO expose addition bibtexparser options, e.g. +# parser = BibTexParser(common_strings=False) +# parser.ignore_nonstandard_types = False +# parser.homogenise_fields = False +# allow for more verbose logging + +def main(argv=None): + args = argparser.parse_args(argv) - for (k, v) in sorted(items): - if k.lower().strip() in non_local_fields: - if k == 'link': - k = 'url' - b += '\t' + k + ' = {'+v+'},\n' - b += '}\n' - return b + keys = [_.rstrip() for _ in args.keylist.readlines()] + + bibparser = BibTexParser(common_strings=True) + bibparser.customization = convert_to_unicode + + allrefs = bibtexparser.load(args.bibfile, parser=bibparser).get_entry_dict() + usedrefs = BibDatabase() + usedrefs.entries = [prune(allrefs[key]) for key in keys if key in allrefs] + + missing = [key for key in keys if key not in allrefs] + + if missing: + logging.warning("Following keys not found: {}".format(', '.join(missing))) + + writer = BibTexWriter() + #writer.indent = ' ' * 4 + args.outfile.write(writer.write(usedrefs)) if __name__ == "__main__": - ## Check the number of arguments - if len(sys.argv) != 4: - raise ValueError("Wrong number of arguments") - else : - key_list = sys.argv[1] - bib_file = sys.argv[2] - out_file = sys.argv[3] - ## The three arguments should be strings - if not isinstance(key_list, str): - raise TypeError("The path to the list of keys should be a string") - if not isinstance(bib_file, str): - raise TypeError("The path to the bibtex library should be a string") - if not isinstance(out_file, str): - raise TypeError("The path to the output bibtex file should be a string") - ## Step 1 - read the key list - keys = [kl.rstrip(":\n") for kl in open(key_list, 'r')] - ## Step 2 - read the library file - refs = BibTexParser(open(bib_file, 'r').read()).get_entry_dict() - ## Step 3 - extract the used entries - used_refs = {key: refs[key] for key in keys if key in refs} - ## Step 4 - convert the dicts back into bibtex - try: - used_refs_iter = used_refs.iteritems() - except AttributeError: - used_refs_iter = used_refs.items() - refs_as_bib = [dict2bib(k, v) for (k, v) in used_refs_iter ] - ## Step 5 - write the output file - with codecs.open(out_file, 'w', 'utf-8-sig') as of: - of.writelines(refs_as_bib) + sys.exit(main()) diff --git a/hg-diff-md.py b/hg-diff-md.py --- a/hg-diff-md.py +++ b/hg-diff-md.py @@ -1,4 +1,20 @@ -#! /bin/env python3 +#! /usr/bin/env python3 +""" +Copyright (c) 2018, Phillip Alday + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +""" import sys import argparse diff --git a/readme.md b/readme.md --- a/readme.md +++ b/readme.md @@ -1,6 +1,3 @@ -Futhark -========= - Markdown and DVCS can change the way we collaborate. Even for those pesky journals that require a submission in Word format, you can do the majority of your work in Markdown and then convert to Word via pandoc (and if need be LibreOffice to convert .odt/.docx to .doc). However, a common bibliography is still difficult to pull off. The Makefile and scripts here automagically extract the relevant pandoc references from the specified BibTeX library and create a minimal local BibTeX file that you can include in your repository. This was inspired by and expands upon the ideas in [this blog post](http://timotheepoisot.fr/2013/11/10/shared-bibtex-file-markdown/). @@ -10,9 +7,24 @@ - `make` - [Mercurial](http://mercurial.selenic.com/) ([git version available on GitHub](https://github.com/palday/futhark)) - [`pandoc`](http://johnmacfarlane.net/pandoc/) -- [Python](https://www.python.org/) - - currently using Python 2.x because of - - [BibTexParser](http://bibtexparser.readthedocs.org/en/latest/install.html) +- [Python (3)](https://www.python.org/) +- [BibTexParser](http://bibtexparser.readthedocs.org/en/latest/install.html) + +You can install `pandoc` and all the Python tooling via [conda](https://www.anaconda.com/products/individual): +```bash +user@host:~/projectdir$ conda env create -f environment.yml +user@host:~/projectdir$ conda activate futhark +(futhark) user@host:~/projectdir$ conda activate futhark +``` -License: -My contributions are currently GPLv2, but I am building on the work of others, whose licensing conditions aren't yet clear. The LaTeX template is a modification of the standard pandoc template and is thus subject to [the same restrictions](https://github.com/jgm/pandoc-templates). \ No newline at end of file +If you prefer to use your system Python, virtual environments, PyEnv, etc., then the Python packages are available via pip: +```bash +user@host:~/projectdir$ python -m pip install -r requirements.txt +``` + +The `mdwc` is useful for word counts of markdown documents with a YAML header block and is developed [here](https://github.com/palday/mdwc). + +## License +My contributions are GPLv2. Previous versions used code with an unclear license, but that has been removed in current versions. + +The LaTeX template is a modification of the standard pandoc template and is thus subject to [the same restrictions](https://github.com/jgm/pandoc-templates). diff --git a/requirements.txt b/requirements.txt new file mode 100644 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +bibtexparser==1.2.0 +certifi==2020.12.5 +future==0.18.2 +pandoc-eqnos==2.5.0 +pandoc-fignos==2.4.0 +pandoc-tablenos==2.3.0 +pandoc-xnos==2.5.0 +pandocfilters==1.4.3 +psutil==5.8.0 +pyparsing==2.4.7