# HG changeset patch # User Andreas Nolda # Date 1652261059 -7200 # Wed May 11 11:24:19 2022 +0200 # Node ID fb09589557fec4340c822e0296cd4aa6a538ac6e # Parent 64e06afc3be2348d278b24c32e5e837c4e051403 add options "-p" ("--pis") and "-P" ("--comments") diff --git a/ChangeLog.md b/ChangeLog.md --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,8 @@ +2022-05-11: + +* add options `-p` (`--pis`) and `-P` (`--comments`) + + 2020-04-26: * add option `-a` (`--abbreviate`) diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ [-M] [--files-and-matches] [-n] [--line-number] [-N] [--declare-ns] + [-p] [--pis] + [-P] [--comments] [-q] [--quiet] [-r ] [--regex ] @@ -48,7 +50,9 @@ used in the XPath expression for matching regular expressions. The option `-i` indents the matching parts, and the option `-N` includes namespace declarations. Matching parts can be abbreviated to their first line by means of the option -`-a`. The `-C` option preserves color and formatting codes when piping output +`-a`. Processing instructions and comments in the XML files are ignored unless +the `-p` and `-P` options are used. The `-C` option preserves color and +formatting codes when piping output through [GNU less](http://www.gnu.org/software/less/) or similar programs. The options `-c`, `-l`, `-L`, `-n`, and `-q` mimic the behaviour of diff --git a/xgrep.py b/xgrep.py --- a/xgrep.py +++ b/xgrep.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 # xgrep.py -- search for elements in XML files, using XPath 1.0 expressions -# Andreas Nolda 2021-06-25 +# Andreas Nolda 2022-05-11 import sys import argparse @@ -35,6 +35,10 @@ help="output line number of match start") parser.add_argument("-N", "--declare-ns", action="store_true", help="declare namespaces in matches") +parser.add_argument("-p", "--pis", action="store_true", + help="preserve processing-instructions in output") +parser.add_argument("-P", "--comments", action="store_true", + help="preserve comments in output") parser.add_argument("-q", "--quiet", action="store_true", help="only return exit status") parser.add_argument("-r", "--regex", metavar="ns", @@ -170,7 +174,9 @@ e = False for file in args.files: try: - xml_parser = etree.XMLParser(remove_blank_text=args.indent) + xml_parser = etree.XMLParser(remove_blank_text=args.indent, + remove_comments=not args.comments, + remove_pis=not args.pis) tree = etree.parse(file, xml_parser) root = tree.getroot() # cf. https://stackoverflow.com/q/4210730: