2 files changed, 67 insertions(+), 51 deletions(-)

M ChangeLog.md
M xgrep.py
M ChangeLog.md +4 -15
@@ 1,3 1,7 @@ 
+2023-11-04:
+
+* reformat code
+
 2022-09-05:
 
 * add default namespaces also in XPath expressions containing `and` or `or`

          
@@ 9,92 13,77 @@ 2022-05-11:
 * add option `-s` (`--spaces`)
 * bump version to 2.10
 
-
 2020-04-26:
 
 * add option `-a` (`--abbreviate`)
 * bump version to 2.9
 
-
 2020-04-25:
 
 * add option `-M` (`--files-and-matches`)
 * bump version to 2.8
 
-
 2020-04-24:
 
 * rename option `-n` (`--declare-ns`) to `-N`
 * add option `-n` (`--line-number`)
 * bump version to 2.7
 
-
 2020-01-14:
 
 * catch keyboard interrupt
 * bump version to 2.6
 
-
 2019-10-07:
 
 * add option `-C` (`--force-color`)
 * bump version to 2.5
 
-
 2019-08-27:
 
 * add option `-r` (`--regex`)
 * bump version to 2.4
 
-
 2019-08-24:
 
 * add option `-q` (`--quiet`)
 * bump version to 2.3
 
-
 2019-07-28:
 
 * convert `README` and `ChangeLog` to Markdown
 
-
 2019-04-19:
 
 * add option `-m` (`--matches`)
 * bump version to 2.2
 
-
 2019-04-13:
 
 * remove leading whitespace if `-i` is not set
 * bump version to 2.1
 
-
 2019-04-12:
 
 * support for namespaces
 * bump version to 2.0
 
-
 2018-11-19:
 
 * fixes
 * bump version to 1.3
 
-
 2018-11-18:
 
 * add option `-L` (`--files-without-match`)
 * bump version to 1.2
 
-
 2018-11-12:
 
 * exit codes as in GNU grep: `2` if an error occurred, `1` if there is no match,
   and `0` else
 * bump version to 1.1
 
-
 2018-11-11:
 
 * initial release

          
M xgrep.py +63 -36
@@ 1,16 1,21 @@ 
 #!/usr/bin/python3
 # xgrep.py -- search for elements in XML files, using XPath 1.0 expressions
-# Andreas Nolda 2022-09-05
+# Andreas Nolda 2023-11-04
 
 import sys
 import argparse
 import re
+
 from blessings import Terminal
+
 from lxml import etree
 
+
 version = "2.11"
 
+
 parser = argparse.ArgumentParser()
+
 parser.add_argument("expr",
                     help="XPath 1.0 expression")
 parser.add_argument("files", metavar="file", nargs="+",

          
@@ 46,77 51,92 @@ parser.add_argument("-r", "--regex", met
 parser.add_argument("-s", "--spaces", action="store_true",
                     help="normalize whitespace to spaces")
 parser.add_argument("-v", "--version", action="version",
-                    version="{0} {1}".format(parser.prog, version))
+                    version=f"{parser.prog} {version}")
+
 args = parser.parse_args()
 
+
 term = Terminal(force_styling=args.force_color)
 
+
 def insert_default_ns(expr, nsmap):
     if nsmap:
-        list = re.split("("
-                        "/|"                                                 # /
-                        "[a-z-]+::|"                                         # axis::
-                        "[A-Za-z_][A-Za-z0-9._-]*:[A-Za-z_][A-Za-z0-9._-]*|" # namespace:element
-                        "\[|"                                                # [
-                        "@[A-Za-z_][A-Za-z0-9._-]*|"                         # @attribute
-                        "=|"                                                 # =
-                        "'[^']*'|"                                           # 'value'
-                        '"[^"]*"|'                                           # "value"
-                        "[a-z-]+\(|"                                         # function(
-                        ",|"                                                 # ,
-                        "\)|"                                                # )
-                        "\s+and\s+|"                                         # and
-                        "\s+or\s+|"                                          # or
-                        "\]"                                                 # ]
-                        ")",
-                        expr)
-        nslist = ["default:" + item if re.fullmatch("[A-Za-z_][A-Za-z0-9._-]*", # element without namespace
-                                                    item)
-                                    else item
-                  for item in list]
-        string = "".join(nslist)
+        subexprs = re.split(r"("
+                            r"/|"                                                  # /
+                            r"[a-z-]+::|"                                          # axis::
+                            r"[A-Za-z_][A-Za-z0-9._-]*:[A-Za-z_][A-Za-z0-9._-]*|"  # namespace:element
+                            r"\[|"                                                 # [
+                            r"@[A-Za-z_][A-Za-z0-9._-]*|"                          # @attribute
+                            r"=|"                                                  # =
+                            r"'[^']*'|"                                            # 'value'
+                            r'"[^"]*"|'                                            # "value"
+                            r"[a-z-]+\(|"                                          # function(
+                            r",|"                                                  # ,
+                            r"\)|"                                                 # )
+                            r"\s+and\s+|"                                          # and
+                            r"\s+or\s+|"                                           # or
+                            r"\]"                                                  # ]
+                            r")",
+                            expr)
+        nssubexprs = ["default:" + subexpr
+                      if re.fullmatch("[A-Za-z_][A-Za-z0-9._-]*", subexpr)  # element without namespace
+                      else subexpr
+                      for subexpr in subexprs]
+        nsexpr = "".join(nssubexprs)
     else:
-        string = expr
-    return string
+        nsexpr = expr
+    return nsexpr
+
 
 def serialize_match(match):
     if isinstance(match, str):
         string = match
     else:
         string = etree.tostring(match, with_tail=False,
-                                encoding="utf-8",
-                                pretty_print=args.indent).decode()
+                                encoding="unicode",
+                                pretty_print=args.indent)
+
     if not args.indent:
-        string = re.sub("\n\s+", "\n", string)
+        string = re.sub(r"\n\s+", r"\n", string)
+
     if args.abbreviate:
         lines = string.splitlines()
         string = lines[0]
         if len(lines) > 1:
             string += " " + term.bright_black("...")
+
     if args.spaces:
-        string = re.sub("\s+", " ", string)
+        string = re.sub(r"\s+", " ", string)
+
     if string.endswith("\n"):
         string = string[:-1]
     return string
 
+
 def remove_ns(match):
-    string = re.sub('\s+xmlns(:[A-Za-z_][A-Za-z0-9._-]*)?="[^"]+"', '', match)
+    string = re.sub(r'\s+xmlns(:[A-Za-z_][A-Za-z0-9._-]*)?="[^"]+"', '', match)
     return string
 
+
 def print_filename(file, end):
     print(term.bold(file), end=end)
 
+
 def print_total(matches, end):
     print(len(matches), end=end)
 
+
 def print_expr(expr, end):
     print(term.bold(expr), end=end)
 
-def print_index(int, end):
-    print(term.bold("[{0}]".format(int + 1)), end=end)
+
+def print_index(i, end):
+    print(term.bold(f"[{i + 1}]"), end=end)
+
 
 def print_line_number(match, end):
-    print(term.bright_black("{0}".format(match.sourceline)), end=end)
+    print(term.bright_black(str(match.sourceline)), end=end)
+
 
 def print_match(match, end):
     if args.declare_ns:

          
@@ 124,6 144,7 @@ def print_match(match, end):
     else:
         print(remove_ns(serialize_match(match)), end=end)
 
+
 def print_matches(matches, file):
     if matches:
         if args.files_with_matches:

          
@@ 167,6 188,7 @@ def print_matches(matches, file):
                         print_line_number(match, ":")
                     print_match(match, "\n")
 
+
 def print_nonmatches(matches, file):
     if not matches:
         if args.count:

          
@@ 175,9 197,11 @@ def print_nonmatches(matches, file):
         else:
             print_filename(file, "\n")
 
+
 def main():
     n = 0
     e = False
+
     for file in args.files:
         try:
             xml_parser = etree.XMLParser(remove_blank_text=args.indent,

          
@@ 187,12 211,13 @@ def main():
             root = tree.getroot()
             # cf. https://stackoverflow.com/q/4210730:
             nsmap = {key if key is not None
-                         else "default":value
-                     for key,value in root.nsmap.items()}
+                         else "default": value
+                     for key, value in root.nsmap.items()}
             nsexpr = insert_default_ns(args.expr, nsmap)
             # add regexp namespace *after* inserting default namespace
             if args.regex:
                 nsmap[args.regex] = "http://exslt.org/regular-expressions"
+
             matches = tree.xpath(nsexpr, namespaces=nsmap)
             if not args.quiet:
                 if args.files_without_match:

          
@@ 214,6 239,7 @@ def main():
             if not args.quiet:
                 print(term.bold_red(file) + ": XPath expression cannot be evaluated", file=sys.stderr)
             e = True
+
     if e:
         exit = 2
     elif n == 0:

          
@@ 222,5 248,6 @@ def main():
         exit = 0
     return exit
 
+
 if __name__ == "__main__":
     sys.exit(main())