Reformat with black
1 files changed, 88 insertions(+), 56 deletions(-)

M htmltextconvert/convert.py
M htmltextconvert/convert.py +88 -56
@@ 26,11 26,39 @@ from .htmlstreamer import HTMLStreamer
 from .htmlstreamer import html_to_stream
 
 
-inline_elements = {'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'button',
-                   'cite', 'code', 'dfn', 'em', 'i', 'img', 'input', 'kbd',
-                   'label', 'map', 'object', 'q', 'samp', 'select', 'small',
-                   'span', 'strong', 'sub', 'sup', 'textarea', 'time', 'tt',
-                   'var'}
+inline_elements = {
+    "a",
+    "abbr",
+    "acronym",
+    "b",
+    "bdo",
+    "big",
+    "br",
+    "button",
+    "cite",
+    "code",
+    "dfn",
+    "em",
+    "i",
+    "img",
+    "input",
+    "kbd",
+    "label",
+    "map",
+    "object",
+    "q",
+    "samp",
+    "select",
+    "small",
+    "span",
+    "strong",
+    "sub",
+    "sup",
+    "textarea",
+    "time",
+    "tt",
+    "var",
+}
 
 
 def window(seq, n=2, empty=None):

          
@@ 62,17 90,17 @@ def stream_to_html(stream: Iterable) -> 
     for type_, data in stream:
         if type_ in {HTMLStreamer.START, HTMLStreamer.STARTEND}:
             tag, attrs = data
-            write(f'<{tag}')
-            write(''.join(' {k}="{v}"'.format(k, escape(v)) for k, v in attrs))
+            write(f"<{tag}")
+            write("".join(' {k}="{v}"'.format(k, escape(v)) for k, v in attrs))
             if type_ == HTMLStreamer.STARTEND:
-                write('/>')
+                write("/>")
             else:
-                write('>')
+                write(">")
         elif type_ == HTMLStreamer.END:
-            write(f'</{data}>')
+            write(f"</{data}>")
         elif type_ == HTMLStreamer.TEXT:
             write(escape(data))
-    return ''.join(buf)
+    return "".join(buf)
 
 
 def normalize_tags(stream):

          
@@ 80,9 108,7 @@ def normalize_tags(stream):
     Normalize tag names, mapping deprecated tags to newer names (eg '<b>'
     becomes '<strong>'), and forcing tag names to lower case
     """
-    tagmap = {'b': 'strong',
-              'i': 'em',
-              }
+    tagmap = {"b": "strong", "i": "em"}
     for type_, data in stream:
         if type_ in {HTMLStreamer.START, HTMLStreamer.STARTEND}:
             tag, attrs = data

          
@@ 103,7 129,7 @@ def collapse_whitespace(stream: Iterable
     Remove whitespace except around inline elements
     """
     #: Stack of (tag, text-content) tuples
-    tagstack: List[Tuple[str, str]] = [('', '')]
+    tagstack: List[Tuple[str, str]] = [("", "")]
 
     windowed = window(stream, 2, empty=(None, None))
     next(windowed, None)

          
@@ 115,18 141,20 @@ def collapse_whitespace(stream: Iterable
 
         if type_ == HTMLStreamer.START:
             yield type_, data
-            tagstack.append((data[0], ''))
+            tagstack.append((data[0], ""))
 
         elif type_ == HTMLStreamer.TEXT:
-            if any(t in {'pre', 'code'} for t, _ in tagstack):
+            if any(t in {"pre", "code"} for t, _ in tagstack):
                 yield type_, data
             else:
                 container, last_sibling = tagstack[-1]
                 in_block = is_block(container) and is_block(last_sibling)
                 if in_block:
                     data = data.lstrip()
-                if peek_type in {HTMLStreamer.START, HTMLStreamer.END} and \
-                        is_block(peek_data):
+                if peek_type in {
+                    HTMLStreamer.START,
+                    HTMLStreamer.END,
+                } and is_block(peek_data):
                     data = data.rstrip()
                 if data:
                     yield type_, data

          
@@ 144,7 172,7 @@ def convert_stream(stream: Iterable) -> 
     list_indent_level = -1
 
     tagstack: List[Tuple[str, Dict]] = []
-    output = ['']
+    output = [""]
     write = output.append
 
     def write_append(s):

          
@@ 170,24 198,25 @@ def convert_stream(stream: Iterable) -> 
         level = indent_level + max(0, list_indent_level)
         text = output[-1]
         if level > 0:
-            output[-1] = textwrap.indent(text, '  ')
+            output[-1] = textwrap.indent(text, "  ")
 
     def reformat(format_str):
         def reformat():
             text = output[-1]
             formatted = format_str.format(text=text)
             output[-1] = formatted
+
         return reformat
 
     def format_ahref():
-        link = ''
+        link = ""
         attrs = dict(tagstack[-1][1])
         text = output[-1]
-        if 'href' in attrs:
+        if "href" in attrs:
             link = f' <{attrs["href"]}>'
-        output[-1] = f'{text}{link}'
+        output[-1] = f"{text}{link}"
 
-    def add_separator_before_block(separator='\n'):
+    def add_separator_before_block(separator="\n"):
         def add_separator_before_block():
             if not output:
                 return

          
@@ 196,44 225,47 @@ def convert_stream(stream: Iterable) -> 
             if output[-1].endswith(separator):
                 return
 
-            parents = ((tag, content)
-                       for (tag, _), content in reversed(
-                           list(zip(tagstack, output))))
+            parents = (
+                (tag, content)
+                for (tag, _), content in reversed(list(zip(tagstack, output)))
+            )
             nonempty_ancestors = itertools.dropwhile(
-                lambda item: not item[1] and is_block(item[0][0]),
-                parents
+                lambda item: not item[1] and is_block(item[0][0]), parents
             )
-            last_content = next((c for t, c in nonempty_ancestors), '')
+            last_content = next((c for t, c in nonempty_ancestors), "")
             if last_content and not last_content.endswith(separator):
-                overlap = list(itertools.takewhile(
-                    lambda item: item[0] == item[1],
-                    zip(reversed(last_content), separator)))
-                output[-1] += separator[len(overlap):]
+                overlap = list(
+                    itertools.takewhile(
+                        lambda item: item[0] == item[1],
+                        zip(reversed(last_content), separator),
+                    )
+                )
+                output[-1] += separator[len(overlap) :]
 
         return add_separator_before_block
 
     def drop_content():
-        output[-1] = ''
+        output[-1] = ""
 
     start_actions = {
-        'blockquote': [push_indent, add_separator_before_block()],
-        'ul': [push_list_indent, add_separator_before_block()],
-        'ol': [push_list_indent, add_separator_before_block()],
-        'p': [add_separator_before_block('\n\n')],
-        '*': [add_separator_before_block()],
+        "blockquote": [push_indent, add_separator_before_block()],
+        "ul": [push_list_indent, add_separator_before_block()],
+        "ol": [push_list_indent, add_separator_before_block()],
+        "p": [add_separator_before_block("\n\n")],
+        "*": [add_separator_before_block()],
     }
     end_actions = {
-        'strong': [reformat('*{text}*')],
-        'em': [reformat('_{text}_')],
-        'code': [reformat('`{text}`')],
-        'blockquote': [reformat('{text}\n'), indent, pop_indent],
-        'br': [reformat('{text}\n')],
-        'ul': [reformat('{text}\n\n')],
-        'li': [reformat('* {text}\n')],
-        'a': [format_ahref],
-        'script': [drop_content],
-        'style': [drop_content],
-        'title': [drop_content],
+        "strong": [reformat("*{text}*")],
+        "em": [reformat("_{text}_")],
+        "code": [reformat("`{text}`")],
+        "blockquote": [reformat("{text}\n"), indent, pop_indent],
+        "br": [reformat("{text}\n")],
+        "ul": [reformat("{text}\n\n")],
+        "li": [reformat("* {text}\n")],
+        "a": [format_ahref],
+        "script": [drop_content],
+        "style": [drop_content],
+        "title": [drop_content],
     }
 
     stream = list(stream)

          
@@ 242,10 274,10 @@ def convert_stream(stream: Iterable) -> 
         if type_ == HTMLStreamer.START:
             tag, attrs = data
             tagstack.append((tag, attrs))
-            actions = start_actions.get(tag, start_actions['*'])
+            actions = start_actions.get(tag, start_actions["*"])
             for fn in actions:
                 fn()
-            write('')
+            write("")
 
         elif type_ == HTMLStreamer.STARTEND:
             tag, attrs = data

          
@@ 263,12 295,12 @@ def convert_stream(stream: Iterable) -> 
             tagstack.pop()
             if len(output) > 1:
                 # Squash this tag's output into its parent
-                output[-2:] = [''.join(output[-2:])]
+                output[-2:] = ["".join(output[-2:])]
 
         elif type_ == HTMLStreamer.TEXT:
             write_append(data)
 
-    return ''.join(output).rstrip() + '\n'
+    return "".join(output).rstrip() + "\n"
 
 
 def html_to_text(s: str) -> str: