remove markdown library

includes hack-y parsing of HTML for title, leader, date appears to
work but could be better
3 files changed, 56 insertions(+), 38 deletions(-)

M quiescent/post.py
M quiescent/static.py
M setup.py
M quiescent/post.py +50 -30
@@ 19,29 19,48 @@ from functools import partial
 import urllib.parse
 import os
 import re
-
-from mistune import Markdown
+from html.parser import HTMLParser
 
-@dataclass(repr=False)
-class RawPost:
-    title: str
-    date: str
-    leader: str
-    body: str
+class PostParser(HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.tag_stack = []
+        self.title = None
+        self.date = None
+        self.leader = []
+        self.DONE = False
+
+    def feed(self, html):
+        self.body = html
+        super().feed(html)
 
-def parse(raw_text: str) -> RawPost:
-    """
-    first paragraph of a post is a "leader" and is split out to
-    generate an index page. Small idiosyncracy, Python's `re` module
-    won't split on a zero-width match (e.g. `^$`) so we're splitting
-    on the first two newlines
-    """
-    head, body = re.split(r'^\+\+\+$', raw_text, maxsplit=1, flags=re.M)
-    metadata = {k.strip().lower(): v.strip()
-                for k, v in (line.split(':', maxsplit=1)
-                             for line in head.strip().split('\n'))}
-    leader, _ = body.strip().split('\n\n', maxsplit=1)
-    return RawPost(metadata['title'], metadata['date'], leader, body)
+    def handle_starttag(self, tag, attributes):
+        if tag == 'h1':
+            self.tag_stack.append(tag)
+        elif tag == 'span':
+            self.tag_stack.append(tag)
+        elif self.tag_stack and (tag == self.tag_stack[-1]):
+            self.DONE = True
+        elif tag == 'p':
+            self.tag_stack.append(tag)
+
+    def handle_endtag(self, tag):
+        if self.tag_stack and (tag == self.tag_stack[-1]):
+            self.tag_stack.pop()
+        if tag == 'p':
+            self.DONE = True
+
+    def handle_data(self, data):
+        if self.tag_stack:
+            if self.tag_stack[-1] == 'h1' and not self.title:
+                self.title = data
+            elif self.tag_stack[-1] == 'span' and not self.date:
+                self.date = data
+            elif not self.DONE:
+                self.leader.append(data)
+
+    def post_data(self):
+        return (self.title, self.date, "".join(self.leader), self.body)
 
 @dataclass(repr=False)
 class RenderedPost:

          
@@ 52,18 71,19 @@ class RenderedPost:
     html_body: str
     path: str
 
-def process(post: RawPost) -> RenderedPost:
-    md = Markdown()
+def parse(raw_text: str) -> RenderedPost:
+    p = PostParser()
+    p.feed(raw_text)
+    post_title, post_date, post_leader, post_body = p.post_data()
     # UTC is used here so that a valid date format is included in the
     # atom feed, not great
-    date = datetime.strptime(post.date, '%Y-%m-%d').replace(tzinfo=timezone.utc)
-    path = os.path.join(date.strftime("%Y"),
-                        '{slug}.html'.format(slug=slugify(post.title)))
-    return RenderedPost(post.title,
+    date = datetime.strptime(post_date, '%Y-%m-%d').replace(tzinfo=timezone.utc)
+    path = os.path.join(date.strftime("%Y"), f'{slugify(post_title)}.html')
+    return RenderedPost(post_title,
                         date,
-                        post.date,
-                        md(post.leader),
-                        md(post.body),
+                        post_date,
+                        post_leader,
+                        post_body,
                         path)
 
 def slugify(text):

          
M quiescent/static.py +4 -4
@@ 27,7 27,7 @@ import sys
 import os
 import re
 
-from .post import parse, process
+from .post import parse
 from .feed import feed
 from .templite import Templite
 

          
@@ 82,13 82,13 @@ class StaticGenerator:
 
     def collect_posts(self, from_dir):
         '''
-        Walk the directory containing posts and return any with a `.md` suffix as a
+        Walk the directory containing posts and return any with a .post suffix as a
         tuple of (directory, filename)
         '''
         post_files = []
         for root, _, files in os.walk(from_dir):
             for _file in files:
-                if _file.endswith('.md'):
+                if _file.endswith('.post'):
                     post_files.append((root, _file))
         return post_files
 

          
@@ 120,7 120,7 @@ class StaticGenerator:
             with open(file_path, encoding='utf-8') as f:
                 text = f.read()
             try:
-                post = process(parse(text))
+                post = parse(text)
                 self.all_posts.append(post)
             except Exception as e:
                 logger.warning('Failed to create post: {name}\n\t{e}'

          
M setup.py +2 -4
@@ 1,7 1,7 @@ 
 from setuptools import setup
 
 description = """A static site generator with strong opinions about the layout of
-written content. Utilizes a custom template engine and includes few
+written content. Uses a custom template engine and requires zero
 dependencies."""
 
 setup(name="quiescent",

          
@@ 13,9 13,7 @@ setup(name="quiescent",
       author_email='quiescent@nprescott.com',
       license='GPL',
       packages=['quiescent'],
-      install_requires=[
-          'mistune >= 0.7.3',
-      ],
+      install_requires=[],
       test_suite='quiescent.tests',
       entry_points={
       'console_scripts': ['quiescent=quiescent.command_line:main']