# HG changeset patch # User Ludovic Chabant # Date 1723349984 25200 # Sat Aug 10 21:19:44 2024 -0700 # Node ID 2e57d0fd8ceb42dc33dbd6570d80f958a3ad34e8 # Parent 08ee3ffbe508af0eca114988ee1d2fdb0cb43bad Add more options for dynamic feed DOM manipulation diff --git a/silorider/parse.py b/silorider/parse.py --- a/silorider/parse.py +++ b/silorider/parse.py @@ -80,18 +80,30 @@ logger.debug("Modifying HTML doc:") for selector, to_add in class_mods: + # Hack to support semicolon and equal signs in .ini files... + # Note that Python seems to make all key names lowercase. + selector = selector.replace('$semicolon$', ':') + selector = selector.replace('$equals$', '=') + elems = list(doc.select(selector)) if not elems: logger.warning("No elements matched by rule: %s" % selector) continue + for elem in elems: - logger.debug("Adding %s to %s" % (to_add, elem.name)) - if to_add == 'dt-published': + if to_add == '$MOVE_UP$': + _move_element_up(doc, elem) + elif to_add == 'dt-published': _insert_html_datetime_published(doc, elem) else: - if 'class' not in elem.attrs: - elem['class'] = [] - elem['class'].append(to_add) + _add_class_to_element(doc, elem, to_add) + + +def _move_element_up(doc, elem): + dest = elem.parent.parent + logger.debug("Moving '%s' up to '%s'" % (elem.name, dest.name)) + elem.extract() + dest.append(elem) def _insert_html_datetime_published(doc, elem): @@ -116,6 +128,13 @@ logger.debug("Adding datetime attribute: %s" % dt) +def _add_class_to_element(doc, elem, to_add): + logger.debug("Adding %s to %s" % (to_add, elem.name)) + if 'class' not in elem.attrs: + elem['class'] = [] + elem['class'].append(to_add) + + class InvalidEntryException(Exception): pass @@ -226,6 +245,7 @@ for item in items: item_types = item.get('type', []) if 'h-feed' not in item_types: + logger.debug("Rejecting item of types: %s" % item_types) continue children = item.get('children', [])