@@ 11,29 11,33 @@ SUBSCRIPT = %r%~[-/*+?^_`\w\d].*?~%
UNDERSCORE = %r%_[-/*+?^~`\w\d].*?_%
RAW = %r%`[-/*+?^~_\w\d].*?`%
FENCE = %r%\{\{\{.*?\}\}\}%
-LINE = %r%(?![ \t][*-][ \t])(.*?|#{EMPHASIS}|#{STRONG}|#{CITATION}|#{DELETED}|#{INSERTED}|#{SUPERSCRIPT}|#{SUBSCRIPT}|#{UNDERSCORE}|#{RAW}|#{FENCE})+%
+INLINE = %r%(?:.*?|#{EMPHASIS}|#{STRONG}|#{CITATION}|#{DELETED}|#{INSERTED}|#{SUPERSCRIPT}|#{SUBSCRIPT}|#{UNDERSCORE}|#{RAW}|#{FENCE})+%
+OLIST = '(?:#|[0-9]+|[A-Za-z])\.'
+LINE = %r%(?:(?![ \t]*(?:[*-]|(?:#{OLIST})|\[[_X]\])[ \t])#{INLINE}\n)%
RULES = %r%(\-+|~+|\++|\^+|=+)%
-PbHeading = %r%\A([ \t]*#{RULES})\n([ \t]*)#{LINE}\n\1[ \t]*\n%m
-HEAD = %r%[ \t]*#{LINE}\n#{RULES}[ \t]*%m
+PbHeading = %r%\A([ \t]*#{RULES})\n([ \t]*)#{LINE}\1[ \t]*\n%m
+HEAD = %r%[ \t]*#{LINE}#{RULES}[ \t]*%m
NHeading = %r%\A[ \t]*([0-9]+|#)\.#{HEAD}\n%m
Heading = %r%\A#{HEAD}\n%m
PageBreak = %r%\A([ \t]*#{RULES})\n\1[ \t]*\n%m
-BLOCK = %r%([ \t]+)#{LINE}(\n+\1#{LINE})*%m
-UList = %r%\A([ \t]*)[*-]#{LINE}(\n\1[ \t]+#{LINE})*\n%m
-OList = %r%\A[ \t]*([#0-9A-Za-z]+\.)#{BLOCK}\n%m
-Todo = %r%\A[ \t]*\[[_X]\]#{BLOCK}\n%m
+def mk_list( mark )
+ %r%\A([ \t]*)#{mark}[ \t]+#{LINE}(\1[ \t]+#{LINE})*%m
+end
+UList = mk_list('[*-]')
+OList = mk_list(OLIST)
+Todo = mk_list('\[[_X]\]')
-PARA = %r%#{LINE}(\n#{LINE})*\n%m
-Block = %r%\A#{PARA}\n%m
+PARA = %r%#{LINE}(#{LINE})*%m
+Block = %r%\A#{PARA}%m
Fenced = %r%\A\{\{\{(#[\w]+)?\n.*?\n\}\}\}\n%m
ATTRIBUTION = %r%.*?:\n%
-Quote = %r%\A(#{ATTRIBUTION})?>([ \t]+(>[ \t]+)*#{LINE})?\n%
+Quote = %r%\A(#{ATTRIBUTION})?>([ \t]+(>[ \t]+)*#{LINE})?%
Comment = %r%\A#[ \t]+.*\n%
-Definition = %r%\A\S#{LINE}\n([ \t]+)#{LINE}(\n\1#{LINE})*\n%m
+Definition = %r%\A(?![ \t])#{LINE}([ \t]+)#{LINE}(?:\1#{LINE})*%m
# Justification requires post-processing interpretation
# Sidebar requires post-processing interpretation
@@ 45,8 49,8 @@ Ident = %r%\A:[\w \t]+: .*\n%
URI = %r^[a-zA-Z]{3,}://[a-zA-Z0-9\.]+/*[a-zA-Z0-9/\\%_.]*\?*[a-zA-Z0-9/\\%_.=&]*^
Link = %r%\{\w[\w \t]+([ \t]#{URI})?\}%
-Anchor = %r%\[[:alpha:]#{LINE}\]%
-Footnote = %r%\{(\d|#)+([ \t]+#{LINE})?\}%
+Anchor = %r%\[[:alpha:]#{INLINE}\]%
+Footnote = %r%\{(\d|#)+([ \t]+#{INLINE})?\}%
LinkRef = %r%\A\.\.[ \t][\w \t]+:[ \t](#{URI}|#{PARA})\n%m
Directive = %r%\A\.\.[ \t]:[\w \t]+:[ \t].*?\n%
@@ 128,8 132,10 @@ def parse( input )
rest = $'
puts "#"*60 + " EMPTY"
#puts "Rest:\n#{rest[0,200]}"
+ when ""
+ break
else
- puts "No match for:\n#{rest[0,200]}"
+ puts "No match for:\n#{rest[0,400].inspect}"
exit
end
end
@@ 9,28 9,33 @@ SUBSCRIPT = %r%~[-/*+?^_`\w\d].*?~%
UNDERSCORE = %r%_[-/*+?^~`\w\d].*?_%
RAW = %r%`[-/*+?^~_\w\d].*?`%
FENCE = %r%\{\{\{.*?\}\}\}%
-LINE = %r%(?![ \t][*-][ \t])(.*?|#{EMPHASIS}|#{STRONG}|#{CITATION}|#{DELETED}|#{INSERTED}|#{SUPERSCRIPT}|#{SUBSCRIPT}|#{UNDERSCORE}|#{RAW}|#{FENCE})+%
+INLINE = %r%(?:.*?|#{EMPHASIS}|#{STRONG}|#{CITATION}|#{DELETED}|#{INSERTED}|#{SUPERSCRIPT}|#{SUBSCRIPT}|#{UNDERSCORE}|#{RAW}|#{FENCE})+%
+OLIST = '(?:#|[0-9]+|[A-Za-z])\.'
+LINE = %r%(?:(?![ \t]*(?:[*-]|(?:#{OLIST})|\[[_X]\])[ \t])#{INLINE}\n)%
RULES = %r%(\-+|~+|\++|\^+|=+)%
-PbHeading = %r%\A([ \t]*#{RULES})\n([ \t]*)#{LINE}\n\1[ \t]*\n%m
-HEAD = %r%[ \t]*#{LINE}\n#{RULES}[ \t]*%m
+PbHeading = %r%\A([ \t]*#{RULES})\n([ \t]*)#{LINE}\1[ \t]*\n%m
+HEAD = %r%[ \t]*#{LINE}#{RULES}[ \t]*%m
NHeading = %r%\A[ \t]*([0-9]+|#)\.#{HEAD}\n%m
Heading = %r%\A#{HEAD}\n%m
PageBreak = %r%\A([ \t]*#{RULES})\n\1[ \t]*\n%m
-BLOCK = %r%([ \t]+)#{LINE}(\n+\1#{LINE})*%m
-UList = %r%\A([ \t]*)[*-]#{LINE}(\n\1[ \t]+#{LINE})*\n%m
-OList = %r%\A[ \t]*([#0-9A-Za-z]+\.)#{BLOCK}\n%m
-Todo = %r%\A[ \t]*\[[_X]\]#{BLOCK}\n%m
+def mk_list( mark )
+ %r%\A([ \t]*)#{mark}[ \t]+#{LINE}(\1[ \t]+#{LINE})*%m
+end
+UList = mk_list('[*-]')
+OList = mk_list(OLIST)
+Todo = mk_list('\[[_X]\]')
-PARA = %r%#{LINE}(\n#{LINE})*\n%m
-Block = %r%\A#{PARA}\n%m
+PARA = %r%#{LINE}(#{LINE})*%m
+Block = %r%\A#{PARA}%m
Fenced = %r%\A\{\{\{(#[\w]+)?\n.*?\n\}\}\}\n%m
-Quote = %r%\A>[ \t]+(>[ \t]+#{LINE})*#{LINE}\n%
+ATTRIBUTION = %r%.*?:\n%
+Quote = %r%\A(#{ATTRIBUTION})?>([ \t]+(>[ \t]+)*#{LINE})?%
Comment = %r%\A#[ \t]+.*\n%
-Definition = %r%\A\w#{LINE}\n([ \t]+)#{LINE}(\n\1#{LINE})*\n%m
+Definition = %r%\A(?![ \t])#{LINE}([ \t]+)#{LINE}(?:\1#{LINE})*%m
# Justification requires post-processing interpretation
# Sidebar requires post-processing interpretation
@@ 42,8 47,8 @@ Ident = %r%\A:[\w \t]+: .*\n%
URI = %r^[a-zA-Z]{3,}://[a-zA-Z0-9\.]+/*[a-zA-Z0-9/\\%_.]*\?*[a-zA-Z0-9/\\%_.=&]*^
Link = %r%\{\w[\w \t]+([ \t]#{URI})?\}%
-Anchor = %r%\[[:alpha:]#{LINE}\]%
-Footnote = %r%\{(\d|#)+([ \t]+#{LINE})?\}%
+Anchor = %r%\[[:alpha:]#{INLINE}\]%
+Footnote = %r%\{(\d|#)+([ \t]+#{INLINE})?\}%
LinkRef = %r%\A\.\.[ \t][\w \t]+:[ \t](#{URI}|#{PARA})\n%m
Directive = %r%\A\.\.[ \t]:[\w \t]+:[ \t].*?\n%