# HG changeset patch # User Marcin Cieślak # Date 1515074102 0 # Thu Jan 04 13:55:02 2018 +0000 # Node ID 2a87f765a5c251c0ea033d5390323a7f5bbabd4c # Parent b2010cf66d2f33a7051b1b255e949f0f0fccd851 Error handling during XML parsing diff --git a/meta/kursy/tabelac.py b/meta/kursy/tabelac.py --- a/meta/kursy/tabelac.py +++ b/meta/kursy/tabelac.py @@ -8,6 +8,7 @@ import wikipedia import logging import logging.config +from xml.parsers.expat import ExpatError logging.config.fileConfig("/home/saper/wikipedia/log/bots.conf", disable_existing_loggers=True) wikipedia.logger = logging.getLogger('plwiki') @@ -36,6 +37,17 @@ root = doc.documentElement return root.getAttribute("uid") +class CannotParseItemError(Exception): + def __init__(self, e, url, content): + self.e = e + self.url = url + self.content = content + + def __str__(self): + return "Cannot parse item fetched from '%s' with an error: \n%s" % ( + self.url, + self.e) + def fetch_table(feedurl, localfile): """ Parse feed, compare with cached copy and return (url, pubdate, parseddomtree) tuple """ p = feedparser.parse(feedurl) @@ -52,71 +64,79 @@ old_uid = None content = urllib.urlopen(url).read() - parsed = dom.parseString(content) + try: + parsed = dom.parseString(content) - if old_uid != get_uid(parsed): - mylogger.info("Kursy walut: uid: %s->%s" % (old_uid, get_uid(parsed))) - data_publikacji = parsed.getElementsByTagName("data_publikacji")[0].firstChild.nodeValue - if localfile: - wr = open(localfile, "w") - wr.write(content) - wr.close() - return (url, data_publikacji, parsed) - else: - return (None, None, None) + if old_uid != get_uid(parsed): + mylogger.info("Kursy walut: uid: %s->%s" % (old_uid, get_uid(parsed))) + data_publikacji = parsed.getElementsByTagName("data_publikacji")[0].firstChild.nodeValue + if localfile: + wr = open(localfile, "w") + wr.write(content) + wr.close() + return (url, data_publikacji, parsed) + else: + return (None, None, None) + except ExpatError, e: + raise CannotParseItemError(e, url, content) TABELA_C = ( ("kod_waluty", lambda a: a), - ("przelicznik", int), - ("kurs_kupna", lambda a: Decimal(a.replace(",", "."))), - ("kurs_sprzedazy", lambda a: Decimal(a.replace(",", ".")))) + ("przelicznik", int), + ("kurs_kupna", lambda a: Decimal(a.replace(",", "."))), + ("kurs_sprzedazy", lambda a: Decimal(a.replace(",", ".")))) TABELA_A = ( ("kod_waluty", lambda a: a), - ("przelicznik", int), - ("kurs_sredni", lambda a: Decimal(a.replace(",", ".")))) + ("przelicznik", int), + ("kurs_sredni", lambda a: Decimal(a.replace(",", ".")))) def extract_items(domtree, tabledef): return [ tuple([op(pozycja.getElementsByTagName(tag)[0].firstChild.nodeValue) for (tag, op) in tabledef ]) for pozycja in domtree.getElementsByTagName("pozycja") ] +def main(): -strony_tabeli_a = [] -strony_tabeli_c = [] + strony_tabeli_a = [] + strony_tabeli_c = [] + + feedurl, localfile = ("http://rss.nbp.pl/kursy/TabelaC.xml", + "/home/saper/wikipedia/src/meta/kursy/tabelac.xml") + (url1, pubdate, parseddomtree) = fetch_table(feedurl, localfile) + if url1: + tabelac = (url1, pubdate, extract_items(parseddomtree, TABELA_C)) -feedurl, localfile = ("http://rss.nbp.pl/kursy/TabelaC.xml", - "/home/saper/wikipedia/src/meta/kursy/tabelac.xml") -(url1, pubdate, parseddomtree) = fetch_table(feedurl, localfile) -if url1: - tabelac = (url1, pubdate, extract_items(parseddomtree, TABELA_C)) + # (site, pagename, lastmod, comment, table, pagetext ) + strony_tabeli_c = [ + (meta, u"User:KursyWalut/CurrencyTable", True, TABLEACTIONMSG, tabelac, + tabelakursow.tabela), + (wikinews, u"Szablon:Kursy walut", True, TABLEACTIONMSG, tabelac, + tabelakursow.tabelawikinews), + (meta, u"Template:PLNConvert", False, TEMPLATEACTIONMSG, tabelac, + currencytemplate.tabela), + (wikinews, u"Szablon:PLNConvert", False, TEMPLATEACTIONMSG, tabelac, + currencytemplate.tabela), + ] - # (site, pagename, lastmod, comment, table, pagetext ) - strony_tabeli_c = [ - (meta, u"User:KursyWalut/CurrencyTable", True, TABLEACTIONMSG, tabelac, - tabelakursow.tabela), - (wikinews, u"Szablon:Kursy walut", True, TABLEACTIONMSG, tabelac, - tabelakursow.tabelawikinews), - (meta, u"Template:PLNConvert", False, TEMPLATEACTIONMSG, tabelac, - currencytemplate.tabela), - (wikinews, u"Szablon:PLNConvert", False, TEMPLATEACTIONMSG, tabelac, - currencytemplate.tabela), - ] + feedurl, localfile = ("http://rss.nbp.pl/kursy/TabelaA.xml", + "/home/saper/wikipedia/src/meta/kursy/tabelaa.xml") + (url2, pubdate, parseddomtree) = fetch_table(feedurl, localfile) + if url2: + tabelaa = (url2, pubdate, extract_items(parseddomtree, TABELA_A)) + strony_tabeli_a = [ + (wikinews, u"Szablon:Średnie kursy walut", True, TABLEACTIONMSG, tabelaa, + tabelakursow.tabelaminiwikinews), + ] -feedurl, localfile = ("http://rss.nbp.pl/kursy/TabelaA.xml", - "/home/saper/wikipedia/src/meta/kursy/tabelaa.xml") -(url2, pubdate, parseddomtree) = fetch_table(feedurl, localfile) -if url2: - tabelaa = (url2, pubdate, extract_items(parseddomtree, TABELA_A)) - strony_tabeli_a = [ - (wikinews, u"Szablon:Średnie kursy walut", True, TABLEACTIONMSG, tabelaa, - tabelakursow.tabelaminiwikinews), - ] + for site, pagename, lastmod, comment, table, pagetext in strony_tabeli_c + strony_tabeli_a: + if lastmod: + text = pagetext(table[2]) + LASTMODIFIEDMSG[site.language()] % (table[0], table[1]) + else: + text = pagetext(table[2]) -for site, pagename, lastmod, comment, table, pagetext in strony_tabeli_c + strony_tabeli_a: - if lastmod: - text = pagetext(table[2]) + LASTMODIFIEDMSG[site.language()] % (table[0], table[1]) - else: - text = pagetext(table[2]) - - text = text + CATEGORY[site.language()] - wikipedia.Page(site, pagename).put(text, - comment=comment[site.language()] % (table[0],), minorEdit=False) + text = text + CATEGORY[site.language()] + wikipedia.Page(site, pagename).put(text, + comment=comment[site.language()] % (table[0],), minorEdit=False) + + +if __name__ == '__main__': + main()