@@ 1,8 1,9 @@
#! /usr/bin/env python
-# -*- coding: utf-8 -*-
-import pagegenerators
import re
-import wikipedia
+import shelve
+import pywikibot
+from pywikibot import pagegenerators
+import wikitextparser
from element import ElementSet
@@ 10,47 11,54 @@ import logging
import logging.config
logging.config.fileConfig("/home/saper/wikipedia/log/bots.conf", disable_existing_loggers=True)
-wikipedia.logger = logging.getLogger('plwiki')
+pywikibot.logger = logging.getLogger('plwiki')
mywiki = logging.getLogger('chemia')
-MAINPAGENAME = u"Związki chemiczne według pierwiastków/Wykaz"
-MAINPAGECAT = u"Kategoria:Wikiprojekt Chemia"
-ERRORPAGENAME = MAINPAGENAME + u"/Błędy"
-ERRORPAGECAT = u"Kategoria:Problemy z hasłami chemicznymi"
-MAINTEMPLATENAME =u"Szablon:Związek chemiczny infobox"
+MAINPAGENAME = "Związki chemiczne według pierwiastków/Wykaz"
+MAINPAGECAT = "Kategoria:Wikiprojekt Chemia"
+ERRORPAGENAME = MAINPAGENAME + "/Błędy"
+ERRORPAGECAT = "Kategoria:Problemy z hasłami chemicznymi"
+MAINTEMPLATENAME = "Związek chemiczny infobox"
+MAINTEMPLATEPAGE = "Szablon:" + MAINTEMPLATENAME
def CompoundGenerator(site, template):
- gen = pagegenerators.ReferringPageGenerator(
- wikipedia.Page(site, template),
- onlyTemplateInclusion=True)
- return pagegenerators.PreloadingGenerator(gen, pageNumber=200)
+ gen = pywikibot.Page(site, template).embeddedin()
+ return pagegenerators.PreloadingGenerator(gen, groupsize=200)
-class IgnoreThis:
+class IgnoreThis(Exception):
pass
-class CannotExtractError:
+class CannotExtractError(Exception):
pass
debug = 0
class CompoundWikiReader:
- def __init__(self, page,
- pattern = re.compile(u"[wW]zór sumaryczny\s*=\s*(?P<comp>.*)", re.M)):
-
- if page.namespace() <> 0:
+ def __init__(self, page):
+ if page.namespace() != 0:
raise IgnoreThis
s = page.get()
- m = pattern.search(s)
- if m:
- if m.group("comp").find("|") == -1:
- title = page.title()
- templatevalue = m.group("comp")
- self.compound = Compound(title, templatevalue)
- else:
- raise CannotExtractError
- else:
+ mywiki.debug("Czytamy '%s'", page.title())
+ self.compound = None
+ for template in wikitextparser.parse(s).templates:
+ mywiki.debug(".. mamy szablon '%s'", template.name.strip())
+ if template.name.strip() == MAINTEMPLATENAME:
+
+ mywiki.debug("... jest, %d", len(template.arguments))
+ for arg in template.arguments:
+ mywiki.debug("param: %s", arg.name)
+ if arg.name.strip().lower() == "Wzór sumaryczny".lower():
+ mywiki.debug(".... mamy wzór sumaryczny: '%s'", (arg.value))
+
+ # self.compound = (page.title(), arg.value)
+ self.compound = Compound(page.title(), arg.value)
+ break
+ break
+
+ if not self.compound:
+ mywiki.debug("'%s' nie ma wzoru sumarycznego", page.title())
raise CannotExtractError
class Compound:
@@ 78,7 86,10 @@ class Compound:
def __init__(self, arg1, arg2=None):
if arg2 is None:
- (self.title, self.templatevalue) = arg1.decode("utf-8").split(u"\t")
+ try:
+ (self.title, self.templatevalue) = arg1.split("\t")
+ except ValueError as cant_unpack:
+ raise ValueError("Can't unpack \"%s\"" % arg1) from cant_unpack
else:
self.title = arg1
self.templatevalue = arg2
@@ 97,7 108,7 @@ class Compound:
return w
def __repr__(self):
- return (u"\t".join([self.title, self.templatevalue])).encode("utf-8")
+ return "\t".join([self.title, self.templatevalue.strip()])
def __str__(self):
return self.compound.strip()
@@ 112,24 123,25 @@ class Compound:
def extractwiki(site, reallybadfile, compgen, elements):
compounds = []
- badlist = [u"==Artykuły o związkach chemicznych, w których robot nie znalazł wzoru sumarycznego==\n"]
- for comp in compgen:
- try:
- c = CompoundWikiReader(comp)
- compounds.append(c.compound)
- except IgnoreThis:
- pass
- except CannotExtractError:
+ badlist = ["==Artykuły o związkach chemicznych, w których robot nie znalazł wzoru sumarycznego==\n"]
+ with shelve.open("/tmp/chemia") as db:
+ for comp in compgen:
try:
- badlist.append(u"\n* [[%s]]" % unicode(comp.title()))
- except UnicodeDecodeError:
- print >>reallybadfile, repr(comp.title)
+ c = CompoundWikiReader(comp)
+ compounds.append(c.compound)
+ except IgnoreThis:
+ pass
+ except CannotExtractError:
+ try:
+ badlist.append("\n* [[%s]]" % str(comp.title()))
+ except UnicodeDecodeError:
+ print(repr(comp.title), file=reallybadfile)
return (compounds, badlist)
def formatgroup(group, ch, elementopen, cntcur, cntcursiz):
if group:
if cntcursiz > 5:
- group.insert(0, u"===%s<sub>%d</sub>===" % (ch.symbol, cntcur))
+ group.insert(0, "===%s<sub>%d</sub>===" % (ch.symbol, cntcur))
else:
if elementopen:
group.insert(0, "") # Additional newline
@@ 139,11 151,11 @@ def formatgroup(group, ch, elementopen,
def formatter(elements):
- out = ["__NOEDITSECTION__", u"{{Spis treści}}"]
+ out = ["__NOEDITSECTION__", "{{Spis treści}}"]
group = []
for (ch, ac) in elements.allarticles():
- out.append(u"==%s==" % unicode(ch.wikilink))
- ac.sort()
+ out.append("==%s==" % str(ch.wikilink))
+ ac.sort(key=lambda pair: (pair[0], pair[1].compound.strip()))
group = []
cntcur = 1
cntcursiz = 0
@@ 158,7 170,7 @@ def formatter(elements):
cntcursiz = 0
cntcur = cnt
- group.append(u"%s" % a.wikilink())
+ group.append("%s" % a.wikilink())
cntcursiz += 1
got = formatgroup(group, ch, elementopen, cntcur, cntcursiz)
@@ 168,23 180,22 @@ def formatter(elements):
def processwikilive(elements):
- errpage = wikipedia.Page(site, ERRORPAGENAME)
+ errpage = pywikibot.Page(site, ERRORPAGENAME)
reallybadfile = open("bad", "w")
- (compounds, badlist) = extractwiki(site, reallybadfile, CompoundGenerator(site, MAINTEMPLATENAME), elements)
+ (compounds, badlist) = extractwiki(site, reallybadfile, CompoundGenerator(site, MAINTEMPLATEPAGE), elements)
reallybadfile.close()
compfile = open("compfile", "w")
for comp in compounds:
- print >>compfile, repr(comp)
+ print(repr(comp), file=compfile)
elements.feed(comp, comp.elements)
compfile.close()
if badlist:
badlist.extend(["", "[[%s]]" % ERRORPAGECAT])
- errpage.put(u"".join(badlist),
- comment=u"Robot zapisuje błędy wynikłe podczas tworzenia [[%s]]" % MAINPAGENAME, minorEdit=False, botflag=False)
- mywiki.info(u"Pierwiastki zostały przetworzone")
+ errpage.put("".join(badlist), summary="Robot zapisuje błędy wynikłe podczas tworzenia [[%s]]" % MAINPAGENAME, minorEdit=False, botflag=False)
+ mywiki.info("Pierwiastki zostały przetworzone")
def processcompfile(elements, compfile=None):
@@ 201,7 212,7 @@ def loadelements(site):
def runbot():
import sys
- outpage = wikipedia.Page(site, MAINPAGENAME)
+ outpage = pywikibot.Page(site, MAINPAGENAME)
if len(sys.argv) > 1:
processcompfile(elements, open(sys.argv[1], "r"))
else:
@@ 209,11 220,9 @@ def runbot():
out = formatter(elements)
if out:
out.extend(["", "[[%s]]" % MAINPAGECAT])
- outpage.put(u"\n".join(out), comment=u"Robot tworzy stronę na podstawie [[Specjalna:Linkujące/%s|zawartości infoboksów]]" % MAINTEMPLATENAME,
- minorEdit=False, botflag=False)
+ outpage.put("\n".join(out), summary="Robot tworzy stronę na podstawie [[Specjalna:Linkujące/%s|zawartości infoboksów]]" % MAINTEMPLATENAME, minorEdit=False, botflag=False)
-site = wikipedia.getSite('pl', 'wikipedia')
-wikipedia.setSite(site)
+site = pywikibot.Site('pl', 'wikipedia')
elements = loadelements(site)
if __name__ == '__main__':
runbot()
@@ 1,7 1,6 @@
#! /usr/bin/env python
-# -*- coding: utf-8 -*-
import re
-import wikipedia
+import pywikibot
class CannotExtractNameError:
pass
@@ 29,9 28,9 @@ class Element:
self.article = self.article.capitalize()
if self.name == self.article:
- self.wikilink = u'[[%s]]' % self.name
+ self.wikilink = '[[%s]]' % self.name
else:
- self.wikilink = u'[[%s|%s]]' % (self.article, self.name)
+ self.wikilink = '[[%s|%s]]' % (self.article, self.name)
def __cmp__(a, b):
return cmp(a.name, b.name)
@@ 46,8 45,8 @@ class ElementSet:
def __init__(self,
site,
- page = u"Lista pierwiastków chemicznych",
- pattern = u"^\| (?P<symbol>\w{1,2}) \|\| (?P<wikilink>.*?) \|\|.*$",
+ page = "Lista pierwiastków chemicznych",
+ pattern = "^\| (?P<symbol>\w{1,2}) \|\| (?P<wikilink>.*?) \|\|.*$",
):
self.wikilink = {}
@@ 57,7 56,7 @@ class ElementSet:
self.site = site
for p in re.compile(pattern, re.M).finditer(
- wikipedia.Page(self.site, page).get()):
+ pywikibot.Page(pywikibot.Link(page)).get()):
# self.wikilink[p.group("symbol")]=p.group("wikilink")
@@ 90,9 89,7 @@ class ElementSet:
input = input[:sympos] + input[npos:]
def allarticles(self):
- s = self.articles.keys()
- s.sort()
- for ch in s:
+ for ch in sorted(self.articles.keys(), key=lambda a: a.name):
if self.articles[ch]:
yield(ch, self.articles[ch])
@@ 110,8 107,7 @@ class ElementSet:
out = []
if not ac:
ac = self.articles[el]
- ac.sort()
- for (cnt, a) in ac:
+ for (cnt, a) in sorted(ac, lambda pair: (pair[0], pair[1].compound.strip())):
out.append(self.printgroup(el, cnt, a))
return "".join(out)
@@ 123,9 119,9 @@ class ElementSet:
if __name__ == '__main__':
- q = ElementSet(wikipedia.getSite('pl', 'wikipedia'))
+ q = ElementSet(pywikibot.Site('pl', 'wikipedia'))
q.parse("Test", "CaCOAuOOH")
q.parse("Test2", "Pu")
q.parse("Test3", "CaHe")
q.parse("Test4", "ZrCaHe")
- print unicode(q)
+ print(str(q))