9febae666db5 — Marcin Cieślak 6 years ago
chemia-dev nieużywane
2 files changed, 0 insertions(+), 232 deletions(-)

R plwiki/chemia-dev/comp.py => 
R plwiki/chemia-dev/element.py => 
R plwiki/chemia-dev/comp.py =>  +0 -128
@@ 1,128 0,0 @@ 
-#! /usr/bin/env python
-# -*- coding: utf-8  -*-
-import pagegenerators
-import re
-import wikipedia
-
-from element import ElementSet
-
-def CompoundGenerator(site, template):
-	gen = pagegenerators.ReferringPageGenerator(
-		wikipedia.Page(site, template), 
-		onlyTemplateInclusion=True)
-	return pagegenerators.PreloadingGenerator(gen, pageNumber=200)
-
-class IgnoreThis:
-	pass
-
-class ErrorPage:
-	errorlist = []
-	def add(self, page):
-		self.__class__.__dict__["errorlist"].append(u"\n* [[%s]]" %  unicode(page.title()))
-
-class StructuralNameError(ErrorPage):
-	errorlist = [u"\n=Artykuły o związkach chemicznych, w których robot nie znalazł wzoru sumarycznego=\n"]
-
-class InvalidTempError(ErrorPage):
-	errorlist = [u"\n=Artykuły o związkach chemicznych, w których robot znalazł niewłaściwy zapis temperatury=\n"]
-
-debug = 0
-
-class Compound:
-	LINKS = [
-		re.compile("\[\[.+?\|"),
-		re.compile("\[\["),
-		re.compile("\[.+? +"),
-		re.compile("\]+"),
-		(re.compile("<br>"), " "),
-		(re.compile("<br/>"), " "),
-	]
-	REMOVE = [
-		re.compile("<su[bp]>.*?</su[bp]>"),
-		re.compile("<nowiki>.*?</nowiki>"),
-		re.compile("\(bezwodny\)"),
-		re.compile("\(dihydrat\)"),
-		re.compile("\(aq\)"),
-		re.compile("\("),
-		re.compile("\)"),
-		re.compile("-"),
-		re.compile("\s+"),
-		re.compile("\|\s*SMILES\s*="),
-		re.compile("dlaQ10"),
-		re.compile(","),
-	]
-		
-	def __init__(self, page,
-			summary_compound_pattern = re.compile(u"[wW]zór sumaryczny\s*=\s*(?P<comp>.*)", re.M),
-		temperature_parameter = re.compile(u"Temperatura \w+=\s*(?P<temp>.*)", re.M),
-		temperature_pattern = re.compile(u"^\s*-?\s*[\.\d]+\s*$")):
-	
-		if page.namespace() <> 0:
-			raise IgnoreThis
-			
-		s = page.get()
-		m = summary_compound_pattern.search(s)
-		if m:
-			if m.group("comp").find("|") == -1:
-				self.title = page.title()
-				if debug:
-					print "Working on page     :    %s" % self.title.encode("utf-8")
-					print "Found template value:    %s" % m.group("comp")
-				self.compound = self.cleanup(m.group("comp"), self.LINKS) 
-				self.elements = self.cleanup(m.group("comp"), self.LINKS + self.REMOVE) 
-				if debug:
-					print "Just removed all links:: %s" % repr(self.compound)
-					print "Cleanup finished:        %s" % repr(self.elements)
-			else:
-				raise StructuralNameError
-		else:
-			raise StructuralNameError
-		m = temperature_parameter.search(s)
-		if m:
-			t =  m.group("temp")
-			if not temperature_pattern.search(t):
-				raise InvalidTempError
-
-	def cleanup(self, what, patterns):
-		w = what
-		for replace_pattern in patterns:
-			try:
-				(pattern, replacement) = replace_pattern
-			except TypeError: 	# re object is not iterable
-				(pattern, replacement) = (replace_pattern, "")
-			w = pattern.sub(replacement, w)
-		return w
-
-	def __str__(self):
-		return "[[%s|%s]]" % (self.title, self.compound.strip())
-
-
-if __name__ == '__main__':
-	site = wikipedia.getSite('pl', 'wikipedia')
-	wikipedia.setSite(site)
-	elements = ElementSet(site)
-	template = u"Szablon:Związek chemiczny infobox"
-	reallybadfile = open("bad", "w")
-	for comp in CompoundGenerator(site, template):
-		try:
-			elements.parse(Compound(comp))
-		except IgnoreThis:
-			pass
-		except InvalidTempError, e:
-			e.add(comp)
-		except StructuralNameError, e:
-			e.add(comp)
-
-	mainpagename = u"Wikipedysta:Miner/Związki chemiczne według pierwiastków"
-	outpage = wikipedia.Page(site, mainpagename)
-	errpage = wikipedia.Page(site, u"Wikipedysta:Miner/Związki chemiczne według pierwiastków/Błędy")
-	out = ["__NOEDITSECTION__", u"{{Spis treści}}"]
-	for (ch, a) in elements.output():
-		out.append(u"===%s===" % unicode(ch.wikilink))
-		out.append(u"<small>%s</small>\n" % ", ".join([unicode(z) for z in a]))
-		
-	reallybadfile.close()
-	outpage.put(u"\n".join(out), comment=u"Robot tworzy stronę na podstawie [[Specjalna:Linkujące/%s|zawartości infoboksów]]" % template)
-	errpage.put(u"".join(StructuralNameError.errorlist + 
-			InvalidTempError.errorlist), 
-		comment=u"Robot zapisuje błędy wynikłe podczas tworzenia [[%s]]" % mainpagename)

          
R plwiki/chemia-dev/element.py =>  +0 -104
@@ 1,104 0,0 @@ 
-#! /usr/bin/env python
-# -*- coding: utf-8  -*-
-import re
-import wikipedia
-
-class CannotExtractNameError:
-	pass
-
-class Element:
-	WIKILINK1 = re.compile("\s*\[\[(?P<article>.*?)\|(?P<name>.*?)\]\]\s*")
-	WIKILINK2 = re.compile("\s*\[\[(?P<name>.*?)\]\]\s*")
-
-	def __init__(self, symbol, wikilink):
-		self.symbol = symbol
-
-		m1 = self.WIKILINK1.match(wikilink)
-		if m1:
-			self.article = m1.group("article")
-			self.name = m1.group("name")
-		else:
-			m2 = self.WIKILINK2.match(wikilink)
-			if m2:
-				self.name = m2.group("name")
-				self.article = self.name
-			else:
-				raise CannotExtractNameError
-
-		self.name = self.name.capitalize()
-		self.article = self.article.capitalize()
-
-		if self.name == self.article:
-			self.wikilink = u'[[%s]]' % self.name
-		else:
-			self.wikilink = u'[[%s|%s]]' % (self.article, self.name)
-
-	def __cmp__(a, b):
-		return cmp(a.name, b.name)
-
-	def __hash__(self):
-		return hash(self.name)
-
-	def __str__(self):
-		return self.name
-
-class ElementSet:
-	
-	def __init__(self, 
-			site,
-			page = u"Pierwiastki chemiczne według symboli", 
-			pattern = u"^\| (?P<symbol>\w{1,2}) \|\| (?P<wikilink>.*?) \|\|.*$",
-		):
-
-		self.wikilink = {}
-		self.twoletter = []
-		self.singleletter = []
-		self.articles = {}
-		self.site = site
-
-		for p in re.compile(pattern, re.M).finditer(
-			wikipedia.Page(self.site, page).get()):
-
-			# self.wikilink[p.group("symbol")]=p.group("wikilink")
-
-			el = Element(p.group("symbol"), p.group("wikilink"))
-
-			if len(p.group("symbol")) == 2:
-				self.twoletter.append(el)
-			else:
-				self.singleletter.append(el)
-			self.articles[el] = []
-
-	def add(self, ch, a):
-		self.articles[ch].append(a)
-
-	def parse(self, a, input=None):
-		if not input:
-			input = a.elements
-		for ch in self.twoletter + self.singleletter:
-			if input.find(ch.symbol) >= 0:
-				self.add(ch, a)
-				input = input.replace(ch.symbol, "")
-
-	def output(self):
-		s = self.articles.keys()
-		s.sort()
-		for ch in s:
-			if self.articles[ch]:
-				yield(ch, self.articles[ch])
-
-	def __str__(self):
-		out = []
-		for (ch, a) in self.output():
-			out.append(unicode(ch.wikilink))
-			out.append(u"  %s\n" % ", ".join([unicode(z) for z in a]))
-		return "\n".join(out)
-
-
-if __name__ == '__main__':
-	q = ElementSet(wikipedia.getSite('pl', 'wikipedia'))
-	q.parse("Test", "CaCOAuOOH")
-	q.parse("Test2", "Pu")
-	q.parse("Test3", "CaHe")
-	q.parse("Test4", "ZrCaHe")
-	print unicode(q)