-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 If this is also an issue with section detection within pages you could (if you like) also consider to use the code given in 'getSections' [1]...
[1] https://fisheye.toolserver.org/browse/drtrigon/pywikipedia/dtbext/dtbext_wikipedia.py?hb=true Greetings DrTrigon Am 03.09.2011 13:58, schrieb [email protected]: > http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9494 > > Revision: 9494 Author: xqt Date: 2011-09-03 11:58:48 +0000 > (Sat, 03 Sep 2011) Log Message: ----------- reverrevert r3147 due > to bug #2989218; check for italic code in headings.TODO: use a > better regex to find it. > > Modified Paths: -------------- trunk/pywikipedia/wikipedia.py > > Modified: trunk/pywikipedia/wikipedia.py > =================================================================== > > - --- trunk/pywikipedia/wikipedia.py 2011-09-03 11:17:47 UTC (rev 9493) > +++ trunk/pywikipedia/wikipedia.py 2011-09-03 11:58:48 UTC (rev > 9494) @@ -66,7 +66,6 @@ within a non-wiki-markup section of text > decodeEsperantoX: decode Esperanto text using the x convention. > encodeEsperantoX: convert wikitext to the Esperanto x-encoding. - > sectionencode: encode text for use as a section title in > wiki-links. findmarker(text, startwith, append): return a string > which is not part of text expandmarker(text, marker, separator): > return marker string expanded @@ -654,7 +653,7 @@ self._contents = > contents hn = self.section() if hn: - m = > re.search("=+ *%s *=+" % hn, self._contents) + m > = re.search("=+[ ']*%s[ ']*=+" % hn, self._contents) if verbose and > not m: output(u"WARNING: Section does not exist: %s" % > self.aslink(forceInterwiki = True)) # Store any exceptions for > later reference @@ -779,8 +778,8 @@ else: raise > IsRedirectPage(redirtarget) if self.section(): - # TODO: > What the hell is this? Docu please. - m = > re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" > % re.escape(self.section()), > sectionencode(pageInfo['revisions'][0]['*'],self.site().encoding())) > > + m = re.search("=+[ ']*%s[ ']*=+" % re.escape(self.section()), > + pageInfo['revisions'][0]['*']) if not > m: try: self._getexception @@ -920,8 +919,8 @@ else: raise > IsRedirectPage(redirtarget) if self.section(): - # TODO: > What the hell is this? Docu please. - m = > re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" > % re.escape(self.section()), > sectionencode(text,self.site().encoding())) + m = > re.search("=+[ ']*%s[ ']*=+" % re.escape(self.section()), + > text) if not m: try: self._getexception @@ -4140,8 +4139,7 @@ > page2._startTime = time.strftime('%Y%m%d%H%M%S', time.gmtime()) if > section: - m = > re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" > > - - % re.escape(section), sectionencode(text,page2.site().encoding())) > + m = re.search("=+[ ']*%s[ ']*=+" % > re.escape(section), text) if not m: try: page2._getexception @@ > -4302,7 +4300,7 @@ # Use the data loading time. page2._startTime = > time.strftime('%Y%m%d%H%M%S', time.gmtime()) if section: - > m = > re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" > % re.escape(section), sectionencode(text,page2.site().encoding())) > + m = re.search("=+[ ']*%s[ ']*=+" % > re.escape(section), text) if not m: try: page2._getexception @@ > -4531,10 +4529,6 @@ break return text > > -def sectionencode(text, encoding): - """Encode text so that it > can be used as a section title in wiki-links.""" - return > urllib.quote(text.replace(" > ","_").encode(encoding)).replace("%",".") - ######## Unicode > library functions ######## > > def UnicodeToAsciiHtml(s): > > > _______________________________________________ Pywikipedia-svn > mailing list [email protected] > https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn > -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.11 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/ iEYEARECAAYFAk5ijR8ACgkQAXWvBxzBrDBNNQCgve2/z/SUa3bUNd625ibUKG/G sEMAn2/LtRfr9kvdV1UX+aVKL9MQZwl8 =9anJ -----END PGP SIGNATURE----- _______________________________________________ Pywikipedia-l mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/pywikipedia-l
