cosmetic_changes.py

xqt Sun, 03 Mar 2013 09:32:52 -0800

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11174


Revision: 11174
Author:   xqt
Date:     2013-03-03 17:32:47 +0000 (Sun, 03 Mar 2013)
Log Message:
-----------
some PEP8 changes

Modified Paths:
--------------
    trunk/pywikipedia/cosmetic_changes.py

Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py       2013-03-03 16:18:14 UTC (rev 
11173)
+++ trunk/pywikipedia/cosmetic_changes.py       2013-03-03 17:32:47 UTC (rev 
11174)
@@ -59,7 +59,8 @@
 #
 __version__ = '$Id$'
 #
-import sys, re
+import sys
+import re
 import wikipedia as pywikibot
 import isbn
 import pagegenerators
@@ -79,38 +80,39 @@
 # Interwiki message on top of iw links
 # 2nd line is a regex if needed
 msg_interwiki = {
-    'fr' : u'<!-- Autres langues -->',
-    'nn' : (u'<!--interwiki (no, sv, da first; then other languages 
alphabetically by name)-->',
-            u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other 
languages alphabetically by name\) ?-->)')
+    'fr': u'<!-- Autres langues -->',
+    'nn': (u'<!--interwiki (no, sv, da first; then other languages 
alphabetically by name)-->',
+           u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other 
languages alphabetically by name\) ?-->)')
 }
 
 # This is from interwiki.py;
 # move it to family file and implement global instances
 moved_links = {
-    'ca' : (u'ús de la plantilla', u'/ús'),
-    'cs' : (u'dokumentace',   u'/doc'),
-    'de' : (u'dokumentation', u'/Meta'),
-    'en' : ([u'documentation',
-             u'template documentation',
-             u'template doc',
-             u'doc',
-             u'documentation, template'], u'/doc'),
-    'es' : ([u'documentación', u'documentación de plantilla'], u'/doc'),
-    'fa' : ([u'documentation',u'توضیحات',u'توضیحات الگو',u'doc'], u'/توضیحات'),
-    'fr' : (u'/documentation', u'/Documentation'),
-    'hu' : (u'sablondokumentáció', u'/doc'),
-    'id' : (u'template doc',  u'/doc'),
-    'ja' : (u'documentation', u'/doc'),
-    'ka' : (u'თარგის ინფო',   u'/ინფო'),
-    'ko' : (u'documentation', u'/설명문서'),
-    'ms' : (u'documentation', u'/doc'),
-    'pl' : (u'dokumentacja',  u'/opis'),
-    'pt' : ([u'documentação', u'/doc'],  u'/doc'),
-    'ro' : (u'documentaţie',  u'/doc'),
-    'ru' : (u'doc',           u'/doc'),
-    'sv' : (u'dokumentation', u'/dok'),
-    'vi' : (u'documentation', u'/doc'),
-    'zh' : ([u'documentation', u'doc'], u'/doc'),
+    'ca': (u'ús de la plantilla', u'/ús'),
+    'cs': (u'dokumentace', u'/doc'),
+    'de': (u'dokumentation', u'/Meta'),
+    'en': ([u'documentation',
+            u'template documentation',
+            u'template doc',
+            u'doc',
+            u'documentation, template'], u'/doc'),
+    'es': ([u'documentación', u'documentación de plantilla'], u'/doc'),
+    'fa': ([u'documentation', u'توضیحات', u'توضیحات الگو',
+            u'doc'], u'/توضیحات'),
+    'fr': (u'/documentation', u'/Documentation'),
+    'hu': (u'sablondokumentáció', u'/doc'),
+    'id': (u'template doc', u'/doc'),
+    'ja': (u'documentation', u'/doc'),
+    'ka': (u'თარგის ინფო', u'/ინფო'),
+    'ko': (u'documentation', u'/설명문서'),
+    'ms': (u'documentation', u'/doc'),
+    'pl': (u'dokumentacja', u'/opis'),
+    'pt': ([u'documentação', u'/doc'], u'/doc'),
+    'ro': (u'documentaţie', u'/doc'),
+    'ru': (u'doc', u'/doc'),
+    'sv': (u'dokumentation', u'/dok'),
+    'vi': (u'documentation', u'/doc'),
+    'zh': ([u'documentation', u'doc'], u'/doc'),
 }
 
 # Template which should be replaced or removed.
@@ -158,7 +160,7 @@
         Given a wiki source code text, return the cleaned up version.
         """
         oldText = text
-        if self.site.sitename()== u'commons:commons' and self.namespace == 6:
+        if self.site.sitename() == u'commons:commons' and self.namespace == 6:
             text = self.commonsfiledesc(text)
         text = self.fixSelfInterwiki(text)
         text = self.standardizePageFooter(text)
@@ -195,7 +197,7 @@
         Interwiki links to the site itself are displayed like local links.
         Remove their language code prefix.
         """
-        if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki':
+        if not self.talkpage and pywikibot.calledModuleName() != 'interwiki':
             interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]'
                                     % self.site.lang)
             text = interwikiR.sub(r'[[\1]]', text)
@@ -212,6 +214,7 @@
         3. additional information depending on local site policy
         4. stars templates for featured and good articles
         5. interwiki links
+
         """
         starsList = [
             u'bueno',
@@ -254,9 +257,13 @@
         if not self.template and not '{{Personendaten' in text and \
            not '{{SORTIERUNG' in text and not '{{DEFAULTSORT' in text and \
            not self.site.lang in ('et', 'it', 'bg', 'ru'):
-            categories = pywikibot.getCategoryLinks(text, site = self.site)
+            try:
+                categories = pywikibot.getCategoryLinks(text, site=self.site)
+            # there are categories like [[categoy:Foo {{#time:Y...}}]]
+            except InvalidTitle:
+                pass
 
-        if not self.talkpage:# and pywikibot.calledModuleName() <> 'interwiki':
+        if not self.talkpage:  # and pywikibot.calledModuleName() <> 
'interwiki':
             subpage = False
             if self.template:
                 loc = None
@@ -265,13 +272,13 @@
                     del tmpl
                 except KeyError:
                     pass
-                if loc != None and loc in self.title:
+                if loc is not None and loc in self.title:
                     subpage = True
             interwikiLinks = pywikibot.getLanguageLinks(
                 text, insite=self.site, template_subpage=subpage)
 
             # Removing the interwiki
-            text = pywikibot.removeLanguageLinks(text, site = self.site)
+            text = pywikibot.removeLanguageLinks(text, site=self.site)
             # Removing the stars' issue
             starstext = pywikibot.removeDisabledParts(text)
             for star in starsList:
@@ -314,16 +321,16 @@
            ((interwikiLinks or hasCommentLine) and
             self.site.language() == 'nn' or
             (interwikiLinks and hasCommentLine) and
-            self.site.language() == 'fr'):
+                self.site.language() == 'fr'):
             text += '\r\n\r\n' + iw_msg
         # Adding stars templates
         if allstars:
-            text = text.strip()+self.site.family.interwiki_text_separator
+            text = text.strip() + self.site.family.interwiki_text_separator
             allstars.sort()
             for element in allstars:
                 text += '%s\r\n' % element.strip()
                 if pywikibot.verbose:
-                    pywikibot.output(u'%s' %element.strip())
+                    pywikibot.output(u'%s' % element.strip())
         # Adding the interwiki
         if interwikiLinks:
             text = pywikibot.replaceLanguageLinks(text, interwikiLinks,
@@ -370,8 +377,8 @@
             if thisNs and namespaces:
                 text = pywikibot.replaceExcept(
                     text,
-                    r'\[\[\s*(' + '|'.join(namespaces) + \
-                    ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + \
+                    r'\[\[\s*(' + '|'.join(namespaces) +
+                    ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs +
                     ':\g<nameAndLabel>]]', exceptions)
         return text
 
@@ -383,13 +390,15 @@
         # arz uses english stylish codes
         if self.site.lang not in ['arz', 'ru']:
             exceptions = ['nowiki', 'comment', 'math', 'pre']
-            for magicWord in ['img_thumbnail', 'img_left', 'img_center', 
'img_right', 'img_none',
-                              'img_framed', 'img_frameless', 'img_border', 
'img_upright',]:
+            for magicWord in ['img_thumbnail', 'img_left', 'img_center',
+                              'img_right', 'img_none', 'img_framed',
+                              'img_frameless', 'img_border', 'img_upright', ]:
                 aliases = self.site.siteinfo('magicwords').get(magicWord)
                 if not aliases: continue
-                text = pywikibot.replaceExcept(text, 
r'\[\[(?P<left>.+?:.+?\..+?\|) *(' + '|'.join(aliases) +') 
*(?P<right>(\|.*?)?\]\])',
-                                               r'[[\g<left>' + aliases[0] + 
'\g<right>',
-                                               exceptions)
+                text = pywikibot.replaceExcept(
+                    text,
+                    r'\[\[(?P<left>.+?:.+?\..+?\|) *(' + '|'.join(aliases) + 
') *(?P<right>(\|.*?)?\]\])',
+                    r'[[\g<left>' + aliases[0] + '\g<right>', exceptions)
         return text
 
     def cleanUpLinks(self, text):
@@ -409,7 +418,7 @@
                 try:
                     page = pywikibot.Page(self.site, titleWithSection)
                 #except pywikibot.InvalidTitle:
-                except: #empty self link occures
+                except:  # empty self link occures
                     return match.group()
                 if page.namespace() == 0:
                     # Replace underlines by spaces, also multiple underlines
@@ -469,7 +478,8 @@
                         newLink = "[[%s]]" % label
                     # Check if we can create a link with trailing characters
                     # instead of a pipelink
-                    elif self.site.sitename() != 'wikipedia:fa' and 
len(titleWithSection) <= len(label) and \
+                    elif self.site.sitename() != 'wikipedia:fa' and \
+                         len(titleWithSection) <= len(label) and \
                          label[:len(titleWithSection)] == titleWithSection and 
\
                          re.sub(trailR, '',
                                 label[len(titleWithSection):]) == '':
@@ -509,7 +519,7 @@
     # group <linktrail> is the link trail after ]] which are part of the word.
     # note that the definition of 'letter' varies from language to language.
         linkR = re.compile(
-            
r'(?P<newline>[\n]*)\[\[(?P<titleWithSection>[^\]\|]+)(\|(?P<label>[^\]\|]*))?\]\](?P<linktrail>'
 + \
+            
r'(?P<newline>[\n]*)\[\[(?P<titleWithSection>[^\]\|]+)(\|(?P<label>[^\]\|]*))?\]\](?P<linktrail>'
 +
             self.site.linktrail() + ')')
 
         text = pywikibot.replaceExcept(text, linkR, handleOneLink,
@@ -526,24 +536,24 @@
 
     def resolveHtmlEntities(self, text):
         ignore = [
-             38,     # Ampersand (&amp;)
-             39,     # Bugzilla 24093
-             60,     # Less than (&lt;)
-             62,     # Great than (&gt;)
-             91,     # Opening bracket - sometimes used intentionally inside 
links
-             93,     # Closing bracket - sometimes used intentionally inside 
links
-            124,     # Vertical bar (??) - used intentionally in navigation 
bar templates on de:
-            160,     # Non-breaking space (&nbsp;) - not supported by Firefox 
textareas
-            173,     # Soft-hypen (&shy;) - enable editing
-           8206,     # left-to-right mark (&ltr;)
-           8207,     # right-to-left mark (&rtl;)
+            38,     # Ampersand (&amp;)
+            39,     # Bugzilla 24093
+            60,     # Less than (&lt;)
+            62,     # Great than (&gt;)
+            91,     # Opening bracket - sometimes used intentionally inside 
links
+            93,     # Closing bracket - sometimes used intentionally inside 
links
+            124,    # Vertical bar (??) - used intentionally in navigation bar 
templates on de:
+            160,    # Non-breaking space (&nbsp;) - not supported by Firefox 
textareas
+            173,    # Soft-hypen (&shy;) - enable editing
+            8206,   # left-to-right mark (&ltr;)
+            8207,   # right-to-left mark (&rtl;)
         ]
         # ignore ' see 
http://eo.wikipedia.org/w/index.php?title=Liberec&diff=next&oldid=2320801
         #if self.site.lang == 'eo':
         #    ignore += [39]
         if self.template:
             ignore += [58]
-        text = pywikibot.html2unicode(text, ignore = ignore)
+        text = pywikibot.html2unicode(text, ignore=ignore)
         return text
 
     def validXhtml(self, text):
@@ -608,17 +618,21 @@
 
     def replaceDeprecatedTemplates(self, text):
         exceptions = ['comment', 'math', 'nowiki', 'pre']
-        if self.site.family.name in deprecatedTemplates and self.site.lang in 
deprecatedTemplates[self.site.family.name]:
+        if self.site.family.name in deprecatedTemplates and \
+           self.site.lang in deprecatedTemplates[self.site.family.name]:
             for template in 
deprecatedTemplates[self.site.family.name][self.site.lang]:
                 old = template[0]
                 new = template[1]
-                if new == None:
+                if new is None:
                     new = ''
                 else:
-                    new = '{{'+new+'}}'
+                    new = '{{%s}}' % new
                 if not self.site.nocapitalize:
                     old = '[' + old[0].upper() + old[0].lower() + ']' + old[1:]
-                text = pywikibot.replaceExcept(text, r'\{\{([mM][sS][gG]:)?' + 
old + '(?P<parameters>\|[^}]+|)}}', new, exceptions)
+                text = pywikibot.replaceExcept(
+                    text,
+                    r'\{\{([mM][sS][gG]:)?%s(?P<parameters>\|[^}]+|)}}' % old,
+                    new, exceptions)
         return text
 
     #from fixes.py
@@ -657,22 +671,31 @@
     def fixHtml(self, text):
         # Everything case-insensitive (?i)
         # Keep in mind that MediaWiki automatically converts <br> to <br />
-        exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 
'startspace']
-        text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''" 
, exceptions)
-        text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>', 
r"'''\1'''" , exceptions)
-        text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''" , 
exceptions)
-        text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''" 
, exceptions)
+        exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
+                      'startspace']
+        text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''",
+                                       exceptions)
+        text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>',
+                                       r"'''\1'''", exceptions)
+        text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''",
+                                       exceptions)
+        text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''",
+                                       exceptions)
         # horizontal line without attributes in a single line
-        text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])', 
r'\1----\2', exceptions)
+        text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
+                                       r'\1----\2', exceptions)
         # horizontal line with attributes; can't be done with wiki syntax
         # so we only make it XHTML compliant
-        text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1 
/>', exceptions)
+        text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1 
/>',
+                                       exceptions)
         # a header where only spaces are in the same line
         for level in range(1, 7):
-            equals = '\\1%s \\2 %s\\3' % ("="*level, "="*level)
-            text = pywikibot.replaceExcept(text,
-                                           r'(?i)([\r\n]) *<h%d> *([^<]+?) 
*</h%d> *([\r\n])'%(level, level),
-                                           r'%s'%equals, exceptions)
+            equals = '\\1%s \\2 %s\\3' % ("=" * level, "=" * level)
+            text = pywikibot.replaceExcept(
+                text,
+                r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])' % (level, 
level),
+                r'%s' % equals,
+                exceptions)
         # TODO: maybe we can make the bot replace <p> tags with \r\n's.
         return text
 
@@ -683,15 +706,19 @@
         # it should be name = " or name=" NOT name   ="
         text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text)
         #remove empty <ref/>-tag
-        text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref 
*>\s*</ref>)', r'', exceptions)
-        text = pywikibot.replaceExcept(text, 
r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', r'<ref \1/>', exceptions)
+        text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref 
*>\s*</ref>)',
+                                       r'', exceptions)
+        text = pywikibot.replaceExcept(text, 
r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>',
+                                       r'<ref \1/>', exceptions)
         return text
 
     def fixStyle(self, text):
         exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 
'startspace']
         # convert prettytable to wikitable class
         if self.site.language in ('de', 'en'):
-           text = pywikibot.replaceExcept(text, 
ur'(class="[^"]*)prettytable([^"]*")', ur'\1wikitable\2', exceptions)
+            text = pywikibot.replaceExcept(text,
+                                           
ur'(class="[^"]*)prettytable([^"]*")',
+                                           ur'\1wikitable\2', exceptions)
         return text
 
     def fixTypo(self, text):
@@ -726,22 +753,23 @@
         ]
         # valid digits
         digits = {
-            'ckb' : u'٠١٢٣٤٥٦٧٨٩',
-            'fa'  : u'۰۱۲۳۴۵۶۷۸۹'
+            'ckb': u'٠١٢٣٤٥٦٧٨٩',
+            'fa': u'۰۱۲۳۴۵۶۷۸۹',
         }
         new = digits.pop(self.site.lang)
         # This only works if there are only two items in digits dict
         old = digits[digits.keys()[0]]
-        faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك'+u'۱۲۳۴۵۶۷۸۹۰'
+        faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
         # do not change inside file links
         namespaces = list(self.site.namespace(6, all=True))
         pattern = re.compile(u'\[\[(' + '|'.join(namespaces) + '):.+?\.\w+? 
*(\|((\[\[.*?\]\])|.)*)?\]\]',
                              re.UNICODE)
         #not to let bot edits in latin content
-        exceptions.append(re.compile(u"[^"+faChrs+u"] *?\"*? *?, 
*?[^"+faChrs+u"]"))
+        exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
+                                     % {'fa': faChrs}))
         exceptions.append(pattern)
         text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
-        if self.site.lang=='ckb':
+        if self.site.lang == 'ckb':
             text = pywikibot.replaceExcept(text,
                                            ur'ه([.،_<\]\s])',
                                            ur'ە\1', exceptions)
@@ -760,7 +788,7 @@
         # do not change digits inside html-tags
         pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
         exceptions.append(pattern)
-        exceptions.append('table') #exclude tables for now
+        exceptions.append('table')  # exclude tables for now
         # replace digits
         for i in xrange(0, 10):
             text = pywikibot.replaceExcept(text, str(i), new[i], exceptions)


_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

[Pywikipedia-svn] SVN: [11174] trunk/pywikipedia/cosmetic_changes.py

Reply via email to