http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11174
Revision: 11174
Author: xqt
Date: 2013-03-03 17:32:47 +0000 (Sun, 03 Mar 2013)
Log Message:
-----------
some PEP8 changes
Modified Paths:
--------------
trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py 2013-03-03 16:18:14 UTC (rev
11173)
+++ trunk/pywikipedia/cosmetic_changes.py 2013-03-03 17:32:47 UTC (rev
11174)
@@ -59,7 +59,8 @@
#
__version__ = '$Id$'
#
-import sys, re
+import sys
+import re
import wikipedia as pywikibot
import isbn
import pagegenerators
@@ -79,38 +80,39 @@
# Interwiki message on top of iw links
# 2nd line is a regex if needed
msg_interwiki = {
- 'fr' : u'<!-- Autres langues -->',
- 'nn' : (u'<!--interwiki (no, sv, da first; then other languages
alphabetically by name)-->',
- u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other
languages alphabetically by name\) ?-->)')
+ 'fr': u'<!-- Autres langues -->',
+ 'nn': (u'<!--interwiki (no, sv, da first; then other languages
alphabetically by name)-->',
+ u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other
languages alphabetically by name\) ?-->)')
}
# This is from interwiki.py;
# move it to family file and implement global instances
moved_links = {
- 'ca' : (u'ús de la plantilla', u'/ús'),
- 'cs' : (u'dokumentace', u'/doc'),
- 'de' : (u'dokumentation', u'/Meta'),
- 'en' : ([u'documentation',
- u'template documentation',
- u'template doc',
- u'doc',
- u'documentation, template'], u'/doc'),
- 'es' : ([u'documentación', u'documentación de plantilla'], u'/doc'),
- 'fa' : ([u'documentation',u'توضیحات',u'توضیحات الگو',u'doc'], u'/توضیحات'),
- 'fr' : (u'/documentation', u'/Documentation'),
- 'hu' : (u'sablondokumentáció', u'/doc'),
- 'id' : (u'template doc', u'/doc'),
- 'ja' : (u'documentation', u'/doc'),
- 'ka' : (u'თარგის ინფო', u'/ინფო'),
- 'ko' : (u'documentation', u'/설명문서'),
- 'ms' : (u'documentation', u'/doc'),
- 'pl' : (u'dokumentacja', u'/opis'),
- 'pt' : ([u'documentação', u'/doc'], u'/doc'),
- 'ro' : (u'documentaţie', u'/doc'),
- 'ru' : (u'doc', u'/doc'),
- 'sv' : (u'dokumentation', u'/dok'),
- 'vi' : (u'documentation', u'/doc'),
- 'zh' : ([u'documentation', u'doc'], u'/doc'),
+ 'ca': (u'ús de la plantilla', u'/ús'),
+ 'cs': (u'dokumentace', u'/doc'),
+ 'de': (u'dokumentation', u'/Meta'),
+ 'en': ([u'documentation',
+ u'template documentation',
+ u'template doc',
+ u'doc',
+ u'documentation, template'], u'/doc'),
+ 'es': ([u'documentación', u'documentación de plantilla'], u'/doc'),
+ 'fa': ([u'documentation', u'توضیحات', u'توضیحات الگو',
+ u'doc'], u'/توضیحات'),
+ 'fr': (u'/documentation', u'/Documentation'),
+ 'hu': (u'sablondokumentáció', u'/doc'),
+ 'id': (u'template doc', u'/doc'),
+ 'ja': (u'documentation', u'/doc'),
+ 'ka': (u'თარგის ინფო', u'/ინფო'),
+ 'ko': (u'documentation', u'/설명문서'),
+ 'ms': (u'documentation', u'/doc'),
+ 'pl': (u'dokumentacja', u'/opis'),
+ 'pt': ([u'documentação', u'/doc'], u'/doc'),
+ 'ro': (u'documentaţie', u'/doc'),
+ 'ru': (u'doc', u'/doc'),
+ 'sv': (u'dokumentation', u'/dok'),
+ 'vi': (u'documentation', u'/doc'),
+ 'zh': ([u'documentation', u'doc'], u'/doc'),
}
# Template which should be replaced or removed.
@@ -158,7 +160,7 @@
Given a wiki source code text, return the cleaned up version.
"""
oldText = text
- if self.site.sitename()== u'commons:commons' and self.namespace == 6:
+ if self.site.sitename() == u'commons:commons' and self.namespace == 6:
text = self.commonsfiledesc(text)
text = self.fixSelfInterwiki(text)
text = self.standardizePageFooter(text)
@@ -195,7 +197,7 @@
Interwiki links to the site itself are displayed like local links.
Remove their language code prefix.
"""
- if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki':
+ if not self.talkpage and pywikibot.calledModuleName() != 'interwiki':
interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]'
% self.site.lang)
text = interwikiR.sub(r'[[\1]]', text)
@@ -212,6 +214,7 @@
3. additional information depending on local site policy
4. stars templates for featured and good articles
5. interwiki links
+
"""
starsList = [
u'bueno',
@@ -254,9 +257,13 @@
if not self.template and not '{{Personendaten' in text and \
not '{{SORTIERUNG' in text and not '{{DEFAULTSORT' in text and \
not self.site.lang in ('et', 'it', 'bg', 'ru'):
- categories = pywikibot.getCategoryLinks(text, site = self.site)
+ try:
+ categories = pywikibot.getCategoryLinks(text, site=self.site)
+ # there are categories like [[categoy:Foo {{#time:Y...}}]]
+ except InvalidTitle:
+ pass
- if not self.talkpage:# and pywikibot.calledModuleName() <> 'interwiki':
+ if not self.talkpage: # and pywikibot.calledModuleName() <>
'interwiki':
subpage = False
if self.template:
loc = None
@@ -265,13 +272,13 @@
del tmpl
except KeyError:
pass
- if loc != None and loc in self.title:
+ if loc is not None and loc in self.title:
subpage = True
interwikiLinks = pywikibot.getLanguageLinks(
text, insite=self.site, template_subpage=subpage)
# Removing the interwiki
- text = pywikibot.removeLanguageLinks(text, site = self.site)
+ text = pywikibot.removeLanguageLinks(text, site=self.site)
# Removing the stars' issue
starstext = pywikibot.removeDisabledParts(text)
for star in starsList:
@@ -314,16 +321,16 @@
((interwikiLinks or hasCommentLine) and
self.site.language() == 'nn' or
(interwikiLinks and hasCommentLine) and
- self.site.language() == 'fr'):
+ self.site.language() == 'fr'):
text += '\r\n\r\n' + iw_msg
# Adding stars templates
if allstars:
- text = text.strip()+self.site.family.interwiki_text_separator
+ text = text.strip() + self.site.family.interwiki_text_separator
allstars.sort()
for element in allstars:
text += '%s\r\n' % element.strip()
if pywikibot.verbose:
- pywikibot.output(u'%s' %element.strip())
+ pywikibot.output(u'%s' % element.strip())
# Adding the interwiki
if interwikiLinks:
text = pywikibot.replaceLanguageLinks(text, interwikiLinks,
@@ -370,8 +377,8 @@
if thisNs and namespaces:
text = pywikibot.replaceExcept(
text,
- r'\[\[\s*(' + '|'.join(namespaces) + \
- ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + \
+ r'\[\[\s*(' + '|'.join(namespaces) +
+ ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs +
':\g<nameAndLabel>]]', exceptions)
return text
@@ -383,13 +390,15 @@
# arz uses english stylish codes
if self.site.lang not in ['arz', 'ru']:
exceptions = ['nowiki', 'comment', 'math', 'pre']
- for magicWord in ['img_thumbnail', 'img_left', 'img_center',
'img_right', 'img_none',
- 'img_framed', 'img_frameless', 'img_border',
'img_upright',]:
+ for magicWord in ['img_thumbnail', 'img_left', 'img_center',
+ 'img_right', 'img_none', 'img_framed',
+ 'img_frameless', 'img_border', 'img_upright', ]:
aliases = self.site.siteinfo('magicwords').get(magicWord)
if not aliases: continue
- text = pywikibot.replaceExcept(text,
r'\[\[(?P<left>.+?:.+?\..+?\|) *(' + '|'.join(aliases) +')
*(?P<right>(\|.*?)?\]\])',
- r'[[\g<left>' + aliases[0] +
'\g<right>',
- exceptions)
+ text = pywikibot.replaceExcept(
+ text,
+ r'\[\[(?P<left>.+?:.+?\..+?\|) *(' + '|'.join(aliases) +
') *(?P<right>(\|.*?)?\]\])',
+ r'[[\g<left>' + aliases[0] + '\g<right>', exceptions)
return text
def cleanUpLinks(self, text):
@@ -409,7 +418,7 @@
try:
page = pywikibot.Page(self.site, titleWithSection)
#except pywikibot.InvalidTitle:
- except: #empty self link occures
+ except: # empty self link occures
return match.group()
if page.namespace() == 0:
# Replace underlines by spaces, also multiple underlines
@@ -469,7 +478,8 @@
newLink = "[[%s]]" % label
# Check if we can create a link with trailing characters
# instead of a pipelink
- elif self.site.sitename() != 'wikipedia:fa' and
len(titleWithSection) <= len(label) and \
+ elif self.site.sitename() != 'wikipedia:fa' and \
+ len(titleWithSection) <= len(label) and \
label[:len(titleWithSection)] == titleWithSection and
\
re.sub(trailR, '',
label[len(titleWithSection):]) == '':
@@ -509,7 +519,7 @@
# group <linktrail> is the link trail after ]] which are part of the word.
# note that the definition of 'letter' varies from language to language.
linkR = re.compile(
-
r'(?P<newline>[\n]*)\[\[(?P<titleWithSection>[^\]\|]+)(\|(?P<label>[^\]\|]*))?\]\](?P<linktrail>'
+ \
+
r'(?P<newline>[\n]*)\[\[(?P<titleWithSection>[^\]\|]+)(\|(?P<label>[^\]\|]*))?\]\](?P<linktrail>'
+
self.site.linktrail() + ')')
text = pywikibot.replaceExcept(text, linkR, handleOneLink,
@@ -526,24 +536,24 @@
def resolveHtmlEntities(self, text):
ignore = [
- 38, # Ampersand (&)
- 39, # Bugzilla 24093
- 60, # Less than (<)
- 62, # Great than (>)
- 91, # Opening bracket - sometimes used intentionally inside
links
- 93, # Closing bracket - sometimes used intentionally inside
links
- 124, # Vertical bar (??) - used intentionally in navigation
bar templates on de:
- 160, # Non-breaking space ( ) - not supported by Firefox
textareas
- 173, # Soft-hypen (­) - enable editing
- 8206, # left-to-right mark (<r;)
- 8207, # right-to-left mark (&rtl;)
+ 38, # Ampersand (&)
+ 39, # Bugzilla 24093
+ 60, # Less than (<)
+ 62, # Great than (>)
+ 91, # Opening bracket - sometimes used intentionally inside
links
+ 93, # Closing bracket - sometimes used intentionally inside
links
+ 124, # Vertical bar (??) - used intentionally in navigation bar
templates on de:
+ 160, # Non-breaking space ( ) - not supported by Firefox
textareas
+ 173, # Soft-hypen (­) - enable editing
+ 8206, # left-to-right mark (<r;)
+ 8207, # right-to-left mark (&rtl;)
]
# ignore ' see
http://eo.wikipedia.org/w/index.php?title=Liberec&diff=next&oldid=2320801
#if self.site.lang == 'eo':
# ignore += [39]
if self.template:
ignore += [58]
- text = pywikibot.html2unicode(text, ignore = ignore)
+ text = pywikibot.html2unicode(text, ignore=ignore)
return text
def validXhtml(self, text):
@@ -608,17 +618,21 @@
def replaceDeprecatedTemplates(self, text):
exceptions = ['comment', 'math', 'nowiki', 'pre']
- if self.site.family.name in deprecatedTemplates and self.site.lang in
deprecatedTemplates[self.site.family.name]:
+ if self.site.family.name in deprecatedTemplates and \
+ self.site.lang in deprecatedTemplates[self.site.family.name]:
for template in
deprecatedTemplates[self.site.family.name][self.site.lang]:
old = template[0]
new = template[1]
- if new == None:
+ if new is None:
new = ''
else:
- new = '{{'+new+'}}'
+ new = '{{%s}}' % new
if not self.site.nocapitalize:
old = '[' + old[0].upper() + old[0].lower() + ']' + old[1:]
- text = pywikibot.replaceExcept(text, r'\{\{([mM][sS][gG]:)?' +
old + '(?P<parameters>\|[^}]+|)}}', new, exceptions)
+ text = pywikibot.replaceExcept(
+ text,
+ r'\{\{([mM][sS][gG]:)?%s(?P<parameters>\|[^}]+|)}}' % old,
+ new, exceptions)
return text
#from fixes.py
@@ -657,22 +671,31 @@
def fixHtml(self, text):
# Everything case-insensitive (?i)
# Keep in mind that MediaWiki automatically converts <br> to <br />
- exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
'startspace']
- text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''"
, exceptions)
- text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>',
r"'''\1'''" , exceptions)
- text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''" ,
exceptions)
- text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''"
, exceptions)
+ exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
+ 'startspace']
+ text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''",
+ exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>',
+ r"'''\1'''", exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''",
+ exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''",
+ exceptions)
# horizontal line without attributes in a single line
- text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
r'\1----\2', exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])',
+ r'\1----\2', exceptions)
# horizontal line with attributes; can't be done with wiki syntax
# so we only make it XHTML compliant
- text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1
/>', exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1
/>',
+ exceptions)
# a header where only spaces are in the same line
for level in range(1, 7):
- equals = '\\1%s \\2 %s\\3' % ("="*level, "="*level)
- text = pywikibot.replaceExcept(text,
- r'(?i)([\r\n]) *<h%d> *([^<]+?)
*</h%d> *([\r\n])'%(level, level),
- r'%s'%equals, exceptions)
+ equals = '\\1%s \\2 %s\\3' % ("=" * level, "=" * level)
+ text = pywikibot.replaceExcept(
+ text,
+ r'(?i)([\r\n]) *<h%d> *([^<]+?) *</h%d> *([\r\n])' % (level,
level),
+ r'%s' % equals,
+ exceptions)
# TODO: maybe we can make the bot replace <p> tags with \r\n's.
return text
@@ -683,15 +706,19 @@
# it should be name = " or name=" NOT name ="
text = re.sub(r'(?i)<ref +name(= *| *=)"', r'<ref name="', text)
#remove empty <ref/>-tag
- text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref
*>\s*</ref>)', r'', exceptions)
- text = pywikibot.replaceExcept(text,
r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>', r'<ref \1/>', exceptions)
+ text = pywikibot.replaceExcept(text, r'(?i)(<ref\s*/>|<ref
*>\s*</ref>)',
+ r'', exceptions)
+ text = pywikibot.replaceExcept(text,
r'(?i)<ref\s+([^>]+?)\s*>\s*</ref>',
+ r'<ref \1/>', exceptions)
return text
def fixStyle(self, text):
exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
'startspace']
# convert prettytable to wikitable class
if self.site.language in ('de', 'en'):
- text = pywikibot.replaceExcept(text,
ur'(class="[^"]*)prettytable([^"]*")', ur'\1wikitable\2', exceptions)
+ text = pywikibot.replaceExcept(text,
+
ur'(class="[^"]*)prettytable([^"]*")',
+ ur'\1wikitable\2', exceptions)
return text
def fixTypo(self, text):
@@ -726,22 +753,23 @@
]
# valid digits
digits = {
- 'ckb' : u'٠١٢٣٤٥٦٧٨٩',
- 'fa' : u'۰۱۲۳۴۵۶۷۸۹'
+ 'ckb': u'٠١٢٣٤٥٦٧٨٩',
+ 'fa': u'۰۱۲۳۴۵۶۷۸۹',
}
new = digits.pop(self.site.lang)
# This only works if there are only two items in digits dict
old = digits[digits.keys()[0]]
- faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك'+u'۱۲۳۴۵۶۷۸۹۰'
+ faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
# do not change inside file links
namespaces = list(self.site.namespace(6, all=True))
pattern = re.compile(u'\[\[(' + '|'.join(namespaces) + '):.+?\.\w+?
*(\|((\[\[.*?\]\])|.)*)?\]\]',
re.UNICODE)
#not to let bot edits in latin content
- exceptions.append(re.compile(u"[^"+faChrs+u"] *?\"*? *?,
*?[^"+faChrs+u"]"))
+ exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
+ % {'fa': faChrs}))
exceptions.append(pattern)
text = pywikibot.replaceExcept(text, u',', u'،', exceptions)
- if self.site.lang=='ckb':
+ if self.site.lang == 'ckb':
text = pywikibot.replaceExcept(text,
ur'ه([.،_<\]\s])',
ur'ە\1', exceptions)
@@ -760,7 +788,7 @@
# do not change digits inside html-tags
pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE)
exceptions.append(pattern)
- exceptions.append('table') #exclude tables for now
+ exceptions.append('table') # exclude tables for now
# replace digits
for i in xrange(0, 10):
text = pywikibot.replaceExcept(text, str(i), new[i], exceptions)
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn