[MediaWiki-commits] [Gerrit] []PEP8] changes - change (pywikibot/compat)

Xqt (Code Review) Thu, 28 Nov 2013 09:35:28 -0800

Xqt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/98109



Change subject: []PEP8] changes
......................................................................

[]PEP8] changes

Change-Id: I203871505f12d2e248d460b3c9848ed297a119b4
---
M spellcheck.py
1 file changed, 141 insertions(+), 113 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/compat 
refs/changes/09/98109/1

diff --git a/spellcheck.py b/spellcheck.py
index 5bb524c..1935422 100644
--- a/spellcheck.py
+++ b/spellcheck.py
@@ -51,15 +51,17 @@
 """
 #
 # (C) Andre Engels, 2005
-# (C) Pywikipedia bot team, 2006-2012
+# (C) Pywikibot team, 2006-2013
 #
 # Distributed under the terms of the MIT license.
 #
 __version__ = '$Id$'
 #
 
-import re, sys
-import string, codecs
+import re
+import sys
+import string
+import codecs
 import wikipedia as pywikibot
 from pywikibot import i18n
 import pagegenerators
@@ -69,7 +71,8 @@
     def __init__(self, text):
         self.style = text
 
-def distance(a,b):
+
+def distance(a, b):
     # Calculates the Levenshtein distance between a and b.
     # That is, the number of edits needed to change one into
     # the other, where one edit is the addition, removal or
@@ -78,23 +81,25 @@
     n, m = len(a), len(b)
     if n > m:
         # Make sure n <= m, to use O(min(n,m)) space
-        a,b = b,a
-        n,m = m,n
-    current = range(n+1)
-    for i in range(1,m+1):
-        previous, current = current, [i]+[0]*m
-        for j in range(1,n+1):
-            add, delete = previous[j]+1, current[j-1]+1
-            change = previous[j-1]
-            if a[j-1] != b[i-1]:
+        a, b = b, a
+        n, m = m, n
+    current = range(n + 1)
+    for i in range(1, m + 1):
+        previous, current = current, [i] + [0] * m
+        for j in range(1, n + 1):
+            add, delete = previous[j] + 1, current[j - 1] + 1
+            change = previous[j - 1]
+            if a[j - 1] != b[i - 1]:
                 change = change + 1
             current[j] = min(add, delete, change)
     return current[n]
 
+
 def getalternatives(string):
     # Find possible correct words for the incorrect word string
     basetext = pywikibot.input(
-        u"Give a text that should occur in the words to be checked.\nYou can 
choose to give no text, but this will make searching slow:")
+        u"Give a text that should occur in the words to be checked.\n
+        u"You can choose to give no text, but this will make searching slow:")
     basetext = basetext.lower()
     simwords = {}
     for i in xrange(11):
@@ -102,7 +107,7 @@
     for alt in knownwords.keys():
         dothis = not basetext or basetext in alt.lower()
         if dothis:
-            diff = distance(string,alt)
+            diff = distance(string, alt)
             if diff < 11:
                 if knownwords[alt] == alt:
                     simwords[diff] += [alt]
@@ -114,31 +119,35 @@
             posswords += simwords[i]
     return posswords[:30]
 
+
 def uncap(string):
     # uncapitalize the first word of the string
     if len(string) > 1:
-        return string[0].lower()+string[1:]
+        return string[0].lower() + string[1:]
     else:
         return string.lower()
 
+
 def cap(string):
     # uncapitalize the first word of the string
-    return string[0].upper()+string[1:]
+    return string[0].upper() + string[1:]
+
 
 def askAlternative(word, context=None, title=''):
     correct = None
-    pywikibot.output(u"="*60)
+    pywikibot.output(u"=" * 60)
     pywikibot.output(u"Found unknown word '%s' in '%s'" % (word, title))
     if context:
         pywikibot.output(u"Context:")
-        pywikibot.output(u""+context)
-        pywikibot.output(u"-"*60)
+        pywikibot.output(u"" + context)
+        pywikibot.output(u"-" * 60)
     while not correct:
         for i in xrange(len(Word(word).getAlternatives())):
             pywikibot.output(u"%s: Replace by '%s'"
-                             % (i+1,
-                                Word(word).getAlternatives()[i].replace('_',' 
')))
-        pywikibot.output(u"a: Add '%s' as correct"%word)
+                             % (i + 1,
+                                Word(word).getAlternatives()[i].replace('_',
+                                                                        ' ')))
+        pywikibot.output(u"a: Add '%s' as correct" % word)
         if word[0].isupper():
             pywikibot.output(u"c: Add '%s' as correct" % (uncap(word)))
         pywikibot.output(u"i: Ignore once (default)")
@@ -149,7 +158,8 @@
         pywikibot.output(u"*: Edit by hand")
         pywikibot.output(u"x: Do not check the rest of this page")
         answer = pywikibot.input(u":")
-        if answer == "": answer = "i"
+        if answer == "":
+            answer = "i"
         if answer in "aAiIpP":
             correct = word
             if answer in "aA":
@@ -166,9 +176,9 @@
                    correct != uncap(word) and \
                    correct != word:
                     try:
-                        knownwords[word] += [correct.replace(' ','_')]
+                        knownwords[word] += [correct.replace(' ', '_')]
                     except KeyError:
-                        knownwords[word] = [correct.replace(' ','_')]
+                        knownwords[word] = [correct.replace(' ', '_')]
                     newwords.append(word)
                 knownwords[correct] = correct
                 newwords.append(correct)
@@ -181,69 +191,71 @@
             if possible:
                 print "Found alternatives:"
                 for pos in possible:
-                    pywikibot.output("  %s"%pos)
+                    pywikibot.output("  %s" % pos)
             else:
                 print "No similar words found."
-        elif answer=="*":
+        elif answer == "*":
             correct = edit
-        elif answer=="x":
+        elif answer == "x":
             correct = endpage
         else:
             for i in xrange(len(Word(word).getAlternatives())):
-                if answer == str(i+1):
-                    correct = Word(word).getAlternatives()[i].replace('_',' ')
+                if answer == str(i + 1):
+                    correct = Word(word).getAlternatives()[i].replace('_', ' ')
     return correct
+
 
 def removeHTML(page):
     # TODO: Consider removing this; this stuff can be done by
     # cosmetic_changes.py
     result = page
-    result = result.replace('&Auml;',u'Ä')
-    result = result.replace('&auml;',u'ä')
-    result = result.replace('&Euml;',u'Ë')
-    result = result.replace('&euml;',u'ë')
-    result = result.replace('&Iuml;',u'Ï')
-    result = result.replace('&iuml;',u'ï')
-    result = result.replace('&Ouml;',u'Ö')
-    result = result.replace('&ouml;',u'ö')
-    result = result.replace('&Uuml;',u'Ü')
-    result = result.replace('&uuml;',u'ü')
-    result = result.replace('&Aacute;',u'Á')
-    result = result.replace('&aacute;',u'á')
-    result = result.replace('&Eacute;',u'É')
-    result = result.replace('&eacute;',u'é')
-    result = result.replace('&Iacute;',u'Í')
-    result = result.replace('&iacute;',u'í')
-    result = result.replace('&Oacute;',u'Ó')
-    result = result.replace('&oacute;',u'ó')
-    result = result.replace('&Uacute;',u'Ú')
-    result = result.replace('&uacute;',u'ú')
-    result = result.replace('&Agrave;',u'À')
-    result = result.replace('&agrave;',u'à')
-    result = result.replace('&Egrave;',u'È')
-    result = result.replace('&egrave;',u'è')
-    result = result.replace('&Igrave;',u'Ì')
-    result = result.replace('&igrave;',u'ì')
-    result = result.replace('&Ograve;',u'Ò')
-    result = result.replace('&ograve;',u'ò')
-    result = result.replace('&Ugrave;',u'Ù')
-    result = result.replace('&ugrave;',u'ù')
-    result = result.replace('&Acirc;',u'Â')
-    result = result.replace('&acirc;',u'â')
-    result = result.replace('&Ecirc;',u'Ê')
-    result = result.replace('&ecirc;',u'ê')
-    result = result.replace('&Icirc;',u'Î')
-    result = result.replace('&icirc;',u'î')
-    result = result.replace('&Ocirc;',u'Ô')
-    result = result.replace('&ocirc;',u'ô')
-    result = result.replace('&Ucirc;',u'Û')
-    result = result.replace('&ucirc;',u'û')
-    result = result.replace('&Aring;',u'Å')
-    result = result.replace('&aring;',u'å')
-    result = result.replace('&deg;',u'°')
+    result = result.replace('&Auml;', u'Ä')
+    result = result.replace('&auml;', u'ä')
+    result = result.replace('&Euml;', u'Ë')
+    result = result.replace('&euml;', u'ë')
+    result = result.replace('&Iuml;', u'Ï')
+    result = result.replace('&iuml;', u'ï')
+    result = result.replace('&Ouml;', u'Ö')
+    result = result.replace('&ouml;', u'ö')
+    result = result.replace('&Uuml;', u'Ü')
+    result = result.replace('&uuml;', u'ü')
+    result = result.replace('&Aacute;', u'Á')
+    result = result.replace('&aacute;', u'á')
+    result = result.replace('&Eacute;', u'É')
+    result = result.replace('&eacute;', u'é')
+    result = result.replace('&Iacute;', u'Í')
+    result = result.replace('&iacute;', u'í')
+    result = result.replace('&Oacute;', u'Ó')
+    result = result.replace('&oacute;', u'ó')
+    result = result.replace('&Uacute;', u'Ú')
+    result = result.replace('&uacute;', u'ú')
+    result = result.replace('&Agrave;', u'À')
+    result = result.replace('&agrave;', u'à')
+    result = result.replace('&Egrave;', u'È')
+    result = result.replace('&egrave;', u'è')
+    result = result.replace('&Igrave;', u'Ì')
+    result = result.replace('&igrave;', u'ì')
+    result = result.replace('&Ograve;', u'Ò')
+    result = result.replace('&ograve;', u'ò')
+    result = result.replace('&Ugrave;', u'Ù')
+    result = result.replace('&ugrave;', u'ù')
+    result = result.replace('&Acirc;', u'Â')
+    result = result.replace('&acirc;', u'â')
+    result = result.replace('&Ecirc;', u'Ê')
+    result = result.replace('&ecirc;', u'ê')
+    result = result.replace('&Icirc;', u'Î')
+    result = result.replace('&icirc;', u'î')
+    result = result.replace('&Ocirc;', u'Ô')
+    result = result.replace('&ocirc;', u'ô')
+    result = result.replace('&Ucirc;', u'Û')
+    result = result.replace('&ucirc;', u'û')
+    result = result.replace('&Aring;', u'Å')
+    result = result.replace('&aring;', u'å')
+    result = result.replace('&deg;', u'°')
     return result
 
-def spellcheck(page, checknames = True, knownonly = False, title=''):
+
+def spellcheck(page, checknames=True, knownonly=False, title=''):
     pageskip = []
     text = page
     if correct_html_codes:
@@ -251,30 +263,33 @@
     loc = 0
     while True:
         wordsearch = re.compile(r'([\s\=\<\>\_]*)([^\s\=\<\>\_]+)')
-        match = wordsearch.search(text,loc)
+        match = wordsearch.search(text, loc)
         if not match:
             # No more words on this page
             break
         loc += len(match.group(1))
         bigword = Word(match.group(2))
         smallword = bigword.derive()
-        if not Word(smallword).isCorrect(checkalternative = knownonly) and \
+        if not Word(smallword).isCorrect(checkalternative=knownonly) and \
            (checknames or not smallword[0].isupper()):
             replacement = askAlternative(smallword,
-                                         context=text[max(0,loc-40):loc + 
len(match.group(2))+40],
+                                         context=text[
+                                             max(0, loc - 40):loc + len(
+                                                 match.group(2)) + 40],
                                          title=title)
             if replacement == edit:
                 import editarticle
                 editor = editarticle.TextEditor()
                 # TODO: Don't know to which index to jump
-                newtxt = editor.edit(text, jumpIndex = 0, highlight=smallword)
+                newtxt = editor.edit(text, jumpIndex=0, highlight=smallword)
                 if newtxt:
                     text = newtxt
             elif replacement == endpage:
                 loc = len(text)
             else:
                 replacement = bigword.replace(replacement)
-                text = text[:loc] + replacement + 
text[loc+len(match.group(2)):]
+                text = text[:loc] + replacement + text[
+                    loc + len(match.group(2)):]
                 loc += len(replacement)
             if knownonly == 'plus' and text != page:
                 knownonly = False
@@ -288,13 +303,14 @@
 
 
 class Word(object):
-    def __init__(self,text):
+
+    def __init__(self, text):
         self.word = text
 
     def __str__(self):
         return self.word
 
-    def __cmp__(self,other):
+    def __cmp__(self, other):
         return self.word.__cmp__(str(other))
 
     def derive(self):
@@ -309,51 +325,56 @@
         # Remove barred links
         if shortword.rfind('|') != -1:
             if -1 < shortword.rfind('[[') < shortword.rfind('|'):
-                shortword = shortword[:shortword.rfind('[[')] + 
shortword[shortword.rfind('|')+1:]
+                shortword = shortword[:shortword.rfind('[[')] + shortword[
+                    shortword.rfind('|') + 1:]
             else:
-                shortword = shortword[shortword.rfind('|')+1:]
-        shortword = shortword.replace('[','')
-        shortword = shortword.replace(']','')
+                shortword = shortword[shortword.rfind('|') + 1:]
+        shortword = shortword.replace('[', '')
+        shortword = shortword.replace(']', '')
         # Remove non-alphanumerical characters at the start
         try:
             while shortword[0] in string.punctuation:
-                shortword=shortword[1:]
+                shortword = shortword[1:]
         except IndexError:
             return ""
         # Remove non-alphanumerical characters at the end; no need for the
         # try here because if things go wrong here, they should have gone
         # wrong before
         while shortword[-1] in string.punctuation:
-            shortword=shortword[:-1]
+            shortword = shortword[:-1]
         # Do not check URLs
         if shortword.startswith("http://";):
-            shortword=""
+            shortword = ""
         # Do not check 'words' with only numerical characters
         number = True
         for i in xrange(len(shortword)):
-            if not (shortword[i] in string.punctuation or shortword[i] in 
string.digits):
+            if not (shortword[i] in string.punctuation or
+                    shortword[i] in string.digits):
                 number = False
         if number:
             shortword = ""
         return shortword
 
-    def replace(self,rep):
-        # Replace the short form by 'rep'. Keeping simple for now - if the
-        # short form is part of the long form, replace it. If it is not, ask
-        # the user
+    def replace(self, rep):
+        """Replace the short form by 'rep'. Keeping simple for now - if the
+        short form is part of the long form, replace it. If it is not, ask the
+        user
+
+        """
         if rep == self.derive():
             return self.word
         if self.derive() not in self.word:
             return pywikibot.input(
                 u"Please give the result of replacing %s by %s in %s:"
                 % (self.derive(), rep, self.word))
-        return self.word.replace(self.derive(),rep)
+        return self.word.replace(self.derive(), rep)
 
-    def isCorrect(self,checkalternative = False):
-        # If checkalternative is True, the word will only be found incorrect if
-        # it is on the spelling list as a spelling error. Otherwise it will
-        # be found incorrect if it is not on the list as a correctly spelled
-        # word.
+    def isCorrect(self, checkalternative=False):
+        """If checkalternative is True, the word will only be found incorrect
+        if it is on the spelling list as a spelling error. Otherwise it will be
+        found incorrect if it is not on the list as a correctly spelled word.
+
+        """
         if self.word == "":
             return True
         if self.word in pageskip:
@@ -366,7 +387,8 @@
         except KeyError:
             pass
         if self.word != uncap(self.word):
-            return 
Word(uncap(self.word)).isCorrect(checkalternative=checkalternative)
+            return Word(uncap(self.word)).isCorrect(
+                checkalternative=checkalternative)
         else:
             if checkalternative:
                 if checklang == 'nl' and self.word.endswith("'s"):
@@ -401,11 +423,12 @@
     def declare_correct(self):
         knownwords[self.word] = self.word
 
-    def declare_alternative(self,alt):
+    def declare_alternative(self, alt):
         if not alt in knownwords[self.word]:
             knownwords[self.word].append(word)
             newwords.append(self.word)
         return self.alternatives
+
 
 def checkPage(page, checknames=True, knownonly=False):
     try:
@@ -413,10 +436,12 @@
     except pywikibot.Error:
         pass
     else:
-        text = spellcheck(text, checknames=checknames, knownonly=knownonly, 
title=page.title())
+        text = spellcheck(text, checknames=checknames, knownonly=knownonly,
+                          title=page.title())
         if text != page.get():
             summary = i18n.twtranslate(page.site, 'spellcheck-checking')
             page.put(text, summary)
+
 
 try:
     pageskip = []
@@ -460,10 +485,10 @@
     if not checklang:
         checklang = mysite.language()
     filename = pywikibot.config.datafilepath('externals/spelling',
-                                      'spelling-' + checklang + '.txt')
+                                             'spelling-' + checklang + '.txt')
     print "Getting wordlist"
     try:
-        f = codecs.open(filename, 'r', encoding = mysite.encoding())
+        f = codecs.open(filename, 'r', encoding=mysite.encoding())
         for line in f.readlines():
             # remove trailing newlines and carriage returns
             try:
@@ -495,10 +520,13 @@
     raise
 try:
     if newpages:
-        for (page, date, length, loggedIn, user, comment) in 
pywikibot.getSite().newpages(1000):
+        for (page, date, length, loggedIn, user,
+             comment) in pywikibot.getSite().newpages(1000):
             checkPage(page, checknames, knownonly)
     elif start:
-        for page in 
pagegenerators.PreloadingGenerator(pagegenerators.AllpagesPageGenerator(start=start,includeredirects=False)):
+        for page in pagegenerators.PreloadingGenerator(
+                pagegenerators.AllpagesPageGenerator(start=start,
+                                                     includeredirects=False)):
             checkPage(page, checknames, knownonly)
 
     if longpages:
@@ -509,7 +537,7 @@
         title = ' '.join(title)
         while title != '':
             try:
-                page = pywikibot.Page(mysite,title)
+                page = pywikibot.Page(mysite, title)
                 text = page.get()
             except pywikibot.NoPage:
                 print "Page does not exist."
@@ -521,14 +549,14 @@
 finally:
     pywikibot.stopme()
     filename = pywikibot.config.datafilepath('externals/spelling',
-                                      'spelling-' + checklang + '.txt')
+                                             'spelling-' + checklang + '.txt')
     if rebuild:
         list = knownwords.keys()
         list.sort()
-        f = codecs.open(filename, 'w', encoding = mysite.encoding())
+        f = codecs.open(filename, 'w', encoding=mysite.encoding())
     else:
         list = newwords
-        f = codecs.open(filename, 'a', encoding = mysite.encoding())
+        f = codecs.open(filename, 'a', encoding=mysite.encoding())
     for word in list:
         if Word(word).isCorrect():
             if word != uncap(word):
@@ -536,7 +564,7 @@
                     # Capitalized form of a word that is in the list
                     # uncapitalized
                     continue
-            f.write("1 %s\n"%word)
+            f.write("1 %s\n" % word)
         else:
-            f.write("0 %s %s\n"%(word," ".join(knownwords[word])))
+            f.write("0 %s %s\n" % (word, " ".join(knownwords[word])))
     f.close()

-- 
To view, visit https://gerrit.wikimedia.org/r/98109
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I203871505f12d2e248d460b3c9848ed297a119b4
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] []PEP8] changes - change (pywikibot/compat)

Reply via email to