Xqt has uploaded a new change for review.
https://gerrit.wikimedia.org/r/95760
Change subject: [PEP8] changes for archivebot.py
......................................................................
[PEP8] changes for archivebot.py
Change-Id: I31f9063e0bb5ac9c8e3263f83d041fc1a2c9dfed
---
M archivebot.py
1 file changed, 137 insertions(+), 104 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/compat
refs/changes/60/95760/1
diff --git a/archivebot.py b/archivebot.py
index d0f79ee..bb11637 100644
--- a/archivebot.py
+++ b/archivebot.py
@@ -76,21 +76,30 @@
#
__version__ = '$Id$'
#
-import wikipedia as pywikibot
-from pywikibot import i18n
-import pagegenerators, query
-Site = pywikibot.getSite()
-
-import os, re, time, locale, traceback, string, urllib, unicodedata
-
-try: #Get a constructor for the MD5 hash object
+import os
+import re
+import time
+import locale
+import traceback
+import string
+import urllib
+import unicodedata
+try: # Get a constructor for the MD5 hash object
import hashlib
new_hash = hashlib.md5
-except ImportError: #Old python?
+except ImportError: # Old python?
import md5
new_hash = md5.md5
+import wikipedia as pywikibot
+from pywikibot import i18n
+import pagegenerators
+import query
+
+
+Site = pywikibot.getSite()
language = Site.language()
+
def message(key, lang=Site.language()):
return i18n.twtranslate(lang, key)
@@ -102,7 +111,9 @@
class MissingConfigError(pywikibot.Error):
"""The config is missing in the header (either it's in one of the threads
- or transcluded from another page)."""
+ or transcluded from another page).
+
+ """
class AlgorithmError(MalformedConfigError):
@@ -111,20 +122,25 @@
class ArchiveSecurityError(pywikibot.Error):
"""Archive is not a subpage of page being archived and key not specified
- (or incorrect)."""
+ (or incorrect).
+
+ """
def str2time(str):
"""Accepts a string defining a time period:
7d - 7 days
36h - 36 hours
- Returns the corresponding time, measured in seconds."""
+ Returns the corresponding time, measured in seconds.
+
+ """
if str[-1] == 'd':
- return int(str[:-1])*24*3600
+ return int(str[:-1]) * 24 * 3600
elif str[-1] == 'h':
- return int(str[:-1])*3600
+ return int(str[:-1]) * 3600
else:
return int(str)
+
def str2size(str):
"""Accepts a string defining a size:
@@ -132,17 +148,20 @@
150K - 150 kilobytes
2M - 2 megabytes
Returns a tuple (size,unit), where size is an integer and unit is
- 'B' (bytes) or 'T' (threads)."""
- if str[-1] in string.digits: #TODO: de-uglify
- return (int(str),'B')
+ 'B' (bytes) or 'T' (threads).
+
+ """
+ if str[-1] in string.digits: # TODO: de-uglify
+ return (int(str), 'B')
elif str[-1] in ['K', 'k']:
- return (int(str[:-1])*1024,'B')
+ return (int(str[:-1]) * 1024, 'B')
elif str[-1] == 'M':
- return (int(str[:-1])*1024*1024,'B')
+ return (int(str[:-1]) * 1024 * 1024, 'B')
elif str[-1] == 'T':
- return (int(str[:-1]),'T')
+ return (int(str[:-1]), 'T')
else:
- return (int(str[:-1])*1024,'B')
+ return (int(str[:-1]) * 1024, 'B')
+
def int2month(num):
"""Returns the locale's full name of month 'num' (1-12)."""
@@ -150,29 +169,36 @@
return locale.nl_langinfo(locale.MON_1+num-1).decode('utf-8')
Months = ['january', 'february', 'march', 'april', 'may_long', 'june',
'july', 'august', 'september', 'october', 'november', 'december']
- return Site.mediawiki_message(Months[num-1])
+ return Site.mediawiki_message(Months[num - 1])
+
def int2month_short(num):
"""Returns the locale's abbreviated name of month 'num' (1-12)."""
if hasattr(locale, 'nl_langinfo'):
#filter out non-alpha characters
- return ''.join([c for c in
locale.nl_langinfo(locale.ABMON_1+num-1).decode('utf-8') if c.isalpha()])
+ return ''.join([c for c in
+ locale.nl_langinfo(
+ locale.ABMON_1 + num - 1).decode('utf-8')
+ if c.isalpha()])
Months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
- return Site.mediawiki_message(Months[num-1])
+ return Site.mediawiki_message(Months[num - 1])
+
def txt2timestamp(txt, format):
"""Attempts to convert the timestamp 'txt' according to given 'format'.
- On success, returns the time tuple; on failure, returns None."""
+ On success, returns the time tuple; on failure, returns None.
+
+ """
## print txt, format
try:
- return time.strptime(txt,format)
+ return time.strptime(txt, format)
except ValueError:
try:
- return time.strptime(txt.encode('utf8'),format)
+ return time.strptime(txt.encode('utf8'), format)
except:
pass
- return None
+
def generateTransclusions(Site, template, namespaces=[]):
pywikibot.output(u'Fetching template transclusions...')
@@ -186,12 +212,14 @@
class DiscussionThread(object):
- """An object representing a discussion thread on a page, that is something
of the form:
+ """An object representing a discussion thread on a page, that is something
+ of the form:
== Title of thread ==
Thread content here. ~~~~
:Reply, etc. ~~~~
+
"""
def __init__(self, title):
@@ -201,7 +229,7 @@
def __repr__(self):
return '%s("%s",%d bytes)' \
- % (self.__class__.__name__,self.title,len(self.content))
+ % (self.__class__.__name__, self.title, len(self.content))
def feedLine(self, line):
if not self.content and not line:
@@ -216,33 +244,38 @@
# 2007. december 8., 13:42 (CET)
TM = re.search(r'(\d\d):(\d\d), (\d\d?) (\S+) (\d\d\d\d) \(.*?\)',
line)
if not TM:
- TM = re.search(r'(\d\d):(\d\d), (\S+) (\d\d?), (\d\d\d\d)
\(.*?\)', line)
+ TM = re.search(r'(\d\d):(\d\d), (\S+) (\d\d?), (\d\d\d\d) \(.*?\)',
+ line)
if not TM:
- TM = re.search(r'(\d{4})\. (\S+) (\d\d?)\., (\d\d:\d\d) \(.*?\)',
line)
+ TM = re.search(r'(\d{4})\. (\S+) (\d\d?)\., (\d\d:\d\d) \(.*?\)',
+ line)
# 18. apr 2006 kl.18:39 (UTC)
# 4. nov 2006 kl. 20:46 (CET)
if not TM:
- TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kl\.\W*(\d\d):(\d\d)
\(.*?\)', line)
+ TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kl\.\W*(\d\d):(\d\d)
\(.*?\)',
+ line)
#3. joulukuuta 2008 kello 16.26 (EET)
if not TM:
- TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d)
\(.*?\)', line)
+ TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d)
\(.*?\)',
+ line)
if not TM:
# 14:23, 12. Jan. 2009 (UTC)
pat = re.compile(r'(\d\d):(\d\d), (\d\d?)\. (\S+)\.? (\d\d\d\d)
\((?:UTC|CES?T)\)')
TM = pat.search(line)
# ro.wiki: 4 august 2012 13:01 (EEST)
if not TM:
- TM = re.search(r'(\d\d?) (\S+) (\d\d\d\d) (\d\d):(\d\d) \(.*?\)',
line)
+ TM = re.search(r'(\d\d?) (\S+) (\d\d\d\d) (\d\d):(\d\d) \(.*?\)',
+ line)
if TM:
# Strip away all diacritics in the Mn ('Mark, non-spacing')
category
- # NFD decomposition splits combined characters (e.g. 'ä", LATIN
SMALL
- # LETTER A WITH DIAERESIS) into two entities: LATIN SMALL LETTER A
- # and COMBINING DIAERESIS. The latter falls in the Mn category and
is
- # filtered out, resuling in 'a'.
+ # NFD decomposition splits combined characters (e.g. 'ä",
+ # LATIN SMALL LETTER A WITH DIAERESIS) into two entities:
+ # LATIN SMALL LETTER A and COMBINING DIAERESIS. The latter falls
+ # in the Mn category and is filtered out, resuling in 'a'.
_TM = ''.join(c for c in unicodedata.normalize('NFD', TM.group(0))
- if unicodedata.category(c) != 'Mn')
+ if unicodedata.category(c) != 'Mn')
- TIME = txt2timestamp(_TM,"%d. %b %Y kl. %H:%M (%Z)")
+ TIME = txt2timestamp(_TM, "%d. %b %Y kl. %H:%M (%Z)")
if not TIME:
TIME = txt2timestamp(_TM, "%Y. %B %d., %H:%M (%Z)")
if not TIME:
@@ -264,7 +297,7 @@
if not TIME:
TIME = txt2timestamp(_TM, "%H:%M, %B %d, %Y (%Z)")
if not TIME:
- TIME = txt2timestamp(_TM,"%d. %Bta %Y kello %H.%M (%Z)")
+ TIME = txt2timestamp(_TM, "%d. %Bta %Y kello %H.%M (%Z)")
if not TIME:
TIME = txt2timestamp(_TM, "%d %B %Y %H:%M (%Z)")
if not TIME:
@@ -282,9 +315,9 @@
def toText(self):
return "== " + self.title + ' ==\n\n' + self.content
- def shouldBeArchived(self,Archiver):
+ def shouldBeArchived(self, Archiver):
algo = Archiver.get('algo')
- reT = re.search(r'^old\((.*)\)$',algo)
+ reT = re.search(r'^old\((.*)\)$', algo)
if reT:
if not self.timestamp:
return ''
@@ -298,7 +331,9 @@
class DiscussionPage(pywikibot.Page):
"""A class that represents a single discussion page as well as an archive
- page. Feed threads to it and run an update() afterwards."""
+ page. Feed threads to it and run an update() afterwards.
+
+ """
def __init__(self, title, archiver, vars=None):
pywikibot.Page.__init__(self, Site, title)
@@ -321,12 +356,12 @@
self.archives = {}
self.archivedThreads = 0
lines = self.get().split('\n')
- found = False #Reading header
+ found = False # Reading header
curThread = None
for line in lines:
- threadHeader = re.search('^== *([^=].*?) *== *$',line)
+ threadHeader = re.search('^== *([^=].*?) *== *$', line)
if threadHeader:
- found = True #Reading threads now
+ found = True # Reading threads now
if curThread:
self.threads.append(curThread)
curThread = DiscussionThread(threadHeader.group(1))
@@ -339,7 +374,7 @@
self.threads.append(curThread)
pywikibot.output(u'%d Threads found on %s' % (len(self.threads), self))
- def feedThread(self, thread, maxArchiveSize=(250*1024,'B')):
+ def feedThread(self, thread, maxArchiveSize=(250 * 1024, 'B')):
self.threads.append(thread)
self.archivedThreads += 1
if maxArchiveSize[1] == 'B':
@@ -353,11 +388,11 @@
def size(self):
return len(self.header) + sum([t.size() for t in self.threads])
- def update(self, summary, sortThreads = False):
+ def update(self, summary, sortThreads=False):
if sortThreads:
pywikibot.output(u'Sorting threads...')
- self.threads.sort(key = lambda t: t.timestamp)
- newtext = re.sub('\n*$', '\n\n', self.header) #Fix trailing newlines
+ self.threads.sort(key=lambda t: t.timestamp)
+ newtext = re.sub('\n*$', '\n\n', self.header) # Fix trailing newlines
for t in self.threads:
newtext += t.toText()
if self.full:
@@ -374,25 +409,25 @@
def __init__(self, Page, tpl, salt, force=False):
self.attributes = {
- 'algo' : ['old(24h)',False],
- 'archive' : ['',False],
- 'maxarchivesize' : ['1000M',False],
- 'counter' : ['1',False],
- 'key' : ['',False],
- }
+ 'algo': ['old(24h)', False],
+ 'archive': ['', False],
+ 'maxarchivesize': ['1000M', False],
+ 'counter': ['1', False],
+ 'key': ['', False],
+ }
self.tpl = tpl
self.salt = salt
self.force = force
self.Page = DiscussionPage(Page.title(), self)
self.loadConfig()
self.commentParams = {
- 'from' : self.Page.title(),
- }
+ 'from': self.Page.title(),
+ }
self.archives = {}
self.archivedThreads = 0
def get(self, attr, default=''):
- return self.attributes.get(attr,[default])[0]
+ return self.attributes.get(attr, [default])[0]
def set(self, attr, value, out=True):
if attr == 'archive':
@@ -404,15 +439,15 @@
and a != 'maxage']
def attr2text(self):
- return '{{%s\n%s\n}}' \
- % (self.tpl,
- '\n'.join(['|%s = %s'%(a,self.get(a))
- for a in self.saveables()]))
+ return '{{%s\n%s\n}}' % (self.tpl,
+ '\n'.join(['|%s = %s '
+ % (a, self.get(a))
+ for a in self.saveables()]))
def key_ok(self):
s = new_hash()
s.update(self.salt+'\n')
- s.update(self.Page.title().encode('utf8')+'\n')
+ s.update(self.Page.title().encode('utf8') + '\n')
return self.get('key') == s.hexdigest()
def loadConfig(self):
@@ -439,12 +474,13 @@
if not archive:
return
if not self.force \
- and not self.Page.title()+'/' == archive[:len(self.Page.title())+1]
\
+ and not self.Page.title() + '/' == archive[
+ :len(self.Page.title()) + 1] \
and not self.key_ok():
raise ArchiveSecurityError
if not archive in self.archives:
self.archives[archive] = DiscussionPage(archive, self, vars)
- return self.archives[archive].feedThread(thread,maxArchiveSize)
+ return self.archives[archive].feedThread(thread, maxArchiveSize)
def analyzePage(self):
maxArchSize = str2size(self.get('maxarchivesize'))
@@ -456,9 +492,9 @@
pywikibot.output(u'Processing %d threads' % len(oldthreads))
for t in oldthreads:
if len(oldthreads) - self.archivedThreads \
- <= int(self.get('minthreadsleft',5)):
+ <= int(self.get('minthreadsleft', 5)):
self.Page.threads.append(t)
- continue #Because there's too little threads left.
+ continue # Because there's too little threads left.
# TODO: Make an option so that unstamped (unsigned) posts get
# archived.
why = t.shouldBeArchived(self)
@@ -466,17 +502,17 @@
archive = self.get('archive')
TStuple = time.gmtime(t.timestamp)
vars = {
- 'counter' : archCounter,
- 'year' : TStuple[0],
- 'month' : TStuple[1],
- 'monthname' : int2month(TStuple[1]),
- 'monthnameshort' : int2month_short(TStuple[1]),
- 'week' : int(time.strftime('%W',TStuple)),
- }
+ 'counter': archCounter,
+ 'year': TStuple[0],
+ 'month': TStuple[1],
+ 'monthname': int2month(TStuple[1]),
+ 'monthnameshort': int2month_short(TStuple[1]),
+ 'week': int(time.strftime('%W', TStuple)),
+ }
archive = archive % vars
- if self.feedArchive(archive,t,maxArchSize,vars):
+ if self.feedArchive(archive, t, maxArchSize, vars):
archCounter += 1
- self.set('counter',str(archCounter))
+ self.set('counter', str(archCounter))
whys.append(why)
self.archivedThreads += 1
else:
@@ -487,7 +523,7 @@
if not self.Page.botMayEdit(Site.username):
return
whys = self.analyzePage()
- if self.archivedThreads < int(self.get('minthreadstoarchive',2)):
+ if self.archivedThreads < int(self.get('minthreadstoarchive', 2)):
# We might not want to archive a measly few threads
# (lowers edit frequency)
pywikibot.output(u'There are only %d Threads. Skipping'
@@ -504,11 +540,11 @@
self.archives[a].update(comment)
#Save the page itself
- rx = re.compile('{{'+self.tpl+'\n.*?\n}}',re.DOTALL)
- self.Page.header = rx.sub(self.attr2text(),self.Page.header)
+ rx = re.compile('{{'+self.tpl+'\n.*?\n}}', re.DOTALL)
+ self.Page.header = rx.sub(self.attr2text(), self.Page.header)
self.commentParams['count'] = self.archivedThreads
- self.commentParams['archives'] \
- = ', '.join(['[['+a.title()+']]' for a in
self.archives.values()])
+ self.commentParams['archives'] = ', '.join(
+ ['[[%s]]' % a.title() for a in self.archives.values()])
if not self.commentParams['archives']:
self.commentParams['archives'] = '/dev/null'
self.commentParams['why'] = ', '.join(whys)
@@ -523,30 +559,30 @@
from optparse import OptionParser
parser = OptionParser(usage='usage: %prog [options] [LINKPAGE(s)]')
parser.add_option('-f', '--file', dest='filename',
- help='load list of pages from FILE', metavar='FILE')
+ help='load list of pages from FILE', metavar='FILE')
parser.add_option('-p', '--page', dest='pagename',
- help='archive a single PAGE', metavar='PAGE')
+ help='archive a single PAGE', metavar='PAGE')
parser.add_option('-n', '--namespace', dest='namespace', type='int',
- help='only archive pages from a given namespace')
+ help='only archive pages from a given namespace')
parser.add_option('-s', '--salt', dest='salt',
- help='specify salt')
+ help='specify salt')
parser.add_option('-F', '--force', action='store_true', dest='force',
- help='override security options')
+ help='override security options')
parser.add_option('-c', '--calc', dest='calc',
- help='calculate key for PAGE and exit', metavar='PAGE')
+ help='calculate key for PAGE and exit', metavar='PAGE')
parser.add_option('-l', '--locale', dest='locale',
- help='switch to locale LOCALE', metavar='LOCALE')
+ help='switch to locale LOCALE', metavar='LOCALE')
parser.add_option('-L', '--lang', dest='lang',
- help='current language code', metavar='lang')
+ help='current language code', metavar='lang')
parser.add_option('-T', '--timezone', dest='timezone',
- help='switch timezone to TIMEZONE', metavar='TIMEZONE')
+ help='switch timezone to TIMEZONE', metavar='TIMEZONE')
parser.add_option('-S', '--simulate', action='store_true', dest='simulate',
- help='Do not change pages, just simulate')
+ help='Do not change pages, just simulate')
(options, args) = parser.parse_args()
if options.locale:
#Required for english month names
- locale.setlocale(locale.LC_TIME,options.locale)
+ locale.setlocale(locale.LC_TIME, options.locale)
if options.timezone:
os.environ['TZ'] = options.timezone
@@ -558,8 +594,8 @@
if not options.salt:
parser.error('Note: you must specify a salt to calculate a key')
s = new_hash()
- s.update(options.salt+'\n')
- s.update(options.calc+'\n')
+ s.update(options.salt + '\n')
+ s.update(options.calc + '\n')
pywikibot.output(u'key = ' + s.hexdigest())
return
@@ -588,23 +624,19 @@
for a in args:
pagelist = []
if not options.filename and not options.pagename:
- #for pg in
pywikibot.Page(Site,a).getReferences(follow_redirects=False,onlyTemplateInclusion=True):
- if not options.namespace == None:
+ if options.namespace is not None:
ns = [str(options.namespace)]
else:
ns = []
for pg in generateTransclusions(Site, a, ns):
pagelist.append(pg)
if options.filename:
- for pg in file(options.filename,'r').readlines():
- pagelist.append(pywikibot.Page(Site,pg))
+ for pg in file(options.filename, 'r').readlines():
+ pagelist.append(pywikibot.Page(Site, pg))
if options.pagename:
pagelist.append(pywikibot.Page(Site, options.pagename,
defaultNamespace=3))
-
pagelist = sorted(pagelist)
- #if not options.namespace == None:
- # pagelist = [pg for pg in pagelist if
pg.namespace()==options.namespace]
for pg in iter(pagelist):
pywikibot.output(u'Processing %s' % pg)
# Catching exceptions, so that errors in one page do not bail out
@@ -617,6 +649,7 @@
pywikibot.output(u'Error occured while processing page %s' %
pg)
traceback.print_exc()
+
if __name__ == '__main__':
try:
main()
--
To view, visit https://gerrit.wikimedia.org/r/95760
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I31f9063e0bb5ac9c8e3263f83d041fc1a2c9dfed
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits