Revision: 7630
Author: xqt
Date: 2009-11-11 13:20:55 +0000 (Wed, 11 Nov 2009)
Log Message:
-----------
aditional -lang option,
new pattern for timestamp,
ignore tzset if it's not available
Modified Paths:
--------------
trunk/pywikipedia/archivebot.py
Modified: trunk/pywikipedia/archivebot.py
===================================================================
--- trunk/pywikipedia/archivebot.py 2009-11-11 13:03:30 UTC (rev 7629)
+++ trunk/pywikipedia/archivebot.py 2009-11-11 13:20:55 UTC (rev 7630)
@@ -26,6 +26,7 @@
-c PAGE, --calc=PAGE calculate key for PAGE and exit
-l LOCALE, --locale=LOCALE
switch to locale LOCALE
+ -L LANG, --lang=LANG set the language code to work on
"""
#
# (C) Misza13, 2006-2007
@@ -35,8 +36,6 @@
__version__ = '$Id$'
#
import wikipedia, pagegenerators, query
-Site = wikipedia.getSite()
-
import os, re, time, locale, traceback, string, urllib
try: #Get a constructor for the MD5 hash object
@@ -46,8 +45,6 @@
import md5
new_hash = md5.md5
-
-language = Site.language()
messages = {
'_default': {
'ArchiveFull': u'(ARCHIVE FULL)',
@@ -124,7 +121,8 @@
},
}
-def message(key, lang=Site.language()):
+def message(key):
+ lang = wikipedia.getSite().language()
if not lang in messages:
lang = '_default'
return messages[lang][key]
@@ -267,7 +265,11 @@
TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kl\.\W*(\d\d):(\d\d)
\(.*?\)', line)
#3. joulukuuta 2008 kello 16.26 (EET)
if not TM:
- TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello
\W*(\d\d).(\d\d) \(.*?\)', line)
+ TM = re.search(r'(\d\d?)\. (\S+) (\d\d\d\d) kello \W*(\d\d).(\d\d)
\(.*?\)', line)
+ if not TM:
+# 14:23, 12. Jan. 2009 (UTC)
+ pat = re.compile(r'(\d\d):(\d\d), (\d\d?)\. (\S+)\.? (\d\d\d\d)
\(UTC\)')
+ TM = pat.search(line)
if TM:
# wikipedia.output(TM)
TIME = txt2timestamp(TM.group(0),"%d. %b %Y kl. %H:%M (%Z)")
@@ -287,6 +289,8 @@
TIME = txt2timestamp(TM.group(0),"%H:%M, %b %d, %Y (%Z)")
if not TIME:
TIME = txt2timestamp(TM.group(0),"%d. %Bta %Y kello %H.%M
(%Z)")
+ if not TIME:
+ TIME = txt2timestamp(TM.group(0),"%H:%M, %d. %b. %Y (%Z)")
if TIME:
self.timestamp = max(self.timestamp,time.mktime(TIME))
# wikipedia.output(u'Time to be parsed: %s' % TM.group(0))
@@ -318,10 +322,11 @@
Feed threads to it and run an update() afterwards."""
#TODO: Make it a subclass of wikipedia.Page
- def __init__(self, title, archiver, vars=None):
+ def __init__(self, site, title, archiver, vars=None):
+ self.site = site
self.title = title
self.threads = []
- self.Page = wikipedia.Page(Site,self.title)
+ self.Page = wikipedia.Page(self.site, self.title)
self.full = False
self.archiver = archiver
self.vars = vars
@@ -391,7 +396,7 @@
pageSummary = message('PageSummary')
archiveSummary = message('ArchiveSummary')
- def __init__(self, Page, tpl, salt, force=False):
+ def __init__(self, site, Page, tpl, salt, force=False):
self.attributes = {
'algo' : ['old(24h)',False],
'archive' : ['',False],
@@ -402,7 +407,8 @@
self.tpl = tpl
self.salt = salt
self.force = force
- self.Page = DiscussionPage(Page.title(),self)
+ self.site = site
+ self.Page = DiscussionPage(self.site, Page.title(),self)
self.loadConfig()
self.commentParams = {
'from' : self.Page.title,
@@ -430,7 +436,7 @@
def loadConfig(self):
hdrlines = self.Page.header.split('\n')
-# wikipedia.output(u'Looking for: %s' % self.tpl)
+ wikipedia.output(u'Looking for: %s' % self.tpl)
mode = 0
for line in hdrlines:
if mode == 0 and re.search('{{'+self.tpl,line):
@@ -497,7 +503,7 @@
return set(whys)
def run(self):
- if not self.Page.Page.botMayEdit(Site.username):
+ if not self.Page.Page.botMayEdit(wikipedia.getSite().username):
return
whys = self.analyzePage()
if self.archivedThreads < int(self.get('minthreadstoarchive',2)):
@@ -538,17 +544,20 @@
help='calculate key for PAGE and exit', metavar='PAGE')
parser.add_option('-l', '--locale', dest='locale',
help='switch to locale LOCALE', metavar='LOCALE')
+ parser.add_option('-L', '--lang', dest='lang',
+ help='current language code', metavar='lang')
parser.add_option('-T', '--timezone', dest='timezone',
help='switch timezone to TIMEZONE', metavar='TIMEZONE')
(options, args) = parser.parse_args()
-
+ Site = wikipedia.getSite(code=options.lang)
+ wikipedia.handleArgs('-lang:%s' %options.lang)
+ language = Site.language()
if options.locale:
locale.setlocale(locale.LC_TIME,options.locale) #Required for english
month names
- if options.timezone:
+ if hasattr(time, 'tzset') and options.timezone:
os.environ['TZ'] = options.timezone
- #Or use the preset value
- time.tzset()
+ time.tzset()
if options.calc:
if not options.salt:
@@ -591,7 +600,7 @@
for pg in pagelist:
try: #Catching exceptions, so that errors in one page do not bail
out the entire process
- Archiver = PageArchiver(pg, a, salt, force)
+ Archiver = PageArchiver(Site, pg, a, salt, force)
Archiver.run()
time.sleep(10)
except:
@@ -602,5 +611,7 @@
if __name__ == '__main__':
- main()
- wikipedia.stopme()
+ try:
+ main()
+ finally:
+ wikipedia.stopme()
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn