Revision: 7815
Author: xqt
Date: 2009-12-22 14:39:37 +0000 (Tue, 22 Dec 2009)
Log Message:
-----------
retrieving movelog pages via API
Modified Paths:
--------------
trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py
===================================================================
--- trunk/pywikipedia/redirect.py 2009-12-22 10:07:33 UTC (rev 7814)
+++ trunk/pywikipedia/redirect.py 2009-12-22 14:39:37 UTC (rev 7815)
@@ -19,17 +19,15 @@
-xml Retrieve information from a local XML dump
(http://download.wikimedia.org). Argument can also be given as
"-xml:filename.xml". Cannot be used with -api or -moves.
- If neither of -xml -api -moves is given, info will be loaded
- from a special page of the live wiki.
+-moves Use the page move log to find double-redirect candidates. Only
+ works with action "double", does not work with -xml. You may
+ use -api option for retrieving pages via API
+
-api Retrieve information from the wiki via MediaWikis application
- program interface (API). Cannot be used with -xml or -moves.
- If neither of -xml -api -moves is given, info will be loaded
- from a special page of the live wiki.
+ program interface (API). Cannot be used with -xml.
--moves Use the page move log to find double-redirect candidates. Only
- works with action "double", does not work with either -xml, or
- -api. If neither of -xml -api -moves is given, info will be
+ NOTE: If neither of -xml -api -moves is given, info will be
loaded from a special page of the live wiki.
-namespace:n Namespace to process. Works only with an XML dump, or the API
@@ -63,7 +61,7 @@
from __future__ import generators
import wikipedia, config, query
import xmlreader
-import re, sys
+import re, sys, datetime
__version__='$Id$'
@@ -411,7 +409,7 @@
yield key
def retrieve_double_redirects(self):
- if self.use_api:
+ if self.use_api and not self.use_move_log:
count = 0
for (pagetitle, type, target, final) \
in self.get_redirects_via_api(maxlen=2):
@@ -424,7 +422,11 @@
elif self.xmlFilename == None:
if self.use_move_log:
- for redir_page in self.get_moved_pages_redirects():
+ if config.use_api:
+ gen = self.get_moved_pages_redirects_via_api()
+ else:
+ gen = self.get_moved_pages_redirects()
+ for redir_page in gen:
yield redir_page.title()
return
# retrieve information from the live wiki's maintenance page
@@ -454,10 +456,46 @@
wikipedia.output(u'\nChecking redirect %i of %i...'
% (num + 1, len(redict)))
+ def get_moved_pages_redirects_via_api(self):
+ if self.offset <= 0:
+ self.offset = 1
+ start = datetime.datetime.utcnow() \
+ - datetime.timedelta(0, self.offset*3600)
+ offset_time = start.strftime("%Y%m%d%H%M%S")
+ params = {
+ 'action' :'query',
+ 'list' :'logevents',
+ 'letype' :'move',
+ 'leprop' :'title|details',
+ 'lelimit' : '500',
+ 'lestart' : offset_time,
+ }
+ data = query.GetData(params, encodeTitle =
False)#['query']['logevents']
+ if 'warnings' in data:
+ raise
+ allmoves = data['query']['logevents']
+ wikipedia.output(u'Retrieving %d moved pages via API...' %
len(allmoves))
+ if wikipedia.verbose:
+ wikipedia.output(u"[%s]" % offset_time)
+ for moved in allmoves:
+ moved_page = wikipedia.Page(self.site, moved['title'])
+ try:
+ if not moved_page.isRedirectPage():
+ continue
+ except wikipedia.BadTitle:
+ continue
+ except wikipedia.ServerError:
+ continue
+ try:
+ for page in moved_page.getReferences(follow_redirects=True,
redirectsOnly=True):
+ yield page
+ except wikipedia.NoPage:
+ # original title must have been deleted after move
+ continue
+
def get_moved_pages_redirects(self):
'''generate redirects to recently-moved pages'''
# this will run forever, until user interrupts it
- import datetime
move_regex = re.compile(
r'moved <a href.*?>(.*?)</a> to <a href=.*?>.*?</a>.*?</li>')
@@ -823,7 +861,7 @@
else:
wikipedia.output(u'Unknown argument: %s' % arg)
- if not action or (api and moved_pages) or (xmlFilename and moved_pages)\
+ if not action or (xmlFilename and moved_pages)\
or (api and xmlFilename):
wikipedia.showHelp('redirect')
else:
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn