Revision: 7359
Author: alexsh
Date: 2009-10-03 04:52:39 +0000 (Sat, 03 Oct 2009)
Log Message:
-----------
RedirectGenerator().get_redirects_via_api(): change API Query type to JSON
Modified Paths:
--------------
trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py
===================================================================
--- trunk/pywikipedia/redirect.py 2009-10-02 20:34:20 UTC (rev 7358)
+++ trunk/pywikipedia/redirect.py 2009-10-03 04:52:39 UTC (rev 7359)
@@ -287,6 +287,8 @@
params['apfrom'] = start
# print (apiQ)
data = query.GetData(params, self.site)
+ if "limits" in data: # process aplimit = max
+ params['aplimit'] = int(data['limits']['allpages'])
# wikipedia.output(u'===RESULT===\n%s\n' % result)
for x in data['query']['allpages']:
if until and x['title'] == until:
@@ -299,30 +301,28 @@
break
- def _next_redirects_via_api_commandline(self, apiQi, number = 'max',
namespaces = [],
- start = None, until = None ):
+ def _next_redirects_via_api_commandline(self, number = 'max', namespaces =
[], start = None, until = None ):
"""
yields commands to the api for checking a set op page ids.
"""
# wikipedia.output(u'====>
_next_redirects_via_api_commandline(apiQi=%s, number=%s, #ns=%d, start=%s,
until=%s)' % (apiQi, number, len(namespaces), start, until))
if namespaces == []:
namespaces = [ 0 ]
- maxurllen = 1018 # accomodate "GET " + apiQ + CR + LF in 1024 bytes.
- apiQ = ''
+ #maxurllen = 1018 # accomodate "GET " + apiQ + CR + LF in 1024
bytes.
+ apiQ = []
+ maxurllen = 900 - len(self.site.hostname() + self.site.api_address() )
+ clen = 0
for pageid in self.get_redirect_pageids_via_api(number, namespaces,
start, until):
- if apiQ:
- tmp = ( '%s|%s' % ( apiQ, pageid ) )
- else:
- tmp = ( '%s%s' % ( apiQi, pageid ) )
- if len(tmp) > maxurllen and apiQ:
+ apiQ.append(pageid)
+ clen += len(str(pageid)) + 1
+ if clen > maxurllen and apiQ:
yield apiQ
- tmp = ''
- apiQ = tmp
+ apiQ = []
+ clen = 0
if apiQ:
yield apiQ
- def get_redirects_via_api(self, number = u'max', namespaces = [], start =
None,
- until = None, maxlen = 8 ):
+ def get_redirects_via_api(self, number = u'max', namespaces = [], start =
None, until = None, maxlen = 8 ):
"""
Generator which will yield a tuple of data about Pages that are
redirects:
0 - page title of a redirect page
@@ -347,30 +347,26 @@
import urllib
if namespaces == []:
namespaces = [ 0 ]
- apiQ1 = self.site.api_address()
- apiQ1 += 'action=query'
- apiQ1 += '&redirects'
- apiQ1 += '&format=xml'
- apiQ1 += '&pageids='
- redirectRe = re.compile('<r from="(.*?)" to="(.*?)"')
- missingpageRe = re.compile('<page .*? title="(.*?)" missing=""')
- existingpageRe = re.compile('<page pageid=".*?" .*? title="(.*?)"')
- for apiQ in self._next_redirects_via_api_commandline(apiQ1, number =
number,
- namespaces = namespaces, start = start, until
= until ):
+ params = {
+ 'action':'query',
+ 'redirects':1,
+ #'':'',
+ }
+ for apiQ in self._next_redirects_via_api_commandline(number,
namespaces, start, until):
# wikipedia.output (u'===apiQ=%s' % apiQ)
- result = self.site.getUrl(apiQ)
+ params['pageids'] = query.ListToParam(apiQ)
+ data = query.GetData(params, self.site)
# wikipedia.output(u'===RESULT===\n%s\n' % result)
redirects = {}
pages = {}
- for redirect in redirectRe.findall(result):
- # wikipedia.output (u'R: %s => %s' % redirect)
- redirects[redirect[0]] = redirect[1]
- for pagetitle in missingpageRe.findall(result):
+ redirects = dict([[x['from'], x['to']] for x in
data['query']['redirects']])
+
+ for pagetitle in data['query']['pages'].values():
# wikipedia.output (u'M: %s' % pagetitle)
- pages[pagetitle] = False
- for pagetitle in existingpageRe.findall(result):
- # wikipedia.output (u'P: %s' % pagetitle)
- pages[pagetitle] = True
+ if 'missing' in pagetitle and 'pageid' not in pagetitle:
+ pages[pagetitle['title']] = False
+ else:
+ pages[pagetitle['title']] = True
for redirect in redirects:
target = redirects[redirect]
result = 0
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn