Revision: 7678
Author: alexsh
Date: 2009-11-22 20:56:47 +0000 (Sun, 22 Nov 2009)
Log Message:
-----------
userlib: add batch dump userinfo data by API
Modified Paths:
--------------
trunk/pywikipedia/userlib.py
Modified: trunk/pywikipedia/userlib.py
===================================================================
--- trunk/pywikipedia/userlib.py 2009-11-22 15:30:28 UTC (rev 7677)
+++ trunk/pywikipedia/userlib.py 2009-11-22 20:56:47 UTC (rev 7678)
@@ -42,7 +42,7 @@
site - a wikipedia.Site object
name - name of the user, without the trailing User:
"""
- if type(site) == str:
+ if type(site) in [str, unicode]:
self._site = wikipedia.getSite(site)
else:
self._site = site
@@ -73,27 +73,8 @@
return self.__str__()
def _load(self):
- data = batchLoadUI(self.name(), self.site()).values()[0]
- if 'missing' in data or 'invalid' in data:
- raise wikipedia.Error('No such user or invaild username')
-
- self._editcount = data['editcount']
-
- if 'groups' in data:
- self._groups = data['groups']
- else:
- self._groups = []
-
- if data['registration']:
- self._registrationTime =
wikipedia.parsetime2stamp(data['registration'])
- else:
- self._registrationTime = 0
-
- self._mailable = ("emailable" in data)
-
- self._blocked = ('blockedby' in data)
- #if self._blocked: #Get block ID
-
+ getall(self.site(), [self])
+ return
def registrationTime(self, force = False):
if not hasattr(self, '_registrationTime') or force:
@@ -553,40 +534,70 @@
raise UnblockError, data
return True
-def batchLoadUI(names = [], site = None):
- #
- # batch load users information by API.
- # result info: http://www.mediawiki.org/wiki/API:Query_-_Lists#users_.2F_us
- #
- if not site:
- site = wikipedia.getSite()
- elif type(site) in [str, unicode]:
- site = wikipedia.getSite(site)
-
- result = {}
- params = {
- 'action': 'query',
- 'list': 'users',
- 'usprop': ['blockinfo', 'groups', 'editcount', 'registration',
'emailable', 'gender'],
- 'ususers': names,
- }
- #if site.versionnumber() >= 16:
- # params['ustoken'] = 'userrights'
+def getall(site, users, throttle=True, force=False):
+ """Bulk-retrieve users data from site
+
+ Arguments: site = Site object
+ users = iterable that yields User objects
- result = dict([(sig['name'].lower(), sig) for sig in query.GetData(params,
site)['query']['users'] ])
+ """
+ users = list(users) # if pages is an iterator, we need to make it a list
+ if len(users) > 1: wikipedia.output(u'Getting %d users data from %s...' %
(len(users), site))
+ _GetAllUI(site, users, throttle, force).run()
+
+class _GetAllUI(object):
+ def __init__(self, site, users, throttle, force):
+ self.site = site
+ self.users = []
+ self.throttle = throttle
+ self.force = force
+ self.sleeptime = 15
-
- return result
+ for user in users:
+ if not hasattr(user, '_editcount') or force:
+ self.users.append(user)
+ elif wikipedia.verbose:
+ wikipedia.output(u"BUGWARNING: %s already done!" % user.name())
+
+ def run(self):
+ if self.users:
+ while True:
+ try:
+ data = self.getData()
+ except Exception, e:
+ # Print the traceback of the caught exception
+ print e
+ raise
+ else:
+ break
+
+ for uj in self.users:
+ x = data[uj.name()]
+ uj._editcount = x['editcount']
+ if 'groups' in x:
+ uj._groups = x['groups']
+ else:
+ uj._groups = []
+ if x['registration']:
+ uj._registrationTime =
wikipedia.parsetime2stamp(x['registration'])
+ else:
+ uj._registrationTime = 0
+ uj._mailable = ("emailable" in x)
+ uj._blocked = ('blockedby' in x)
+ #if self._blocked: #Get block ID
+
+ def getData(self):
+ datas = {}
+ params = {
+ 'action': 'query',
+ 'list': 'users',
+ 'usprop': ['blockinfo', 'groups', 'editcount', 'registration',
'emailable', 'gender'],
+ 'ususers': u'|'.join([n.name() for n in self.users]),
+ }
+ for n in query.GetData(params, self.site)['query']['users']:
+ datas[n['name']] = n
+ return datas
-def batchDumpInfo(user):
- totals = batchLoadUI([x.name() for x in user])
- for oj in user:
- data = totals[oj.name().lower()]
- oj._editcount = data['editcount']
- if 'groups' in data:
- oj._groups = data['groups']
- oj._blocked = ('blockedby' in data)
-
if __name__ == '__main__':
"""
Simple testing code for the [[User:Example]] on the English Wikipedia.
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn