Revision: 7678
Author:   alexsh
Date:     2009-11-22 20:56:47 +0000 (Sun, 22 Nov 2009)

Log Message:
-----------
userlib: add batch dump userinfo data by API

Modified Paths:
--------------
    trunk/pywikipedia/userlib.py

Modified: trunk/pywikipedia/userlib.py
===================================================================
--- trunk/pywikipedia/userlib.py        2009-11-22 15:30:28 UTC (rev 7677)
+++ trunk/pywikipedia/userlib.py        2009-11-22 20:56:47 UTC (rev 7678)
@@ -42,7 +42,7 @@
         site - a wikipedia.Site object
         name - name of the user, without the trailing User:
         """
-        if type(site) == str:
+        if type(site) in [str, unicode]:
             self._site = wikipedia.getSite(site)
         else:
             self._site = site
@@ -73,27 +73,8 @@
         return self.__str__()
     
     def _load(self):
-        data = batchLoadUI(self.name(), self.site()).values()[0]
-        if 'missing' in data or 'invalid' in data:
-            raise wikipedia.Error('No such user or invaild username')
-        
-        self._editcount = data['editcount']
-        
-        if 'groups' in data:
-            self._groups = data['groups']
-        else:
-            self._groups = []
-        
-        if data['registration']:
-            self._registrationTime = 
wikipedia.parsetime2stamp(data['registration'])
-        else:
-            self._registrationTime = 0
-        
-        self._mailable = ("emailable" in data)
-        
-        self._blocked = ('blockedby' in data)
-        #if self._blocked: #Get block ID
-        
+        getall(self.site(), [self])
+        return
     
     def registrationTime(self, force = False):
         if not hasattr(self, '_registrationTime') or force:
@@ -553,40 +534,70 @@
             raise UnblockError, data
         return True
 
-def batchLoadUI(names = [], site = None):
-    #
-    # batch load users information by API.
-    # result info: http://www.mediawiki.org/wiki/API:Query_-_Lists#users_.2F_us
-    #
-    if not site:
-        site = wikipedia.getSite()
-    elif type(site) in  [str, unicode]:
-        site = wikipedia.getSite(site)
-    
-    result = {}
-    params = {
-        'action': 'query',
-        'list': 'users',
-        'usprop': ['blockinfo', 'groups', 'editcount', 'registration', 
'emailable', 'gender'],
-        'ususers': names,
-    }
-    #if site.versionnumber() >= 16:
-    #    params['ustoken'] = 'userrights'
+def getall(site, users, throttle=True, force=False):
+    """Bulk-retrieve users data from site
+ 
+    Arguments: site = Site object
+               users = iterable that yields User objects
 
-    result = dict([(sig['name'].lower(), sig) for sig in query.GetData(params, 
site)['query']['users'] ])
+    """
+    users = list(users)  # if pages is an iterator, we need to make it a list
+    if len(users) > 1: wikipedia.output(u'Getting %d users data from %s...' % 
(len(users), site))
+    _GetAllUI(site, users, throttle, force).run()
+
+class _GetAllUI(object):
+    def __init__(self, site, users, throttle, force):
+        self.site = site
+        self.users = []
+        self.throttle = throttle
+        self.force = force
+        self.sleeptime = 15
     
-    
-    return result
+        for user in users:
+            if not hasattr(user, '_editcount') or force:
+                self.users.append(user)
+            elif wikipedia.verbose:
+                wikipedia.output(u"BUGWARNING: %s already done!" % user.name())
+     
+    def run(self):
+        if self.users:
+            while True:
+                try:
+                    data = self.getData()
+                except Exception, e:
+                    # Print the traceback of the caught exception
+                    print e
+                    raise
+                else:
+                    break
+            
+            for uj in self.users:
+                x = data[uj.name()]
+                uj._editcount = x['editcount']
+                if 'groups' in x:
+                    uj._groups = x['groups']
+                else:
+                    uj._groups = []
+                if x['registration']:
+                    uj._registrationTime = 
wikipedia.parsetime2stamp(x['registration'])
+                else:
+                    uj._registrationTime = 0
+                uj._mailable = ("emailable" in x)
+                uj._blocked = ('blockedby' in x)
+                #if self._blocked: #Get block ID
+        
+    def getData(self):
+        datas = {}
+        params = {
+            'action': 'query',
+            'list': 'users',
+            'usprop': ['blockinfo', 'groups', 'editcount', 'registration', 
'emailable', 'gender'],
+            'ususers': u'|'.join([n.name() for n in self.users]),
+        }
+        for n in query.GetData(params, self.site)['query']['users']:
+            datas[n['name']] = n
+        return datas
 
-def batchDumpInfo(user):
-    totals = batchLoadUI([x.name() for x in user])
-    for oj in user:
-        data = totals[oj.name().lower()]
-        oj._editcount = data['editcount']
-        if 'groups' in data:
-            oj._groups = data['groups']
-        oj._blocked = ('blockedby' in data)
-
 if __name__ == '__main__':
     """
     Simple testing code for the [[User:Example]] on the English Wikipedia.



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to