Revision: 7112
Author:   alexsh
Date:     2009-08-05 21:25:39 +0000 (Wed, 05 Aug 2009)

Log Message:
-----------
Site()._load() and Site()._getUserData(): for reduce load time, Add query API 
option to collect user data. (use ac=query,meta=userinfo)

Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py      2009-08-05 06:24:39 UTC (rev 7111)
+++ trunk/pywikipedia/wikipedia.py      2009-08-05 21:25:39 UTC (rev 7112)
@@ -1499,6 +1499,7 @@
             predata['wpWatchthis'] = '1'
         # Give the token, but only if one is supplied.
         if token:
+            ##output(token) # for debug use only
             predata['wpEditToken'] = token
 
         # Sorry, single-site exception...
@@ -1630,6 +1631,7 @@
                 except NoUsername:
                     raise LockedPage()
             if not newToken and "<textarea" in data:
+                ##if "<textarea" in data: # for debug use only, if badtoken 
still happen
                 # We might have been using an outdated token
                 output(u"Changing page has failed. Retrying.")
                 return self._putPage(text, comment, watchArticle, minorEdit, 
newPage, token=self.site().getToken(sysop = sysop, getagain = True), newToken = 
True, sysop = sysop)
@@ -4519,6 +4521,7 @@
         else:
             self._load(sysop = sysop)
             index = self._userIndex(sysop)
+            ##output('%s' % self._rights[index]) #for debug use
             return right in self._rights[index]
 
     def server_time(self):
@@ -4874,126 +4877,209 @@
         * text - the page text
         * sysop - is the user a sysop?
         """
-        if '<div id="globalWrapper">' not in text:
-            # Not a wiki page
-            return
 
         index = self._userIndex(sysop)
 
-        # Check for blocks - but only if version is 1.11 (userinfo is 
available)
-        # and the user data was not yet loaded
-        if self.versionnumber() >= 11 and (not self._userData[index] or force):
-            blocked = self._getBlock(sysop = sysop)
-            if blocked and not self._isBlocked[index]:
+        if type(text) == dict: #text is dict, query from API
+            # Check for blocks - but only if version is 1.11 (userinfo is 
available)
+            # and the user data was not yet loaded
+            if text.has_key('blockedby') and not self._isBlocked[index]:
                 # Write a warning if not shown earlier
                 if sysop:
                     account = 'Your sysop account'
                 else:
                     account = 'Your account'
                 output(u'WARNING: %s on %s is blocked. Editing using this 
account will stop the run.' % (account, self))
-            self._isBlocked[index] = blocked
+            self._isBlocked[index] = text.has_key('blockedby')
 
-        # Check for new messages
-        if '<div class="usermessage">' in text:
-            if not self._messages[index]:
-                # User has *new* messages
-                if sysop:
-                    output(u'NOTE: You have new messages in your sysop account 
on %s' % self)
-                else:
-                    output(u'NOTE: You have new messages on %s' % self)
-            self._messages[index] = True
-        else:
-            self._messages[index] = False
+            # Check for new messages, show key 'messages' in dict.
+            if text.has_key('messages'):
+                if not self._messages[index]:
+                    # User has *new* messages
+                    if sysop:
+                        output(u'NOTE: You have new messages in your sysop 
account on %s' % self)
+                    else:
+                        output(u'NOTE: You have new messages on %s' % self)
+                self._messages[index] = True
+            else:
+                self._messages[index] = False
 
-        # Don't perform other checks if the data was already loaded
-        if self._userData[index] and not force:
-            return
+            # Don't perform other checks if the data was already loaded
+            if self._userData[index] and not force:
+                return
 
-        # Search for the the user page link at the top.
-        # Note that the link of anonymous users (which doesn't exist at all
-        # in Wikimedia sites) has the ID pt-anonuserpage, and thus won't be
-        # found here.
-        userpageR = re.compile('<li id="pt-userpage"><a 
href=".+?">(?P<username>.+?)</a></li>')
-        m = userpageR.search(text)
-        if m:
-            self._isLoggedIn[index] = True
-            self._userName[index] = m.group('username')
-        else:
-            self._isLoggedIn[index] = False
-            # No idea what is the user name, and it isn't important
-            self._userName[index] = None
+            # Get username.
+            # anonymous mode will show key 'anon'
+            if not text.has_key('anon'): 
+                self._isLoggedIn[index] = True
+                self._userName[index] = text['name']
+            else:
+                self._isLoggedIn[index] = False
+                # No idea what is the user name, and it isn't important
+                self._userName[index] = None
 
-        # Check user groups, if possible (introduced in 1.10)
-        groupsR = re.compile(r'var wgUserGroups = \[\"(.+)\"\];')
-        m = groupsR.search(text)
-        checkLocal = True
-        if default_code in self.family.cross_allowed: # if current languages 
in cross allowed list, check global bot flag.
-            globalgroupsR = re.compile(r'var wgGlobalGroups = \[\"(.+)\"\];')
-            mg = globalgroupsR.search(text)
-            if mg: # the account had global permission
-                globalRights = mg.group(1)
-                globalRights = globalRights.split('","')
-                self._rights[index] = globalRights
+            # Check user groups and rights
+            if text.has_key('groups') and text['groups'] != []:
+                self._rights[index] = text['groups']
+                self._rights[index].extend(text['rights'])
+                # Warnings
+                # Don't show warnings for not logged in users, they will just 
fail to
+                # do any action
                 if self._isLoggedIn[index]:
-                    if 'Global_bot' in globalRights: # This account has the 
global bot flag, no need to check local flags.
-                        checkLocal = False
-                    else:
-                        output(u'Your bot account does not have global the bot 
flag, checking local flag.')
+                    if 'bot' not in self._rights[index] and 
config.notify_unflagged_bot:
+                        # Sysop + bot flag = Sysop flag in MediaWiki < 1.7.1?
+                        if sysop:
+                            output(u'Note: Your sysop account on %s does not 
have a bot flag. Its edits will be visible in the recent changes.' % self)
+                        else:
+                            output(u'WARNING: Your account on %s does not have 
a bot flag. Its edits will be visible in the recent changes and it may get 
blocked.' % self)
+                    if sysop and 'sysop' not in self._rights[index]:
+                        output(u'WARNING: Your sysop account on %s does not 
seem to have sysop rights. You may not be able to perform any sysop-restricted 
actions using it.' % self)
+            else:
+                # key groups is not exists, setup a default rights
+                self._rights[index] = []
+                if self._isLoggedIn[index]:
+                    # Logged in user
+                    self._rights[index].append('user')
+                    # Assume bot, and thus autoconfirmed
+                    self._rights[index].extend(['bot', 'autoconfirmed'])
+                    if sysop:
+                        # Assume user reported as a sysop indeed has the sysop 
rights
+                        self._rights[index].append('sysop')
+            # Assume the user has the default rights if API not query back
+            self._rights[index].extend(['read', 'createaccount', 'edit', 
'upload', 'createpage', 'createtalk', 'move', 'upload'])
+            #remove Duplicate rights
+            self._rights[index] = list(set(self._rights[index]))
+
+            # Search for a token
+            if text.has_key('preferencestoken') and 
len(text['preferencestoken']) > 2:
+                # anonymous token is '+\\', check len('+\\') = 2
+                # if preferencestoken > 2, it must be loggedin.
+                self._token[index] = text['preferencestoken']
+                if self._rights[index] is not None:
+                    # In this case, token and rights are loaded - user data is 
now loaded
+                    self._userData[index] = True
+            else:
+                output(u'WARNING: Token not found on %s. You will not be able 
to edit any page.' % self)
         else:
-            if verbose: output(u'Note: this language does not allow global 
bots.')
-        if m and checkLocal:
-            rights = m.group(1)
-            rights = rights.split('", "')
-            if '*' in rights:
-                rights.remove('*')
-            self._rights[index] = rights
-            # Warnings
-            # Don't show warnings for not logged in users, they will just fail 
to
-            # do any action
-            if self._isLoggedIn[index]:
-                if 'bot' not in self._rights[index] and 
config.notify_unflagged_bot:
-                    # Sysop + bot flag = Sysop flag in MediaWiki < 1.7.1?
+            #ordinary mode to get data from edit page HTMLs and JavaScripts
+            
+            if '<div id="globalWrapper">' not in text:
+                # Not a wiki page
+                return
+            # Check for blocks - but only if version is 1.11 (userinfo is 
available)
+            # and the user data was not yet loaded
+            if self.versionnumber() >= 11 and (not self._userData[index] or 
force):
+                blocked = self._getBlock(sysop = sysop)
+                if blocked and not self._isBlocked[index]:
+                    # Write a warning if not shown earlier
                     if sysop:
-                        output(u'Note: Your sysop account on %s does not have 
a bot flag. Its edits will be visible in the recent changes.' % self)
+                        account = 'Your sysop account'
                     else:
-                        output(u'WARNING: Your account on %s does not have a 
bot flag. Its edits will be visible in the recent changes and it may get 
blocked.' % self)
-                if sysop and 'sysop' not in self._rights[index]:
-                    output(u'WARNING: Your sysop account on %s does not seem 
to have sysop rights. You may not be able to perform any sysop-restricted 
actions using it.' % self)
-        else:
-            # We don't have wgUserGroups, and can't check the rights
-            self._rights[index] = []
-            if self._isLoggedIn[index]:
-                # Logged in user
-                self._rights[index].append('user')
-                # Assume bot, and thus autoconfirmed
-                self._rights[index].extend(['bot', 'autoconfirmed'])
-                if sysop:
-                    # Assume user reported as a sysop indeed has the sysop 
rights
-                    self._rights[index].append('sysop')
-        # Assume the user has the default rights
-        self._rights[index].extend(['read', 'createaccount', 'edit', 'upload', 
'createpage', 'createtalk', 'move', 'upload'])
-        if 'bot' in self._rights[index] or 'sysop' in self._rights[index]:
-            self._rights[index].append('apihighlimits')
-        if 'sysop' in self._rights[index]:
-            self._rights[index].extend(['delete', 'undelete', 'block', 
'protect', 'import', 'deletedhistory', 'unwatchedpages'])
+                        account = 'Your account'
+                    output(u'WARNING: %s on %s is blocked. Editing using this 
account will stop the run.' % (account, self))
+                self._isBlocked[index] = blocked
 
-        # Search for a token
-        tokenR = re.compile(r"\<input type='hidden' value=\"(.*?)\" 
name=\"wpEditToken\"")
-        tokenloc = tokenR.search(text)
-        if tokenloc:
-            self._token[index] = tokenloc.group(1)
-            if self._rights[index] is not None:
-                # In this case, token and rights are loaded - user data is now 
loaded
-                self._userData[index] = True
-        else:
-            # Token not found
-            # Possible reason for this is the user is blocked, don't show a
-            # warning in this case, otherwise do show a warning
-            # Another possible reason is that the page cannot be edited - 
ensure
-            # there is a textarea and the tab "view source" is not shown
-            if u'<textarea' in text and u'<li id="ca-viewsource"' not in text 
and not self._isBlocked[index]:
+            # Check for new messages
+            if '<div class="usermessage">' in text:
+                if not self._messages[index]:
+                    # User has *new* messages
+                    if sysop:
+                        output(u'NOTE: You have new messages in your sysop 
account on %s' % self)
+                    else:
+                        output(u'NOTE: You have new messages on %s' % self)
+                self._messages[index] = True
+            else:
+                self._messages[index] = False
+
+            # Don't perform other checks if the data was already loaded
+            if self._userData[index] and not force:
+                return
+
+            # Search for the the user page link at the top.
+            # Note that the link of anonymous users (which doesn't exist at all
+            # in Wikimedia sites) has the ID pt-anonuserpage, and thus won't be
+            # found here.
+            userpageR = re.compile('<li id="pt-userpage"><a 
href=".+?">(?P<username>.+?)</a></li>')
+            m = userpageR.search(text)
+            if m:
+                self._isLoggedIn[index] = True
+                self._userName[index] = m.group('username')
+            else:
+                self._isLoggedIn[index] = False
+                # No idea what is the user name, and it isn't important
+                self._userName[index] = None
+
+            # Check user groups, if possible (introduced in 1.10)
+            groupsR = re.compile(r'var wgUserGroups = \[\"(.+)\"\];')
+            m = groupsR.search(text)
+            checkLocal = True
+            if default_code in self.family.cross_allowed: # if current 
languages in cross allowed list, check global bot flag.
+                globalgroupsR = re.compile(r'var wgGlobalGroups = 
\[\"(.+)\"\];')
+                mg = globalgroupsR.search(text)
+                if mg: # the account had global permission
+                    globalRights = mg.group(1)
+                    globalRights = globalRights.split('","')
+                    self._rights[index] = globalRights
+                    if self._isLoggedIn[index]:
+                        if 'Global_bot' in globalRights: # This account has 
the global bot flag, no need to check local flags.
+                            checkLocal = False
+                        else:
+                            output(u'Your bot account does not have global the 
bot flag, checking local flag.')
+            else:
+                if verbose: output(u'Note: this language does not allow global 
bots.')
+            if m and checkLocal:
+                rights = m.group(1)
+                rights = rights.split('", "')
+                if '*' in rights:
+                    rights.remove('*')
+                self._rights[index] = rights
+                # Warnings
+                # Don't show warnings for not logged in users, they will just 
fail to
+                # do any action
+                if self._isLoggedIn[index]:
+                    if 'bot' not in self._rights[index] and 
config.notify_unflagged_bot:
+                        # Sysop + bot flag = Sysop flag in MediaWiki < 1.7.1?
+                        if sysop:
+                            output(u'Note: Your sysop account on %s does not 
have a bot flag. Its edits will be visible in the recent changes.' % self)
+                        else:
+                            output(u'WARNING: Your account on %s does not have 
a bot flag. Its edits will be visible in the recent changes and it may get 
blocked.' % self)
+                    if sysop and 'sysop' not in self._rights[index]:
+                        output(u'WARNING: Your sysop account on %s does not 
seem to have sysop rights. You may not be able to perform any sysop-restricted 
actions using it.' % self)
+            else:
+                # We don't have wgUserGroups, and can't check the rights
+                self._rights[index] = []
+                if self._isLoggedIn[index]:
+                    # Logged in user
+                    self._rights[index].append('user')
+                    # Assume bot, and thus autoconfirmed
+                    self._rights[index].extend(['bot', 'autoconfirmed'])
+                    if sysop:
+                        # Assume user reported as a sysop indeed has the sysop 
rights
+                        self._rights[index].append('sysop')
+            # Assume the user has the default rights
+            self._rights[index].extend(['read', 'createaccount', 'edit', 
'upload', 'createpage', 'createtalk', 'move', 'upload'])
+            if 'bot' in self._rights[index] or 'sysop' in self._rights[index]:
+                self._rights[index].append('apihighlimits')
+            if 'sysop' in self._rights[index]:
+                self._rights[index].extend(['delete', 'undelete', 'block', 
'protect', 'import', 'deletedhistory', 'unwatchedpages'])
+
+            # Search for a token
+            tokenR = re.compile(r"\<input type='hidden' value=\"(.*?)\" 
name=\"wpEditToken\"")
+            tokenloc = tokenR.search(text)
+            if tokenloc:
+                self._token[index] = tokenloc.group(1)
+                if self._rights[index] is not None:
+                    # In this case, token and rights are loaded - user data is 
now loaded
+                    self._userData[index] = True
+            else:
                 # Token not found
-                output(u'WARNING: Token not found on %s. You will not be able 
to edit any page.' % self)
+                # Possible reason for this is the user is blocked, don't show a
+                # warning in this case, otherwise do show a warning
+                # Another possible reason is that the page cannot be edited - 
ensure
+                # there is a textarea and the tab "view source" is not shown
+                if u'<textarea' in text and u'<li id="ca-viewsource"' not in 
text and not self._isBlocked[index]:
+                    # Token not found
+                    output(u'WARNING: Token not found on %s. You will not be 
able to edit any page.' % self)
 
     def mediawiki_message(self, key):
         """Return the MediaWiki message text for key "key" """
@@ -5121,10 +5207,26 @@
 
         if verbose:
             output(u'Getting information for site %s' % self)
-
+        
+        try:
+            api_url = self.api_address()
+            del api_url
+        except NotImplementedError:
+            config.use_api = False
+        
         # Get data
-        url = self.edit_address('Non-existing_page')
-        text = self.getUrl(url, sysop = sysop)
+        if config.use_api and self.versionnumber() >= 11:
+            #Query userinfo
+            params = {
+                'action': 'query',
+                'meta': 'userinfo',
+                'uiprop': 
'blockinfo|groups|rights|hasmsg|ratelimits|preferencestoken',
+            }
+            text = query.GetData(params, site = self, useAPI = True, 
sysop=sysop)['query']['userinfo']
+            ##output('%s' % text) # for debug use only
+        else:
+            url = self.edit_address('Non-existing_page')
+            text = self.getUrl(url, sysop = sysop)
 
         # Parse data
         self._getUserData(text, sysop = sysop, force = force)
@@ -5862,7 +5964,7 @@
         if not siteurl.startswith('*.'):
             urlsToRetrieve.append('*.' + siteurl)
         if config.use_api:
-            output(u'Querying API...')
+            output(u'Querying API exturlusage...')
             for url in urlsToRetrieve:
                 params = {
                     'action': 'query',



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to