Revision: 7112
Author: alexsh
Date: 2009-08-05 21:25:39 +0000 (Wed, 05 Aug 2009)
Log Message:
-----------
Site()._load() and Site()._getUserData(): for reduce load time, Add query API
option to collect user data. (use ac=query,meta=userinfo)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-08-05 06:24:39 UTC (rev 7111)
+++ trunk/pywikipedia/wikipedia.py 2009-08-05 21:25:39 UTC (rev 7112)
@@ -1499,6 +1499,7 @@
predata['wpWatchthis'] = '1'
# Give the token, but only if one is supplied.
if token:
+ ##output(token) # for debug use only
predata['wpEditToken'] = token
# Sorry, single-site exception...
@@ -1630,6 +1631,7 @@
except NoUsername:
raise LockedPage()
if not newToken and "<textarea" in data:
+ ##if "<textarea" in data: # for debug use only, if badtoken
still happen
# We might have been using an outdated token
output(u"Changing page has failed. Retrying.")
return self._putPage(text, comment, watchArticle, minorEdit,
newPage, token=self.site().getToken(sysop = sysop, getagain = True), newToken =
True, sysop = sysop)
@@ -4519,6 +4521,7 @@
else:
self._load(sysop = sysop)
index = self._userIndex(sysop)
+ ##output('%s' % self._rights[index]) #for debug use
return right in self._rights[index]
def server_time(self):
@@ -4874,126 +4877,209 @@
* text - the page text
* sysop - is the user a sysop?
"""
- if '<div id="globalWrapper">' not in text:
- # Not a wiki page
- return
index = self._userIndex(sysop)
- # Check for blocks - but only if version is 1.11 (userinfo is
available)
- # and the user data was not yet loaded
- if self.versionnumber() >= 11 and (not self._userData[index] or force):
- blocked = self._getBlock(sysop = sysop)
- if blocked and not self._isBlocked[index]:
+ if type(text) == dict: #text is dict, query from API
+ # Check for blocks - but only if version is 1.11 (userinfo is
available)
+ # and the user data was not yet loaded
+ if text.has_key('blockedby') and not self._isBlocked[index]:
# Write a warning if not shown earlier
if sysop:
account = 'Your sysop account'
else:
account = 'Your account'
output(u'WARNING: %s on %s is blocked. Editing using this
account will stop the run.' % (account, self))
- self._isBlocked[index] = blocked
+ self._isBlocked[index] = text.has_key('blockedby')
- # Check for new messages
- if '<div class="usermessage">' in text:
- if not self._messages[index]:
- # User has *new* messages
- if sysop:
- output(u'NOTE: You have new messages in your sysop account
on %s' % self)
- else:
- output(u'NOTE: You have new messages on %s' % self)
- self._messages[index] = True
- else:
- self._messages[index] = False
+ # Check for new messages, show key 'messages' in dict.
+ if text.has_key('messages'):
+ if not self._messages[index]:
+ # User has *new* messages
+ if sysop:
+ output(u'NOTE: You have new messages in your sysop
account on %s' % self)
+ else:
+ output(u'NOTE: You have new messages on %s' % self)
+ self._messages[index] = True
+ else:
+ self._messages[index] = False
- # Don't perform other checks if the data was already loaded
- if self._userData[index] and not force:
- return
+ # Don't perform other checks if the data was already loaded
+ if self._userData[index] and not force:
+ return
- # Search for the the user page link at the top.
- # Note that the link of anonymous users (which doesn't exist at all
- # in Wikimedia sites) has the ID pt-anonuserpage, and thus won't be
- # found here.
- userpageR = re.compile('<li id="pt-userpage"><a
href=".+?">(?P<username>.+?)</a></li>')
- m = userpageR.search(text)
- if m:
- self._isLoggedIn[index] = True
- self._userName[index] = m.group('username')
- else:
- self._isLoggedIn[index] = False
- # No idea what is the user name, and it isn't important
- self._userName[index] = None
+ # Get username.
+ # anonymous mode will show key 'anon'
+ if not text.has_key('anon'):
+ self._isLoggedIn[index] = True
+ self._userName[index] = text['name']
+ else:
+ self._isLoggedIn[index] = False
+ # No idea what is the user name, and it isn't important
+ self._userName[index] = None
- # Check user groups, if possible (introduced in 1.10)
- groupsR = re.compile(r'var wgUserGroups = \[\"(.+)\"\];')
- m = groupsR.search(text)
- checkLocal = True
- if default_code in self.family.cross_allowed: # if current languages
in cross allowed list, check global bot flag.
- globalgroupsR = re.compile(r'var wgGlobalGroups = \[\"(.+)\"\];')
- mg = globalgroupsR.search(text)
- if mg: # the account had global permission
- globalRights = mg.group(1)
- globalRights = globalRights.split('","')
- self._rights[index] = globalRights
+ # Check user groups and rights
+ if text.has_key('groups') and text['groups'] != []:
+ self._rights[index] = text['groups']
+ self._rights[index].extend(text['rights'])
+ # Warnings
+ # Don't show warnings for not logged in users, they will just
fail to
+ # do any action
if self._isLoggedIn[index]:
- if 'Global_bot' in globalRights: # This account has the
global bot flag, no need to check local flags.
- checkLocal = False
- else:
- output(u'Your bot account does not have global the bot
flag, checking local flag.')
+ if 'bot' not in self._rights[index] and
config.notify_unflagged_bot:
+ # Sysop + bot flag = Sysop flag in MediaWiki < 1.7.1?
+ if sysop:
+ output(u'Note: Your sysop account on %s does not
have a bot flag. Its edits will be visible in the recent changes.' % self)
+ else:
+ output(u'WARNING: Your account on %s does not have
a bot flag. Its edits will be visible in the recent changes and it may get
blocked.' % self)
+ if sysop and 'sysop' not in self._rights[index]:
+ output(u'WARNING: Your sysop account on %s does not
seem to have sysop rights. You may not be able to perform any sysop-restricted
actions using it.' % self)
+ else:
+ # key groups is not exists, setup a default rights
+ self._rights[index] = []
+ if self._isLoggedIn[index]:
+ # Logged in user
+ self._rights[index].append('user')
+ # Assume bot, and thus autoconfirmed
+ self._rights[index].extend(['bot', 'autoconfirmed'])
+ if sysop:
+ # Assume user reported as a sysop indeed has the sysop
rights
+ self._rights[index].append('sysop')
+ # Assume the user has the default rights if API not query back
+ self._rights[index].extend(['read', 'createaccount', 'edit',
'upload', 'createpage', 'createtalk', 'move', 'upload'])
+ #remove Duplicate rights
+ self._rights[index] = list(set(self._rights[index]))
+
+ # Search for a token
+ if text.has_key('preferencestoken') and
len(text['preferencestoken']) > 2:
+ # anonymous token is '+\\', check len('+\\') = 2
+ # if preferencestoken > 2, it must be loggedin.
+ self._token[index] = text['preferencestoken']
+ if self._rights[index] is not None:
+ # In this case, token and rights are loaded - user data is
now loaded
+ self._userData[index] = True
+ else:
+ output(u'WARNING: Token not found on %s. You will not be able
to edit any page.' % self)
else:
- if verbose: output(u'Note: this language does not allow global
bots.')
- if m and checkLocal:
- rights = m.group(1)
- rights = rights.split('", "')
- if '*' in rights:
- rights.remove('*')
- self._rights[index] = rights
- # Warnings
- # Don't show warnings for not logged in users, they will just fail
to
- # do any action
- if self._isLoggedIn[index]:
- if 'bot' not in self._rights[index] and
config.notify_unflagged_bot:
- # Sysop + bot flag = Sysop flag in MediaWiki < 1.7.1?
+ #ordinary mode to get data from edit page HTMLs and JavaScripts
+
+ if '<div id="globalWrapper">' not in text:
+ # Not a wiki page
+ return
+ # Check for blocks - but only if version is 1.11 (userinfo is
available)
+ # and the user data was not yet loaded
+ if self.versionnumber() >= 11 and (not self._userData[index] or
force):
+ blocked = self._getBlock(sysop = sysop)
+ if blocked and not self._isBlocked[index]:
+ # Write a warning if not shown earlier
if sysop:
- output(u'Note: Your sysop account on %s does not have
a bot flag. Its edits will be visible in the recent changes.' % self)
+ account = 'Your sysop account'
else:
- output(u'WARNING: Your account on %s does not have a
bot flag. Its edits will be visible in the recent changes and it may get
blocked.' % self)
- if sysop and 'sysop' not in self._rights[index]:
- output(u'WARNING: Your sysop account on %s does not seem
to have sysop rights. You may not be able to perform any sysop-restricted
actions using it.' % self)
- else:
- # We don't have wgUserGroups, and can't check the rights
- self._rights[index] = []
- if self._isLoggedIn[index]:
- # Logged in user
- self._rights[index].append('user')
- # Assume bot, and thus autoconfirmed
- self._rights[index].extend(['bot', 'autoconfirmed'])
- if sysop:
- # Assume user reported as a sysop indeed has the sysop
rights
- self._rights[index].append('sysop')
- # Assume the user has the default rights
- self._rights[index].extend(['read', 'createaccount', 'edit', 'upload',
'createpage', 'createtalk', 'move', 'upload'])
- if 'bot' in self._rights[index] or 'sysop' in self._rights[index]:
- self._rights[index].append('apihighlimits')
- if 'sysop' in self._rights[index]:
- self._rights[index].extend(['delete', 'undelete', 'block',
'protect', 'import', 'deletedhistory', 'unwatchedpages'])
+ account = 'Your account'
+ output(u'WARNING: %s on %s is blocked. Editing using this
account will stop the run.' % (account, self))
+ self._isBlocked[index] = blocked
- # Search for a token
- tokenR = re.compile(r"\<input type='hidden' value=\"(.*?)\"
name=\"wpEditToken\"")
- tokenloc = tokenR.search(text)
- if tokenloc:
- self._token[index] = tokenloc.group(1)
- if self._rights[index] is not None:
- # In this case, token and rights are loaded - user data is now
loaded
- self._userData[index] = True
- else:
- # Token not found
- # Possible reason for this is the user is blocked, don't show a
- # warning in this case, otherwise do show a warning
- # Another possible reason is that the page cannot be edited -
ensure
- # there is a textarea and the tab "view source" is not shown
- if u'<textarea' in text and u'<li id="ca-viewsource"' not in text
and not self._isBlocked[index]:
+ # Check for new messages
+ if '<div class="usermessage">' in text:
+ if not self._messages[index]:
+ # User has *new* messages
+ if sysop:
+ output(u'NOTE: You have new messages in your sysop
account on %s' % self)
+ else:
+ output(u'NOTE: You have new messages on %s' % self)
+ self._messages[index] = True
+ else:
+ self._messages[index] = False
+
+ # Don't perform other checks if the data was already loaded
+ if self._userData[index] and not force:
+ return
+
+ # Search for the the user page link at the top.
+ # Note that the link of anonymous users (which doesn't exist at all
+ # in Wikimedia sites) has the ID pt-anonuserpage, and thus won't be
+ # found here.
+ userpageR = re.compile('<li id="pt-userpage"><a
href=".+?">(?P<username>.+?)</a></li>')
+ m = userpageR.search(text)
+ if m:
+ self._isLoggedIn[index] = True
+ self._userName[index] = m.group('username')
+ else:
+ self._isLoggedIn[index] = False
+ # No idea what is the user name, and it isn't important
+ self._userName[index] = None
+
+ # Check user groups, if possible (introduced in 1.10)
+ groupsR = re.compile(r'var wgUserGroups = \[\"(.+)\"\];')
+ m = groupsR.search(text)
+ checkLocal = True
+ if default_code in self.family.cross_allowed: # if current
languages in cross allowed list, check global bot flag.
+ globalgroupsR = re.compile(r'var wgGlobalGroups =
\[\"(.+)\"\];')
+ mg = globalgroupsR.search(text)
+ if mg: # the account had global permission
+ globalRights = mg.group(1)
+ globalRights = globalRights.split('","')
+ self._rights[index] = globalRights
+ if self._isLoggedIn[index]:
+ if 'Global_bot' in globalRights: # This account has
the global bot flag, no need to check local flags.
+ checkLocal = False
+ else:
+ output(u'Your bot account does not have global the
bot flag, checking local flag.')
+ else:
+ if verbose: output(u'Note: this language does not allow global
bots.')
+ if m and checkLocal:
+ rights = m.group(1)
+ rights = rights.split('", "')
+ if '*' in rights:
+ rights.remove('*')
+ self._rights[index] = rights
+ # Warnings
+ # Don't show warnings for not logged in users, they will just
fail to
+ # do any action
+ if self._isLoggedIn[index]:
+ if 'bot' not in self._rights[index] and
config.notify_unflagged_bot:
+ # Sysop + bot flag = Sysop flag in MediaWiki < 1.7.1?
+ if sysop:
+ output(u'Note: Your sysop account on %s does not
have a bot flag. Its edits will be visible in the recent changes.' % self)
+ else:
+ output(u'WARNING: Your account on %s does not have
a bot flag. Its edits will be visible in the recent changes and it may get
blocked.' % self)
+ if sysop and 'sysop' not in self._rights[index]:
+ output(u'WARNING: Your sysop account on %s does not
seem to have sysop rights. You may not be able to perform any sysop-restricted
actions using it.' % self)
+ else:
+ # We don't have wgUserGroups, and can't check the rights
+ self._rights[index] = []
+ if self._isLoggedIn[index]:
+ # Logged in user
+ self._rights[index].append('user')
+ # Assume bot, and thus autoconfirmed
+ self._rights[index].extend(['bot', 'autoconfirmed'])
+ if sysop:
+ # Assume user reported as a sysop indeed has the sysop
rights
+ self._rights[index].append('sysop')
+ # Assume the user has the default rights
+ self._rights[index].extend(['read', 'createaccount', 'edit',
'upload', 'createpage', 'createtalk', 'move', 'upload'])
+ if 'bot' in self._rights[index] or 'sysop' in self._rights[index]:
+ self._rights[index].append('apihighlimits')
+ if 'sysop' in self._rights[index]:
+ self._rights[index].extend(['delete', 'undelete', 'block',
'protect', 'import', 'deletedhistory', 'unwatchedpages'])
+
+ # Search for a token
+ tokenR = re.compile(r"\<input type='hidden' value=\"(.*?)\"
name=\"wpEditToken\"")
+ tokenloc = tokenR.search(text)
+ if tokenloc:
+ self._token[index] = tokenloc.group(1)
+ if self._rights[index] is not None:
+ # In this case, token and rights are loaded - user data is
now loaded
+ self._userData[index] = True
+ else:
# Token not found
- output(u'WARNING: Token not found on %s. You will not be able
to edit any page.' % self)
+ # Possible reason for this is the user is blocked, don't show a
+ # warning in this case, otherwise do show a warning
+ # Another possible reason is that the page cannot be edited -
ensure
+ # there is a textarea and the tab "view source" is not shown
+ if u'<textarea' in text and u'<li id="ca-viewsource"' not in
text and not self._isBlocked[index]:
+ # Token not found
+ output(u'WARNING: Token not found on %s. You will not be
able to edit any page.' % self)
def mediawiki_message(self, key):
"""Return the MediaWiki message text for key "key" """
@@ -5121,10 +5207,26 @@
if verbose:
output(u'Getting information for site %s' % self)
-
+
+ try:
+ api_url = self.api_address()
+ del api_url
+ except NotImplementedError:
+ config.use_api = False
+
# Get data
- url = self.edit_address('Non-existing_page')
- text = self.getUrl(url, sysop = sysop)
+ if config.use_api and self.versionnumber() >= 11:
+ #Query userinfo
+ params = {
+ 'action': 'query',
+ 'meta': 'userinfo',
+ 'uiprop':
'blockinfo|groups|rights|hasmsg|ratelimits|preferencestoken',
+ }
+ text = query.GetData(params, site = self, useAPI = True,
sysop=sysop)['query']['userinfo']
+ ##output('%s' % text) # for debug use only
+ else:
+ url = self.edit_address('Non-existing_page')
+ text = self.getUrl(url, sysop = sysop)
# Parse data
self._getUserData(text, sysop = sysop, force = force)
@@ -5862,7 +5964,7 @@
if not siteurl.startswith('*.'):
urlsToRetrieve.append('*.' + siteurl)
if config.use_api:
- output(u'Querying API...')
+ output(u'Querying API exturlusage...')
for url in urlsToRetrieve:
params = {
'action': 'query',
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn