Revision: 7136
Author: alexsh
Date: 2009-08-09 05:18:21 +0000 (Sun, 09 Aug 2009)
Log Message:
-----------
catlib.py
*Category()._parseCategory(): Change use API query(list=categorymembers), could
except to old way(moved to _oldParseCategory) if version not support or
NotImplementedError
(tested for 12 hours in [[ja:WP:BOTREQ]])
Modified Paths:
--------------
trunk/pywikipedia/catlib.py
Modified: trunk/pywikipedia/catlib.py
===================================================================
--- trunk/pywikipedia/catlib.py 2009-08-08 15:28:21 UTC (rev 7135)
+++ trunk/pywikipedia/catlib.py 2009-08-09 05:18:21 UTC (rev 7136)
@@ -166,6 +166,69 @@
def _parseCategory(self, purge=False, startFrom=None):
"""
+ Yields all articles and subcategories that are in this category by API.
+
+ Set startFrom to a string which is the title of the page to start from.
+
+ Yielded results are tuples in the form (tag, page) where tag is one
+ of the constants ARTICLE and SUBCATEGORY, and title is the Page or
Category
+ object.
+
+ Note that results of this method need not be unique.
+
+ This should not be used outside of this module.
+ """
+ try:
+ if self.site().versionnumber() >= 11:
+ api_url = self.site().api_address()
+ del api_url
+ else:
+ raise NotImplementedError # version not support
+ except NotImplementedError:
+ for tag, page in self._oldParseCategory(purge, startFrom):
+ yield tag, page
+ return
+
+ currentPageOffset = None
+ while True:
+ params = {
+ 'action': 'query',
+ 'list': 'categorymembers',
+ 'cmtitle': self.title(),
+ 'cmprop': 'title',#|ids|sortkey|timestamp',
+ #'cmlimit': config.special_page_limit,
+ #'': '',
+ }
+ if currentPageOffset:
+ params['cmcontinue'] = currentPageOffset
+ wikipedia.output('Getting [[%s]] list from %s by API...'
+ % (self.title(), currentPageOffset[:-1])) #
cmcontinue last key is '|'
+ elif startFrom:
+ params['cmstart'] = startFrom
+ wikipedia.output('Getting [[%s]] list starting at %s by API...'
+ % (self.title(), startFrom))
+ else:
+ wikipedia.output('Getting [[%s]] by API...' % self.title())
+
+ wikipedia.get_throttle()
+ data = query.GetData(params, self.site())
+
+ for memb in data['query']['categorymembers']:
+ # For MediaWiki versions where subcats look like articles
+ if isCatTitle(memb['title'], self.site()):
+ yield SUBCATEGORY, Category(self.site(), memb['title'])
+ elif memb['ns'] == 6 and self.site().image_namespace() in
memb['title']:
+ yield ARTICLE, wikipedia.ImagePage(self.site(), title)
+ else:
+ yield ARTICLE, wikipedia.Page(self.site(), memb['title'])
+ # try to find a link to the next list page
+ if data.has_key('query-continue'):
+ currentPageOffset =
data['query-continue']['categorymembers']['cmcontinue']
+ else:
+ break
+
+ def _oldParseCategory(self, purge=False, startFrom=None):
+ """
Yields all articles and subcategories that are in this category.
Set purge to True to instruct MediaWiki not to serve a cached version.
@@ -247,16 +310,14 @@
pass
# For MediaWiki versions where subcats look like articles
elif isCatTitle(title, self.site()):
- ncat = Category(self.site(), title)
- yield SUBCATEGORY, ncat
+ yield SUBCATEGORY, Category(self.site(), title)
else:
yield ARTICLE, wikipedia.Page(self.site(), title)
if Rsubcat:
# For MediaWiki versions where subcats look differently
for titleWithoutNamespace in Rsubcat.findall(txt):
title = 'Category:%s' % titleWithoutNamespace
- ncat = Category(self.site(), title)
- yield SUBCATEGORY, ncat
+ yield SUBCATEGORY, Category(self.site(), title)
if Rimage:
# For MediaWiki versions where images work through galleries
for title in Rimage.findall(txt):
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn