Revision: 6480
Author:   russblau
Date:     2009-03-03 16:50:18 +0000 (Tue, 03 Mar 2009)

Log Message:
-----------
Ported to new framework

Modified Paths:
--------------
    branches/rewrite/pywikibot/scripts/category_redirect.py

Modified: branches/rewrite/pywikibot/scripts/category_redirect.py
===================================================================
--- branches/rewrite/pywikibot/scripts/category_redirect.py     2009-03-03 
16:49:18 UTC (rev 6479)
+++ branches/rewrite/pywikibot/scripts/category_redirect.py     2009-03-03 
16:50:18 UTC (rev 6480)
@@ -14,8 +14,8 @@
 """
 __version__ = '$Id$'
 
-import wikipedia, catlib
-import pagegenerators
+import pywikibot
+from pywikibot import pagegenerators
 import simplejson
 import cPickle
 import math
@@ -25,27 +25,16 @@
 from datetime import datetime, timedelta
 
 
-class APIError(Exception):
-    """The wiki API returned an error message."""
-
-    def __init__(self, errordict):
-        """Save error dict returned by MW API."""
-        self.errors = errordict
-
-    def __str__(self):
-        return "%(code)s: %(info)s" % self.errors
-
-
 class CategoryRedirectBot(object):
     def __init__(self):
         self.cooldown = 7 # days
-        self.site = wikipedia.getSite()
+        self.site = pywikibot.getSite()
         self.catprefix = self.site.namespace(14)+":"
         self.log_text = []
         self.edit_requests = []
-        self.log_page = wikipedia.Page(self.site,
+        self.log_page = pywikibot.Page(self.site,
                         u"User:%(user)s/category redirect log" %
-                            {'user': self.site.loggedInAs()})
+                            {'user': self.site.user()})
 
         # Localization:
 
@@ -147,7 +136,7 @@
             'no': u"Bot for vedlikehold av kategoriomdirigeringer",
         }
 
-        self.edit_request_text = wikipedia.translate(self.site.lang,
+        self.edit_request_text = pywikibot.translate(self.site.lang,
             {'en': u"""\
 The following protected pages have been detected as requiring updates to \
 category links:
@@ -156,7 +145,7 @@
 """,
             })
 
-        self.edit_request_item = wikipedia.translate(self.site.lang,
+        self.edit_request_item = pywikibot.translate(self.site.lang,
             {'en': u"* %s is in %s, which is a redirect to %s",
             })
 
@@ -166,89 +155,94 @@
         Moves subcategories of oldCat as well. oldCat and newCat should be
         Category objects. If newCat is None, the category will be removed.
 
-        This is a copy of portions of catlib.change_category(), with some
-        changes.
+        This is a copy of portions of [old] catlib.change_category(), with
+        some changes.
 
         """
         oldtext = article.get(get_redirect=True, force=True)
-        newtext = wikipedia.replaceCategoryInPlace(oldtext, oldCat, newCat)
+        newtext = pywikibot.replaceCategoryInPlace(oldtext, oldCat, newCat)
         try:
             # even if no changes, still save the page, in case it needs
             # an update due to changes in a transcluded template
             article.put(newtext, comment)
             if newtext == oldtext:
-                wikipedia.output(
-                    u'No changes in made in page %s.' % article.aslink())
+                pywikibot.output(
+                    u'No changes in made in page %s.'
+                     % article.title(asLink=True)
+                )
                 return False
             return True
-        except wikipedia.EditConflict:
-            wikipedia.output(
-                u'Skipping %s because of edit conflict' % article.aslink())
-        except wikipedia.LockedPage:
-            wikipedia.output(u'Skipping locked page %s' % article.aslink())
-            self.edit_requests.append((article.aslink(),
-                                       oldCat.aslink(textlink=True),
-                                       newCat.aslink(textlink=True)))
-        except wikipedia.SpamfilterError, error:
-            wikipedia.output(
+        except pywikibot.EditConflict:
+            pywikibot.output(
+                u'Skipping %s because of edit conflict'
+                % article.title(asLink=True)
+            )
+        except pywikibot.LockedPage:
+            pywikibot.output(u'Skipping locked page %s'
+                             % article.title(asLink=True)
+            )
+            self.edit_requests.append(
+                    (article.title(asLink=True, textlink=True),
+                     oldCat.title(asLink=True, textlink=True),
+                     newCat.title(asLink=True, textlink=True)
+                    ))
+        except pywikibot.SpamfilterError, error:
+            pywikibot.output(
                 u'Changing page %s blocked by spam filter (URL=%s)'
-                             % (article.aslink(), error.url))
-        except wikipedia.NoUsername:
-            wikipedia.output(
+                             % (article.title(asLink=True), error.url))
+        except pywikibot.NoUsername:
+            pywikibot.output(
                 u"Page %s not saved; sysop privileges required."
-                             % article.aslink())
-            self.edit_requests.append((article.aslink(textlink=True),
-                                       oldCat.aslink(textlink=True),
-                                       newCat.aslink(textlink=True)))
-        except wikipedia.PageNotSaved, error:
-            wikipedia.output(u"Saving page %s failed: %s"
-                             % (article.aslink(), error.message))
+                             % article.title(asLink=True))
+            self.edit_requests.append(
+                    (article.title(asLink=True, textlink=True),
+                     oldCat.title(asLink=True, textlink=True),
+                     newCat.title(asLink=True, textlink=True)
+                   ))
+        except pywikibot.PageNotSaved, error:
+            pywikibot.output(u"Saving page %s failed: %s"
+                             % (article.title(asLink=True), error.message))
         return False
 
     def move_contents(self, oldCatTitle, newCatTitle, editSummary):
         """The worker function that moves pages out of oldCat into newCat"""
         while True:
             try:
-                oldCat = catlib.Category(self.site,
-                                         self.catprefix + oldCatTitle)
-                newCat = catlib.Category(self.site,
-                                         self.catprefix + newCatTitle)
+                oldCat = pywikibot.Category(self.site,
+                                            self.catprefix + oldCatTitle)
+                newCat = pywikibot.Category(self.site,
+                                            self.catprefix + newCatTitle)
 
                 # Move articles
                 found, moved = 0, 0
-                for result in self.query_results(list="categorymembers",
-                                                 cmtitle=oldCat.title(),
-                                                 cmprop="title|sortkey",
-                                                 cmlimit="max"):
-                    found += len(result['categorymembers'])
-                    for item in result['categorymembers']:
-                        article = wikipedia.Page(self.site, item['title'])
-                        changed = self.change_category(article, oldCat, newCat,
-                                                       comment=editSummary)
-                        if changed: moved += 1
+                for article in oldCat.members():
+                    found += 1
+                    changed = self.change_category(article, oldCat, newCat,
+                                                   comment=editSummary)
+                    if changed: moved += 1
 
                 # pass 2: look for template doc pages
-                for result in self.query_results(list="categorymembers",
-                                                 cmtitle=oldCat.title(),
-                                                 cmprop="title|sortkey",
-                                                 cmnamespace="10",
-                                                 cmlimit="max"):
-                    for item in result['categorymembers']:
-                        doc = wikipedia.Page(self.site, item['title']+"/doc")
-                        try:
-                            old_text = doc.get()
-                        except wikipedia.Error:
-                            continue
-                        changed = self.change_category(doc, oldCat, newCat,
-                                                       comment=editSummary)
-                        if changed: moved += 1
+                for item in pywikibot.data.api.ListGenerator(
+                                "categorymembers", cmtitle=oldCat.title(),
+                                cmprop="title|sortkey", cmnamespace="10",
+                                cmlimit="max"):
+                    doc = pywikibot.Page(
+                              pywikibot.Link(item['title']+"/doc", self.site)
+                          )
+                    try:
+                        old_text = doc.get()
+                    except pywikibot.Error:
+                        continue
+                    changed = self.change_category(doc, oldCat, newCat,
+                                                   comment=editSummary)
+                    if changed: moved += 1
 
                 if found:
-                    wikipedia.output(u"%s: %s found, %s moved"
+                    pywikibot.output(u"%s: %s found, %s moved"
                                      % (oldCat.title(), found, moved))
                 return (found, moved)
-            except wikipedia.ServerError:
-                wikipedia.output(u"Server error: retrying in 5 seconds...")
+            except pywikibot.ServerError:
+                pywikibot.output(u"Server error: retrying in 5 seconds...")
                 time.sleep(5)
                 continue
             except KeyboardInterrupt:
@@ -265,83 +259,12 @@
             raise RuntimeError
         return (deadline.strftime(dateformat) > cat.editTime())
 
-    def query_results(self, **data):
-        """Iterate results from API action=query, using data as parameters."""
-        addr = self.site.apipath()
-        querydata = {'action': 'query',
-                     'format': 'json',
-                     'maxlag': str(wikipedia.config.maxlag)}
-        querydata.update(data)
-        if not querydata.has_key("action")\
-                or not querydata['action'] == 'query':
-            raise ValueError(
-                "query_results: 'action' set to value other than 'query'"
-                )
-        waited = 0
-        while True:
-            response, data = self.site.postForm(addr, querydata)
-            if response.status != 200:
-                # WARNING: if the server is down, this could
-                # cause an infinite loop
-                wikipedia.output(u"HTTP error %i received; retrying..."
-                                  % response.status)
-                time.sleep(5)
-                continue
-            if data.startswith(u"unknown_action"):
-                e = {'code': data[:14], 'info': data[16:]}
-                raise APIError(e)
-            try:
-                result = simplejson.loads(data)
-            except ValueError:
-                # if the result isn't valid JSON, there must be a server
-                # problem.  Wait a few seconds and try again
-                # WARNING: if the server is down, this could
-                # cause an infinite loop
-                wikipedia.output(u"Invalid API response received; retrying...")
-                time.sleep(5)
-                continue
-            if type(result) is dict and result.has_key("error"):
-                if result['error']['code'] == "maxlag":
-                    print "Pausing due to server lag.\r",
-                    time.sleep(5)
-                    waited += 5
-                    if waited % 30 == 0:
-                        wikipedia.output(
-                            u"(Waited %i seconds due to server lag.)"
-                             % waited)
-                    continue
-                else:
-                    # raise error
-                    raise APIError(result['error'])
-            waited = 0
-            if type(result) is list:
-                # query returned no results
-                return
-            assert type(result) is dict, \
-                   "Unexpected result of type '%s' received." % type(result)
-            if "query" not in result:
-                # query returned no results
-                return
-            yield result['query']
-            if result.has_key("query-continue"):
-                assert len(result['query-continue'].keys()) == 1, \
-                       "More than one query-continue key returned: %s" \
-                       % result['query-continue'].keys()
-                query_type = result['query-continue'].keys()[0]
-                assert (query_type in querydata.keys()
-                        or query_type in querydata.values()), \
-                       "Site returned unknown query-continue type '%s'"\
-                       % query_type
-                querydata.update(result['query-continue'][query_type])
-            else:
-                return
-
     def get_log_text(self):
         """Rotate log text and return the most recent text."""
         LOG_SIZE = 7  # Number of items to keep in active log
         try:
             log_text = self.log_page.get()
-        except wikipedia.NoPage:
+        except pywikibot.NoPage:
             log_text = u""
         log_items = {}
         header = None
@@ -367,13 +290,15 @@
                 % (self.site.protocol(),
                    self.site.hostname(),
                    self.site.scriptpath(),
-                   self.log_page.urlname(),
+                   self.log_page.title(asUrl=True),
                    rotate_revid))
         return log_text
 
     def run(self):
         """Run the bot"""
-        user = self.site.loggedInAs()
+        global destmap, catlist, catmap
+        
+        user = self.site.user()
         redirect_magicwords = ["redirect"]
         other_words = self.site.redirect()
         if other_words:
@@ -382,9 +307,9 @@
 
         l = time.localtime()
         today = "%04d-%02d-%02d" % l[:3]
-        edit_request_page = wikipedia.Page(self.site,
+        edit_request_page = pywikibot.Page(self.site,
                             u"User:%(user)s/category edit requests" % locals())
-        datafile = wikipedia.config.datafilepath(
+        datafile = pywikibot.config.datafilepath(
                    "%s-catmovebot-data" % self.site.dbName())
         try:
             inp = open(datafile, "rb")
@@ -399,7 +324,7 @@
             template_list = self.redir_templates[self.site.family.name
                                                 ][self.site.lang]
         except KeyError:
-            wikipedia.output(u"No redirect templates defined for %s"
+            pywikibot.output(u"No redirect templates defined for %s"
                               % self.site.sitename())
             return
         # regex to match soft category redirects
@@ -419,96 +344,82 @@
 
         # check for hard-redirected categories that are not already marked
         # with an appropriate template
-        comment = wikipedia.translate(self.site.lang, self.redir_comment)
-        for result in self.query_results(list='allpages',
-                                         apnamespace='14', # Category:
-                                         apfrom='!',
-                                         apfilterredir='redirects',
-                                         aplimit='max'):
-            gen = (wikipedia.Page(self.site, page_item['title'])
-                   for page_item in result['allpages'])
-            # gen yields all hard redirect pages in namespace 14
-            for page in pagegenerators.PreloadingGenerator(gen, 120):
-                if page.isCategoryRedirect():
-                    # this is already a soft-redirect, so skip it (for now)
-                    continue
-                target = page.getRedirectTarget()
-                if target.namespace() == 14:
-                    # this is a hard-redirect to a category page
-                    newtext = (u"{{%(template)s|%(cat)s}}"
-                               % {'cat': target.titleWithoutNamespace(),
-                                  'template': template_list[0]})
-                    try:
-                        page.put(newtext, comment, minorEdit=True)
-                        self.log_text.append(u"* Added {{tl|%s}} to %s"
-                                         % (template_list[0],
-                                            page.aslink(textlink=True)))
-                    except wikipedia.Error, e:
-                        self.log_text.append(
-                            u"* Failed to add {{tl|%s}} to %s (%s)"
-                             % (template_list[0],
-                                page.aslink(textlink=True),
-                                e))
-                else:
-                    problems.append(
-                        u"# %s is a hard redirect to %s"
-                         % (page.aslink(textlink=True),
-                            target.aslink(textlink=True)))
+        comment = pywikibot.translate(self.site.lang, self.redir_comment)
+        for page in pagegenerators.PreloadingGenerator(
+                        self.site.allpages(namespace=14, filterredir=True)
+                    ):
+            # generator yields all hard redirect pages in namespace 14
+            if page.isCategoryRedirect():
+                # this is already a soft-redirect, so skip it (for now)
+                continue
+            target = page.getRedirectTarget()
+            if target.namespace() == 14:
+                # this is a hard-redirect to a category page
+                newtext = (u"{{%(template)s|%(cat)s}}"
+                           % {'cat': target.title(withNamespace=False),
+                              'template': template_list[0]})
+                try:
+                    page.put(newtext, comment, minorEdit=True)
+                    self.log_text.append(u"* Added {{tl|%s}} to %s"
+                                     % (template_list[0],
+                                        page.title(asLink=True, 
textlink=True)))
+                except pywikibot.Error, e:
+                    self.log_text.append(
+                        u"* Failed to add {{tl|%s}} to %s (%s)"
+                         % (template_list[0],
+                            page.title(asLink=True, textlink=True),
+                            e))
+            else:
+                problems.append(
+                    u"# %s is a hard redirect to %s"
+                     % (page.title(asLink=True, textlink=True),
+                        target.title(asLink=True, textlink=True)))
 
-        wikipedia.output("Done checking hard-redirect category pages.")
+        pywikibot.output("Done checking hard-redirect category pages.")
 
-        comment = wikipedia.translate(self.site.lang, self.move_comment)
-        scan_data = {
-            u'action': 'query',
-            u'list': 'embeddedin',
-            u'einamespace': '14',   # Category:
-            u'eilimit': 'max',
-            u'format': 'json'
-        }
+        comment = pywikibot.translate(self.site.lang, self.move_comment)
         counts, destmap, catmap = {}, {}, {}
-        catlist, catpages, nonemptypages = [], [], []
-        target = self.cat_redirect_cat[self.site.family.name][self.site.lang]
+        catlist, nonemptypages = [], []
+        redircat = pywikibot.Category(
+                       pywikibot.Link(
+                           self.cat_redirect_cat[self.site.family.name]
+                                                [self.site.lang],
+                           self.site)
+                   )
 
         # get a list of all members of the category-redirect category
-        for result in self.query_results(generator=u'categorymembers',
-                                         gcmtitle=target,
-                                         gcmnamespace=u'14', # CATEGORY
-                                         gcmlimit=u'max',
-                                         prop='info|categoryinfo'):
-            for catdata in result['pages'].values():
-                thispage = wikipedia.Page(self.site, catdata['title'])
-                catpages.append(thispage)
-                if 'categoryinfo' in catdata \
-                        and catdata['categoryinfo']['size'] != "0":
-                    # save those categories that have contents
-                    nonemptypages.append(thispage)
+        catpages = list(redircat.subcategories())
 
         # preload the category pages for redirected categories
-        wikipedia.output(u"")
-        wikipedia.output(u"Preloading %s category redirect pages"
+        pywikibot.output(u"")
+        pywikibot.output(u"Preloading %s category redirect pages"
                          % len(catpages))
-        for cat in pagegenerators.PreloadingGenerator(catpages, 120):
-            cat_title = cat.titleWithoutNamespace()
+        for cat in pagegenerators.PreloadingGenerator(catpages):
+            catdata = cat.categoryinfo
+            if "size" in catdata and int(catdata['size']):
+                # save those categories that have contents
+                nonemptypages.append(cat)
+            cat_title = cat.title(withNamespace=False)
             if "category redirect" in cat_title:
                 self.log_text.append(u"* Ignoring %s"
-                                      % cat.aslink(textlink=True))
+                                      % cat.title(asLink=True, textlink=True))
                 continue
             try:
-                text = cat.get(get_redirect=True)
-            except wikipedia.Error:
+                if not cat.isCategoryRedirect():
+                    self.log_text.append(u"* False positive: %s"
+                                          % cat.title(asLink=True,
+                                                      textlink=True))
+                    continue
+            except pywikibot.Error:
                 self.log_text.append(u"* Could not load %s; ignoring"
-                                      % cat.aslink(textlink=True))
+                                      % cat.title(asLink=True, textlink=True))
                 continue
-            if not cat.isCategoryRedirect():
-                self.log_text.append(u"* False positive: %s"
-                                      % cat.aslink(textlink=True))
-                continue
             if cat_title not in record:
                 # make sure every redirect has a record entry
                 record[cat_title] = {today: None}
             catlist.append(cat)
             target = cat.getCategoryRedirectTarget()
-            destination = target.titleWithoutNamespace()
+            destination = target.title(withNamespace=False)
             destmap.setdefault(target, []).append(cat)
             catmap[cat] = destination
 ##            if match.group(1):
@@ -519,26 +430,27 @@
 ##                            u"Robot: fixing category redirect parameter 
format")
 ##                    self.log_text.append(
 ##                        u"* Removed category prefix from parameter in %s"
-##                         % cat.aslink(textlink=True))
-##                except wikipedia.Error:
+##                         % cat.title(asLink=True, textlink=True))
+##                except pywikibot.Error:
 ##                    self.log_text.append(
 ##                        u"* Unable to save changes to %s"
-##                         % cat.aslink(textlink=True))
+##                         % cat.title(asLink=True, textlink=True))
 
         # delete record entries for non-existent categories
         for cat_name in list(record.keys()):
-            if catlib.Category(self.site,
-                               self.catprefix+cat_name) not in catmap:
+            if pywikibot.Category(
+                    pywikibot.Link(self.catprefix+cat_name, self.site)
+               ) not in catmap:
                 del record[cat_name]
 
-        wikipedia.output(u"")
-        wikipedia.output(u"Checking %s destination categories" % len(destmap))
-        for dest in pagegenerators.PreloadingGenerator(destmap.keys(), 120):
+        pywikibot.output(u"")
+        pywikibot.output(u"Checking %s destination categories" % len(destmap))
+        for dest in pagegenerators.PreloadingGenerator(destmap.keys()):
             if not dest.exists():
                 for d in destmap[dest]:
                     problems.append("# %s redirects to %s"
-                                    % (d.aslink(textlink=True),
-                                       dest.aslink(textlink=True)))
+                                    % (d.title(asLink=True, textlink=True),
+                                       dest.title(asLink=True, textlink=True)))
                     catlist.remove(d)
                     # do a null edit on d to make it appear in the
                     # "needs repair" category (if this wiki has one)
@@ -549,53 +461,59 @@
             if dest in catlist:
                 for d in destmap[dest]:
                     # is catmap[dest] also a redirect?
-                    newcat = catlib.Category(self.site,
-                                             self.catprefix+catmap[dest])
+                    newcat = pywikibot.Category(
+                                 pywikibot.Link(self.catprefix+catmap[dest],
+                                                self.site)
+                             )
                     while newcat in catlist:
                         if newcat == d or newcat == dest:
                             self.log_text.append(u"* Redirect loop from %s"
-                                             % newcat.aslink(textlink=True))
+                                             % newcat.title(asLink=True,
+                                                            textlink=True))
                             break
-                        newcat = catlib.Category(self.site,
-                                                 self.catprefix+catmap[newcat])
+                        newcat = pywikibot.Category(
+                                     pywikibot.Link(
+                                         self.catprefix+catmap[newcat],
+                                         self.site)
+                                 )
                     else:
                         self.log_text.append(
                             u"* Fixed double-redirect: %s -> %s -> %s"
-                                % (d.aslink(textlink=True),
-                                   dest.aslink(textlink=True),
-                                   newcat.aslink(textlink=True)))
+                                % (d.title(asLink=True, textlink=True),
+                                   dest.title(asLink=True, textlink=True),
+                                   newcat.title(asLink=True, textlink=True)))
                         oldtext = d.get(get_redirect=True)
                         # remove the old redirect from the old text,
                         # leaving behind any non-redirect text
                         oldtext = template_regex.sub("", oldtext)
                         newtext = (u"{{%(redirtemp)s|%(ncat)s}}"
                                     % {'redirtemp': template_list[0],
-                                       'ncat': newcat.titleWithoutNamespace()})
+                                       'ncat': 
newcat.title(withNamespace=False)})
                         newtext = newtext + oldtext.strip()
                         try:
                             d.put(newtext,
-                                  wikipedia.translate(self.site.lang,
+                                  pywikibot.translate(self.site.lang,
                                                       self.dbl_redir_comment),
                                   minorEdit=True)
-                        except wikipedia.Error, e:
+                        except pywikibot.Error, e:
                             self.log_text.append("** Failed: %s" % str(e))
 
         # only scan those pages that have contents (nonemptypages)
         # and that haven't been removed from catlist as broken redirects
         cats_to_empty = set(catlist) & set(nonemptypages)
-        wikipedia.output(u"")
-        wikipedia.output(u"Moving pages out of %s redirected categories."
+        pywikibot.output(u"")
+        pywikibot.output(u"Moving pages out of %s redirected categories."
                          % len(cats_to_empty))
 #        thread_limit = int(math.log(len(cats_to_empty), 8) + 1)
 #        threadpool = ThreadList(limit=1)    # disabling multi-threads
 
         for cat in cats_to_empty:
-            cat_title = cat.titleWithoutNamespace()
+            cat_title = cat.title(withNamespace=False)
             if not self.readyToEdit(cat):
                 counts[cat_title] = None
                 self.log_text.append(
                     u"* Skipping %s; in cooldown period."
-                     % cat.aslink(textlink=True))
+                     % cat.title(asLink=True, textlink=True))
                 continue
             found, moved = self.move_contents(cat_title, catmap[cat],
                                               editSummary=comment)
@@ -612,7 +530,7 @@
 
         cPickle.dump(record, open(datafile, "wb"))
 
-        wikipedia.setAction(wikipedia.translate(self.site.lang,
+        pywikibot.setAction(pywikibot.translate(self.site.lang,
                                                 self.maint_comment))
         self.log_text.sort()
         self.log_page.put(u"\n==%i-%02i-%02iT%02i:%02i:%02iZ==\n"
@@ -629,7 +547,7 @@
 def main(*args):
     global bot
     try:
-        a = wikipedia.handleArgs(*args)
+        a = pywikibot.handleArgs(*args)
         if len(a) == 1:
             raise RuntimeError('Unrecognized argument "%s"' % a[0])
         elif a:
@@ -638,7 +556,7 @@
         bot = CategoryRedirectBot()
         bot.run()
     finally:
-        wikipedia.stopme()
+        pywikibot.stopme()
 
 
 if __name__ == "__main__":



_______________________________________________
Pywikipedia-l mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-l

Reply via email to