jenkins-bot has submitted this change and it was merged.

Change subject: [WIP] Merge commons_category_redirect.py
......................................................................


[WIP] Merge commons_category_redirect.py

- use wikidata to get localized Non-empty_category_redirects
  category with -tiny option.
- Set cooldown days with -delay option
- CategoryRedirectBot becomes a subclass of pywikibot.Bot
- split code into pars

Change-Id: Iaa36e36ee39689e376c181df36784a189b40bc4f
---
M scripts/category_redirect.py
1 file changed, 140 insertions(+), 104 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/category_redirect.py b/scripts/category_redirect.py
index f6274af..87d8693 100755
--- a/scripts/category_redirect.py
+++ b/scripts/category_redirect.py
@@ -2,8 +2,6 @@
 # -*- coding: utf-8 -*-
 """This bot will move pages out of redirected categories.
 
-Usage: category_redirect.py [options]
-
 The bot will look for categories that are marked with a category redirect
 template, take the first parameter of the template as the target of the
 redirect, and move all pages and subcategories of the category there. It
@@ -11,6 +9,15 @@
 A log is written under <userpage>/category_redirect_log. Only category pages
 that haven't been edited for a certain cooldown period (currently 7 days)
 are taken into account.
+
+-delay:#          Set an amount of days. If the category is edited more recenty
+                  than given days, ignore it. Default is 7.
+
+-tiny             Only loops over Category:Non-empty_category_redirects and
+                  moves all images, pages and categories in redirect categories
+                  to the target category.
+
+Usage: category_redirect.py [options]
 
 """
 
@@ -24,7 +31,7 @@
 import sys
 import re
 import time
-from datetime import datetime, timedelta
+from datetime import timedelta
 import pywikibot
 from pywikibot import i18n, pagegenerators, config
 
@@ -34,17 +41,25 @@
     import cPickle
 
 
-class CategoryRedirectBot(object):
+class CategoryRedirectBot(pywikibot.Bot):
 
     """Page category update bot."""
 
-    def __init__(self):
+    def __init__(self, **kwargs):
         """Constructor."""
-        self.cooldown = 7  # days
+        self.availableOptions.update({
+            'tiny': False,  # use Non-empty category redirects only
+            'delay': 7,  # cool down delay in days
+        })
+        super(CategoryRedirectBot, self).__init__(**kwargs)
+        self.cooldown = self.getOption('delay')
         self.site = pywikibot.Site()
         self.catprefix = self.site.namespace(14) + ":"
         self.log_text = []
         self.edit_requests = []
+        self.problems = []
+        self.template_list = []
+        self.cat_title = None
         self.log_page = pywikibot.Page(self.site,
                                        u"User:%(user)s/category redirect log"
                                        % {'user': self.site.username()})
@@ -53,28 +68,27 @@
 
         # Category that contains all redirected category pages
         self.cat_redirect_cat = {
-            'wikipedia': {
-                'ar': u"تصنيف:تحويلات تصنيفات ويكيبيديا",
-                'cs': u"Kategorie:Zastaralé kategorie",
-                'da': "Kategori:Omdirigeringskategorier",
-                'en': "Category:Wikipedia soft redirected categories",
-                'es': "Categoría:Wikipedia:Categorías redirigidas",
-                'fa': u"رده:رده‌های منتقل‌شده",
-                'hu': "Kategória:Kategóriaátirányítások",
-                'ja': "Category:移行中のカテゴリ",
-                'no': "Kategori:Wikipedia omdirigertekategorier",
-                'pl': "Kategoria:Przekierowania kategorii",
-                'pt': "Categoria:!Redirecionamentos de categorias",
-                'ru': "Категория:Википедия:Категории-дубликаты",
-                'simple': "Category:Category redirects",
-                'sh': u"Kategorija:Preusmjerene kategorije Wikipedije",
-                'vi': u"Thể loại:Thể loại đổi hướng",
-                'zh': u"Category:已重定向的分类",
-            },
-            'commons': {
-                'commons': "Category:Category redirects"
-            }
+            'commons': "Category:Category redirects",
+            'ar': u"تصنيف:تحويلات تصنيفات ويكيبيديا",
+            'cs': u"Kategorie:Zastaralé kategorie",
+            'da': "Kategori:Omdirigeringskategorier",
+            'en': "Category:Wikipedia soft redirected categories",
+            'es': "Categoría:Wikipedia:Categorías redirigidas",
+            'fa': u"رده:رده‌های منتقل‌شده",
+            'hu': "Kategória:Kategóriaátirányítások",
+            'ja': "Category:移行中のカテゴリ",
+            'no': "Kategori:Wikipedia omdirigertekategorier",
+            'pl': "Kategoria:Przekierowania kategorii",
+            'pt': "Categoria:!Redirecionamentos de categorias",
+            'ru': "Категория:Википедия:Категории-дубликаты",
+            'simple': "Category:Category redirects",
+            'sh': u"Kategorija:Preusmjerene kategorije Wikipedije",
+            'vi': u"Thể loại:Thể loại đổi hướng",
+            'zh': u"Category:已重定向的分类",
         }
+
+        # Category that contains non-empty redirected category pages
+        self.tiny_cat_redirect_cat = 'Q8099903'
 
         self.move_comment = 'category_redirect-change-category'
         self.redir_comment = 'category_redirect-add-template'
@@ -84,6 +98,21 @@
             self.site.code, 'category_redirect-edit-request') + u'\n~~~~'
         self.edit_request_item = i18n.twtranslate(
             self.site.code, 'category_redirect-edit-request-item')
+
+    def get_cat_title(self):
+        """Specify the category title."""
+        if self.getOption('tiny'):
+            repo = self.site.data_repository()
+            dp = pywikibot.ItemPage(repo, self.tiny_cat_redirect_cat)
+            try:
+                self.cat_title = dp.getSitelink(self.site)
+            except pywikibot.NoPage:
+                self.cat_title = None
+        else:
+            self.cat_title = pywikibot.translate(self.site,
+                                                 self.cat_redirect_cat,
+                                                 fallback=False)
+        return self.cat_title is not None
 
     def move_contents(self, oldCatTitle, newCatTitle, editSummary):
         """The worker function that moves pages out of oldCat into newCat."""
@@ -137,7 +166,7 @@
 
     def readyToEdit(self, cat):
         """Return True if cat not edited during cooldown period, else False."""
-        today = datetime.now()
+        today = pywikibot.Timestamp.now()
         deadline = today + timedelta(days=-self.cooldown)
         if cat.editTime() is None:
             raise RuntimeError
@@ -173,13 +202,72 @@
                                % self.log_page.permalink(oldid=rotate_revid))
         return log_text
 
+    def check_hard_redirect(self):
+        """
+        Check for hard-redirected categories.
+
+        Check categories that are not already marked with an appropriate
+        softredirect template.
+        """
+        pywikibot.output("Checking hard-redirect category pages.")
+        comment = i18n.twtranslate(self.site.code, self.redir_comment)
+
+        # generator yields all hard redirect pages in namespace 14
+        for page in pagegenerators.PreloadingGenerator(
+                self.site.allpages(namespace=14, filterredir=True), step=250):
+            if page.isCategoryRedirect():
+                # this is already a soft-redirect, so skip it (for now)
+                continue
+            try:
+                target = page.getRedirectTarget()
+            except pywikibot.CircularRedirect:
+                target = page
+                self.problems.append(u"# %s is a self-linked redirect"
+                                     % page.title(asLink=True, textlink=True))
+            except RuntimeError:
+                # race condition: someone else removed the redirect while we
+                # were checking for it
+                continue
+            if target.namespace() == 14:
+                # this is a hard-redirect to a category page
+                newtext = (u"{{%(template)s|%(cat)s}}"
+                           % {'cat': target.title(withNamespace=False),
+                              'template': self.template_list[0]})
+                try:
+                    page.text = newtext
+                    page.save(comment)
+                    self.log_text.append(u"* Added {{tl|%s}} to %s"
+                                         % (self.template_list[0],
+                                            page.title(asLink=True,
+                                                       textlink=True)))
+                except pywikibot.Error:
+                    self.log_text.append(u"* Failed to add {{tl|%s}} to %s"
+                                         % (self.template_list[0],
+                                            page.title(asLink=True,
+                                                       textlink=True)))
+            else:
+                self.problems.append(u"# %s is a hard redirect to %s"
+                                     % (page.title(asLink=True, textlink=True),
+                                        target.title(asLink=True, 
textlink=True)))
+
     def run(self):
         """Run the bot."""
         global destmap, catlist, catmap
 
+        # validate L10N
+        try:
+            self.template_list = self.site.family.category_redirect_templates[
+                self.site.code]
+        except KeyError:
+            pywikibot.warning(u"No redirect templates defined for %s"
+                              % self.site)
+            return
+        if not self.get_cat_title():
+            pywikibot.warning(u"No redirect category found for %s" % self.site)
+            return
+
         # user() invokes login()
         user = self.site.user()
-        problems = []
         newredirs = []
 
         l = time.localtime()
@@ -196,13 +284,6 @@
         if record:
             with open(datafile + ".bak", "wb") as f:
                 cPickle.dump(record, f, protocol=config.pickle_protocol)
-        try:
-            template_list = self.site.family.category_redirect_templates[
-                self.site.code]
-        except KeyError:
-            pywikibot.output(u"No redirect templates defined for %s"
-                             % self.site.sitename())
-            return
         # regex to match soft category redirects
         #  note that any templates containing optional "category:" are
         #  incorrect and will be fixed by the bot
@@ -214,69 +295,22 @@
                      (?:\|[^|}]*)*}}         # optional arguments 2+, ignored
              """ % {'prefix': self.site.namespace(10).lower(),
                     'template': "|".join(item.replace(" ", "[ _]+")
-                                         for item in template_list),
+                                         for item in self.template_list),
                     'catns': self.site.namespace(14)},
             re.I | re.X)
 
-        # check for hard-redirected categories that are not already marked
-        # with an appropriate template
-        comment = i18n.twtranslate(self.site.code, self.redir_comment)
-        for page in pagegenerators.PreloadingGenerator(
-                self.site.allpages(namespace=14, filterredir=True), step=250):
-            # generator yields all hard redirect pages in namespace 14
-            if page.isCategoryRedirect():
-                # this is already a soft-redirect, so skip it (for now)
-                continue
-            try:
-                target = page.getRedirectTarget()
-            except pywikibot.CircularRedirect:
-                target = page
-                problems.append(u"# %s is a self-linked redirect"
-                                % page.title(asLink=True, textlink=True))
-            except RuntimeError:
-                # race condition: someone else removed the redirect while we
-                # were checking for it
-                continue
-            if target.namespace() == 14:
-                # this is a hard-redirect to a category page
-                newtext = (u"{{%(template)s|%(cat)s}}"
-                           % {'cat': target.title(withNamespace=False),
-                              'template': template_list[0]})
-                try:
-                    page.text = newtext
-                    page.save(comment)
-                    self.log_text.append(u"* Added {{tl|%s}} to %s"
-                                         % (template_list[0],
-                                            page.title(asLink=True,
-                                                       textlink=True)))
-                except pywikibot.Error as e:
-                    self.log_text.append(u"* Failed to add {{tl|%s}} to %s"
-                                         % (template_list[0],
-                                            page.title(asLink=True,
-                                                       textlink=True)))
-            else:
-                problems.append(u"# %s is a hard redirect to %s"
-                                % (page.title(asLink=True, textlink=True),
-                                   target.title(asLink=True, textlink=True)))
-
-        pywikibot.output("Done checking hard-redirect category pages.")
+        self.check_hard_redirect()
 
         comment = i18n.twtranslate(self.site.code, self.move_comment)
         counts, destmap, catmap = {}, {}, {}
         catlist, nonemptypages = [], []
-        redircat = pywikibot.Category(
-            pywikibot.Link(self.cat_redirect_cat
-                           [self.site.family.name][self.site.code], self.site))
+        redircat = pywikibot.Category(pywikibot.Link(self.cat_title, 
self.site))
 
-        # get a list of all members of the category-redirect category
-        catpages = dict((c, None)
-                        for c in redircat.subcategories())
-
-        # check the category pages for redirected categories
-        pywikibot.output(u"")
-        pywikibot.output(u"Checking %s category redirect pages"
-                         % len(catpages))
-        for cat in catpages:
+        pywikibot.output(u"\nChecking %d category redirect pages"
+                         % redircat.categoryinfo['subcats'])
+        catpages = set()
+        for cat in redircat.subcategories():
+            catpages.add(cat)
             cat_title = cat.title(withNamespace=False)
             if "category redirect" in cat_title:
                 self.log_text.append(u"* Ignoring %s"
@@ -333,9 +367,9 @@
                 continue
             dest = cat.getCategoryRedirectTarget()
             if not dest.exists():
-                problems.append("# %s redirects to %s"
-                                % (cat.title(asLink=True, textlink=True),
-                                   dest.title(asLink=True, textlink=True)))
+                self.problems.append("# %s redirects to %s"
+                                     % (cat.title(asLink=True, textlink=True),
+                                        dest.title(asLink=True, 
textlink=True)))
                 # do a null edit on cat to update any special redirect
                 # categories this wiki might maintain
                 try:
@@ -365,7 +399,7 @@
                     # leaving behind any non-redirect text
                     oldtext = template_regex.sub("", oldtext)
                     newtext = (u"{{%(redirtemp)s|%(ncat)s}}"
-                               % {'redirtemp': template_list[0],
+                               % {'redirtemp': self.template_list[0],
                                   'ncat': double.title(withNamespace=False)})
                     newtext = newtext + oldtext.strip()
                     try:
@@ -399,7 +433,7 @@
             cPickle.dump(record, f, protocol=config.pickle_protocol)
 
         self.log_text.sort()
-        problems.sort()
+        self.problems.sort()
         newredirs.sort()
         comment = i18n.twtranslate(self.site.code, self.maint_comment)
         self.log_page.text = (u"\n== %i-%02i-%02iT%02i:%02i:%02iZ ==\n"
@@ -407,7 +441,7 @@
                               + u"\n".join(self.log_text)
                               + u"\n* New redirects since last report:\n"
                               + u"\n".join(newredirs)
-                              + u"\n" + u"\n".join(problems)
+                              + u"\n" + u"\n".join(self.problems)
                               + u"\n" + self.get_log_text())
         self.log_page.save(comment)
         if self.edit_requests:
@@ -427,13 +461,15 @@
     @param args: command line arguments
     @type args: list of unicode
     """
-    a = pywikibot.handle_args(args)
-    if len(a) == 1:
-        raise RuntimeError('Unrecognized argument "%s"' % a[0])
-    elif a:
-        raise RuntimeError('Unrecognized arguments: ' +
-                           " ".join(('"%s"' % arg) for arg in a))
-    bot = CategoryRedirectBot()
+    options = {}
+    for arg in pywikibot.handle_args(args):
+        if arg.startswith('-delay:'):
+            pos = arg.find(':')
+            options[arg[1:pos]] = int(arg[pos + 1:])
+        else:
+            # generic handling of we have boolean options
+            options[arg[1:]] = True
+    bot = CategoryRedirectBot(**options)
     bot.run()
 
 if __name__ == "__main__":

-- 
To view, visit https://gerrit.wikimedia.org/r/175413
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iaa36e36ee39689e376c181df36784a189b40bc4f
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to