jenkins-bot has submitted this change and it was merged.
Change subject: [WIP] Merge commons_category_redirect.py
......................................................................
[WIP] Merge commons_category_redirect.py
- use wikidata to get localized Non-empty_category_redirects
category with -tiny option.
- Set cooldown days with -delay option
- CategoryRedirectBot becomes a subclass of pywikibot.Bot
- split code into pars
Change-Id: Iaa36e36ee39689e376c181df36784a189b40bc4f
---
M scripts/category_redirect.py
1 file changed, 140 insertions(+), 104 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/category_redirect.py b/scripts/category_redirect.py
index f6274af..87d8693 100755
--- a/scripts/category_redirect.py
+++ b/scripts/category_redirect.py
@@ -2,8 +2,6 @@
# -*- coding: utf-8 -*-
"""This bot will move pages out of redirected categories.
-Usage: category_redirect.py [options]
-
The bot will look for categories that are marked with a category redirect
template, take the first parameter of the template as the target of the
redirect, and move all pages and subcategories of the category there. It
@@ -11,6 +9,15 @@
A log is written under <userpage>/category_redirect_log. Only category pages
that haven't been edited for a certain cooldown period (currently 7 days)
are taken into account.
+
+-delay:# Set an amount of days. If the category is edited more recenty
+ than given days, ignore it. Default is 7.
+
+-tiny Only loops over Category:Non-empty_category_redirects and
+ moves all images, pages and categories in redirect categories
+ to the target category.
+
+Usage: category_redirect.py [options]
"""
@@ -24,7 +31,7 @@
import sys
import re
import time
-from datetime import datetime, timedelta
+from datetime import timedelta
import pywikibot
from pywikibot import i18n, pagegenerators, config
@@ -34,17 +41,25 @@
import cPickle
-class CategoryRedirectBot(object):
+class CategoryRedirectBot(pywikibot.Bot):
"""Page category update bot."""
- def __init__(self):
+ def __init__(self, **kwargs):
"""Constructor."""
- self.cooldown = 7 # days
+ self.availableOptions.update({
+ 'tiny': False, # use Non-empty category redirects only
+ 'delay': 7, # cool down delay in days
+ })
+ super(CategoryRedirectBot, self).__init__(**kwargs)
+ self.cooldown = self.getOption('delay')
self.site = pywikibot.Site()
self.catprefix = self.site.namespace(14) + ":"
self.log_text = []
self.edit_requests = []
+ self.problems = []
+ self.template_list = []
+ self.cat_title = None
self.log_page = pywikibot.Page(self.site,
u"User:%(user)s/category redirect log"
% {'user': self.site.username()})
@@ -53,28 +68,27 @@
# Category that contains all redirected category pages
self.cat_redirect_cat = {
- 'wikipedia': {
- 'ar': u"تصنيف:تحويلات تصنيفات ويكيبيديا",
- 'cs': u"Kategorie:Zastaralé kategorie",
- 'da': "Kategori:Omdirigeringskategorier",
- 'en': "Category:Wikipedia soft redirected categories",
- 'es': "Categoría:Wikipedia:Categorías redirigidas",
- 'fa': u"رده:ردههای منتقلشده",
- 'hu': "Kategória:Kategóriaátirányítások",
- 'ja': "Category:移行中のカテゴリ",
- 'no': "Kategori:Wikipedia omdirigertekategorier",
- 'pl': "Kategoria:Przekierowania kategorii",
- 'pt': "Categoria:!Redirecionamentos de categorias",
- 'ru': "Категория:Википедия:Категории-дубликаты",
- 'simple': "Category:Category redirects",
- 'sh': u"Kategorija:Preusmjerene kategorije Wikipedije",
- 'vi': u"Thể loại:Thể loại đổi hướng",
- 'zh': u"Category:已重定向的分类",
- },
- 'commons': {
- 'commons': "Category:Category redirects"
- }
+ 'commons': "Category:Category redirects",
+ 'ar': u"تصنيف:تحويلات تصنيفات ويكيبيديا",
+ 'cs': u"Kategorie:Zastaralé kategorie",
+ 'da': "Kategori:Omdirigeringskategorier",
+ 'en': "Category:Wikipedia soft redirected categories",
+ 'es': "Categoría:Wikipedia:Categorías redirigidas",
+ 'fa': u"رده:ردههای منتقلشده",
+ 'hu': "Kategória:Kategóriaátirányítások",
+ 'ja': "Category:移行中のカテゴリ",
+ 'no': "Kategori:Wikipedia omdirigertekategorier",
+ 'pl': "Kategoria:Przekierowania kategorii",
+ 'pt': "Categoria:!Redirecionamentos de categorias",
+ 'ru': "Категория:Википедия:Категории-дубликаты",
+ 'simple': "Category:Category redirects",
+ 'sh': u"Kategorija:Preusmjerene kategorije Wikipedije",
+ 'vi': u"Thể loại:Thể loại đổi hướng",
+ 'zh': u"Category:已重定向的分类",
}
+
+ # Category that contains non-empty redirected category pages
+ self.tiny_cat_redirect_cat = 'Q8099903'
self.move_comment = 'category_redirect-change-category'
self.redir_comment = 'category_redirect-add-template'
@@ -84,6 +98,21 @@
self.site.code, 'category_redirect-edit-request') + u'\n~~~~'
self.edit_request_item = i18n.twtranslate(
self.site.code, 'category_redirect-edit-request-item')
+
+ def get_cat_title(self):
+ """Specify the category title."""
+ if self.getOption('tiny'):
+ repo = self.site.data_repository()
+ dp = pywikibot.ItemPage(repo, self.tiny_cat_redirect_cat)
+ try:
+ self.cat_title = dp.getSitelink(self.site)
+ except pywikibot.NoPage:
+ self.cat_title = None
+ else:
+ self.cat_title = pywikibot.translate(self.site,
+ self.cat_redirect_cat,
+ fallback=False)
+ return self.cat_title is not None
def move_contents(self, oldCatTitle, newCatTitle, editSummary):
"""The worker function that moves pages out of oldCat into newCat."""
@@ -137,7 +166,7 @@
def readyToEdit(self, cat):
"""Return True if cat not edited during cooldown period, else False."""
- today = datetime.now()
+ today = pywikibot.Timestamp.now()
deadline = today + timedelta(days=-self.cooldown)
if cat.editTime() is None:
raise RuntimeError
@@ -173,13 +202,72 @@
% self.log_page.permalink(oldid=rotate_revid))
return log_text
+ def check_hard_redirect(self):
+ """
+ Check for hard-redirected categories.
+
+ Check categories that are not already marked with an appropriate
+ softredirect template.
+ """
+ pywikibot.output("Checking hard-redirect category pages.")
+ comment = i18n.twtranslate(self.site.code, self.redir_comment)
+
+ # generator yields all hard redirect pages in namespace 14
+ for page in pagegenerators.PreloadingGenerator(
+ self.site.allpages(namespace=14, filterredir=True), step=250):
+ if page.isCategoryRedirect():
+ # this is already a soft-redirect, so skip it (for now)
+ continue
+ try:
+ target = page.getRedirectTarget()
+ except pywikibot.CircularRedirect:
+ target = page
+ self.problems.append(u"# %s is a self-linked redirect"
+ % page.title(asLink=True, textlink=True))
+ except RuntimeError:
+ # race condition: someone else removed the redirect while we
+ # were checking for it
+ continue
+ if target.namespace() == 14:
+ # this is a hard-redirect to a category page
+ newtext = (u"{{%(template)s|%(cat)s}}"
+ % {'cat': target.title(withNamespace=False),
+ 'template': self.template_list[0]})
+ try:
+ page.text = newtext
+ page.save(comment)
+ self.log_text.append(u"* Added {{tl|%s}} to %s"
+ % (self.template_list[0],
+ page.title(asLink=True,
+ textlink=True)))
+ except pywikibot.Error:
+ self.log_text.append(u"* Failed to add {{tl|%s}} to %s"
+ % (self.template_list[0],
+ page.title(asLink=True,
+ textlink=True)))
+ else:
+ self.problems.append(u"# %s is a hard redirect to %s"
+ % (page.title(asLink=True, textlink=True),
+ target.title(asLink=True,
textlink=True)))
+
def run(self):
"""Run the bot."""
global destmap, catlist, catmap
+ # validate L10N
+ try:
+ self.template_list = self.site.family.category_redirect_templates[
+ self.site.code]
+ except KeyError:
+ pywikibot.warning(u"No redirect templates defined for %s"
+ % self.site)
+ return
+ if not self.get_cat_title():
+ pywikibot.warning(u"No redirect category found for %s" % self.site)
+ return
+
# user() invokes login()
user = self.site.user()
- problems = []
newredirs = []
l = time.localtime()
@@ -196,13 +284,6 @@
if record:
with open(datafile + ".bak", "wb") as f:
cPickle.dump(record, f, protocol=config.pickle_protocol)
- try:
- template_list = self.site.family.category_redirect_templates[
- self.site.code]
- except KeyError:
- pywikibot.output(u"No redirect templates defined for %s"
- % self.site.sitename())
- return
# regex to match soft category redirects
# note that any templates containing optional "category:" are
# incorrect and will be fixed by the bot
@@ -214,69 +295,22 @@
(?:\|[^|}]*)*}} # optional arguments 2+, ignored
""" % {'prefix': self.site.namespace(10).lower(),
'template': "|".join(item.replace(" ", "[ _]+")
- for item in template_list),
+ for item in self.template_list),
'catns': self.site.namespace(14)},
re.I | re.X)
- # check for hard-redirected categories that are not already marked
- # with an appropriate template
- comment = i18n.twtranslate(self.site.code, self.redir_comment)
- for page in pagegenerators.PreloadingGenerator(
- self.site.allpages(namespace=14, filterredir=True), step=250):
- # generator yields all hard redirect pages in namespace 14
- if page.isCategoryRedirect():
- # this is already a soft-redirect, so skip it (for now)
- continue
- try:
- target = page.getRedirectTarget()
- except pywikibot.CircularRedirect:
- target = page
- problems.append(u"# %s is a self-linked redirect"
- % page.title(asLink=True, textlink=True))
- except RuntimeError:
- # race condition: someone else removed the redirect while we
- # were checking for it
- continue
- if target.namespace() == 14:
- # this is a hard-redirect to a category page
- newtext = (u"{{%(template)s|%(cat)s}}"
- % {'cat': target.title(withNamespace=False),
- 'template': template_list[0]})
- try:
- page.text = newtext
- page.save(comment)
- self.log_text.append(u"* Added {{tl|%s}} to %s"
- % (template_list[0],
- page.title(asLink=True,
- textlink=True)))
- except pywikibot.Error as e:
- self.log_text.append(u"* Failed to add {{tl|%s}} to %s"
- % (template_list[0],
- page.title(asLink=True,
- textlink=True)))
- else:
- problems.append(u"# %s is a hard redirect to %s"
- % (page.title(asLink=True, textlink=True),
- target.title(asLink=True, textlink=True)))
-
- pywikibot.output("Done checking hard-redirect category pages.")
+ self.check_hard_redirect()
comment = i18n.twtranslate(self.site.code, self.move_comment)
counts, destmap, catmap = {}, {}, {}
catlist, nonemptypages = [], []
- redircat = pywikibot.Category(
- pywikibot.Link(self.cat_redirect_cat
- [self.site.family.name][self.site.code], self.site))
+ redircat = pywikibot.Category(pywikibot.Link(self.cat_title,
self.site))
- # get a list of all members of the category-redirect category
- catpages = dict((c, None)
- for c in redircat.subcategories())
-
- # check the category pages for redirected categories
- pywikibot.output(u"")
- pywikibot.output(u"Checking %s category redirect pages"
- % len(catpages))
- for cat in catpages:
+ pywikibot.output(u"\nChecking %d category redirect pages"
+ % redircat.categoryinfo['subcats'])
+ catpages = set()
+ for cat in redircat.subcategories():
+ catpages.add(cat)
cat_title = cat.title(withNamespace=False)
if "category redirect" in cat_title:
self.log_text.append(u"* Ignoring %s"
@@ -333,9 +367,9 @@
continue
dest = cat.getCategoryRedirectTarget()
if not dest.exists():
- problems.append("# %s redirects to %s"
- % (cat.title(asLink=True, textlink=True),
- dest.title(asLink=True, textlink=True)))
+ self.problems.append("# %s redirects to %s"
+ % (cat.title(asLink=True, textlink=True),
+ dest.title(asLink=True,
textlink=True)))
# do a null edit on cat to update any special redirect
# categories this wiki might maintain
try:
@@ -365,7 +399,7 @@
# leaving behind any non-redirect text
oldtext = template_regex.sub("", oldtext)
newtext = (u"{{%(redirtemp)s|%(ncat)s}}"
- % {'redirtemp': template_list[0],
+ % {'redirtemp': self.template_list[0],
'ncat': double.title(withNamespace=False)})
newtext = newtext + oldtext.strip()
try:
@@ -399,7 +433,7 @@
cPickle.dump(record, f, protocol=config.pickle_protocol)
self.log_text.sort()
- problems.sort()
+ self.problems.sort()
newredirs.sort()
comment = i18n.twtranslate(self.site.code, self.maint_comment)
self.log_page.text = (u"\n== %i-%02i-%02iT%02i:%02i:%02iZ ==\n"
@@ -407,7 +441,7 @@
+ u"\n".join(self.log_text)
+ u"\n* New redirects since last report:\n"
+ u"\n".join(newredirs)
- + u"\n" + u"\n".join(problems)
+ + u"\n" + u"\n".join(self.problems)
+ u"\n" + self.get_log_text())
self.log_page.save(comment)
if self.edit_requests:
@@ -427,13 +461,15 @@
@param args: command line arguments
@type args: list of unicode
"""
- a = pywikibot.handle_args(args)
- if len(a) == 1:
- raise RuntimeError('Unrecognized argument "%s"' % a[0])
- elif a:
- raise RuntimeError('Unrecognized arguments: ' +
- " ".join(('"%s"' % arg) for arg in a))
- bot = CategoryRedirectBot()
+ options = {}
+ for arg in pywikibot.handle_args(args):
+ if arg.startswith('-delay:'):
+ pos = arg.find(':')
+ options[arg[1:pos]] = int(arg[pos + 1:])
+ else:
+ # generic handling of we have boolean options
+ options[arg[1:]] = True
+ bot = CategoryRedirectBot(**options)
bot.run()
if __name__ == "__main__":
--
To view, visit https://gerrit.wikimedia.org/r/175413
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Iaa36e36ee39689e376c181df36784a189b40bc4f
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits