Prianka has uploaded a new change for review. https://gerrit.wikimedia.org/r/185673
Change subject: Porting overcat_simple_filter.py from compat to core/scripts ...................................................................... Porting overcat_simple_filter.py from compat to core/scripts Added overcat_simple_filter.py in core/scripts/ as part of Pywikibot:Compat to Core Migration. Bug: T66876 Change-Id: I09b5c2b25b6dfc6427f90bf516417e64a63c5e6a --- A scripts/overcat_simple_filter.py 1 file changed, 74 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/73/185673/1 diff --git a/scripts/overcat_simple_filter.py b/scripts/overcat_simple_filter.py new file mode 100644 index 0000000..8cdb463 --- /dev/null +++ b/scripts/overcat_simple_filter.py @@ -0,0 +1,74 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +A bot to do some simple over categorization filtering. + +Now it uses the strategy to loop over all images in all the subcategories. +That might be a very good strategy when the parent category is very full, but +later on it will become very inefficient. + +""" +# +# (C) Pywikibot team, 2013 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' +# + +import sys +import pywikibot +import catlib +from pywikibot import pagegenerators + + +def filterCategory(page): + """ Loop over all subcategories of page and filter them. """ + # FIXME: category = catlib.Category(page) doesn't work + site = page.site() + title = page.title() + category = catlib.Category(site, title) + + for subcat in category.subcategories(): + filterSubCategory(subcat, category) + + +def filterSubCategory(subcat, category): + """ Filter category from all articles and files in subcat. """ + articleGen = pagegenerators.PreloadingGenerator( + pagegenerators.CategorizedPageGenerator(subcat)) + + for article in articleGen: + pywikibot.output(u'Working on %s' % (article.title(),)) + articleCategories = article.categories() + if category in articleCategories: + articleCategories.remove(category) + try: + text = article.get() + except (pywikibot.NoPage, pywikibot.IsRedirectPage): + return + newtext = pywikibot.replaceCategoryLinks(text, articleCategories) + pywikibot.showDiff(text, newtext) + comment = (u'Removing [[%s]]: Is already in the subcategory [[%s]]' + % (category.title(), subcat.title())) + article.put(newtext, comment) + + +def main(*args): + generator = None + genFactory = pagegenerators.GeneratorFactory() + + for arg in pywikibot.handle_args(*args): + genFactory.handleArg(arg) + + generator = genFactory.getCombinedGenerator() + if not generator: + return False + + for page in generator: + if page.exists() and page.namespace() == 14: + filterCategory(page) + + +if __name__ == "__main__": + main(sys.argv[1:]) -- To view, visit https://gerrit.wikimedia.org/r/185673 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I09b5c2b25b6dfc6427f90bf516417e64a63c5e6a Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Prianka <priyankajayaswal...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits