jenkins-bot has submitted this change and it was merged.

Change subject: (bug 64087) fix for indexing set in category.py; code 
improvements
......................................................................


(bug 64087) fix for indexing set in category.py; code improvements

with https://www.mediawiki.org/wiki/Special:Code/pywikipedia/6428
a list was replaced by a set. Now indexing a set failes.

Solving this problem, we can easily replace that indexing by
enumerating the set which gives us both: the index value and the
category.

Some code improvements:
cat.title().split(':', 1)[1] can be replaced by cat.title(withNamespace=False)

Page.title() now is able to return labeled links if withNamespace parameter is 
False. e.g.
[:%s|%s]] % (sc[i].title(), sc[i]title().split(':', 1)[1]) can be replaced by a 
simple title() statement.

Adding "noisy dots" to show that bot is still running while
generating the category tree.

-depth option added for the maximum category depth. Default is
set to 5 which also needs a long time to compute it.

Change-Id: I463b7416bbf67ee8fb90dbeed525fc834aadc7d0
---
M scripts/category.py
1 file changed, 41 insertions(+), 32 deletions(-)

Approvals:
  Merlijn van Deen: Looks good to me, approved
  Pyfisch: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/category.py b/scripts/category.py
index 81211c0..e12fd6c 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -63,6 +63,8 @@
                   and do not remove them
  * -match       - Only work on pages whose titles match the given regex (for
                   move and remove actions).
+ * -depth:      - The max depth limit beyond which no subcategories will be
+                  listed.
 
 For the actions tidy and tree, the bot will store the category structure
 locally in category.dump. This saves time and server load, but if it uses
@@ -747,22 +749,21 @@
 
         pywikibot.output('\n' + full_text[:contextLength] + '\n')
 
-        subcatlist = self.catDB.getSubcats(current_cat)
-        supercatlist = self.catDB.getSupercats(current_cat)
+        # we need list to index the choice
+        subcatlist = list(self.catDB.getSubcats(current_cat))
+        supercatlist = list(self.catDB.getSupercats(current_cat))
 
-        if len(subcatlist) == 0:
+        if not subcatlist:
             pywikibot.output('This category has no subcategories.\n')
-        if len(supercatlist) == 0:
+        if not supercatlist:
             pywikibot.output('This category has no supercategories.\n')
         # show subcategories as possible choices (with numbers)
-        for i in range(len(supercatlist)):
+        for i, supercat in enumerate(supercatlist):
             # layout: we don't expect a cat to have more than 10 supercats
-            pywikibot.output(u'u%d - Move up to %s'
-                             % (i, supercatlist[i].title()))
-        for i in range(len(subcatlist)):
+            pywikibot.output(u'u%d - Move up to %s' % (i, supercat.title()))
+        for i, subcat in enumerate(subcatlist):
             # layout: we don't expect a cat to have more than 100 subcats
-            pywikibot.output(u'%2d - Move down to %s'
-                             % (i, subcatlist[i].title()))
+            pywikibot.output(u'%2d - Move down to %s' % (i, subcat.title()))
         pywikibot.output(' j - Jump to another category')
         pywikibot.output(' s - Skip this article')
         pywikibot.output(' r - Remove this category tag')
@@ -843,8 +844,7 @@
 
 
 class CategoryTreeRobot:
-    '''
-    Robot to create tree overviews of the category structure.
+    """ Robot to create tree overviews of the category structure.
 
     Parameters:
         * catTitle - The category which will be the tree's root.
@@ -854,7 +854,7 @@
                      won't be a problem.
         * filename - The textfile where the tree should be saved; None to print
                      the tree to stdout.
-    '''
+    """
 
     def __init__(self, catTitle, catDB, filename=None, maxDepth=10):
         self.catTitle = catTitle
@@ -862,39 +862,41 @@
         if filename and not os.path.isabs(filename):
             filename = config.datafilepath(filename)
         self.filename = filename
-        # TODO: make maxDepth changeable with a parameter or config file entry
         self.maxDepth = maxDepth
         self.site = pywikibot.Site()
 
     def treeview(self, cat, currentDepth=0, parent=None):
-        '''
-        Returns a multi-line string which contains a tree view of all
+        """ Return a multi-line string which contains a tree view of all
         subcategories of cat, up to level maxDepth. Recursively calls itself.
 
         Parameters:
             * cat - the Category of the node we're currently opening
             * currentDepth - the current level in the tree (for recursion)
             * parent - the Category of the category we're coming from
-        '''
 
-        result = u'#' * currentDepth
-        result += '[[:%s|%s]]' % (cat.title(), cat.title().split(':', 1)[1])
+        """
+
+        result = u'#' * currentDepth + ' '
+        result += cat.title(asLink=True, textlink=True, withNamespace=False)
         result += ' (%d)' % len(self.catDB.getArticles(cat))
-        # We will remove an element of this array, but will need the original
-        # array later, so we create a shallow copy with [:]
-        supercats = self.catDB.getSupercats(cat)[:]
+        # We will remove an element of supercats, but need the original set
+        # later, so we create a list from the catDB.getSupercats(cat) set
+        supercats = list(self.catDB.getSupercats(cat))
         # Find out which other cats are supercats of the current cat
         try:
             supercats.remove(parent)
         except:
             pass
-        if supercats != []:
+        if supercats:
+            if currentDepth < self.maxDepth / 2:
+                # noisy dots
+                pywikibot.output('.', newline=False)
             supercat_names = []
-            for i in range(len(supercats)):
+            for i, cat in enumerate(supercats):
                 # create a list of wiki links to the supercategories
-                supercat_names.append('[[:%s|%s]]'
-                                      % (supercats[i].title(),
-                                         supercats[i].title().split(':', 
1)[1]))
+                supercat_names.append(cat.title(asLink=True,
+                                                textlink=True,
+                                                withNamespace=False))
                 # print this list, separated with commas, using translations
                 # given in also_in_cats
             result += ' ' + i18n.twtranslate(self.site, 'category-also-in',
@@ -906,9 +908,9 @@
                 # recurse into subdirectories
                 result += self.treeview(subcat, currentDepth + 1, parent=cat)
         else:
-            if self.catDB.getSubcats(cat) != []:
+            if self.catDB.getSubcats(cat):
                 # show that there are more categories beyond the depth limit
-                result += '#' * (currentDepth + 1) + '[...]\n'
+                result += '#' * (currentDepth + 1) + ' [...]\n'
         return result
 
     def run(self):
@@ -921,7 +923,9 @@
 
         """
         cat = pywikibot.Category(self.site, self.catTitle)
+        pywikibot.output('Generating tree...', newline=False)
         tree = self.treeview(cat)
+        pywikibot.output(u'')
         if self.filename:
             pywikibot.output(u'Saving results in %s' % self.filename)
             import codecs
@@ -947,6 +951,8 @@
     titleRegex = None
     pagesonly = False
     withHistory = False
+    rebuild = False
+    depth = 5
 
     # This factory is responsible for processing command line arguments
     # that are also used by other scripts and that determine on which pages
@@ -958,7 +964,6 @@
     # If this is set to true then the custom edit summary given for removing
     # categories from articles will also be used as the deletion reason.
     useSummaryForDeletion = True
-    catDB = CategoryDatabase()
     action = None
     sort_by_last_name = False
     create_pages = False
@@ -982,7 +987,7 @@
         elif arg == '-person':
             sort_by_last_name = True
         elif arg == '-rebuild':
-            catDB.rebuild()
+            rebuild = True
         elif arg.startswith('-from:'):
             oldCatTitle = arg[len('-from:'):].replace('_', ' ')
             fromGiven = True
@@ -1019,9 +1024,13 @@
             follow_redirects = True
         elif arg == '-hist':
             withHistory = True
+        elif arg.startswith('-depth:'):
+            depth = int(arg[len('-depth:'):])
         else:
             genFactory.handleArg(arg)
     pywikibot.Site().login()
+
+    catDB = CategoryDatabase(rebuild=rebuild)
     gen = genFactory.getCombinedGenerator()
     if action == 'add':
         if not gen:
@@ -1065,7 +1074,7 @@
         filename = pywikibot.input(
             u'Please enter the name of the file where the tree should be 
saved,'
             u'\nor press enter to simply show the tree:')
-        bot = CategoryTreeRobot(catTitle, catDB, filename)
+        bot = CategoryTreeRobot(catTitle, catDB, filename, depth)
         bot.run()
     elif action == 'listify':
         if not fromGiven:

-- 
To view, visit https://gerrit.wikimedia.org/r/127251
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I463b7416bbf67ee8fb90dbeed525fc834aadc7d0
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: Pyfisch <[email protected]>
Gerrit-Reviewer: Ricordisamoa <[email protected]>
Gerrit-Reviewer: Russell Blau <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to