jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/458792 )

Change subject: [IMPR] Improvements fo MakeCatBot.include()
......................................................................

[IMPR] Improvements fo MakeCatBot.include()

- set sortKey directly in workingcat
- improved link checking in checklinks() which uses chained generators
  and print progress dots for each page
- introduce print_dot method for that
- simplify changing category by using change_category method
- "checked" container becomes a set
- removes checkforward which is always True

6th step detached from Id7ca3461d for easier reviewing

Change-Id: I32238bdb412783be2e84c04be7c09bd2c652b51f
---
M scripts/makecat.py
1 file changed, 50 insertions(+), 47 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/makecat.py b/scripts/makecat.py
index 7d72ca5..76c31bf 100755
--- a/scripts/makecat.py
+++ b/scripts/makecat.py
@@ -50,6 +50,7 @@
 # Distributed under the terms of the MIT license.
 #
 import codecs
+from itertools import chain
 from textwrap import fill

 import pywikibot
@@ -77,7 +78,6 @@
         })
         super().__init__(**kwargs)
         self.skipdates = self.opt.nodate
-        self.checkforward = True
         self.checkbackward = not self.opt.forward
         self.checkbroken = not (self.opt.forward
                                 and self.opt.exist)
@@ -107,6 +107,12 @@
                 color_format('\n>>> {lightpurple}{0}{default} <<<',
                              page.title()))

+    @staticmethod
+    def print_dot(condition=True):
+        """Print a single dot if conditon is True."""
+        if condition:
+            pywikibot.output('.', newline=False)
+
     def needcheck(self, page):
         """Verify whether the current page may be processed."""
         global checked
@@ -114,53 +120,50 @@
                     or page in checked
                     or self.skipdates and page.autoFormat()[0] is not None)

-    def change_category(self, page, catlist):
+    def change_category(self, page, categories, summary):
         """Change the category of page."""
-        pass
+        global workingcat, parentcats
+        for category in categories:
+            if self.removeparent and category in parentcats:
+                page.change_category(workingcat, summary=summary)
+                return True
+        return False

-    def include(self, pl, checklinks=True, realinclude=True, linkterm=None,
+    def include(self, page, checklinks=True, realinclude=True, linkterm=None,
                 summary=''):
         """Include the current page to the working category."""
         global workingcat, parentcats
-        global checked, tocheck
-        cl = checklinks
-        mysite = self.site
+        actualworkingcat = workingcat
         if linkterm:
-            actualworkingcat = pywikibot.Category(mysite, workingcat.title(),
-                                                  sort_key=linkterm)
-        else:
-            actualworkingcat = workingcat
-        if realinclude:
-            try:
-                text = pl.get()
-            except pywikibot.NoPage:
-                pass
-            except pywikibot.IsRedirectPage:
-                cl = True
+            actualworkingcat.sortKey = linkterm
+        if realinclude and page.exists():
+            if page.isRedirectPage():
+                checklinks = True
             else:
-                cats = list(pl.categories())
-                if workingcat not in cats:
-                    for c in cats:
-                        if c in parentcats:
-                            if self.removeparent:
-                                pl.change_category(actualworkingcat,
-                                                   summary=summary)
-                                break
-                    else:
-                        pl.put(textlib.replaceCategoryLinks(
-                            text, cats + [actualworkingcat], site=pl.site),
-                            summary=summary)
-        if cl:
-            if self.checkforward:
-                for page2 in pl.linkedPages():
-                    if self.needcheck(page2):
-                        tocheck.append(page2)
-                        checked[page2] = page2
-            if self.checkbackward:
-                for ref_page in pl.getReferences():
-                    if self.needcheck(ref_page):
-                        tocheck.append(ref_page)
-                        checked[ref_page] = ref_page
+                cats = list(page.categories())
+                if workingcat not in cats \
+                   and not self.change_category(page, cats, summary):
+                    newtext = textlib.replaceCategoryLinks(
+                        page.text, cats + [actualworkingcat],
+                        site=page.site)
+                    page.put(newtext, summary=summary)
+
+        if checklinks:
+            self.checklinks(page)
+
+    def checklinks(self, page):
+        """Check whether the page has to be added to the tocheck deque."""
+        global checked, tocheck
+        pywikibot.output('\nChecking links for "{}"...'
+                         .format(page.title()), newline=False)
+        generators = [page.linkedPages()]
+        if self.checkbackward:
+            generators.append(page.getReferences())
+        for i, linked_page in enumerate(chain(*generators)):
+            self.print_dot(not i % 25)
+            if self.needcheck(linked_page):
+                tocheck.append(linked_page)
+                checked.add(linked_page)

     def skip_page(self, page):
         """Check whether the page is to be skipped."""
@@ -177,7 +180,7 @@
             pl2 = pl.getRedirectTarget()
             if self.needcheck(pl2):
                 tocheck.append(pl2)
-                checked[pl2] = pl2
+                checked.add(pl2)
             return
         ctoshow = 500
         pywikibot.output('')
@@ -225,7 +228,7 @@
             elif answer == 'o':
                 pagetitle = pywikibot.input('Specify page to add:')
                 page = pywikibot.Page(pywikibot.Site(), pagetitle)
-                if page not in checked.keys():
+                if page not in checked:
                     self.include(page, summary=summary)
             elif answer == 's':
                 if not pl.exists():
@@ -268,7 +271,7 @@
     global checked, tocheck
     global excludefile

-    checked = {}
+    checked = set()
     tocheck = DequeGenerator()

     workingcatname = ''
@@ -311,7 +314,7 @@
                 if not line:
                     continue
                 pl = pywikibot.Page(mysite, line)
-                checked[pl] = pl
+                checked.add(pl)

         excludefile = codecs.open(filename, 'a', encoding=mysite.encoding())
     except IOError:
@@ -331,7 +334,7 @@
         for cat in subcatlist:
             artlist = list(cat.articles())
             for page in artlist:
-                checked[page] = page
+                checked.add(page)

     # Fetch articles in category, and mark as already checked (seen)
     # If category is empty, ask user if they want to look for pages
@@ -349,7 +352,7 @@
         articles = [pl]

     for pl in articles:
-        checked[pl] = pl
+        checked.add(pl)
         bot.include(pl, summary=summary)

     gen = pagegenerators.DequePreloadingGenerator(tocheck)

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/458792
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I32238bdb412783be2e84c04be7c09bd2c652b51f
Gerrit-Change-Number: 458792
Gerrit-PatchSet: 5
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Dalba <[email protected]>
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to