jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/511326 )

Change subject: [IMPR] Use ThreadList with weblinkchecker.py
......................................................................

[IMPR] Use ThreadList with weblinkchecker.py

Also simplify ignore url checking

Change-Id: I714bebfb1221ec73dc2893de5c6592257ec27e78
---
M scripts/weblinkchecker.py
1 file changed, 9 insertions(+), 17 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  Matěj Suchánek: Looks good to me, but someone else must approve
  jenkins-bot: Verified



diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 8d4fd46..62f2413 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -133,7 +133,7 @@
 from pywikibot.pagegenerators import (
     XMLDumpPageGenerator as _XMLDumpPageGenerator,
 )
-from pywikibot.tools import deprecated
+from pywikibot.tools import deprecated, ThreadList
 from pywikibot.tools.formatter import color_format

 try:
@@ -857,32 +857,24 @@
             self.HTTPignore = HTTPignore
         self.day = day

+        # Limit the number of threads started at the same time
+        self.threads = ThreadList(limit=config.max_external_links,
+                                  wait_time=config.retry_wait)
+
     def treat_page(self):
         """Process one page."""
         page = self.current_page
-        text = page.get()
-        for url in weblinksIn(text):
+        for url in weblinksIn(page.text):
             for ignoreR in ignorelist:
                 if ignoreR.match(url):
                     break
-            else:  # not ignore url
-                # Limit the number of threads started at the same time. Each
-                # thread will check one page, then die.
-                while threading.activeCount() >= config.max_external_links:
-                    pywikibot.sleep(config.retry_wait)
+            else:
+                # Each thread will check one page, then die.
                 thread = LinkCheckThread(page, url, self.history,
                                          self.HTTPignore, self.day)
                 # thread dies when program terminates
                 thread.setDaemon(True)
-                try:
-                    thread.start()
-                except threading.ThreadError:
-                    pywikibot.warning(
-                        "Can't start a new thread.\nPlease decrease "
-                        'max_external_links in your user-config.py or use\n'
-                        "'-max_external_links:' option with a smaller value. "
-                        'Default is 50.')
-                    raise
+                self.threads.append(thread)


 def RepeatPageGenerator():

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/511326
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I714bebfb1221ec73dc2893de5c6592257ec27e78
Gerrit-Change-Number: 511326
Gerrit-PatchSet: 9
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: Dalba <[email protected]>
Gerrit-Reviewer: Dvorapa <[email protected]>
Gerrit-Reviewer: Framawiki <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Matěj Suchánek <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to