jenkins-bot has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/511326 )
Change subject: [IMPR] Use ThreadList with weblinkchecker.py
......................................................................
[IMPR] Use ThreadList with weblinkchecker.py
Also simplify ignore url checking
Change-Id: I714bebfb1221ec73dc2893de5c6592257ec27e78
---
M scripts/weblinkchecker.py
1 file changed, 9 insertions(+), 17 deletions(-)
Approvals:
Xqt: Looks good to me, approved
Matěj Suchánek: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 8d4fd46..62f2413 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -133,7 +133,7 @@
from pywikibot.pagegenerators import (
XMLDumpPageGenerator as _XMLDumpPageGenerator,
)
-from pywikibot.tools import deprecated
+from pywikibot.tools import deprecated, ThreadList
from pywikibot.tools.formatter import color_format
try:
@@ -857,32 +857,24 @@
self.HTTPignore = HTTPignore
self.day = day
+ # Limit the number of threads started at the same time
+ self.threads = ThreadList(limit=config.max_external_links,
+ wait_time=config.retry_wait)
+
def treat_page(self):
"""Process one page."""
page = self.current_page
- text = page.get()
- for url in weblinksIn(text):
+ for url in weblinksIn(page.text):
for ignoreR in ignorelist:
if ignoreR.match(url):
break
- else: # not ignore url
- # Limit the number of threads started at the same time. Each
- # thread will check one page, then die.
- while threading.activeCount() >= config.max_external_links:
- pywikibot.sleep(config.retry_wait)
+ else:
+ # Each thread will check one page, then die.
thread = LinkCheckThread(page, url, self.history,
self.HTTPignore, self.day)
# thread dies when program terminates
thread.setDaemon(True)
- try:
- thread.start()
- except threading.ThreadError:
- pywikibot.warning(
- "Can't start a new thread.\nPlease decrease "
- 'max_external_links in your user-config.py or use\n'
- "'-max_external_links:' option with a smaller value. "
- 'Default is 50.')
- raise
+ self.threads.append(thread)
def RepeatPageGenerator():
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/511326
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I714bebfb1221ec73dc2893de5c6592257ec27e78
Gerrit-Change-Number: 511326
Gerrit-PatchSet: 9
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: Dalba <[email protected]>
Gerrit-Reviewer: Dvorapa <[email protected]>
Gerrit-Reviewer: Framawiki <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Matěj Suchánek <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits