jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1182807?usp=email )
Change subject: handle uncommon uri schemes on weblinkchecker ...................................................................... handle uncommon uri schemes on weblinkchecker Bug: T389008 Change-Id: I85013d27954842a6dc733b336176e1966e2953c7 --- M scripts/weblinkchecker.py A tests/weblinkchecker_tests.py 2 files changed, 48 insertions(+), 5 deletions(-) Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py index e4bca2a..122e02c 100755 --- a/scripts/weblinkchecker.py +++ b/scripts/weblinkchecker.py @@ -169,6 +169,9 @@ # Ignore links containing * in domain name # as they are intentionally fake re.compile(r'https?\:\/\/\*(/.*)?'), + + # properly formatted mailto links: no further checking possible + re.compile(r'mailto:[^@]+@[a-z0-9\.]+(\?.*)?'), ] @@ -251,7 +254,8 @@ hosts: dict[str, float] = {} lock = threading.Lock() - def __init__(self, page, url, history, http_ignores, day) -> None: + def __init__(self, page, url: str, history: History, + http_ignores: list[int], day: int) -> None: """Initializer.""" self.page = page self.url = url @@ -341,7 +345,8 @@ } """ - def __init__(self, report_thread, site=None) -> None: + def __init__(self, report_thread: DeadLinkReportThread | None, + site: pywikibot._BaseSite | None = None) -> None: """Initializer.""" self.report_thread = report_thread if not site: @@ -539,7 +544,8 @@ use_redirects = False - def __init__(self, http_ignores=None, day: int = 7, **kwargs) -> None: + def __init__(self, http_ignores: list[int] | None = None, + day: int = 7, **kwargs) -> None: """Initializer.""" super().__init__(**kwargs) @@ -571,8 +577,9 @@ # thread dies when program terminates thread.daemon = True # use hostname as thread.name - thread.name = removeprefix( - urlparse.urlparse(url).hostname, 'www.') + hostname = urlparse.urlparse(url).hostname + if hostname is not None: + thread.name = removeprefix(hostname, 'www.') self.threads.append(thread) def teardown(self) -> None: diff --git a/tests/weblinkchecker_tests.py b/tests/weblinkchecker_tests.py new file mode 100755 index 0000000..5a1b9a5 --- /dev/null +++ b/tests/weblinkchecker_tests.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +"""Tests for the weblinkchecker script.""" +# +# (C) Pywikibot team, 2025 +# +# Distributed under the terms of the MIT license. +# +from __future__ import annotations + +from contextlib import suppress + +import pywikibot +from scripts.weblinkchecker import WeblinkCheckerRobot +from tests.aspects import TestCase, unittest + + +class TestWeblinkchecker(TestCase): + + """Test cases for weblinkchecker.""" + + family = 'wikipedia' + code = 'test' + + def test_different_uri_schemes(self) -> None: + """Test different uri schemes on test page.""" + site = self.get_site('wikipedia:test') + page = pywikibot.Page(site, 'User:DerIch27/weblink test') + generator = [page] + bot = WeblinkCheckerRobot(site=site, generator=generator) + bot.run() + self.assertEqual(1, bot.counter['read']) + + +if __name__ == '__main__': + with suppress(SystemExit): + unittest.main() -- To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1182807?usp=email To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Change-Id: I85013d27954842a6dc733b336176e1966e2953c7 Gerrit-Change-Number: 1182807 Gerrit-PatchSet: 5 Gerrit-Owner: DerIch27 <lar...@gmx.de> Gerrit-Reviewer: Xqt <i...@gno.de> Gerrit-Reviewer: jenkins-bot
_______________________________________________ Pywikibot-commits mailing list -- pywikibot-commits@lists.wikimedia.org To unsubscribe send an email to pywikibot-commits-le...@lists.wikimedia.org