jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1182807?usp=email )

Change subject: handle uncommon uri schemes on weblinkchecker
......................................................................

handle uncommon uri schemes on weblinkchecker

Bug: T389008
Change-Id: I85013d27954842a6dc733b336176e1966e2953c7
---
M scripts/weblinkchecker.py
A tests/weblinkchecker_tests.py
2 files changed, 48 insertions(+), 5 deletions(-)

Approvals:
  jenkins-bot: Verified
  Xqt: Looks good to me, approved




diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index e4bca2a..122e02c 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -169,6 +169,9 @@
     # Ignore links containing * in domain name
     # as they are intentionally fake
     re.compile(r'https?\:\/\/\*(/.*)?'),
+
+    # properly formatted mailto links: no further checking possible
+    re.compile(r'mailto:[^@]+@[a-z0-9\.]+(\?.*)?'),
 ]


@@ -251,7 +254,8 @@
     hosts: dict[str, float] = {}
     lock = threading.Lock()

-    def __init__(self, page, url, history, http_ignores, day) -> None:
+    def __init__(self, page, url: str, history: History,
+                 http_ignores: list[int], day: int) -> None:
         """Initializer."""
         self.page = page
         self.url = url
@@ -341,7 +345,8 @@
      }
     """

-    def __init__(self, report_thread, site=None) -> None:
+    def __init__(self, report_thread: DeadLinkReportThread | None,
+                 site: pywikibot._BaseSite | None = None) -> None:
         """Initializer."""
         self.report_thread = report_thread
         if not site:
@@ -539,7 +544,8 @@

     use_redirects = False

-    def __init__(self, http_ignores=None, day: int = 7, **kwargs) -> None:
+    def __init__(self, http_ignores: list[int] | None = None,
+                 day: int = 7, **kwargs) -> None:
         """Initializer."""
         super().__init__(**kwargs)

@@ -571,8 +577,9 @@
                 # thread dies when program terminates
                 thread.daemon = True
                 # use hostname as thread.name
-                thread.name = removeprefix(
-                    urlparse.urlparse(url).hostname, 'www.')
+                hostname = urlparse.urlparse(url).hostname
+                if hostname is not None:
+                    thread.name = removeprefix(hostname, 'www.')
                 self.threads.append(thread)

     def teardown(self) -> None:
diff --git a/tests/weblinkchecker_tests.py b/tests/weblinkchecker_tests.py
new file mode 100755
index 0000000..5a1b9a5
--- /dev/null
+++ b/tests/weblinkchecker_tests.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+"""Tests for the weblinkchecker script."""
+#
+# (C) Pywikibot team, 2025
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import annotations
+
+from contextlib import suppress
+
+import pywikibot
+from scripts.weblinkchecker import WeblinkCheckerRobot
+from tests.aspects import TestCase, unittest
+
+
+class TestWeblinkchecker(TestCase):
+
+    """Test cases for weblinkchecker."""
+
+    family = 'wikipedia'
+    code = 'test'
+
+    def test_different_uri_schemes(self) -> None:
+        """Test different uri schemes on test page."""
+        site = self.get_site('wikipedia:test')
+        page = pywikibot.Page(site, 'User:DerIch27/weblink test')
+        generator = [page]
+        bot = WeblinkCheckerRobot(site=site, generator=generator)
+        bot.run()
+        self.assertEqual(1, bot.counter['read'])
+
+
+if __name__ == '__main__':
+    with suppress(SystemExit):
+        unittest.main()

--
To view, visit 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1182807?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I85013d27954842a6dc733b336176e1966e2953c7
Gerrit-Change-Number: 1182807
Gerrit-PatchSet: 5
Gerrit-Owner: DerIch27 <lar...@gmx.de>
Gerrit-Reviewer: Xqt <i...@gno.de>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- pywikibot-commits@lists.wikimedia.org
To unsubscribe send an email to pywikibot-commits-le...@lists.wikimedia.org

Reply via email to