Xqt has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/810472 )

Change subject: [IMPR] Only follow redirects in harvest_template.py if no 
wikibase item exists
......................................................................

[IMPR] Only follow redirects in harvest_template.py if no wikibase item exists

This implements the proposal no. 2 of T311883

- Make _template_link_target a public staticmethod
- Test for InvalidPageError with linked_page.exists() call
- do not follow redirect in template_link_target if the redirect
  page has a wikibase item
- solve multiple return statements issue
- Add test for this new behaviour

Bug: T311883
Change-Id: I87fe427009f9bbe5db2208d0ed850eb0d48bd505
---
M scripts/harvest_template.py
M tests/__init__.py
A tests/harvest_templates_tests.py
3 files changed, 74 insertions(+), 19 deletions(-)

Approvals:
  jenkins-bot: Verified
  Xqt: Looks good to me, approved



diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py
index c1df570..a225b8d 100755
--- a/scripts/harvest_template.py
+++ b/scripts/harvest_template.py
@@ -106,7 +106,11 @@
 from pywikibot import textlib
 from pywikibot.backports import List
 from pywikibot.bot import ConfigParserBot, OptionHandler, WikidataBot
-from pywikibot.exceptions import InvalidTitleError, NoPageError
+from pywikibot.exceptions import (
+    InvalidPageError,
+    InvalidTitleError,
+    NoPageError,
+)


 willstop = False
@@ -203,15 +207,23 @@
         titles.append(temp.title(with_ns=False))
         return titles

-    def _template_link_target(self, item, link_text
-                              ) -> Optional[pywikibot.ItemPage]:
+    @staticmethod
+    def template_link_target(item: pywikibot.ItemPage,
+                             link_text: str) -> Optional[pywikibot.ItemPage]:
+        """Find the ItemPage target for a given link text.
+
+        .. versionchanged:: 7.4
+           Only follow the redirect target if redirect page has no
+           wikibase item.
+        """
         link = pywikibot.Link(link_text)
         linked_page = pywikibot.Page(link)
         try:
             exists = linked_page.exists()
-        except InvalidTitleError:
-            pywikibot.error('"{}" is not a valid title so it cannot be linked.'
-                            ' Skipping.'.format(link_text))
+        except (InvalidTitleError, InvalidPageError):
+            pywikibot.error('"{}" is not a valid title or the page itself is '
+                            'invalid so it cannot be linked. Skipping.'
+                            .format(link_text))
             return None

         if not exists:
@@ -219,23 +231,24 @@
                              'Skipping.'.format(linked_page))
             return None

-        if linked_page.isRedirectPage():
-            linked_page = linked_page.getRedirectTarget()
-
-        try:
-            linked_item = pywikibot.ItemPage.fromPage(linked_page)
-        except NoPageError:
-            linked_item = None
+        while True:
+            try:
+                linked_item = pywikibot.ItemPage.fromPage(linked_page)
+            except NoPageError:
+                if linked_page.isRedirectPage():
+                    linked_page = linked_page.getRedirectTarget()
+                    continue
+                linked_item = None
+            break

         if not linked_item or not linked_item.exists():
             pywikibot.output('{} does not have a wikidata item to link with. '
                              'Skipping.'.format(linked_page))
-            return None
-
-        if linked_item.title() == item.title():
+            linked_item = None
+        elif linked_item.title() == item.title():
             pywikibot.output('{} links to itself. Skipping.'
                              .format(linked_page))
-            return None
+            linked_item = None

         return linked_item

@@ -295,7 +308,7 @@
                     for match in pywikibot.link_regex.finditer(value):
                         matched = True
                         link_text = match.group(1)
-                        linked_item = self._template_link_target(
+                        linked_item = self.template_link_target(
                             item, link_text)
                         added = False
                         if linked_item:
@@ -321,7 +334,7 @@
                             .format(claim.getID(), field, value))
                         continue

-                    linked_item = self._template_link_target(item, value)
+                    linked_item = self.template_link_target(item, value)
                     if not linked_item:
                         continue

diff --git a/tests/__init__.py b/tests/__init__.py
index 39ce2b0..fa97e19 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -148,6 +148,7 @@
     'fixing_redirects',
     'generate_family_file',
     'generate_user_files',
+    'harvest_templates',
     'interwikidata',
     'l10n',
     'patrolbot',
diff --git a/tests/harvest_templates_tests.py b/tests/harvest_templates_tests.py
new file mode 100644
index 0000000..4ea7a6e
--- /dev/null
+++ b/tests/harvest_templates_tests.py
@@ -0,0 +1,41 @@
+#!/usr/bin/python3
+"""Tests for scripts/harvest_template.py."""
+#
+# (C) Pywikibot team, 2022
+#
+# Distributed under the terms of the MIT license.
+#
+import unittest
+from contextlib import suppress
+
+from pywikibot import ItemPage
+from scripts.harvest_template import HarvestRobot
+
+from tests.aspects import ScriptMainTestCase
+
+
+class TestHarvestRobot(ScriptMainTestCase):
+
+    """Test HarvestRobot."""
+
+    family = 'wikipedia'
+    code = 'cs'
+
+    def test_template_link_target(self):
+        """Test template_link_target static method."""
+        tests = [
+            ('Pes', 'Q144'),
+            ('Imaginární číslo', 'Q9165172'),
+            ('Sequana', 'Q472766'),
+        ]
+        for link, item in tests:
+            with self.subTest(link=link, item=item):
+                dummy_item = ItemPage(self.site.data_repository(), 'Q1')
+                target = HarvestRobot.template_link_target(dummy_item, link)
+                self.assertIsInstance(target, ItemPage)
+                self.assertEqual(target.title(), item)
+
+
+if __name__ == '__main__':  # pragma: no cover
+    with suppress(SystemExit):
+        unittest.main()

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/810472
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I87fe427009f9bbe5db2208d0ed850eb0d48bd505
Gerrit-Change-Number: 810472
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: Fomafix <[email protected]>
Gerrit-Reviewer: JAn Dudík <[email protected]>
Gerrit-Reviewer: Matěj Suchánek <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to