jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1196121?usp=email )

Change subject: IMPR: Refactor replace_magicword in CosmeticChangesToolkit
......................................................................

IMPR: Refactor replace_magicword in CosmeticChangesToolkit

- filename must not be empty in FILE_LINK_REGEX
- use a marker to exclude caption from replacements in replace_magicword
- update cosmetic_changes_tests and textlib_tests

Bug: T396715
Change-Id: Ib6b0229f074856532a45899a4c23723569924420
---
M pywikibot/cosmetic_changes.py
M pywikibot/textlib.py
M tests/cosmetic_changes_tests.py
M tests/textlib_tests.py
4 files changed, 46 insertions(+), 25 deletions(-)

Approvals:
  Matěj Suchánek: Looks good to me, approved
  jenkins-bot: Verified




diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 467734c..725a924 100644
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -59,7 +59,7 @@
 import re
 from contextlib import suppress
 from enum import IntEnum
-from typing import Any
+from typing import Any, cast
 from urllib.parse import urlparse, urlunparse

 import pywikibot
@@ -502,19 +502,30 @@
                 cache[False] = True  # signal there is nothing to replace

         def replace_magicword(match: Match[str]) -> str:
+            """Replace magic words in file link params, leaving captions."""
+            linktext = match.group()
             if cache.get(False):
-                return match.group()
-            split = match.group().split('|')
-            if len(split) == 1:
-                return match.group()
+                return linktext
+
+            params = match.group(2)  # includes pre-leading |
+            if not params:
+                return linktext

             if not cache:
                 init_cache()

-            # push ']]' out and re-add below
-            split[-1] = split[-1][:-2]
-            return '{}|{}]]'.format(
-                split[0], '|'.join(cache.get(x.strip(), x) for x in split[1:]))
+            # do the magic job
+            marker = textlib.findmarker(params)
+            params = textlib.replaceExcept(
+                params, r'\|', marker, ['link', 'template'])
+            parts = params.split(marker)
+            replaced = '|'.join(cache.get(p.strip(), p) for p in parts)
+
+            # extract namespace
+            m = cast(Match[str],
+                     re.match(r'\[\[\s*(?P<namespace>[^:]+)\s*:', linktext))
+
+            return f'[[{m["namespace"]}:{match["filename"]}{replaced}]]'

         cache: dict[bool | str, Any] = {}
         exceptions = ['comment', 'nowiki', 'pre', 'syntaxhighlight']
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index da5187f..97ea55a 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -74,18 +74,25 @@
 (?P<unhandled_depth>{{\s*[^{\|#0-9][^{\|#]*?\s* [^{]* {{ .* }})
 """, re.VERBOSE | re.DOTALL)

-# The following regex supports wikilinks anywhere after the first pipe
-# and correctly matches the end of the file link if the wikilink contains
-# [[ or ]].
-# The namespace names must be substituted into this regex.
-# e.g. FILE_LINK_REGEX % 'File'
-# or FILE_LINK_REGEX % '|'.join(site.namespaces[6])
+# Regex matching file links with optional parameters.
+#
+# Captures the filename and parameters, including nested links
+# within the parameters. The regex safely matches the closing
+# brackets even if inner wikilinks contain [[ or ]].
+# The Namespace names must be substituted into the pattern, e.g.:
+#     FILE_LINK_REGEX % 'File'
+# or: FILE_LINK_REGEX % '|'.join(site.namespaces[6])
+#
+# Don't use this regex directly; use textlib.get_regexes('file', site)`
+# instead.
+#
+# 10.7: Exclude empty filename
 FILE_LINK_REGEX = r"""
     \[\[\s*
     (?:%s)  # namespace aliases
     \s*:
     (?=(?P<filename>
-        [^]|]*
+        [^]|]+
     ))(?P=filename)
     (
         \|
diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py
index 74d8f0e..bd39792 100755
--- a/tests/cosmetic_changes_tests.py
+++ b/tests/cosmetic_changes_tests.py
@@ -431,17 +431,19 @@
         self.assertEqual(
             '[[File:Foo.bar|250px|zentriert|Bar]]',
             self.cct.translateMagicWords('[[File:Foo.bar|250px|center|Bar]]'))
-
-    @unittest.expectedFailure  # T396715
-    def test_translateMagicWords_fail(self) -> None:
-        """Test translateMagicWords method.
-
-        The current implementation doesn't check whether the magic word
-        is inside a template.
-        """
+        # test magic word inside template
         self.assertEqual(
             '[[File:Foo.bar|{{Baz|thumb|foo}}]]',
             self.cct.translateMagicWords('[[File:Foo.bar|{{Baz|thumb|foo}}]]'))
+        # test magic word inside link and template
+        self.assertEqual(
+            '[[File:ABC.jpg|123px|mini|links|[[Foo|left]] {{Bar|thumb}}]]',
+            self.cct.translateMagicWords(
+                '[[File:ABC.jpg|123px|thumb|left|[[Foo|left]] {{Bar|thumb}}]]')
+        )
+        self.assertEqual(
+            '[[File:Foo.bar]]',
+            self.cct.translateMagicWords('[[File:Foo.bar]]'))

     def test_cleanUpLinks_pipes(self) -> None:
         """Test cleanUpLinks method."""
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index baf0c2f..0bb7daa 100755
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -1309,11 +1309,12 @@
                 'x', 'y', ['file'], site=self.site),
             '[[NonFile:y]]')

+        # No File if filename is missing
         self.assertEqual(
             textlib.replaceExcept(
                 '[[File:]]',
                 'File:', 'NonFile:', ['file'], site=self.site),
-            '[[File:]]')
+            '[[NonFile:]]')

         self.assertEqual(
             textlib.replaceExcept(

-- 
To view, visit 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1196121?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ib6b0229f074856532a45899a4c23723569924420
Gerrit-Change-Number: 1196121
Gerrit-PatchSet: 11
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Matěj Suchánek <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to