jenkins-bot has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1196121?usp=email )
Change subject: IMPR: Refactor replace_magicword in CosmeticChangesToolkit
......................................................................
IMPR: Refactor replace_magicword in CosmeticChangesToolkit
- filename must not be empty in FILE_LINK_REGEX
- use a marker to exclude caption from replacements in replace_magicword
- update cosmetic_changes_tests and textlib_tests
Bug: T396715
Change-Id: Ib6b0229f074856532a45899a4c23723569924420
---
M pywikibot/cosmetic_changes.py
M pywikibot/textlib.py
M tests/cosmetic_changes_tests.py
M tests/textlib_tests.py
4 files changed, 46 insertions(+), 25 deletions(-)
Approvals:
Matěj Suchánek: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 467734c..725a924 100644
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -59,7 +59,7 @@
import re
from contextlib import suppress
from enum import IntEnum
-from typing import Any
+from typing import Any, cast
from urllib.parse import urlparse, urlunparse
import pywikibot
@@ -502,19 +502,30 @@
cache[False] = True # signal there is nothing to replace
def replace_magicword(match: Match[str]) -> str:
+ """Replace magic words in file link params, leaving captions."""
+ linktext = match.group()
if cache.get(False):
- return match.group()
- split = match.group().split('|')
- if len(split) == 1:
- return match.group()
+ return linktext
+
+ params = match.group(2) # includes pre-leading |
+ if not params:
+ return linktext
if not cache:
init_cache()
- # push ']]' out and re-add below
- split[-1] = split[-1][:-2]
- return '{}|{}]]'.format(
- split[0], '|'.join(cache.get(x.strip(), x) for x in split[1:]))
+ # do the magic job
+ marker = textlib.findmarker(params)
+ params = textlib.replaceExcept(
+ params, r'\|', marker, ['link', 'template'])
+ parts = params.split(marker)
+ replaced = '|'.join(cache.get(p.strip(), p) for p in parts)
+
+ # extract namespace
+ m = cast(Match[str],
+ re.match(r'\[\[\s*(?P<namespace>[^:]+)\s*:', linktext))
+
+ return f'[[{m["namespace"]}:{match["filename"]}{replaced}]]'
cache: dict[bool | str, Any] = {}
exceptions = ['comment', 'nowiki', 'pre', 'syntaxhighlight']
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index da5187f..97ea55a 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -74,18 +74,25 @@
(?P<unhandled_depth>{{\s*[^{\|#0-9][^{\|#]*?\s* [^{]* {{ .* }})
""", re.VERBOSE | re.DOTALL)
-# The following regex supports wikilinks anywhere after the first pipe
-# and correctly matches the end of the file link if the wikilink contains
-# [[ or ]].
-# The namespace names must be substituted into this regex.
-# e.g. FILE_LINK_REGEX % 'File'
-# or FILE_LINK_REGEX % '|'.join(site.namespaces[6])
+# Regex matching file links with optional parameters.
+#
+# Captures the filename and parameters, including nested links
+# within the parameters. The regex safely matches the closing
+# brackets even if inner wikilinks contain [[ or ]].
+# The Namespace names must be substituted into the pattern, e.g.:
+# FILE_LINK_REGEX % 'File'
+# or: FILE_LINK_REGEX % '|'.join(site.namespaces[6])
+#
+# Don't use this regex directly; use textlib.get_regexes('file', site)`
+# instead.
+#
+# 10.7: Exclude empty filename
FILE_LINK_REGEX = r"""
\[\[\s*
(?:%s) # namespace aliases
\s*:
(?=(?P<filename>
- [^]|]*
+ [^]|]+
))(?P=filename)
(
\|
diff --git a/tests/cosmetic_changes_tests.py b/tests/cosmetic_changes_tests.py
index 74d8f0e..bd39792 100755
--- a/tests/cosmetic_changes_tests.py
+++ b/tests/cosmetic_changes_tests.py
@@ -431,17 +431,19 @@
self.assertEqual(
'[[File:Foo.bar|250px|zentriert|Bar]]',
self.cct.translateMagicWords('[[File:Foo.bar|250px|center|Bar]]'))
-
- @unittest.expectedFailure # T396715
- def test_translateMagicWords_fail(self) -> None:
- """Test translateMagicWords method.
-
- The current implementation doesn't check whether the magic word
- is inside a template.
- """
+ # test magic word inside template
self.assertEqual(
'[[File:Foo.bar|{{Baz|thumb|foo}}]]',
self.cct.translateMagicWords('[[File:Foo.bar|{{Baz|thumb|foo}}]]'))
+ # test magic word inside link and template
+ self.assertEqual(
+ '[[File:ABC.jpg|123px|mini|links|[[Foo|left]] {{Bar|thumb}}]]',
+ self.cct.translateMagicWords(
+ '[[File:ABC.jpg|123px|thumb|left|[[Foo|left]] {{Bar|thumb}}]]')
+ )
+ self.assertEqual(
+ '[[File:Foo.bar]]',
+ self.cct.translateMagicWords('[[File:Foo.bar]]'))
def test_cleanUpLinks_pipes(self) -> None:
"""Test cleanUpLinks method."""
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index baf0c2f..0bb7daa 100755
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -1309,11 +1309,12 @@
'x', 'y', ['file'], site=self.site),
'[[NonFile:y]]')
+ # No File if filename is missing
self.assertEqual(
textlib.replaceExcept(
'[[File:]]',
'File:', 'NonFile:', ['file'], site=self.site),
- '[[File:]]')
+ '[[NonFile:]]')
self.assertEqual(
textlib.replaceExcept(
--
To view, visit
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1196121?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ib6b0229f074856532a45899a4c23723569924420
Gerrit-Change-Number: 1196121
Gerrit-PatchSet: 11
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Matěj Suchánek <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]