jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942668 )

Change subject: [IMPR] use inline re.IGNORECASE flag in textlib.case_escape 
function
......................................................................

[IMPR] use inline re.IGNORECASE flag in textlib.case_escape function

- use inline re.IGNORECASE flag for the first letter of string argument
- add underscore parameter to detect interchangeable and collapsible
  spaces/underscores in string
- use underscore parameter  within scripts

Bug: T308265
Change-Id: I58df8260db97c45cde6e959ada7e5a8acc959d79
---
M pywikibot/textlib.py
M scripts/image.py
M scripts/delinker.py
3 files changed, 31 insertions(+), 15 deletions(-)

Approvals:
  Matěj Suchánek: Looks good to me, approved
  jenkins-bot: Verified




diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 39f8d50..b8ac31f 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -163,19 +163,24 @@
     return phrase


-def case_escape(case: str, string: str) -> str:
+def case_escape(case: str, string: str, *, underscore: bool = False) -> str:
     """Return an escaped regex pattern which depends on 'first-letter' case.

     .. versionadded:: 7.0
+    .. versionchanged:: 8.4
+       Added the optional *underscore* parameter.

-    :param case: if `case` is 'first-letter' the regex contains an
-        upper/lower case set for the first letter
+    :param case: if `case` is 'first-letter', the regex contains an
+        inline re.IGNORECASE flag for the first letter
+    :param underscore: if True, expand the regex to detect spaces and
+        underscores which are interchangeable and collapsible
     """
-    first = string[0]
-    if first.isalpha() and case == 'first-letter':
-        pattern = f'[{first.upper()}{first.lower()}]{re.escape(string[1:])}'
+    if case == 'first-letter':
+        pattern = f'(?i:{string[:1]}){re.escape(string[1:])}'
     else:
         pattern = re.escape(string)
+    if underscore:
+        pattern = re.sub(r'_|\\ ', '[_ ]+', pattern)
     return pattern


@@ -1557,9 +1562,7 @@
         return oldtext

     # title might contain regex special characters
-    title = case_escape(site.namespaces[14].case, title)
-    # spaces and underscores in page titles are interchangeable and collapsible
-    title = title.replace(r'\ ', '[ _]+').replace(r'\_', '[ _]+')
+    title = case_escape(site.namespaces[14].case, title, underscore=True)
     categoryR = re.compile(r'\[\[\s*({})\s*:\s*{}[\s\u200e\u200f]*'
                            r'((?:\|[^]]+)?\]\])'
                            .format(catNamespace, title), re.I)
diff --git a/scripts/delinker.py b/scripts/delinker.py
index 6282cd6..4d3d0b4 100755
--- a/scripts/delinker.py
+++ b/scripts/delinker.py
@@ -100,9 +100,9 @@
         """Set page to current page and delink that page."""
         # use image_regex from image.py
         namespace = file_page.site.namespaces[6]
-        escaped = case_escape(namespace.case, file_page.title(with_ns=False))
-        # Be careful, spaces and _ have been converted to '\ ' and '\_'
-        escaped = re.sub('\\\\[_ ]', '[_ ]', escaped)
+        escaped = case_escape(namespace.case,
+                              file_page.title(with_ns=False),
+                              underscore=True)
         self.image_regex = re.compile(
             r'\[\[ *(?:{})\s*:\s*{} *(?P<parameters>\|'
             r'(?:[^\[\]]|\[\[[^\]]+\]\]|\[[^\]]+\])*|) *\]\]'
diff --git a/scripts/image.py b/scripts/image.py
index 1b5d05d..167fa78 100755
--- a/scripts/image.py
+++ b/scripts/image.py
@@ -85,10 +85,8 @@
             param)

         namespace = self.site.namespaces[6]
-        escaped = case_escape(namespace.case, self.old_image)
+        escaped = case_escape(namespace.case, self.old_image, underscore=True)

-        # Be careful, spaces and _ have been converted to '\ ' and '\_'
-        escaped = re.sub('\\\\[_ ]', '[_ ]', escaped)
         if not self.opt.loose or not self.new_image:
             image_regex = re.compile(
                 r'\[\[ *(?:{})\s*:\s*{} *(?P<parameters>\|'

--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942668
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I58df8260db97c45cde6e959ada7e5a8acc959d79
Gerrit-Change-Number: 942668
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: Matěj Suchánek <[email protected]>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to