jenkins-bot has submitted this change and it was merged.

Change subject: [FEAT] Improved Site.sametitle
......................................................................


[FEAT] Improved Site.sametitle

This improves the Site.sametitle comparision by the following features:
- It uses (if available) the case-sensitivity option defined by the
  namespace
- It replaces underscores and spaces by only one space. So 'Fo__ar',
  'Fo_ar' and 'Fo ar' are all the same.
- It works with servers which don't have a namespace which is empty.

Bug: 69118
Change-Id: I0b57ea6d7014b4ddfd8ceafbd859594b021e92b4
---
M pywikibot/site.py
M tests/site_tests.py
2 files changed, 83 insertions(+), 44 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/site.py b/pywikibot/site.py
index 799d99c..4a139fa 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -376,9 +376,9 @@
 
         # Discard leading colon
         if count >= 2 and parts[0] == '' and parts[1]:
-            return parts[1]
+            return parts[1].strip()
         elif parts[0]:
-            return parts[0]
+            return parts[0].strip()
         return False
 
     @staticmethod
@@ -806,55 +806,42 @@
                           re.IGNORECASE | re.UNICODE | re.DOTALL)
 
     def sametitle(self, title1, title2):
-        """Return True if title1 and title2 identify the same wiki page."""
-        # title1 and title2 may be unequal but still identify the same page,
-        # if they use different aliases for the same namespace
+        """
+        Return True if title1 and title2 identify the same wiki page.
 
-        def valid_namespace(alias, ns):
-            """Determine if a string is a valid alias for a namespace.
-
-            @param alias: namespace alias
-            @type alias: unicode
-            @param ns: namespace
-            @type ns: int
-
-            @return: bool
-            """
-            for text in self.namespace(ns, all=True):
-                if text.lower() == alias.lower():
-                    return True
-            return False
+        title1 and title2 may be unequal but still identify the same page,
+        if they use different aliases for the same namespace.
+        """
+        def ns_split(title):
+            """Separate the namespace from the name."""
+            if ':' not in title:
+                title = ':' + title
+            ns, _, name = title.partition(':')
+            ns = Namespace.lookup_name(ns, self.namespaces) or default_ns
+            return ns, name
 
         if title1 == title2:
             return True
+        # Replace underscores with spaces and multiple combinations of them
+        # with only one space
+        title1 = re.sub(r'[_ ]+', ' ', title1)
+        title2 = re.sub(r'[_ ]+', ' ', title2)
+        if title1 == title2:
+            return True
+        default_ns = self.namespaces[0]
         # determine whether titles contain namespace prefixes
-        if ":" in title1:
-            ns1, name1 = title1.split(":", 1)
-        else:
-            ns1, name1 = 0, title1
-        if ":" in title2:
-            ns2, name2 = title2.split(":", 1)
-        else:
-            ns2, name2 = 0, title2
-        for space in self.namespaces():  # iterate over all valid namespaces
-            if not isinstance(ns1, int) and valid_namespace(ns1, space):
-                ns1 = space
-            if not isinstance(ns2, int) and valid_namespace(ns2, space):
-                ns2 = space
-        if not isinstance(ns1, int):
-            # no valid namespace prefix found, so the string followed by ":"
-            # must be part of the title
-            name1 = ns1 + ":" + name1
-            ns1 = 0
-        if not isinstance(ns2, int):
-            name2 = ns2 + ":" + name2
-            ns2 = 0
-        if ns1 != ns2:
+        ns1_obj, name1 = ns_split(title1)
+        ns2_obj, name2 = ns_split(title2)
+        if ns1_obj != ns2_obj:
             # pages in different namespaces
             return False
-        if self.case() == "first-letter":
-            name1 = name1[:1].upper() + name1[1:]
-            name2 = name2[:1].upper() + name2[1:]
+        name1 = name1.strip()
+        name2 = name2.strip()
+        # If the namespace has a case definition it's overriding the site's
+        # case definition
+        if (ns1_obj.case if hasattr(ns1_obj, 'case') else self.case()) == 
'first-letter':
+            name1 = name1[0].upper() + name1[1:]
+            name2 = name2[0].upper() + name2[1:]
         return name1 == name2
 
     # namespace shortcuts for backwards-compatibility
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 408c60b..c2b3140 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -144,6 +144,17 @@
         self.assertFalse(mysite.isInterwikiLink("foo"))
         self.assertIsInstance(mysite.redirectRegex().pattern, basestring)
         self.assertIsInstance(mysite.category_on_one_line(), bool)
+        self.assertTrue(mysite.sametitle("Template:Test", "Template:Test"))
+        self.assertTrue(mysite.sametitle("Template: Test", "Template:   Test"))
+        self.assertTrue(mysite.sametitle('Test name', 'Test name'))
+        self.assertFalse(mysite.sametitle('Test name', 'Test Name'))
+        # User, MediaWiki (both since 1.16) and Special are always
+        # first-letter (== only first non-namespace letter is case insenstive)
+        # See also: https://www.mediawiki.org/wiki/Manual:$wgCapitalLinks
+        self.assertTrue(mysite.sametitle("Special:Always", "Special:always"))
+        if LV(mysite.version()) >= LV('1.16'):
+            self.assertTrue(mysite.sametitle('User:Always', 'User:always'))
+            self.assertTrue(mysite.sametitle('MediaWiki:Always', 
'MediaWiki:always'))
 
     def testConstructors(self):
         """Test cases for site constructors."""
@@ -1611,6 +1622,47 @@
         self.assertEqual(item.id, 'Q5296')
 
 
+class TestSameTitleSite(TestCase):
+
+    """Test APISite.sametitle on sites with known behaviour."""
+
+    sites = {
+        'enwp': {
+            'family': 'wikipedia',
+            'code': 'en',
+        },
+        'dewp': {
+            'family': 'wikipedia',
+            'code': 'de',
+        },
+        'enwt': {
+            'family': 'wiktionary',
+            'code': 'en',
+        }
+    }
+
+    def check(self, site, case_sensitive):
+        self.assertEqual(site.sametitle('Foo', 'foo'), not case_sensitive)
+        self.assertTrue(site.sametitle('File:Foo', 'Image:Foo'))
+        self.assertTrue(site.sametitle(':Foo', 'Foo'))
+        self.assertFalse(site.sametitle('User:Foo', 'Foo'))
+
+    def test_enwp(self):
+        self.check(self.get_site('enwp'), False)
+        self.assertFalse(self.get_site('enwp').sametitle(
+            'Template:Test template', 'Template:Test Template'))
+
+    def test_dewp(self):
+        site = self.get_site('dewp')
+        self.check(site, False)
+        self.assertTrue(site.sametitle('Benutzer:Foo', 'User:Foo'))
+        self.assertTrue(site.sametitle('Benutzerin:Foo', 'User:Foo'))
+        self.assertTrue(site.sametitle('Benutzerin:Foo', 'Benutzer:Foo'))
+
+    def test_enwt(self):
+        self.check(self.get_site('enwt'), True)
+
+
 if __name__ == '__main__':
     try:
         unittest.main()

-- 
To view, visit https://gerrit.wikimedia.org/r/151809
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I0b57ea6d7014b4ddfd8ceafbd859594b021e92b4
Gerrit-PatchSet: 12
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: Nullzero <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to