jenkins-bot has submitted this change and it was merged.
Change subject: Add Namespace class
......................................................................
Add Namespace class
Replaces the current implicit data structure of
site._namespaces with a class with variables with
names that explain their value, and methods
that simplify using namespaces.
The class also stores other attributes provided by
API siprop=namespaces.
This changeset does not implement new Namespace semantics
in other parts of pywikibot so-as to ensure that existing
usage of site._namespaces is supported by the Namespace
class.
Change-Id: I0cce21e6161031861c0056dc5498f47bc26e1cc8
---
M pywikibot/site.py
M tests/dry_api_tests.py
M tests/dry_site_tests.py
A tests/namespace_tests.py
M tests/site_tests.py
M tests/wikibase_tests.py
6 files changed, 506 insertions(+), 47 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
XZise: Looks good to me, but someone else must approve
Mpaa: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/site.py b/pywikibot/site.py
index b70ad2d..060776b 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -20,6 +20,7 @@
import re
import sys
from distutils.version import LooseVersion as LV
+from collections import Iterable
import threading
import time
import urllib
@@ -119,6 +120,276 @@
raise Error("Family %s does not exist" % fam)
_families[fam] = myfamily.Family()
return _families[fam]
+
+
+class Namespace(Iterable):
+
+ """ Namespace site data object.
+
+ This is backwards compatible with the structure of entries
+ in site._namespaces which were a list of
+ [customised namespace,
+ canonical namespace name?,
+ namespace alias*]
+
+ If the canonical_name is not provided for a namespace between -2
+ and 15, the MediaWiki 1.14+ built-in names are used.
+ Enable use_image_name to use built-in names from MediaWiki 1.13
+ and earlier as the details.
+
+ Image and File are aliases of each other by default.
+
+ If only one of canonical_name and custom_name are available, both
+ properties will have the same value.
+ """
+
+ # These are the MediaWiki built-in names for MW 1.14+.
+ # Namespace prefixes are always case-insensitive, but the
+ # canonical forms are capitalized.
+ canonical_namespaces = {
+ -2: u"Media",
+ -1: u"Special",
+ 0: u"",
+ 1: u"Talk",
+ 2: u"User",
+ 3: u"User talk",
+ 4: u"Project",
+ 5: u"Project talk",
+ 6: u"File",
+ 7: u"File talk",
+ 8: u"MediaWiki",
+ 9: u"MediaWiki talk",
+ 10: u"Template",
+ 11: u"Template talk",
+ 12: u"Help",
+ 13: u"Help talk",
+ 14: u"Category",
+ 15: u"Category talk",
+ }
+
+ def __init__(self, id, canonical_name=None, custom_name=None,
+ aliases=None, use_image_name=False, **kwargs):
+ """Constructor.
+
+ @param custom_name: Name defined in server LocalSettings.php
+ @type custom_name: unicode
+ @param canonical_name: Canonical name
+ @type canonical_name: str
+ @param aliases: Aliases
+ @type aliases: list of unicode
+ @param use_image_name: Use 'Image' as default canonical
+ for 'File' namespace
+ @param use_image_name: bool
+
+ """
+ self.id = id
+
+ if aliases is None:
+ self.aliases = list()
+ else:
+ self.aliases = aliases
+
+ if not canonical_name and id in self.canonical_namespaces:
+ if use_image_name:
+ if id == 6:
+ canonical_name = u'Image'
+ elif id == 7:
+ canonical_name = u"Image talk"
+
+ if not canonical_name:
+ canonical_name = self.canonical_namespaces[id]
+
+ assert(custom_name is not None or canonical_name is not None)
+
+ self.custom_name = custom_name if custom_name is not None else
canonical_name
+ self.canonical_name = canonical_name if canonical_name is not None
else custom_name
+
+ if not aliases:
+ if id in (6, 7):
+ if use_image_name:
+ alias = u'File'
+ else:
+ alias = u'Image'
+ if id == 7:
+ alias += u' talk'
+ self.aliases = [alias]
+ else:
+ self.aliases = list()
+ else:
+ self.aliases = aliases
+
+ self.info = kwargs
+
+ def __getattr__(self, attr):
+ """Look for undefined attributes in info."""
+ if attr in self.info:
+ return self.info[attr]
+ else:
+ raise AttributeError("%s instance has no attribute '%s'"
+ % (self.__class__.__name__, attr))
+
+ def _distinct(self):
+ if self.custom_name == self.canonical_name:
+ return [self.canonical_name] + self.aliases
+ else:
+ return [self.custom_name, self.canonical_name] + self.aliases
+
+ def _contains_lowercase_name(self, name):
+ """Determine a lowercase normalised name is a name of this namespace.
+
+ """
+ return name in [x.lower() for x in self._distinct()]
+
+ def __contains__(self, item):
+ """Determine if item is a name of this namespace.
+
+ The comparison is case insensitive, and item may have a single
+ colon on one or both sides of the name.
+
+ @param item: name to check
+ @type item: basestring
+ """
+ if item == '' and self.id == 0:
+ return True
+
+ name = Namespace.normalize_name(item)
+ if not name:
+ return False
+
+ return self._contains_lowercase_name(name.lower())
+
+ def __len__(self):
+ """Obtain length of the iterable."""
+ if self.custom_name == self.canonical_name:
+ return len(self.aliases) + 1
+ else:
+ return len(self.aliases) + 2
+
+ def __iter__(self):
+ """Return an iterator."""
+ return iter(self._distinct())
+
+ def __getitem__(self, index):
+ """Obtain an item from the iterable."""
+ if self.custom_name != self.canonical_name:
+ if index == 0:
+ return self.custom_name
+ else:
+ index -= 1
+
+ if index == 0:
+ return self.canonical_name
+ else:
+ return self.aliases[index - 1]
+
+ def __str__(self):
+ """Return a string representation."""
+ if self.id == 0:
+ return ':'
+ elif self.id in (6, 14):
+ return ':' + self.canonical_name + ':'
+ else:
+ return self.canonical_name + ':'
+
+ def __unicode__(self):
+ """Return a unicode string representation."""
+ if self.id == 0:
+ return u':'
+ elif self.id in (6, 14):
+ return u':' + self.custom_name + u':'
+ else:
+ return u'' + self.custom_name + u':'
+
+ def __index__(self):
+ return self.id
+
+ def __eq__(self, other):
+ """Compare whether two namespace objects are equal."""
+ if isinstance(other, int):
+ return self.id == other
+ elif isinstance(other, Namespace):
+ return self.id == other.id
+ elif isinstance(other, basestring):
+ return other in self
+ elif other is None:
+ return self.id == 0
+
+ def __ne__(self, other):
+ """Compare whether two namespace objects are not equal."""
+ if self.id == other.id:
+ return False
+ else:
+ return True
+
+ def __cmp__(self, other):
+ """Compare two namespace ids."""
+ if self.id == other.id:
+ return 0
+ elif self.id > other.id:
+ return 1
+ else:
+ return -1
+
+ def __repr__(self):
+ """Return a reconstructable representation."""
+ return '%s(id=%d, custom_name=%r, canonical_name=%r, aliases=%r, ' \
+ 'kwargs=%r)' \
+ % (self.__class__.__name__, self.id, self.custom_name,
+ self.canonical_name, self.aliases, self.info)
+
+ @staticmethod
+ def builtin_namespaces(use_image_name=False):
+ """Return a dict of the builtin namespaces."""
+ return dict([(i, Namespace(i, use_image_name=use_image_name))
+ for i in range(-2, 16)])
+
+ @staticmethod
+ def normalize_name(name):
+ """Remove an optional colon before and after name.
+
+ TODO: reject illegal characters.
+ """
+ if name == '':
+ return ''
+
+ parts = name.split(':', 4)
+ count = len(parts)
+ if count > 3:
+ return False
+ elif count == 3:
+ if parts[2] != '':
+ return False
+
+ # Discard leading colon
+ if count >= 2 and parts[0] == '' and parts[1]:
+ return parts[1]
+ elif parts[0]:
+ return parts[0]
+ return False
+
+ @staticmethod
+ def lookup_name(name, namespaces=None):
+ """Find the namespace for a name.
+
+ @param name: Name of the namespace.
+ @param namespaces: namespaces to search
+ default: builtins only
+ @type namespaces: dict of Namespace
+ @return: Namespace or None
+ """
+ if not namespaces:
+ namespaces = Namespace.builtin_namespaces()
+
+ name = Namespace.normalize_name(name)
+ if name is False:
+ return None
+ name = name.lower()
+
+ for namespace in namespaces.values():
+ if namespace._contains_lowercase_name(name):
+ return namespace
+
+ return None
class BaseSite(object):
@@ -293,6 +564,9 @@
def namespaces(self):
"""Return dict of valid namespaces on this wiki."""
+ if not hasattr(self, '_namespaces'):
+ use_image_name = LV(self.version()) < LV("1.14")
+ self._namespaces = Namespace.builtin_namespaces(use_image_name)
return self._namespaces
def ns_normalize(self, value):
@@ -638,33 +912,6 @@
def __init__(self, code, fam=None, user=None, sysop=None):
""" Constructor. """
BaseSite.__init__(self, code, fam, user, sysop)
- self._namespaces = {
- # These are the MediaWiki built-in names, which always work.
- # Localized names are loaded later upon accessing the wiki.
- # Namespace prefixes are always case-insensitive, but the
- # canonical forms are capitalized
- -2: [u"Media"],
- -1: [u"Special"],
- 0: [u""],
- 1: [u"Talk"],
- 2: [u"User"],
- 3: [u"User talk"],
- 4: [u"Project"],
- 5: [u"Project talk"],
- 6: [u"Image"],
- 7: [u"Image talk"],
- 8: [u"MediaWiki"],
- 9: [u"MediaWiki talk"],
- 10: [u"Template"],
- 11: [u"Template talk"],
- 12: [u"Help"],
- 13: [u"Help talk"],
- 14: [u"Category"],
- 15: [u"Category talk"],
- }
- if LV(self.version()) >= LV("1.14"):
- self._namespaces[6] = [u"File"]
- self._namespaces[7] = [u"File talk"]
self._msgcache = {}
self._loginstatus = LoginStatus.NOT_ATTEMPTED
return
@@ -1219,31 +1466,39 @@
self._siteinfo = sidata['general']
nsdata = sidata['namespaces']
+
+ self._namespaces = {}
+
+ # In MW 1.14, API siprop 'namespaces' added 'canonical',
+ # and Image became File with Image as an alias.
+ # For versions lower than 1.14, APISite needs to override
+ # the defaults defined in Namespace.
+ is_mw114 = LV(self.version()) >= LV('1.14')
+
for nskey in nsdata:
ns = int(nskey)
- # this is the preferred form so it goes at front of list
- self._namespaces.setdefault(ns, []).insert(0, nsdata[nskey]["*"])
+ custom_name = None
+ canonical_name = None
+ if ns == 0:
+ canonical_name = nsdata[nskey].pop('*')
+ custom_name = canonical_name
+ else:
+ custom_name = nsdata[nskey].pop('*')
+ if is_mw114:
+ canonical_name = nsdata[nskey].pop('canonical')
- if LV(self.version()) >= LV("1.14"):
- # nsdata["0"] has no canonical key.
- # canonical ns -2 to 15 are hard coded in self._namespaces
- # do not get them from API result to avoid canonical duplicates
- if -2 <= ns <= 15:
- continue
- if 'canonical' not in nsdata[nskey]:
- pywikibot.warning(
- u'namespace %s without a canonical name.
Misconfigured?'
- % self._namespaces[ns][0])
- continue
- self._namespaces.setdefault(ns,
[]).append(nsdata[nskey]["canonical"])
+ # Remove the 'id' from nsdata
+ nsdata[nskey].pop('id')
+ namespace = Namespace(ns, canonical_name, custom_name,
+ use_image_name=is_mw114, **nsdata[nskey])
+
+ self._namespaces[ns] = namespace
if 'namespacealiases' in sidata:
aliasdata = sidata['namespacealiases']
for item in aliasdata:
- if item["*"] in self._namespaces[int(item['id'])]:
- continue
- # this is a less preferred form so it goes at the end
- self._namespaces[int(item['id'])].append(item["*"])
+ ns = int(item['id'])
+ self._namespaces[ns].aliases.append(item['*'])
if 'extensions' in sidata:
self._extensions = sidata['extensions']
diff --git a/tests/dry_api_tests.py b/tests/dry_api_tests.py
index 9bc44d2..c6cddf8 100644
--- a/tests/dry_api_tests.py
+++ b/tests/dry_api_tests.py
@@ -69,6 +69,9 @@
self._user = 'anon'
pywikibot.site.BaseSite.__init__(self, 'mock', MockFamily())
+ def version(self):
+ return '1.13' # pre 1.14
+
def languages(self):
return ['mock']
diff --git a/tests/dry_site_tests.py b/tests/dry_site_tests.py
index 16f247e..709d569 100644
--- a/tests/dry_site_tests.py
+++ b/tests/dry_site_tests.py
@@ -53,6 +53,7 @@
self._logged_in_as = None
self.obsolete = False
super(TestMustBe, self).setUp()
+ self.version = lambda: '1.13' # pre 1.14
def login(self, sysop):
# mock call
diff --git a/tests/namespace_tests.py b/tests/namespace_tests.py
new file mode 100644
index 0000000..b6fe44f
--- /dev/null
+++ b/tests/namespace_tests.py
@@ -0,0 +1,190 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for the Namespace class.
+"""
+#
+# (C) Pywikibot team, 2014
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+from collections import Iterable
+from pywikibot.site import Namespace
+from tests.utils import PywikibotTestCase, unittest
+
+import sys
+if sys.version_info[0] > 2:
+ basestring = (str, )
+
+
+class TestNamespaceObject(PywikibotTestCase):
+ """Test cases for Namespace class."""
+
+ # These should work in any MW wiki
+ builtin_ids = {
+ 'Media': -2,
+ 'Special': -1,
+ '': 0,
+ 'Talk': 1,
+ 'User': 2,
+ 'User talk': 3,
+ 'Project': 4,
+ 'Project talk': 5,
+ 'File': 6,
+ 'File talk': 7,
+ 'MediaWiki': 8,
+ 'MediaWiki talk': 9,
+ 'Template': 10,
+ 'Template talk': 11,
+ 'Help': 12,
+ 'Help talk': 13,
+ 'Category': 14,
+ 'Category talk': 15,
+ }
+
+ old_builtin_ids = {
+ 'Image': 6,
+ 'Image talk': 7,
+ }
+
+ all_builtin_ids = dict(builtin_ids.items() + old_builtin_ids.items())
+
+ def testNamespaceTypes(self):
+ """Test cases for methods manipulating namespace names"""
+
+ ns = Namespace.builtin_namespaces(use_image_name=False)
+
+ self.assertType(ns, dict)
+ self.assertTrue(all(x in ns for x in range(0, 16)))
+
+ self.assertTrue(all(isinstance(key, int)
+ for key in ns))
+ self.assertTrue(all(isinstance(val, Iterable)
+ for val in ns.values()))
+ self.assertTrue(all(isinstance(name, basestring)
+ for val in ns.values()
+ for name in val))
+
+ self.assertTrue(all(isinstance(Namespace.lookup_name(b, ns), Namespace)
+ for b in self.builtin_ids))
+
+ self.assertTrue(all(Namespace.lookup_name(b, ns).id ==
self.all_builtin_ids[b]
+ for b in self.all_builtin_ids))
+
+ ns = Namespace.builtin_namespaces(use_image_name=True)
+
+ self.assertTrue(all(isinstance(Namespace.lookup_name(b, ns), Namespace)
+ for b in self.builtin_ids))
+
+ self.assertTrue(all(Namespace.lookup_name(b, ns).id ==
self.all_builtin_ids[b]
+ for b in self.all_builtin_ids))
+
+ def testNamespaceConstructor(self):
+ kwargs = {u'case': u'first-letter'}
+ y = Namespace(id=6, custom_name=u'dummy', canonical_name=u'File',
+ aliases=[u'Image', u'Immagine'], **kwargs)
+
+ self.assertEquals(y.id, 6)
+ self.assertEquals(y.custom_name, u'dummy')
+ self.assertEquals(y.canonical_name, u'File')
+
+ self.assertNotEquals(y.custom_name, u'Dummy')
+ self.assertNotEquals(y.canonical_name, u'file')
+
+ self.assertIn(u'Image', y.aliases)
+ self.assertIn(u'Immagine', y.aliases)
+
+ self.assertEquals(len(y), 4)
+ self.assertEquals(list(y), ['dummy', u'File', u'Image', u'Immagine'])
+ self.assertEquals(y.case, u'first-letter')
+
+ def testNamespaceNameCase(self):
+ """Namespace names are always case-insensitive."""
+ kwargs = {u'case': u'first-letter'}
+ y = Namespace(id=6, custom_name=u'dummy', canonical_name=u'File',
+ aliases=[u'Image', u'Immagine'], **kwargs)
+ self.assertIn(u'dummy', y)
+ self.assertIn(u'Dummy', y)
+ self.assertIn(u'file', y)
+ self.assertIn(u'File', y)
+ self.assertIn(u'image', y)
+ self.assertIn(u'Image', y)
+ self.assertIn(u'immagine', y)
+ self.assertIn(u'Immagine', y)
+
+ def testNamespaceToString(self):
+ ns = Namespace.builtin_namespaces(use_image_name=False)
+
+ self.assertEquals(str(ns[0]), ':')
+ self.assertEquals(str(ns[1]), 'Talk:')
+ self.assertEquals(str(ns[6]), ':File:')
+
+ self.assertEquals(unicode(ns[0]), u':')
+ self.assertEquals(unicode(ns[1]), u'Talk:')
+ self.assertEquals(unicode(ns[6]), u':File:')
+
+ kwargs = {u'case': u'first-letter'}
+ y = Namespace(id=6, custom_name=u'ملف', canonical_name=u'File',
+ aliases=[u'Image', u'Immagine'], **kwargs)
+
+ self.assertEquals(str(y), ':File:')
+ self.assertEquals(unicode(y), u':ملف:')
+
+ def testNamespaceCompare(self):
+ a = Namespace(id=0, canonical_name=u'')
+
+ self.assertEquals(a, 0)
+ self.assertEquals(a, '')
+ self.assertEquals(a, None)
+
+ x = Namespace(id=6, custom_name=u'dummy', canonical_name=u'File',
+ aliases=[u'Image', u'Immagine'])
+ y = Namespace(id=6, custom_name=u'ملف', canonical_name=u'File',
+ aliases=[u'Image', u'Immagine'])
+ z = Namespace(id=7, custom_name=u'dummy', canonical_name=u'File',
+ aliases=[u'Image', u'Immagine'])
+
+ self.assertEquals(x, x)
+ self.assertEquals(x, y)
+ self.assertNotEquals(x, a)
+ self.assertNotEquals(x, z)
+
+ self.assertEquals(x, 6)
+ self.assertEquals(x, u'dummy')
+ self.assertEquals(x, u'Dummy')
+ self.assertEquals(x, u'file')
+ self.assertEquals(x, u'File')
+ self.assertEquals(x, u':File')
+ self.assertEquals(x, u':File:')
+ self.assertEquals(x, u'File:')
+ self.assertEquals(x, u'image')
+ self.assertEquals(x, u'Image')
+
+ self.assertEquals(y, u'ملف')
+
+ self.assertTrue(a < x)
+ self.assertTrue(x > a)
+ self.assertTrue(z > x)
+
+ def testNamespaceNormalizeName(self):
+ self.assertEquals(Namespace.normalize_name(u'File'), u'File')
+ self.assertEquals(Namespace.normalize_name(u':File'), u'File')
+ self.assertEquals(Namespace.normalize_name(u'File:'), u'File')
+ self.assertEquals(Namespace.normalize_name(u':File:'), u'File')
+
+ self.assertEquals(Namespace.normalize_name(u''), u'')
+
+ self.assertEquals(Namespace.normalize_name(u':'), False)
+ self.assertEquals(Namespace.normalize_name(u'::'), False)
+ self.assertEquals(Namespace.normalize_name(u':::'), False)
+ self.assertEquals(Namespace.normalize_name(u':File::'), False)
+ self.assertEquals(Namespace.normalize_name(u'::File:'), False)
+ self.assertEquals(Namespace.normalize_name(u'::File::'), False)
+
+
+if __name__ == '__main__':
+ try:
+ unittest.main()
+ except SystemExit:
+ pass
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 4550618..6b9dd33 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -11,6 +11,7 @@
from distutils.version import LooseVersion as LV
+from collections import Iterable
import pywikibot
from tests.utils import PywikibotTestCase, unittest
@@ -115,19 +116,26 @@
self.assertType(mysite.ns_normalize("project"), basestring)
self.assertTrue(all(isinstance(key, int)
for key in ns))
- self.assertTrue(all(isinstance(val, list)
+ self.assertTrue(all(isinstance(val, Iterable)
for val in ns.values()))
self.assertTrue(all(isinstance(name, basestring)
for val in ns.values()
for name in val))
self.assertTrue(all(isinstance(mysite.namespace(key), basestring)
for key in ns))
- self.assertTrue(all(isinstance(mysite.namespace(key, True), list)
+ self.assertTrue(all(isinstance(mysite.namespace(key, True), Iterable)
for key in ns))
self.assertTrue(all(isinstance(item, basestring)
for key in ns
for item in mysite.namespace(key, True)))
+ def testNamespaceCase(self):
+ site = pywikibot.Site('en', 'wiktionary')
+ main_namespace = site.namespaces()[0]
+ self.assertEquals(main_namespace.case, 'case-sensitive')
+ user_namespace = site.namespaces()[2]
+ self.assertEquals(user_namespace.case, 'first-letter')
+
def testApiMethods(self):
"""Test generic ApiSite methods"""
diff --git a/tests/wikibase_tests.py b/tests/wikibase_tests.py
index 9ef249e..f30ded4 100644
--- a/tests/wikibase_tests.py
+++ b/tests/wikibase_tests.py
@@ -40,6 +40,8 @@
if not site.has_transcluded_data:
return
repo = site.data_repository()
+ item_namespace = repo.namespaces()[0]
+ self.assertEqual(item_namespace.defaultcontentmodel, 'wikibase-item')
item = pywikibot.ItemPage.fromPage(mainpage)
self.assertType(item, pywikibot.ItemPage)
self.assertEqual(item.getID(), 'Q5296')
--
To view, visit https://gerrit.wikimedia.org/r/151617
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I0cce21e6161031861c0056dc5498f47bc26e1cc8
Gerrit-PatchSet: 10
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits