John Vandenberg has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/179627

Change subject: Allow pagegen filtering using namespace names
......................................................................

Allow pagegen filtering using namespace names

The ability to filter using namespace names was removed from the
isbn script in 31c07b2, but still exists in other scripts.
Namespace names are more user-friendly as users interact with
these names regularly in titles, while the namespace numbers are
less known, especially namespaces which are not frequently used.

Change-Id: Ib6caa11577546e14a69bbd898860843d69d4efb0
---
M pywikibot/data/api.py
M pywikibot/pagegenerators.py
M scripts/isbn.py
M tests/pagegenerators_tests.py
4 files changed, 86 insertions(+), 28 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/27/179627/1

diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index a460313..54a4c86 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -1397,7 +1397,8 @@
         except TypeError:
             namespaces = [namespaces]
 
-        namespaces = [str(namespace) for namespace in namespaces]
+        # Namespace objects need to be cast to int first
+        namespaces = [str(int(namespace)) for namespace in namespaces]
         if 'multi' not in param and len(namespaces) != 1:
             raise pywikibot.Error(u'{0} module does not support multiple '
                                   'namespaces.'.format(self.limited_module))
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index e5c0e31..56073f0 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -29,6 +29,8 @@
 import time
 
 import pywikibot
+import pywikibot.data.wikidataquery as wdquery
+
 from pywikibot import date, config, i18n
 from pywikibot.tools import (
     deprecated,
@@ -37,7 +39,7 @@
     intersect_generators,
 )
 from pywikibot.comms import http
-import pywikibot.data.wikidataquery as wdquery
+from pywikibot.site import Namespace
 
 if sys.version_info[0] > 2:
     basestring = (str, )
@@ -102,7 +104,7 @@
 
 -namespaces       Filter the page generator to only yield pages in the
 -namespace        specified namespaces. Separate multiple namespace
--ns               numbers with commas. Example "-ns:0,2,4"
+-ns               numbers with commas. Example "-ns:0,2,4" or "-ns:Help".
                   If used with -newpages, -namepace/ns must be provided
                   before -newpages.
                   If used with -recentchanges, efficiency is improved if
@@ -254,7 +256,7 @@
         @type site: L{pywikibot.site.BaseSite}
         """
         self.gens = []
-        self.namespaces = []
+        self._namespaces = []
         self.step = None
         self.limit = None
         self.articlefilter_list = []
@@ -272,6 +274,26 @@
         if not self._site:
             self._site = pywikibot.Site()
         return self._site
+
+    @property
+    def namespaces(self):
+        # Convert int or string namespaces to Namespace objects
+        # and change the storage to immutable since it has been
+        # accessed.
+        if isinstance(self._namespaces, list):
+            _namespaces = frozenset(
+                [self.site.namespaces[ns] if isinstance(ns, int) else
+                 Namespace.lookup_name(ns, self.site.namespaces)
+                 for ns in self._namespaces])
+
+            # Namespace.lookup_name return None if the name is not recognised
+            if None in _namespaces:
+                pywikibot.warning('Namespaces not all recognised: %r'
+                                  % _namespaces)
+
+            self._namespaces = _namespaces - frozenset([None])
+
+        return self._namespaces
 
     def getCombinedGenerator(self, gen=None):
         """Return the combination of all accumulated generators.
@@ -430,6 +452,11 @@
                     u'Please enter the local file name:')
             gen = TextfilePageGenerator(textfilename, site=self.site)
         elif arg.startswith('-namespace') or arg.startswith('-ns'):
+            if isinstance(self._namespaces, frozenset):
+                pywikibot.warning('Cannot handle arg %s as namespaces can not '
+                                  'be altered after a generator is created.'
+                                  % arg)
+                return True
             value = None
             if arg.startswith('-ns:'):
                 value = arg[len('-ns:'):]
@@ -441,12 +468,11 @@
                 value = pywikibot.input(
                     u'What namespace are you filtering on?')
             try:
-                self.namespaces.extend(
+                self._namespaces.extend(
                     [int(ns) for ns in value.split(",")]
                 )
             except ValueError:
-                pywikibot.output(u'Invalid namespaces argument: %s' % value)
-                return False
+                self._namespaces += value.split(",")
             return True
         elif arg.startswith('-step'):
             if len(arg) == len('-step'):
@@ -1016,25 +1042,23 @@
         namespaces contains namespace names.
     @type site: L{pywikibot.site.BaseSite}
     """
+    if not site:
+        site = pywikibot.Site()
     if isinstance(namespaces, (int, basestring)):
         namespaces = [namespaces]
     # convert namespace names to namespace numbers
-    for i in range(len(namespaces)):
-        ns = namespaces[i]
-        if isinstance(ns, basestring):
-            try:
-                # namespace might be given as str representation of int
-                index = int(ns)
-            except ValueError:
-                # FIXME: deprecate providing strings as namespaces
-                if site is None:
-                    site = pywikibot.Site()
-                index = site.getNamespaceIndex(ns)
-                if index is None:
-                    raise ValueError(u'Unknown namespace: %s' % ns)
-            namespaces[i] = index
+    _namespaces = [Namespace.lookup_name(ns, site.namespaces)
+                   if isinstance(ns, basestring) else ns
+                   for ns in namespaces]
+
+    # Namespace.lookup_name returns None if the name is not recognised
+    if None in _namespaces:
+        raise ValueError('Namespaces not all recognised: %r' % namespaces)
+
+    _namespaces = [int(ns) for ns in namespaces]
+
     for page in generator:
-        if page.namespace() in namespaces:
+        if int(page.namespace()) in _namespaces:
             yield page
 
 
diff --git a/scripts/isbn.py b/scripts/isbn.py
index 50663ac..fa7f472 100755
--- a/scripts/isbn.py
+++ b/scripts/isbn.py
@@ -11,12 +11,6 @@
 
 &params;
 
--namespace:n      Number or name of namespace to process. The parameter can be
-                  used multiple times. It works in combination with all other
-                  parameters, except for the -start parameter. If you e.g.
-                  want to iterate over all categories starting at M, use
-                  -start:Category:M.
-
 Furthermore, the following command line parameters are supported:
 
 -to13             Converts all ISBN-10 codes to ISBN-13.
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 32f3fda..ba06644 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -345,6 +345,45 @@
         self.assertTrue(all(isinstance(item, pywikibot.ItemPage) for item in 
gen))
 
 
+class DryFactoryGeneratorTest(TestCase):
+
+    """Dry tests for pagegenerators.GeneratorFactory."""
+
+    family = 'wikipedia'
+    code = 'en'
+
+    dry = True
+
+    def test_one_namespace(self):
+        gf = pagegenerators.GeneratorFactory(site=self.get_site())
+        gf.handleArg('-ns:2')
+        self.assertEqual(gf.namespaces, set([2]))
+
+    def test_two_namespaces(self):
+        gf = pagegenerators.GeneratorFactory(site=self.get_site())
+        gf.handleArg('-ns:2')
+        gf.handleArg('-ns:Talk')
+        self.assertEqual(gf.namespaces, set([2, 1]))
+
+    def test_two_named_namespaces(self):
+        gf = pagegenerators.GeneratorFactory(site=self.get_site())
+        gf.handleArg('-ns:Talk,File')
+        self.assertEqual(gf.namespaces, set([1, 6]))
+
+    def test_two_numeric_namespaces(self):
+        gf = pagegenerators.GeneratorFactory(site=self.get_site())
+        gf.handleArg('-ns:1,6')
+        self.assertEqual(gf.namespaces, set([1, 6]))
+
+    def test_immutable_namespaces_on_read(self):
+        gf = pagegenerators.GeneratorFactory(site=self.get_site())
+        gf.handleArg('-ns:1,6')
+        self.assertEqual(gf.namespaces, set([1, 6]))
+        self.assertIsInstance(gf.namespaces, frozenset)
+        gf.handleArg('-ns:0')
+        self.assertEqual(gf.namespaces, set([1, 6]))
+
+
 class TestFactoryGenerator(DefaultSiteTestCase):
 
     """Test pagegenerators.GeneratorFactory."""

-- 
To view, visit https://gerrit.wikimedia.org/r/179627
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib6caa11577546e14a69bbd898860843d69d4efb0
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to