John Vandenberg has uploaded a new change for review.
https://gerrit.wikimedia.org/r/179627
Change subject: Allow pagegen filtering using namespace names
......................................................................
Allow pagegen filtering using namespace names
The ability to filter using namespace names was removed from the
isbn script in 31c07b2, but still exists in other scripts.
Namespace names are more user-friendly as users interact with
these names regularly in titles, while the namespace numbers are
less known, especially namespaces which are not frequently used.
Change-Id: Ib6caa11577546e14a69bbd898860843d69d4efb0
---
M pywikibot/data/api.py
M pywikibot/pagegenerators.py
M scripts/isbn.py
M tests/pagegenerators_tests.py
4 files changed, 86 insertions(+), 28 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/27/179627/1
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index a460313..54a4c86 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -1397,7 +1397,8 @@
except TypeError:
namespaces = [namespaces]
- namespaces = [str(namespace) for namespace in namespaces]
+ # Namespace objects need to be cast to int first
+ namespaces = [str(int(namespace)) for namespace in namespaces]
if 'multi' not in param and len(namespaces) != 1:
raise pywikibot.Error(u'{0} module does not support multiple '
'namespaces.'.format(self.limited_module))
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index e5c0e31..56073f0 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -29,6 +29,8 @@
import time
import pywikibot
+import pywikibot.data.wikidataquery as wdquery
+
from pywikibot import date, config, i18n
from pywikibot.tools import (
deprecated,
@@ -37,7 +39,7 @@
intersect_generators,
)
from pywikibot.comms import http
-import pywikibot.data.wikidataquery as wdquery
+from pywikibot.site import Namespace
if sys.version_info[0] > 2:
basestring = (str, )
@@ -102,7 +104,7 @@
-namespaces Filter the page generator to only yield pages in the
-namespace specified namespaces. Separate multiple namespace
--ns numbers with commas. Example "-ns:0,2,4"
+-ns numbers with commas. Example "-ns:0,2,4" or "-ns:Help".
If used with -newpages, -namepace/ns must be provided
before -newpages.
If used with -recentchanges, efficiency is improved if
@@ -254,7 +256,7 @@
@type site: L{pywikibot.site.BaseSite}
"""
self.gens = []
- self.namespaces = []
+ self._namespaces = []
self.step = None
self.limit = None
self.articlefilter_list = []
@@ -272,6 +274,26 @@
if not self._site:
self._site = pywikibot.Site()
return self._site
+
+ @property
+ def namespaces(self):
+ # Convert int or string namespaces to Namespace objects
+ # and change the storage to immutable since it has been
+ # accessed.
+ if isinstance(self._namespaces, list):
+ _namespaces = frozenset(
+ [self.site.namespaces[ns] if isinstance(ns, int) else
+ Namespace.lookup_name(ns, self.site.namespaces)
+ for ns in self._namespaces])
+
+ # Namespace.lookup_name return None if the name is not recognised
+ if None in _namespaces:
+ pywikibot.warning('Namespaces not all recognised: %r'
+ % _namespaces)
+
+ self._namespaces = _namespaces - frozenset([None])
+
+ return self._namespaces
def getCombinedGenerator(self, gen=None):
"""Return the combination of all accumulated generators.
@@ -430,6 +452,11 @@
u'Please enter the local file name:')
gen = TextfilePageGenerator(textfilename, site=self.site)
elif arg.startswith('-namespace') or arg.startswith('-ns'):
+ if isinstance(self._namespaces, frozenset):
+ pywikibot.warning('Cannot handle arg %s as namespaces can not '
+ 'be altered after a generator is created.'
+ % arg)
+ return True
value = None
if arg.startswith('-ns:'):
value = arg[len('-ns:'):]
@@ -441,12 +468,11 @@
value = pywikibot.input(
u'What namespace are you filtering on?')
try:
- self.namespaces.extend(
+ self._namespaces.extend(
[int(ns) for ns in value.split(",")]
)
except ValueError:
- pywikibot.output(u'Invalid namespaces argument: %s' % value)
- return False
+ self._namespaces += value.split(",")
return True
elif arg.startswith('-step'):
if len(arg) == len('-step'):
@@ -1016,25 +1042,23 @@
namespaces contains namespace names.
@type site: L{pywikibot.site.BaseSite}
"""
+ if not site:
+ site = pywikibot.Site()
if isinstance(namespaces, (int, basestring)):
namespaces = [namespaces]
# convert namespace names to namespace numbers
- for i in range(len(namespaces)):
- ns = namespaces[i]
- if isinstance(ns, basestring):
- try:
- # namespace might be given as str representation of int
- index = int(ns)
- except ValueError:
- # FIXME: deprecate providing strings as namespaces
- if site is None:
- site = pywikibot.Site()
- index = site.getNamespaceIndex(ns)
- if index is None:
- raise ValueError(u'Unknown namespace: %s' % ns)
- namespaces[i] = index
+ _namespaces = [Namespace.lookup_name(ns, site.namespaces)
+ if isinstance(ns, basestring) else ns
+ for ns in namespaces]
+
+ # Namespace.lookup_name returns None if the name is not recognised
+ if None in _namespaces:
+ raise ValueError('Namespaces not all recognised: %r' % namespaces)
+
+ _namespaces = [int(ns) for ns in namespaces]
+
for page in generator:
- if page.namespace() in namespaces:
+ if int(page.namespace()) in _namespaces:
yield page
diff --git a/scripts/isbn.py b/scripts/isbn.py
index 50663ac..fa7f472 100755
--- a/scripts/isbn.py
+++ b/scripts/isbn.py
@@ -11,12 +11,6 @@
¶ms;
--namespace:n Number or name of namespace to process. The parameter can be
- used multiple times. It works in combination with all other
- parameters, except for the -start parameter. If you e.g.
- want to iterate over all categories starting at M, use
- -start:Category:M.
-
Furthermore, the following command line parameters are supported:
-to13 Converts all ISBN-10 codes to ISBN-13.
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 32f3fda..ba06644 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -345,6 +345,45 @@
self.assertTrue(all(isinstance(item, pywikibot.ItemPage) for item in
gen))
+class DryFactoryGeneratorTest(TestCase):
+
+ """Dry tests for pagegenerators.GeneratorFactory."""
+
+ family = 'wikipedia'
+ code = 'en'
+
+ dry = True
+
+ def test_one_namespace(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:2')
+ self.assertEqual(gf.namespaces, set([2]))
+
+ def test_two_namespaces(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:2')
+ gf.handleArg('-ns:Talk')
+ self.assertEqual(gf.namespaces, set([2, 1]))
+
+ def test_two_named_namespaces(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:Talk,File')
+ self.assertEqual(gf.namespaces, set([1, 6]))
+
+ def test_two_numeric_namespaces(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:1,6')
+ self.assertEqual(gf.namespaces, set([1, 6]))
+
+ def test_immutable_namespaces_on_read(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:1,6')
+ self.assertEqual(gf.namespaces, set([1, 6]))
+ self.assertIsInstance(gf.namespaces, frozenset)
+ gf.handleArg('-ns:0')
+ self.assertEqual(gf.namespaces, set([1, 6]))
+
+
class TestFactoryGenerator(DefaultSiteTestCase):
"""Test pagegenerators.GeneratorFactory."""
--
To view, visit https://gerrit.wikimedia.org/r/179627
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib6caa11577546e14a69bbd898860843d69d4efb0
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits