Sn1per has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/258422

Change subject: Subpage filter generator
......................................................................

Subpage filter generator

Exclude subpages that have a high depth
i.e. how many parents

Bug: T120587
Change-Id: Ia53580cf8ad7387c14d6ca3bf4fcf5b35f53edd4
---
M pywikibot/pagegenerators.py
M tests/pagegenerators_tests.py
2 files changed, 76 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/22/258422/1

diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 9198dca..e1913ac 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -190,6 +190,10 @@
                   of pages, only retrieve n pages at a time from the wiki
                   server.
 
+-subpage:n        Filters pages to only those that have depth n.
+                  i.e. a depth of 0 filters out all pages that are subpages,
+                  a depth of 1 filters out all pages that are subpages of 
subpages
+
 -titleregex       A regular expression that needs to match the article title
                   otherwise the page won't be returned.
                   Multiple -titleregex:regexpr can be provided and the page 
will
@@ -339,6 +343,7 @@
         self.titlefilter_list = []
         self.claimfilter_list = []
         self.intersect = False
+        self.subpage_max_depth = None
         self._site = site
 
     @property
@@ -406,9 +411,9 @@
                 if self.limit:
                     self.gens[i] = itertools.islice(self.gens[i], self.limit)
         if len(self.gens) == 0:
-            if self.titlefilter_list or self.articlefilter_list:
+            if self.titlefilter_list or self.articlefilter_list or 
self.subpage_max_depth is not None:
                 pywikibot.warning(
-                    'grep/titleregex filters specified but no generators.')
+                    'grep/titleregex or subpage filters specified but no 
generators.')
             return None
         elif len(self.gens) == 1:
             gensList = self.gens[0]
@@ -440,7 +445,12 @@
             dupfiltergen = RegexBodyFilterPageGenerator(
                 PreloadingGenerator(dupfiltergen), self.articlefilter_list)
 
-        return dupfiltergen
+        # Add on subpage filter generator
+        if self.subpage_max_depth is not None:
+            dupfiltergen = SubpageFilterGenerator(
+                dupfiltergen, self.subpage_max_depth)
+
+            return dupfiltergen
 
     def getCategoryGen(self, arg, recurse=False, content=False,
                        gen_func=None):
@@ -799,6 +809,13 @@
             gen = MySQLPageGenerator(query, site=self.site)
         elif arg.startswith('-intersect'):
             self.intersect = True
+            return True
+        elif arg.startswith('-subpage'):
+            max_depth = arg[len('-subpage:'):]
+            if not max_depth:
+                max_depth = pywikibot.input(
+                    u'Maximum subpage depth:')
+            self.subpage_max_depth = int(max_depth)
             return True
         elif arg.startswith('-logevents:'):
             gen = self._parse_log_events(*arg[len('-logevents:'):].split(','))
@@ -1376,6 +1393,38 @@
 ItemClaimFilterPageGenerator = ItemClaimFilter.filter
 
 
+def SubpageFilterGenerator(generator, max_depth=0, show_filtered=False):
+    """
+    Generator which filters out subpages based on depth.
+
+    It looks at the namespace of each page and checks if that namespace has
+    subpages enabled. If so, pages with forward slashes ('/') are excluded.
+
+    @param generator: A generator object
+    @type generator: any generator or iterator
+    @param max_depth: Max depth of subpages to yield, at least zero
+    @type max_depth: int
+    @param show_filtered: Output a message for each page not yielded
+    @type show_filtered: bool
+    """
+    for page in generator:
+        # Count how many '/'s we have in the title
+        depth = len(list(re.finditer('/', page.title())))
+
+        try:
+            # Check if the namespace allows subpages
+            if page.namespace().subpages == '':
+                if depth <= max_depth:
+                    yield page
+                else:
+                    pass
+        except AttributeError:
+            # Does not allow subpages, which means depth is always 0
+            yield page
+
+        pass
+
+
 class RegexFilter(object):
 
     """Regex filter."""
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 5c5e207..8378e8c 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -233,6 +233,30 @@
         self.assertEqual(len(list(gen)), 0)
 
 
+class SubpageFilterGeneratorTestCase(TestCase):
+
+    """Test SubpageFilterGenerator."""
+
+    test_site = pywikibot.Site('test', 'test')
+    test_cat = pywikibot.Category('Subpage testing')
+
+    def test_subpage_filter(self):
+        gen = CategorizedPageGenerator(test_cat)
+        gen = pagegenerators.SubpageFilterGenerator(gen, 0)
+        expect_0 = (u'/home/lol')
+        self.assertPagelistTitles(gen, titles=expect_0, site=test_site)
+
+        gen = CategorizedPageGenerator(test_cat)
+        gen = pagegenerators.SubpageFilterGenerator(gen, 3)
+        expect_3 = (
+            u'/home/lol',
+            u'Sn1per/ProtectTest1/test',
+            u'Sn1per/ProtectTest1/test/test',
+            u'Sn1per/sandbox'
+        )
+        self.assertPagelistTitles(gen, titles=expect_3, site=test_site)
+
+
 class TestRepeatingGenerator(RecentChangesTestCase):
 
     """Test RepeatingGenerator."""

-- 
To view, visit https://gerrit.wikimedia.org/r/258422
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia53580cf8ad7387c14d6ca3bf4fcf5b35f53edd4
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Sn1per <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to