Ian Kelling has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/315899

Change subject: add -titleregexnot page generator
......................................................................

add -titleregexnot page generator

- Add -titleregexnot page generator, completing -titleregex option
- Add corresponding tests, following -titleregex test example
- Add documentation on how -titleregexnot is useful to weblinkchecker.py

Change-Id: If580dd9734717fe74c91e1a373217c4046372606
---
M pywikibot/pagegenerators.py
M scripts/weblinkchecker.py
M tests/aspects.py
M tests/pagegenerators_tests.py
4 files changed, 68 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/99/315899/1

diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index e60c98f..65e45b9 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -226,6 +226,9 @@
                   Case insensitive regular expressions will be used and
                   dot matches any character.
 
+-titleregexnot    Like -titleregex, but return the page only if the regular
+                  expression does not match.
+
 -transcludes      Work on all pages that use a certain template.
                   Argument can also be given as "-transcludes:Title".
 
@@ -384,6 +387,7 @@
         self.qualityfilter_list = []
         self.articlefilter_list = []
         self.titlefilter_list = []
+        self.titlenotfilter_list = []
         self.claimfilter_list = []
         self.catfilter_list = []
         self.intersect = False
@@ -456,6 +460,7 @@
                     self.gens[i] = itertools.islice(self.gens[i], self.limit)
         if len(self.gens) == 0:
             if (self.titlefilter_list or
+                self.titlenotfilter_list or
                 self.articlefilter_list or
                 self.claimfilter_list or
                 self.catfilter_list or
@@ -498,6 +503,10 @@
         if self.titlefilter_list:
             dupfiltergen = RegexFilterPageGenerator(
                 dupfiltergen, self.titlefilter_list)
+
+        if self.titlenotfilter_list:
+            dupfiltergen = RegexFilterPageGenerator(
+                dupfiltergen, self.titlenotfilter_list, 'none')
 
         if self.articlefilter_list:
             dupfiltergen = RegexBodyFilterPageGenerator(
@@ -855,6 +864,12 @@
                     'What page names are you looking for?')
             self.titlefilter_list.append(value)
             return True
+        elif arg == '-titleregexnot':
+            if not value:
+                value = pywikibot.input(
+                    'All pages except which ones?')
+            self.titlenotfilter_list.append(value)
+            return True
         elif arg == '-grep':
             if not value:
                 value = pywikibot.input('Which pattern do you want to grep?')
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 44e3296..499e595 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -15,6 +15,10 @@
 two times, with a time lag of at least one week. Such links will be logged to a
 .txt file in the deadlinks subdirectory.
 
+The .txt file uses wiki markup and so it may be useful to post it on the
+wiki and then exclude that page from subsequent runs. For example if the
+page is named Broken Links, exclude it with '-titleregexnot:^Broken_Links$'
+
 After running the bot and waiting for at least one week, you can re-check those
 pages where dead links were found, using the -repeat parameter.
 
diff --git a/tests/aspects.py b/tests/aspects.py
index e9a8788..4d30d14 100644
--- a/tests/aspects.py
+++ b/tests/aspects.py
@@ -85,6 +85,15 @@
             """
             return self.assertRegexpMatches(*args, **kwargs)
 
+    if not hasattr(unittest.TestCase, 'assertNotRegex'):
+        def assertNotRegex(self, *args, **kwargs):
+            """
+            Wrapper of unittest.assertNotRegexpMatches for Python 2 unittest.
+
+            assertNotRegexpMatches is deprecated in Python 3.
+            """
+            return self.assertNotRegexpMatches(*args, **kwargs)
+
     if not hasattr(unittest.TestCase, 'assertCountEqual'):
 
         def assertCountEqual(self, *args, **kwargs):
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 06380ef..098ad3c 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -786,6 +786,46 @@
         self.assertLessEqual(len(pages), 10)
         self.assertPagesInNamespaces(pages, 1)
 
+    def test_regexfilternot_default(self):
+        """Test allpages generator with titleregexnot filter."""
+        gf = pagegenerators.GeneratorFactory()
+        self.assertTrue(gf.handleArg('-start'))
+        # matches titles with less than 11 characters
+        self.assertTrue(gf.handleArg('-titleregexnot:.{11,}'))
+        gf.handleArg('-limit:10')
+        gen = gf.getCombinedGenerator()
+        self.assertIsNotNone(gen)
+        pages = list(gen)
+        self.assertLessEqual(len(pages), 10)
+        for page in pages:
+            self.assertIsInstance(page, pywikibot.Page)
+            self.assertNotRegex(page.title().lower(), '.{11,}')
+
+    def test_regexfilternot_ns_after(self):
+        """Test allpages generator with titleregexnot and namespace filter."""
+        gf = pagegenerators.GeneratorFactory()
+        self.assertTrue(gf.handleArg('-start'))
+        self.assertTrue(gf.handleArg('-titleregexnot:zzzz'))
+        gf.handleArg('-ns:1')
+        gf.handleArg('-limit:10')
+        gen = gf.getCombinedGenerator()
+        pages = list(gen)
+        self.assertLessEqual(len(pages), 10)
+        self.assertPagesInNamespaces(pages, 1)
+
+    def test_regexfilternot_ns_before(self):
+        """Test allpages generator with namespace and titleregexnot filter."""
+        gf = pagegenerators.GeneratorFactory()
+        self.assertTrue(gf.handleArg('-start'))
+        gf.handleArg('-ns:1')
+        self.assertTrue(gf.handleArg('-titleregexnot:zzzz'))
+        gf.handleArg('-limit:10')
+        gen = gf.getCombinedGenerator()
+        self.assertIsNotNone(gen)
+        pages = list(gen)
+        self.assertLessEqual(len(pages), 10)
+        self.assertPagesInNamespaces(pages, 1)
+
     def test_allpages_with_two_ns(self):
         """Test that allpages fails with two ns as parameter."""
         gf = pagegenerators.GeneratorFactory()

-- 
To view, visit https://gerrit.wikimedia.org/r/315899
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If580dd9734717fe74c91e1a373217c4046372606
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ian Kelling <i...@iankelling.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to