Mpaa has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/250221

Change subject: pagegenerators.py: allow filtering by quality level
......................................................................

pagegenerators.py: allow filtering by quality level

Allow filtering by quality when available (content_model shall equal
'proofread-page').

Add Page.quality_level property for pages and related tests.

Change-Id: I93ff113f0fa3701b830a8589a955e0f35814b2a2
---
M pywikibot/page.py
M pywikibot/pagegenerators.py
M tests/page_tests.py
3 files changed, 71 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/21/250221/1

diff --git a/pywikibot/page.py b/pywikibot/page.py
index d1ea5bc..14d6388 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -186,6 +186,19 @@
             self.site.loadpageinfo(self)
         return self._contentmodel
 
+    @property
+    def quality_level(self):
+        """Return the quality level of this page when it is retrieved from API.
+
+        This is only applicable if contentmodel equals 'proofread-page'.
+        None is returned otherwise.
+        # TODO: align this value with ProofreadPage.ql
+
+        """
+        if self.content_model == 'proofread-page':
+            return self._quality
+        return None
+
     @deprecated_args(decode=None, savetitle="asUrl")
     def title(self, underscore=False, withNamespace=True,
               withSection=True, asUrl=False, asLink=False,
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index ecffb9a..f1e9fc4 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -274,6 +274,10 @@
                   Case insensitive regular expressions will be used and
                   dot matches any character, including a newline.
 
+-ql               Filter pageas based on page quality.
+                  This is only applicable if contentmodel equals
+                  'proofread-page', otherwise has no effects.
+
 -onlyif           A claim the page needs to contain, otherwise the item won't
                   be returned.
                   The format is property=value,qualifier=value. Multiple (or
@@ -335,6 +339,7 @@
         self._namespaces = []
         self.step = None
         self.limit = None
+        self.qualityfilter_list = []
         self.articlefilter_list = []
         self.titlefilter_list = []
         self.claimfilter_list = []
@@ -431,6 +436,10 @@
                 dupfiltergen = ItemClaimFilterPageGenerator(dupfiltergen,
                                                             claim[0], claim[1],
                                                             claim[2], claim[3])
+
+        if self.qualityfilter_list:
+            dupfiltergen = QualityFilterPageGenerator(
+                dupfiltergen, self.qualityfilter_list)
 
         if self.titlefilter_list:
             dupfiltergen = RegexFilterPageGenerator(
@@ -766,6 +775,12 @@
                     u'Which pattern do you want to grep?'))
             else:
                 self.articlefilter_list.append(arg[6:])
+            return True
+        elif arg.startswith('-ql:'):
+            value = map(int, arg[4:].split(','))
+            if min(value) < 0 and max(value) > 4:
+                pywikibot.warning('Acceptable values for -ql are in 0-4')
+            self.qualityfilter_list = value
             return True
         elif arg.startswith('-onlyif') or arg.startswith('-onlyifnot'):
             ifnot = arg.startswith('-onlyifnot')
@@ -1457,6 +1472,23 @@
         return (page for page in generator
                 if cls.__filter_match(reg, page.text, quantifier))
 
+
+def QualityFilterPageGenerator(generator, quality):
+    """
+    Wrap a generator to filter pages according to quality levels.
+
+    This is possible only for pages with content_model 'proofread-page'.
+    In alll the other cases, no filter is applied.
+
+    @param generator: A generator object
+    @param quality: proofread-page quality levels (valid range 0-4)
+    @type quality: list of int
+
+    """
+    for page in generator:
+        if page.quality_level is None or page.quality_level in quality:
+            yield page
+
 # name the generator methods
 RegexFilterPageGenerator = RegexFilter.titlefilter
 RegexBodyFilterPageGenerator = RegexFilter.contentfilter
diff --git a/tests/page_tests.py b/tests/page_tests.py
index cf25ff0..d268ea6 100644
--- a/tests/page_tests.py
+++ b/tests/page_tests.py
@@ -938,6 +938,32 @@
         self.assertEqual(p1.protection(), {})
 
 
+class TestPageQuality(TestCase):
+
+    """Test page quality."""
+
+    family = 'wikisource'
+    code = 'en'
+
+    cached = True
+
+    def test_applicable_quality_level(self):
+        """Test Page.quality_level when applicable."""
+        site = self.get_site()
+        title = 'Page:Popular Science Monthly Volume 49.djvu/1'
+        page = pywikibot.Page(site, title)
+        self.assertEqual(page.content_model, 'proofread-page')
+        self.assertEqual(page.quality_level, 0)
+
+    def test_not_applicable_quality_level(self):
+        """Test Page.quality_level when not applicable."""
+        site = self.get_site()
+        title = 'Main Page'
+        page = pywikibot.Page(site, title)
+        self.assertNotEqual(page.content_model, 'proofread-page')
+        self.assertEqual(page.quality_level, None)
+
+
 class HtmlEntity(TestCase):
 
     """Test that HTML entities are correctly decoded."""

-- 
To view, visit https://gerrit.wikimedia.org/r/250221
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I93ff113f0fa3701b830a8589a955e0f35814b2a2
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to