Mpaa has uploaded a new change for review.
https://gerrit.wikimedia.org/r/250221
Change subject: pagegenerators.py: allow filtering by quality level
......................................................................
pagegenerators.py: allow filtering by quality level
Allow filtering by quality when available (content_model shall equal
'proofread-page').
Add Page.quality_level property for pages and related tests.
Change-Id: I93ff113f0fa3701b830a8589a955e0f35814b2a2
---
M pywikibot/page.py
M pywikibot/pagegenerators.py
M tests/page_tests.py
3 files changed, 71 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/21/250221/1
diff --git a/pywikibot/page.py b/pywikibot/page.py
index d1ea5bc..14d6388 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -186,6 +186,19 @@
self.site.loadpageinfo(self)
return self._contentmodel
+ @property
+ def quality_level(self):
+ """Return the quality level of this page when it is retrieved from API.
+
+ This is only applicable if contentmodel equals 'proofread-page'.
+ None is returned otherwise.
+ # TODO: align this value with ProofreadPage.ql
+
+ """
+ if self.content_model == 'proofread-page':
+ return self._quality
+ return None
+
@deprecated_args(decode=None, savetitle="asUrl")
def title(self, underscore=False, withNamespace=True,
withSection=True, asUrl=False, asLink=False,
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index ecffb9a..f1e9fc4 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -274,6 +274,10 @@
Case insensitive regular expressions will be used and
dot matches any character, including a newline.
+-ql Filter pageas based on page quality.
+ This is only applicable if contentmodel equals
+ 'proofread-page', otherwise has no effects.
+
-onlyif A claim the page needs to contain, otherwise the item won't
be returned.
The format is property=value,qualifier=value. Multiple (or
@@ -335,6 +339,7 @@
self._namespaces = []
self.step = None
self.limit = None
+ self.qualityfilter_list = []
self.articlefilter_list = []
self.titlefilter_list = []
self.claimfilter_list = []
@@ -431,6 +436,10 @@
dupfiltergen = ItemClaimFilterPageGenerator(dupfiltergen,
claim[0], claim[1],
claim[2], claim[3])
+
+ if self.qualityfilter_list:
+ dupfiltergen = QualityFilterPageGenerator(
+ dupfiltergen, self.qualityfilter_list)
if self.titlefilter_list:
dupfiltergen = RegexFilterPageGenerator(
@@ -766,6 +775,12 @@
u'Which pattern do you want to grep?'))
else:
self.articlefilter_list.append(arg[6:])
+ return True
+ elif arg.startswith('-ql:'):
+ value = map(int, arg[4:].split(','))
+ if min(value) < 0 and max(value) > 4:
+ pywikibot.warning('Acceptable values for -ql are in 0-4')
+ self.qualityfilter_list = value
return True
elif arg.startswith('-onlyif') or arg.startswith('-onlyifnot'):
ifnot = arg.startswith('-onlyifnot')
@@ -1457,6 +1472,23 @@
return (page for page in generator
if cls.__filter_match(reg, page.text, quantifier))
+
+def QualityFilterPageGenerator(generator, quality):
+ """
+ Wrap a generator to filter pages according to quality levels.
+
+ This is possible only for pages with content_model 'proofread-page'.
+ In alll the other cases, no filter is applied.
+
+ @param generator: A generator object
+ @param quality: proofread-page quality levels (valid range 0-4)
+ @type quality: list of int
+
+ """
+ for page in generator:
+ if page.quality_level is None or page.quality_level in quality:
+ yield page
+
# name the generator methods
RegexFilterPageGenerator = RegexFilter.titlefilter
RegexBodyFilterPageGenerator = RegexFilter.contentfilter
diff --git a/tests/page_tests.py b/tests/page_tests.py
index cf25ff0..d268ea6 100644
--- a/tests/page_tests.py
+++ b/tests/page_tests.py
@@ -938,6 +938,32 @@
self.assertEqual(p1.protection(), {})
+class TestPageQuality(TestCase):
+
+ """Test page quality."""
+
+ family = 'wikisource'
+ code = 'en'
+
+ cached = True
+
+ def test_applicable_quality_level(self):
+ """Test Page.quality_level when applicable."""
+ site = self.get_site()
+ title = 'Page:Popular Science Monthly Volume 49.djvu/1'
+ page = pywikibot.Page(site, title)
+ self.assertEqual(page.content_model, 'proofread-page')
+ self.assertEqual(page.quality_level, 0)
+
+ def test_not_applicable_quality_level(self):
+ """Test Page.quality_level when not applicable."""
+ site = self.get_site()
+ title = 'Main Page'
+ page = pywikibot.Page(site, title)
+ self.assertNotEqual(page.content_model, 'proofread-page')
+ self.assertEqual(page.quality_level, None)
+
+
class HtmlEntity(TestCase):
"""Test that HTML entities are correctly decoded."""
--
To view, visit https://gerrit.wikimedia.org/r/250221
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I93ff113f0fa3701b830a8589a955e0f35814b2a2
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits