jenkins-bot has submitted this change and it was merged.

Change subject: Add ItemClaimFilterPageGenerator
......................................................................


Add ItemClaimFilterPageGenerator

The generator filters ItemPages which does or does not contain
a specified claim.
Can be used via the -onlyif and -onlyifnot command line option.

Bug: T69568
Bug: T57005
Bug: T76547
Change-Id: I850f1063016fd0c8845c9509634f85b1830724ef
---
M pywikibot/page.py
M pywikibot/pagegenerators.py
M tests/pagegenerators_tests.py
3 files changed, 254 insertions(+), 0 deletions(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/pywikibot/page.py b/pywikibot/page.py
index 45e41c3..a10291f 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -3931,6 +3931,72 @@
         else:
             self.qualifiers[qualifier.getID()] = [qualifier]
 
+    def target_equals(self, value):
+        """
+        Check whether the Claim's target is equal to specified value.
+
+        The function checks for:
+        - ItemPage ID equality
+        - WbTime year equality
+        - Coordinate equality, regarding precision
+        - direct equality
+
+        @param value: the value to compare with
+        @return: true if the Claim's target is equal to the value provided,
+            false otherwise
+        @rtype: bool
+        """
+        if (isinstance(self.target, pywikibot.ItemPage) and
+                isinstance(value, str) and
+                self.target.id == value):
+            return True
+
+        if (isinstance(self.target, pywikibot.WbTime) and
+                not isinstance(value, pywikibot.WbTime) and
+                self.target.year == int(value)):
+            return True
+
+        if (isinstance(self.target, pywikibot.Coordinate) and
+                isinstance(value, str)):
+            coord_args = [float(x) for x in value.split(',')]
+            if len(coord_args) >= 3:
+                precision = coord_args[2]
+            else:
+                precision = 0.0001  # Default value (~10 m at equator)
+            try:
+                if self.target.precision is not None:
+                    precision = max(precision, self.target.precision)
+            except TypeError:
+                pass
+
+            if (abs(self.target.lat - coord_args[0]) <= precision and
+                    abs(self.target.lon - coord_args[1]) <= precision):
+                return True
+
+        if self.target == value:
+            return True
+
+        return False
+
+    def has_qualifier(self, qualifier_id, target):
+        """
+        Check whether Claim contains specified qualifier.
+
+        @param qualifier_id: id of the qualifier
+        @type qualifier_id: str
+        @param target: qualifier target to check presence of
+        @return: true if the qualifier was found, false otherwise
+        @rtype: bool
+        """
+        if self.isQualifier or self.isReference:
+            raise ValueError(u'Qualifiers and references cannot have '
+                             u'qualifiers.')
+
+        for qualifier in self.qualifiers.get(qualifier_id, []):
+            if qualifier.target_equals(target):
+                return True
+        return False
+
     def _formatValue(self):
         """
         Format the target into the proper JSON value that Wikibase wants.
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index aeb2d15..6f01605 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -235,6 +235,24 @@
                   Case insensitive regular expressions will be used and
                   dot matches any character, including a newline.
 
+-onlyif           A claim the page needs to contain, otherwise the item won't
+                  be returned.
+                  The format is property=value,qualifier=value. Multiple (or
+                  none) qualifiers can be passed, separated by commas.
+                  Examples: P1=Q2 (property P1 must contain value Q2),
+                  P3=Q4,P5=Q6,P6=Q7 (property P3 with value Q4 and
+                  qualifiers: P5 with value Q6 and P6 with value Q7).
+                  Value can be page ID, coordinate in format:
+                  latitude,longitude[,precision] (all values are in decimal
+                  degrees), year, or plain string.
+                  The argument can be provided multiple times and the item
+                  page will be returned only if all of the claims are present.
+                  Argument can be also given as "-onlyif:expression".
+
+-onlyifnot        A claim the page must not contain, otherwise the item won't
+                  be returned.
+                  For usage and examples, see -onlyif above.
+
 -intersect        Work on the intersection of all the provided generators.
 """
 
@@ -270,6 +288,7 @@
         self.step = None
         self.limit = None
         self.articlefilter_list = []
+        self.claimfilter_list = []
         self.intersect = False
         self._site = site
 
@@ -353,6 +372,13 @@
             else:
                 gensList = CombinedPageGenerator(self.gens)
                 dupfiltergen = DuplicateFilterPageGenerator(gensList)
+
+        if self.claimfilter_list:
+            dupfiltergen = PreloadingItemGenerator(dupfiltergen)
+            for claim in self.claimfilter_list:
+                dupfiltergen = ItemClaimFilterPageGenerator(dupfiltergen,
+                                                            claim[0], claim[1],
+                                                            claim[2], claim[3])
 
         if self.articlefilter_list:
             return RegexBodyFilterPageGenerator(
@@ -664,6 +690,20 @@
                     u'Which pattern do you want to grep?'))
             else:
                 self.articlefilter_list.append(arg[6:])
+            return True
+        elif arg.startswith('-onlyif') or arg.startswith('-onlyifnot'):
+            ifnot = arg.startswith('-onlyifnot')
+            if (len(arg) == 7 and not ifnot) or (len(arg) == 10 and ifnot):
+                claim = pywikibot.input(u'Which claim do you want to filter?')
+            else:
+                claim = arg[11 if ifnot else 8:]
+
+            p = re.compile(r'(?<!\\),')  # Match "," only if there no "\" 
before
+            temp = []  # Array to store split argument
+            for arg in p.split(claim):
+                temp.append(arg.replace('\,', ',').split('='))
+            self.claimfilter_list.append((temp[0][0], temp[0][1],
+                                          dict(temp[1:]), ifnot))
             return True
         elif arg.startswith('-yahoo'):
             gen = YahooSearchPageGenerator(arg[7:], site=self.site)
@@ -1184,6 +1224,56 @@
             yield page
 
 
+class ItemClaimFilter(object):
+
+    """Item claim filter."""
+
+    @classmethod
+    def __filter_match(cls, page, prop, claim, qualifiers=None):
+        """
+        Return true if the page contains the claim given.
+
+        @param page: the page to check
+        @return: true if page contains the claim, false otherwise
+        @rtype: bool
+        """
+        if not isinstance(page, pywikibot.ItemPage):
+            pywikibot.output(u'%s is not an ItemPage. Skipping.' % page)
+            return False
+        for page_claim in page.get()['claims'][prop]:
+            if page_claim.target_equals(claim):
+                if not qualifiers:
+                    return True
+
+                for prop, val in qualifiers.items():
+                    if not page_claim.has_qualifier(prop, val):
+                        return False
+                return True
+
+    @classmethod
+    def filter(cls, generator, prop, claim, qualifiers=None, negate=False):
+        """
+        Yield all ItemPages which does contain certain claim in a property.
+
+        @param prop: property id to check
+        @type prop: str
+        @param claim: value of the property to check. Can be exact value (for
+            instance, ItemPage instance) or ItemPage ID string (e.g. 'Q37470').
+        @param qualifiers: dict of qualifiers that must be present, or None if
+            qualifiers are irrelevant
+        @type qualifiers: dict or None
+        @param negate: true if pages that does *not* contain specified claim
+            should be yielded, false otherwise
+        @type negate: bool
+        """
+        for page in generator:
+            if cls.__filter_match(page, prop, claim, qualifiers) and not 
negate:
+                yield page
+
+# name the generator methods
+ItemClaimFilterPageGenerator = ItemClaimFilter.filter
+
+
 class RegexFilter(object):
 
     """Regex filter."""
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 1bbecab..ae7e2d8 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -437,6 +437,73 @@
         self.assertEqual(gf.namespaces, set([1, 6]))
 
 
+class TestItemClaimFilterPageGenerator(WikidataTestCase):
+
+    """Test item claim filter page generator generator."""
+
+    def _simple_claim_test(self, prop, claim, qualifiers, valid):
+        """
+        Test given claim on sample (India) page.
+
+        @param prop: the property to check
+        @type prop: str
+        @param claim: the claim the property should contain
+        @param qualifiers: qualifiers to check or None
+        @type qualifiers: dict or None
+        @param valid: true if the page should be yielded by the generator,
+            false otherwise
+        @type valid: bool
+        """
+        item = pywikibot.ItemPage(self.get_repo(), 'Q668')
+        gen = pagegenerators.ItemClaimFilterPageGenerator([item], prop,
+                                                          claim, qualifiers)
+        pages = set(gen)
+        self.assertEqual(len(pages), 1 if valid else 0)
+
+    def _get_council_page(self):
+        """Return United Nations Security Council Wikidata page."""
+        site = self.get_site()
+        return pywikibot.Page(site, 'Q37470')
+
+    def test_valid_qualifiers(self):
+        """Test ItemClaimFilterPageGenerator on sample page using valid 
qualifiers."""
+        qualifiers = {
+            'P580': pywikibot.WbTime(1950, 1, 1, precision=9,
+                                     site=self.get_site()),
+            'P582': '1951',
+        }
+        self._simple_claim_test('P463', self._get_council_page(), qualifiers,
+                                True)
+
+    def test_invalid_qualifiers(self):
+        """Test ItemClaimFilterPageGenerator on sample page using invalid 
qualifiers."""
+        qualifiers = {
+            'P580': 1950,
+            'P582': pywikibot.WbTime(1960, 1, 1, precision=9,
+                                     site=self.site),
+        }
+        self._simple_claim_test('P463', self._get_council_page(), qualifiers,
+                                False)
+
+    def test_nonexisting_qualifiers(self):
+        """Test ItemClaimFilterPageGenerator on sample page using qualifiers 
the page doesn't have."""
+        qualifiers = {
+            'P370': pywikibot.WbTime(1950, 1, 1, precision=9,
+                                     site=self.get_site()),
+            'P232': pywikibot.WbTime(1960, 1, 1, precision=9,
+                                     site=self.get_site()),
+        }
+        self._simple_claim_test('P463', self._get_council_page(), qualifiers,
+                                False)
+
+    def test_no_qualifiers(self):
+        """Test ItemClaimFilterPageGenerator on sample page without 
qualifiers."""
+        self._simple_claim_test('P474', '+91', None, True)
+        self._simple_claim_test('P463', 'Q37470', None, True)
+        self._simple_claim_test('P625', '21,78', None, True)
+        self._simple_claim_test('P625', '21,78.05,0.01', None, False)
+
+
 class TestFactoryGenerator(DefaultSiteTestCase):
 
     """Test pagegenerators.GeneratorFactory."""
@@ -560,6 +627,37 @@
         self.assertPagesInNamespaces(gen, set([1, 3]))
 
 
+class TestFactoryGeneratorWikibase(WikidataTestCase):
+
+    """Test pagegenerators.GeneratorFactory on Wikibase site."""
+
+    def test_onlyif(self):
+        """Test -onlyif without qualifiers."""
+        gf = pagegenerators.GeneratorFactory(site=self.site)
+        gf.handleArg('-page:Q15745378')
+        gf.handleArg('-onlyif:P357=International Journal of Minerals\, '
+                     'Metallurgy\, and Materials')
+        gen = gf.getCombinedGenerator()
+        self.assertEqual(len(set(gen)), 1)
+
+    def test_onlyifnot(self):
+        """Test -onlyifnot without qualifiers."""
+        gf = pagegenerators.GeneratorFactory(site=self.site)
+        gf.handleArg('-page:Q15745378')
+        gf.handleArg('-onlyifnot:P357=International Journal of Minerals\, '
+                     'Metallurgy\, and Materials')
+        gen = gf.getCombinedGenerator()
+        self.assertEqual(len(set(gen)), 0)
+
+    def test_onlyif_qualifiers(self):
+        """Test -onlyif with qualifiers."""
+        gf = pagegenerators.GeneratorFactory(site=self.site)
+        gf.handleArg('-page:Q668')
+        gf.handleArg('-onlyif:P47=Q837,P805=Q3088768')
+        gen = gf.getCombinedGenerator()
+        self.assertEqual(len(set(gen)), 1)
+
+
 class TestLogeventsFactoryGenerator(DefaultSiteTestCase):
 
     """Test GeneratorFactory with pagegenerators.LogeventsPageGenerator."""

-- 
To view, visit https://gerrit.wikimedia.org/r/179158
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I850f1063016fd0c8845c9509634f85b1830724ef
Gerrit-PatchSet: 16
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: M4tx <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: M4tx <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: Multichill <[email protected]>
Gerrit-Reviewer: Ricordisamoa <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to