jenkins-bot has submitted this change and it was merged. Change subject: bot: handle ambiguous names in search_entity ......................................................................
bot: handle ambiguous names in search_entity In some cases there is no other way but to hardcode the item id because the labels are ambiguous and there is no sure way to figure out which one is relevant in the context of the FLOSS project. When there are two items and one of them is a disambiguation page, just ignore it. Change-Id: I14b895a6a67dcb7da6246c489d10f1b77e1d5777 Signed-off-by: Loic Dachary <l...@dachary.org> --- M FLOSSbot/bot.py M tests/test_bot.py 2 files changed, 87 insertions(+), 6 deletions(-) Approvals: Dachary: Looks good to me, approved jenkins-bot: Verified diff --git a/FLOSSbot/bot.py b/FLOSSbot/bot.py index 6381027..66eff53 100644 --- a/FLOSSbot/bot.py +++ b/FLOSSbot/bot.py @@ -95,16 +95,56 @@ self.entities[type][name] = found return found + # + # Hardcode the desired wikidata item when there are + # multiple items with the same english label and no + # trivial way to disambiguate them. + # + authoritative = { + 'wikidata': { + 'git': 'Q186055', + 'Fossil': 'Q1439431', + }, + 'test': { + }, + } + def search_entity(self, site, name, **kwargs): - found = None + if name in Bot.authoritative[site.code]: + candidate = pywikibot.ItemPage( + site, Bot.authoritative[site.code][name], 0) + if candidate.get()['labels']['en'] == name: + return candidate + candidates = [] for p in site.search_entities(name, 'en', **kwargs): - if p['label'] == name: + # log.debug("looking for entity " + name + ", found " + str(p)) + if p.get('label') == name: if kwargs['type'] == 'property': - found = p + candidates.append(p) else: - found = pywikibot.ItemPage(site, p['id'], 0) - break - return found + candidates.append(pywikibot.ItemPage(site, p['id'], 0)) + if len(candidates) == 0: + return None + elif len(candidates) > 1 and kwargs['type'] == 'item': + found = [] + for candidate in candidates: + item = candidate.get() + ok = True + for instance_of in item['claims'].get(self.P_instance_of, []): + if (instance_of.getTarget() == + self.Q_Wikimedia_disambiguation_page): + log.debug("ignore disambiguation page " + + candidate.getID() + " for " + name) + ok = False + break + if ok: + found.append(candidate) + if len(found) != 1: + raise ValueError("found multiple items for " + name + + " " + str(found)) + return found[0] + else: + return candidates[0] lookup_item = lookup_entity diff --git a/tests/test_bot.py b/tests/test_bot.py index a9dbba9..d28537e 100644 --- a/tests/test_bot.py +++ b/tests/test_bot.py @@ -19,6 +19,7 @@ import logging from datetime import date +import pytest import pywikibot from FLOSSbot.bot import Bot @@ -105,3 +106,43 @@ bot.set_retrieved(item, claim, date(1965, 11, 2)) assert bot.need_verification(claim) is True bot.clear_entity_label(item.getID()) + + def test_search_entity(self): + bot = Bot(argparse.Namespace( + test=True, + user='FLOSSbotCI', + )) + name = TestWikidata.random_name() + entity = { + "labels": { + "en": { + "language": "en", + "value": name, + } + }, + } + first = bot.site.editEntity({'new': 'item'}, entity) + first = pywikibot.ItemPage(bot.site, first['entity']['id'], 0) + second = bot.site.editEntity({'new': 'item'}, entity) + second = pywikibot.ItemPage(bot.site, second['entity']['id'], 0) + + with pytest.raises(ValueError) as e: + bot.search_entity(bot.site, name, type='item') + assert "found multiple items" in str(e.value) + + claim = pywikibot.Claim(bot.site, bot.P_instance_of, 0) + claim.setTarget(bot.Q_Wikimedia_disambiguation_page) + first.addClaim(claim) + + found = bot.search_entity(bot.site, name, type='item') + assert found.getID() == second.getID() + + bot.site.editEntity({'new': 'item'}, entity) + + with pytest.raises(ValueError) as e: + bot.search_entity(bot.site, name, type='item') + assert "found multiple items" in str(e.value) + + Bot.authoritative['test'][name] = second.getID() + found = bot.search_entity(bot.site, name, type='item') + assert found.getID() == second.getID() -- To view, visit https://gerrit.wikimedia.org/r/311841 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I14b895a6a67dcb7da6246c489d10f1b77e1d5777 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/bots/FLOSSbot Gerrit-Branch: master Gerrit-Owner: Dachary <l...@dachary.org> Gerrit-Reviewer: Dachary <l...@dachary.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits