jenkins-bot has submitted this change and it was merged. Change subject: plugin: get template field values for all sitelinks ......................................................................
plugin: get template field values for all sitelinks For a given item, extract the templates found in the interlink pages. Extract the value of a given field and return a map that looks like: { 'fr': 'License Publique Générale GNU', 'en': 'GNU General Public License', } Only consider the templates matching the pattern specified in lang2pattern. For instance Infobox etc. To limit the chances of conflicting values should another template have the same value for a given field. The value is extracted from the field named after the 'en' entry of lang2field. For instance if lang2field['en'] = 'License', the license field will be extracted. If lang2field['fr'] does not exist, the french translation as returned by the translate_title method will be used. If the lang2field['zh'] = 'license' exists, it is used and no attempt is made to translate the english word. It is not uncommon for some wikipedia to use fields that are not in the native language. Change-Id: I43178b93a3e2e4445c2b73742bef6f072b65d3f2 Signed-off-by: Loic Dachary <l...@dachary.org> --- M FLOSSbot/plugin.py M tests/test_plugin.py 2 files changed, 45 insertions(+), 0 deletions(-) Approvals: Dachary: Looks good to me, approved jenkins-bot: Verified diff --git a/FLOSSbot/plugin.py b/FLOSSbot/plugin.py index cba787e..7eefa2b 100644 --- a/FLOSSbot/plugin.py +++ b/FLOSSbot/plugin.py @@ -278,6 +278,37 @@ log.debug("GET failed with " + str(e)) return None + def get_template_field(self, item, lang2field, lang2pattern): + lang2value = {} + for dbname in item.sitelinks.keys(): + site = pywikibot.site.APISite.fromDBName(dbname) + pattern = lang2pattern.get(site.code, lang2pattern['*']) + p = pywikibot.Page(site, item.sitelinks[dbname]) + for (template, pairs) in p.templatesWithParams(): + self.debug(item, site.code + " template " + template.title()) + if pattern in template.title(): + for pair in pairs: + found = pair.split('=', 1) + if len(found) == 1: + continue + (name, value) = found + if site.code in lang2field: + translated = lang2field[site.code] + elif 'en' in lang2field: + translated = self.translate_title(lang2field['en'], + site.code) + else: + translated = None + self.debug(item, site.code + " compare " + + str(translated).lower() + " and " + + name.lower()) + if (value and + translated and + name.lower() == translated.lower()): + lang2value[site.code] = value + self.debug(item, 'get_template_field ' + str(lang2value)) + return lang2value + def translate_title(self, title, lang): if title not in self.title_translation: site = pywikibot.site.APISite.fromDBName('enwiki') diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 1edd2fa..8fe185a 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -151,6 +151,20 @@ found = plugin.search_entity(plugin.bot.site, name, type='item') assert found.getID() == second.getID() + def test_get_template_field(self): + bot = Bot.factory(['--verbose']) + plugin = Plugin(bot, bot.args) + item = plugin.Q_GNU_Emacs + expected = { + 'fr': 'licence', + 'en': 'license', + } + item.get() + lang2field = {'en': 'License'} + lang2pattern = {'*': 'Infobox'} + actual = plugin.get_template_field(item, lang2field, lang2pattern) + assert actual.keys() == expected.keys() + def test_translate_title(self): bot = Bot.factory(['--verbose']) plugin = Plugin(bot, bot.args) -- To view, visit https://gerrit.wikimedia.org/r/316057 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I43178b93a3e2e4445c2b73742bef6f072b65d3f2 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/bots/FLOSSbot Gerrit-Branch: master Gerrit-Owner: Dachary <l...@dachary.org> Gerrit-Reviewer: Dachary <l...@dachary.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits