Dachary has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/316057

Change subject: plugin: get template field values for all sitelinks
......................................................................

plugin: get template field values for all sitelinks

For a given item, extract the templates found in the interlink pages.
Extract the value of a given field and return a map that looks like:

   {
     'fr': 'License Publique Générale GNU',
     'en': 'GNU General Public License',
   }

Only consider the templates matching the pattern specified in
lang2pattern. For instance Infobox etc. To limit the chances of
conflicting values should another template have the same value for a
given field.

The value is extracted from the field named after the 'en' entry of
lang2field. For instance if lang2field['en'] = 'License', the license
field will be extracted. If lang2field['fr'] does not exist, the french
translation as returned by the translate_title method will be used. If
the lang2field['zh'] = 'license' exists, it is used and no attempt is
made to translate the english word. It is not uncommon for some
wikipedia to use fields that are not in the native language.

Change-Id: I43178b93a3e2e4445c2b73742bef6f072b65d3f2
Signed-off-by: Loic Dachary <l...@dachary.org>
---
M FLOSSbot/plugin.py
M tests/test_plugin.py
2 files changed, 45 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/bots/FLOSSbot 
refs/changes/57/316057/1

diff --git a/FLOSSbot/plugin.py b/FLOSSbot/plugin.py
index cba787e..7eefa2b 100644
--- a/FLOSSbot/plugin.py
+++ b/FLOSSbot/plugin.py
@@ -278,6 +278,37 @@
             log.debug("GET failed with " + str(e))
             return None
 
+    def get_template_field(self, item, lang2field, lang2pattern):
+        lang2value = {}
+        for dbname in item.sitelinks.keys():
+            site = pywikibot.site.APISite.fromDBName(dbname)
+            pattern = lang2pattern.get(site.code, lang2pattern['*'])
+            p = pywikibot.Page(site, item.sitelinks[dbname])
+            for (template, pairs) in p.templatesWithParams():
+                self.debug(item, site.code + " template " + template.title())
+                if pattern in template.title():
+                    for pair in pairs:
+                        found = pair.split('=', 1)
+                        if len(found) == 1:
+                            continue
+                        (name, value) = found
+                        if site.code in lang2field:
+                            translated = lang2field[site.code]
+                        elif 'en' in lang2field:
+                            translated = self.translate_title(lang2field['en'],
+                                                              site.code)
+                        else:
+                            translated = None
+                        self.debug(item, site.code + " compare " +
+                                   str(translated).lower() + " and " +
+                                   name.lower())
+                        if (value and
+                                translated and
+                                name.lower() == translated.lower()):
+                            lang2value[site.code] = value
+        self.debug(item, 'get_template_field ' + str(lang2value))
+        return lang2value
+
     def translate_title(self, title, lang):
         if title not in self.title_translation:
             site = pywikibot.site.APISite.fromDBName('enwiki')
diff --git a/tests/test_plugin.py b/tests/test_plugin.py
index 1edd2fa..8fe185a 100644
--- a/tests/test_plugin.py
+++ b/tests/test_plugin.py
@@ -151,6 +151,20 @@
         found = plugin.search_entity(plugin.bot.site, name, type='item')
         assert found.getID() == second.getID()
 
+    def test_get_template_field(self):
+        bot = Bot.factory(['--verbose'])
+        plugin = Plugin(bot, bot.args)
+        item = plugin.Q_GNU_Emacs
+        expected = {
+            'fr': 'licence',
+            'en': 'license',
+        }
+        item.get()
+        lang2field = {'en': 'License'}
+        lang2pattern = {'*': 'Infobox'}
+        actual = plugin.get_template_field(item, lang2field, lang2pattern)
+        assert actual.keys() == expected.keys()
+
     def test_translate_title(self):
         bot = Bot.factory(['--verbose'])
         plugin = Plugin(bot, bot.args)

-- 
To view, visit https://gerrit.wikimedia.org/r/316057
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I43178b93a3e2e4445c2b73742bef6f072b65d3f2
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/bots/FLOSSbot
Gerrit-Branch: master
Gerrit-Owner: Dachary <l...@dachary.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to