jenkins-bot has submitted this change and it was merged.

Change subject: Extract CommonsCat templates data to JSON file
......................................................................


Extract CommonsCat templates data to JSON file

- Add new JSON file containing the CommonsCat templates data
- Read it from getCommonscatTemplates via
  _load_wikipedia_commonscat_templates()

Also delete the ignoreTemplates data as it is unused.

Change-Id: Ib25b02eeaee9074548ac3708988d6814da1d63ca
---
M erfgoedbot/categorize_images.py
A erfgoedbot/data/wikipedia_commonscat_templates.json
2 files changed, 394 insertions(+), 144 deletions(-)

Approvals:
  Jean-Frédéric: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/erfgoedbot/categorize_images.py b/erfgoedbot/categorize_images.py
index d30bffb..734dd02 100644
--- a/erfgoedbot/categorize_images.py
+++ b/erfgoedbot/categorize_images.py
@@ -16,6 +16,8 @@
 python categorize_images.py -countrycode:ee -lang:et
 
 '''
+import json
+import os
 import re
 
 import pywikibot
@@ -41,150 +43,11 @@
 class NoCategoryToAddException(Exception):
     pass
 
-# Contains the commonscat templates for most Wikipedia's (taken from 
ex-commonscat.py)
-wikipedia_commonscat_templates = {
-    '_default': (u'Commonscat', []),
-    'af': (u'CommonsKategorie', [u'commonscat']),
-    'an': (u'Commonscat', [u'Commons cat']),
-    'ar': (u'تصنيف كومنز',
-           [u'Commonscat', u'تصنيف كومونز', u'Commons cat', u'CommonsCat']),
-    'arz': (u'Commons cat', [u'Commoncat']),
-    'az': (u'CommonsKat', [u'Commonscat']),
-    'bn': (u'কমন্সক্যাট', [u'Commonscat']),
-    'ca': (u'Commonscat', [u'Commons cat', u'Commons category']),
-    'crh': (u'CommonsKat', [u'Commonscat']),
-    'cs': (u'Commonscat', [u'Commons cat']),
-    'da': (u'Commonscat',
-           [u'Commons cat', u'Commons category', u'Commonscat left',
-            u'Commonscat2']),
-    'en': (u'Commons category',
-           [u'Commoncat', u'Commonscat', u'Commons cat', u'Commons+cat',
-            u'Commonscategory', u'Commons and category', u'Commonscat-inline',
-            u'Commons category-inline', u'Commons2', u'Commons category multi',
-            u'Cms-catlist-up', u'Catlst commons', u'Commonscat show2',
-            u'Sister project links']),
-    'es': (u'Commonscat',
-           [u'Ccat', u'Commons cat', u'Categoría Commons',
-            u'Commonscat-inline']),
-    'et': (u'Commonsi kategooria',
-           [u'Commonscat', u'Commonskat', u'Commons cat', u'Commons 
category']),
-    'eu': (u'Commonskat', [u'Commonscat']),
-    'fa': (u'ویکی‌انبار-رده',
-           [u'Commonscat', u'Commons cat', u'انبار رده', u'Commons category',
-            u'انبار-رده', u'جعبه پیوند به پروژه‌های خواهر',
-            u'در پروژه‌های خواهر', u'پروژه‌های خواهر']),
-    'fr': (u'Commonscat', [u'CommonsCat', u'Commons cat', u'Commons 
category']),
-    'frp': (u'Commonscat', [u'CommonsCat']),
-    'ga': (u'Catcómhaoin', [u'Commonscat']),
-    'he': (u'ויקישיתוף בשורה', []),
-    'hi': (u'Commonscat', [u'Commons2', u'Commons cat', u'Commons category']),
-    'hu': (u'Commonskat', [u'Közvagyonkat']),
-    'hy': (u'Վիքիպահեստ կատեգորիա',
-           [u'Commonscat', u'Commons cat', u'Commons category']),
-    'id': (u'Commonscat',
-           [u'Commons cat', u'Commons2', u'CommonsCat', u'Commons category']),
-    'is': (u'CommonsCat', [u'Commonscat']),
-    'ja': (u'Commonscat', [u'Commons cat', u'Commons category']),
-    'jv': (u'Commonscat', [u'Commons cat']),
-    'kaa': (u'Commons cat', [u'Commonscat']),
-    'kk': (u'Commonscat', [u'Commons2']),
-    'ko': (u'Commonscat', [u'Commons cat', u'공용분류']),
-    'la': (u'CommuniaCat', []),
-    'mk': (u'Ризница-врска',
-           [u'Commonscat', u'Commons cat', u'CommonsCat', u'Commons2',
-            u'Commons category']),
-    'ml': (u'Commonscat', [u'Commons cat', u'Commons2']),
-    'ms': (u'Kategori Commons', [u'Commonscat', u'Commons category']),
-    'nn': (u'Commonscat', [u'Commons cat']),
-    'os': (u'Commonscat', [u'Commons cat']),
-    'pt': (u'Commonscat', [u'Commons cat']),
-    'ro': (u'Commonscat', [u'Commons cat']),
-    'ru': (u'Commonscat', [u'Викисклад-кат', u'Commons category']),
-    'simple': (u'Commonscat',
-               [u'Commons cat', u'Commons cat multi', u'Commons category',
-                u'Commons category multi', u'CommonsCompact',
-                u'Commons-inline']),
-    'sh': (u'Commonscat', [u'Commons cat']),
-    'sl': (u'Kategorija v Zbirki',
-           [u'Commonscat', u'Kategorija v zbirki', u'Commons cat',
-            u'Katzbirke']),
-    'sq': (u'Commonscat', [u'Commonskat', u'Commonsart', u'CommonsCat']),
-    'sv': (u'Commonscat',
-           [u'Commonscat-rad', u'Commonskat', u'Commons cat', u'Commonscatbox',
-            u'Commonscat-box']),
-    'sw': (u'Commonscat', [u'Commons2', u'Commons cat']),
-    'te': (u'Commonscat', [u'Commons cat']),
-    'tr': (u'Commons kategori',
-           [u'CommonsKat', u'Commonscat', u'Commons cat']),
-    'uk': (u'Commonscat', [u'Commons cat', u'Category', u'Commonscat-inline']),
-    'vi': (u'Commonscat',
-           [u'Commons2', u'Commons cat', u'Commons category', u'Commons+cat']),
-    'zh': (u'Commonscat', [u'Commons cat', u'Commons category']),
-    'zh-classical': (u'共享類', [u'Commonscat']),
-    'zh-yue': (u'同享類',
-               [u'Commonscat', u'共享類 ', u'Commons cat', u'Commons category']),
-}
 
-ignoreTemplates = {
-    'af': [u'commons'],
-    'ar': [u'تحويلة تصنيف', u'كومنز', u'كومونز', u'Commons'],
-    'be-tarask': [u'Commons', u'Commons category'],
-    'cs': [u'Commons', u'Sestřičky', u'Sisterlinks'],
-    'da': [u'Commons', u'Commons left', u'Commons2', u'Commonsbilleder',
-           u'Commonskat', u'Commonscat2', u'GalleriCommons', u'Søsterlinks'],
-    'de': [u'Commons', u'ZhSZV', u'Bauwerk-stil-kategorien',
-           u'Bauwerk-funktion-kategorien', u'KsPuB',
-           u'Kategoriesystem Augsburg-Infoleiste',
-           u'Kategorie Ge', u'Kategorie v. Chr. Ge',
-           u'Kategorie Geboren nach Jh. v. Chr.', u'Kategorie Geboren nach 
Jh.',
-           u'!Kategorie Gestorben nach Jh. v. Chr.',
-           u'!Kategorie Gestorben nach Jh.',
-           u'Kategorie Jahr', u'Kategorie Jahr v. Chr.',
-           u'Kategorie Jahrzehnt', u'Kategorie Jahrzehnt v. Chr.',
-           u'Kategorie Jahrhundert', u'Kategorie Jahrhundert v. Chr.',
-           u'Kategorie Jahrtausend', u'Kategorie Jahrtausend v. Chr.'],
-    'en': [u'Category redirect', u'Commons', u'Commonscat1A', u'Commoncats',
-           u'Commonscat4Ra',
-           u'Sisterlinks', u'Sisterlinkswp', u'Sister project links',
-           u'Tracking category', u'Template category', u'Wikipedia category'],
-    'eo': [u'Commons',
-           (u'Projekto/box', 'commons='),
-           (u'Projekto', 'commons='),
-           (u'Projektoj', 'commons='),
-           (u'Projektoj', 'commonscat=')],
-    'es': [u'Commons', u'IprCommonscat'],
-    'eu': [u'Commons'],
-    'fa': [u'Commons', u'ویکی‌انبار', u'Category redirect', u'رده بهتر',
-           u'جعبه پیوند به پروژه‌های خواهر', u'در پروژه‌های خواهر',
-           u'پروژه‌های خواهر'],
-    'fi': [u'Commonscat-rivi', u'Commons-rivi', u'Commons'],
-    'fr': [u'Commons', u'Commons-inline', (u'Autres projets', 'commons=')],
-    'fy': [u'Commons', u'CommonsLyts'],
-    'he': [u'מיזמים'],
-    'hr': [u'Commons', (u'WProjekti', 'commonscat=')],
-    'is': [u'Systurverkefni', u'Commons'],
-    'it': [(u'Ip', 'commons='), (u'Interprogetto', 'commons=')],
-    'ja': [u'CommonscatS', u'SisterlinksN', u'Interwikicat'],
-    'ms': [u'Commons', u'Sisterlinks', u'Commons cat show2'],
-    'nds-nl': [u'Commons'],
-    'nl': [u'Commons', u'Commonsklein', u'Commonscatklein', u'Catbeg',
-           u'Catsjab', u'Catwiki'],
-    'om': [u'Commons'],
-    'pt': [u'Correlatos',
-           u'Commons',
-           u'Commons cat multi',
-           u'Commons1',
-           u'Commons2'],
-    'simple': [u'Sisterlinks'],
-    'ru': [u'Навигация', u'Навигация для категорий', u'КПР', u'КБР',
-           u'Годы в России', u'commonscat-inline'],
-    'tt': [u'Навигация'],
-    'zh': [u'Category redirect', u'cr', u'Commons',
-           u'Sisterlinks', u'Sisterlinkswp',
-           u'Tracking category', u'Trackingcatu',
-           u'Template category', u'Wikipedia category'
-           u'分类重定向', u'追蹤分類', u'共享資源', u'追蹤分類'],
-}
+def _load_wikipedia_commonscat_templates():
+    data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
+    json_file = os.path.join(data_dir, 'wikipedia_commonscat_templates.json')
+    return json.load(open(json_file, 'r'))
 
 
 def categorizeImage(countrycode, lang, commonsTemplateName, 
commonsCategoryBase, commonsCatTemplates, page, conn, cursor):
@@ -597,10 +460,12 @@
     Get the template name in a language on a project.
 
     Expects the language code and project.
-    Return as tuple containing the primary template and it's alternatives
+    Return as list containing the primary template and it's alternatives
     """
     project = project or u'wikipedia'  # default to wikipedia
 
+    wikipedia_commonscat_templates = _load_wikipedia_commonscat_templates()
+
     result = []
     if project == u'wikipedia' and lang in wikipedia_commonscat_templates:
         (prim, backups) = wikipedia_commonscat_templates[lang]
diff --git a/erfgoedbot/data/wikipedia_commonscat_templates.json 
b/erfgoedbot/data/wikipedia_commonscat_templates.json
new file mode 100644
index 0000000..6f47140
--- /dev/null
+++ b/erfgoedbot/data/wikipedia_commonscat_templates.json
@@ -0,0 +1,385 @@
+{
+    "_default": [
+        "Commonscat",
+        []
+    ],
+    "af": [
+        "CommonsKategorie",
+        [
+            "commonscat"
+        ]
+    ],
+    "an": [
+        "Commonscat",
+        [
+            "Commons cat"
+        ]
+    ],
+    "ar": [
+        "تصنيف كومنز",
+        [
+            "Commonscat",
+            "تصنيف كومونز",
+            "Commons cat",
+            "CommonsCat"
+        ]
+    ],
+    "arz": [
+        "Commons cat",
+        [
+            "Commoncat"
+        ]
+    ],
+    "az": [
+        "CommonsKat",
+        [
+            "Commonscat"
+        ]
+    ],
+    "bn": [
+        "কমন্সক্যাট",
+        [
+            "Commonscat"
+        ]
+    ],
+    "ca": [
+        "Commonscat",
+        [
+            "Commons cat",
+            "Commons category"
+        ]
+    ],
+    "crh": [
+        "CommonsKat",
+        [
+            "Commonscat"
+        ]
+    ],
+    "cs": [
+        "Commonscat",
+        [
+            "Commons cat"
+        ]
+    ],
+    "da": [
+        "Commonscat",
+        [
+            "Commons cat",
+            "Commons category",
+            "Commonscat left",
+            "Commonscat2"
+        ]
+    ],
+    "en": [
+        "Commons category",
+        [
+            "Commoncat",
+            "Commonscat",
+            "Commons cat",
+            "Commons+cat",
+            "Commonscategory",
+            "Commons and category",
+            "Commonscat-inline",
+            "Commons category-inline",
+            "Commons2",
+            "Commons category multi",
+            "Cms-catlist-up",
+            "Catlst commons",
+            "Commonscat show2",
+            "Sister project links"
+        ]
+    ],
+    "es": [
+        "Commonscat",
+        [
+            "Ccat",
+            "Commons cat",
+            "Categoría Commons",
+            "Commonscat-inline"
+        ]
+    ],
+    "et": [
+        "Commonsi kategooria",
+        [
+            "Commonscat",
+            "Commonskat",
+            "Commons cat",
+            "Commons category"
+        ]
+    ],
+    "eu": [
+        "Commonskat",
+        [
+            "Commonscat"
+        ]
+    ],
+    "fa": [
+        "ویکی‌انبار-رده",
+        [
+            "Commonscat",
+            "Commons cat",
+            "انبار رده",
+            "Commons category",
+            "انبار-رده",
+            "جعبه پیوند به پروژه‌های خواهر",
+            "در پروژه‌های خواهر",
+            "پروژه‌های خواهر"
+        ]
+    ],
+    "fr": [
+        "Commonscat",
+        [
+            "CommonsCat",
+            "Commons cat",
+            "Commons category"
+        ]
+    ],
+    "frp": [
+        "Commonscat",
+        [
+            "CommonsCat"
+        ]
+    ],
+    "ga": [
+        "Catcómhaoin",
+        [
+            "Commonscat"
+        ]
+    ],
+    "he": [
+        "ויקישיתוף בשורה",
+        []
+    ],
+    "hi": [
+        "Commonscat",
+        [
+            "Commons2",
+            "Commons cat",
+            "Commons category"
+        ]
+    ],
+    "hu": [
+        "Commonskat",
+        [
+            "Közvagyonkat"
+        ]
+    ],
+    "hy": [
+        "Վիքիպահեստ կատեգորիա",
+        [
+            "Commonscat",
+            "Commons cat",
+            "Commons category"
+        ]
+    ],
+    "id": [
+        "Commonscat",
+        [
+            "Commons cat",
+            "Commons2",
+            "CommonsCat",
+            "Commons category"
+        ]
+    ],
+    "is": [
+        "CommonsCat",
+        [
+            "Commonscat"
+        ]
+    ],
+    "ja": [
+        "Commonscat",
+        [
+            "Commons cat",
+            "Commons category"
+        ]
+    ],
+    "jv": [
+        "Commonscat",
+        [
+            "Commons cat"
+        ]
+    ],
+    "kaa": [
+        "Commons cat",
+        [
+            "Commonscat"
+        ]
+    ],
+    "kk": [
+        "Commonscat",
+        [
+            "Commons2"
+        ]
+    ],
+    "ko": [
+        "Commonscat",
+        [
+            "Commons cat",
+            "공용분류"
+        ]
+    ],
+    "la": [
+        "CommuniaCat",
+        []
+    ],
+    "mk": [
+        "Ризница-врска",
+        [
+            "Commonscat",
+            "Commons cat",
+            "CommonsCat",
+            "Commons2",
+            "Commons category"
+        ]
+    ],
+    "ml": [
+        "Commonscat",
+        [
+            "Commons cat",
+            "Commons2"
+        ]
+    ],
+    "ms": [
+        "Kategori Commons",
+        [
+            "Commonscat",
+            "Commons category"
+        ]
+    ],
+    "nn": [
+        "Commonscat",
+        [
+            "Commons cat"
+        ]
+    ],
+    "os": [
+        "Commonscat",
+        [
+            "Commons cat"
+        ]
+    ],
+    "pt": [
+        "Commonscat",
+        [
+            "Commons cat"
+        ]
+    ],
+    "ro": [
+        "Commonscat",
+        [
+            "Commons cat"
+        ]
+    ],
+    "ru": [
+        "Commonscat",
+        [
+            "Викисклад-кат",
+            "Commons category"
+        ]
+    ],
+    "sh": [
+        "Commonscat",
+        [
+            "Commons cat"
+        ]
+    ],
+    "simple": [
+        "Commonscat",
+        [
+            "Commons cat",
+            "Commons cat multi",
+            "Commons category",
+            "Commons category multi",
+            "CommonsCompact",
+            "Commons-inline"
+        ]
+    ],
+    "sl": [
+        "Kategorija v Zbirki",
+        [
+            "Commonscat",
+            "Kategorija v zbirki",
+            "Commons cat",
+            "Katzbirke"
+        ]
+    ],
+    "sq": [
+        "Commonscat",
+        [
+            "Commonskat",
+            "Commonsart",
+            "CommonsCat"
+        ]
+    ],
+    "sv": [
+        "Commonscat",
+        [
+            "Commonscat-rad",
+            "Commonskat",
+            "Commons cat",
+            "Commonscatbox",
+            "Commonscat-box"
+        ]
+    ],
+    "sw": [
+        "Commonscat",
+        [
+            "Commons2",
+            "Commons cat"
+        ]
+    ],
+    "te": [
+        "Commonscat",
+        [
+            "Commons cat"
+        ]
+    ],
+    "tr": [
+        "Commons kategori",
+        [
+            "CommonsKat",
+            "Commonscat",
+            "Commons cat"
+        ]
+    ],
+    "uk": [
+        "Commonscat",
+        [
+            "Commons cat",
+            "Category",
+            "Commonscat-inline"
+        ]
+    ],
+    "vi": [
+        "Commonscat",
+        [
+            "Commons2",
+            "Commons cat",
+            "Commons category",
+            "Commons+cat"
+        ]
+    ],
+    "zh": [
+        "Commonscat",
+        [
+            "Commons cat",
+            "Commons category"
+        ]
+    ],
+    "zh-classical": [
+        "共享類",
+        [
+            "Commonscat"
+        ]
+    ],
+    "zh-yue": [
+        "同享類",
+        [
+            "Commonscat",
+            "共享類 ",
+            "Commons cat",
+            "Commons category"
+        ]
+    ]
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/309842
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib25b02eeaee9074548ac3708988d6814da1d63ca
Gerrit-PatchSet: 2
Gerrit-Project: labs/tools/heritage
Gerrit-Branch: master
Gerrit-Owner: Jean-Frédéric <jeanfrederic.w...@gmail.com>
Gerrit-Reviewer: Jean-Frédéric <jeanfrederic.w...@gmail.com>
Gerrit-Reviewer: Lokal Profil <lokal.pro...@gmail.com>
Gerrit-Reviewer: Multichill <maar...@mdammers.nl>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to