jenkins-bot has submitted this change and it was merged. Change subject: Clean up scripts/generate_wiki_languages.py ......................................................................
Clean up scripts/generate_wiki_languages.py Change-Id: I9d26f658bdedc9a1fdbc103fe5499b24b4ef751a --- M scripts/generate_wiki_languages.py 1 file changed, 46 insertions(+), 29 deletions(-) Approvals: BearND: Looks good to me, approved Niedzielski: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/scripts/generate_wiki_languages.py b/scripts/generate_wiki_languages.py index 425fd38..f2081e2 100755 --- a/scripts/generate_wiki_languages.py +++ b/scripts/generate_wiki_languages.py @@ -1,17 +1,21 @@ #!/usr/bin/env python # coding=utf-8 -from urllib2 import urlopen -import unicodecsv as csv -from itertools import islice +import itertools +import urllib +import urllib2 import json +import lxml import lxml.builder as lb -from lxml import etree +import unicodecsv + # Returns CSV of all wikipedias, ordered by number of 'good' articles -URL = "https://wikistats.wmflabs.org/api.php?action=dump&table=wikipedias&format=csv&s=good" +QUERY_API_URL = ('https://' 'wikistats.wmflabs.org' '/' 'api.php' '?') +QUERY_PARAMS = [('action', 'dump'), ('table', 'wikipedias'), + ('format', 'csv'), ('s', 'good')] +RESULT_COLUMN = {'english_name': 1, 'language_code': 2, 'local_name': 10} -data = csv.reader(urlopen(URL)) lang_keys = [] lang_local_names = [] @@ -23,20 +27,34 @@ lang_local_names.append(local_name) lang_eng_names.append(eng_name) -for row in islice(data, 1, None): - if row[2] == 'got': + +def escape(s): + return s.replace("'", "\\'") + + +QUERY_URL = QUERY_API_URL + urllib.urlencode(QUERY_PARAMS) +response_file = urllib2.urlopen(QUERY_URL) +csv_data = unicodecsv.reader(response_file) + +start_at_row = 1 +end_at_row = None +for row in itertools.islice(csv_data, start_at_row, end_at_row): + language_code = row[RESULT_COLUMN['language_code']] + if language_code == 'got': # 'got' is Gothic Runes, which lie outside the Basic Multilingual Plane - # < https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane > # Android segfaults on these. So let's ignore those. # What's good for Android is also good for iOS :P - pass - elif row[2] == 'zh': - add_lang(key='zh-hans', local_name=u'简体', eng_name='Simplified Chinese') - add_lang(key='zh-hant', local_name=u'繁體', eng_name='Traditional Chinese') - else: - add_lang(key=row[2].replace("'", "\\'"), - local_name=row[10].replace("'", "\\'"), - eng_name=row[1].replace("'", "\\'")) + continue + if language_code == 'zh': + add_lang(key='zh-hans', local_name=u'简体', + eng_name='Simplified Chinese') + add_lang(key='zh-hant', local_name=u'繁體', + eng_name='Traditional Chinese') + continue + local_name = row[RESULT_COLUMN['local_name']] + english_name = row[RESULT_COLUMN['english_name']] + add_lang(key=escape(language_code), local_name=escape(local_name), + eng_name=escape(english_name)) add_lang(key='test', local_name='Test', eng_name='Test') add_lang(key='', local_name='None', eng_name='None (development)') @@ -51,25 +69,24 @@ eng_names = [x.item(k) for k in lang_eng_names] resources = x.resources( - getattr(x, 'string-array')(*keys, name="preference_language_keys"), - getattr(x, 'string-array')(*local_names, name="preference_language_local_names"), - getattr(x, 'string-array')(*eng_names, name="preference_language_canonical_names") -) + getattr(x, 'string-array')(*keys, name='preference_language_keys'), + getattr(x, 'string-array')(*local_names, + name='preference_language_local_names'), + getattr(x, 'string-array')(*eng_names, + name='preference_language_canonical_names')) resources.set(TOOLS + 'ignore', 'MissingTranslation') -open("languages_list.xml", "w").write( - etree.tostring(resources, pretty_print=True, xml_declaration=True, encoding='utf-8') -) +with open('languages_list.xml', 'w') as f: + f.write(lxml.etree.tostring(resources, pretty_print=True, + xml_declaration=True, encoding='utf-8')) # Generate the JSON, for iOS langs_json = [] # Start from 1, to skip the headers for i in xrange(1, len(lang_keys)): - langs_json.append({ - "code": lang_keys[i], - "name": lang_local_names[i], - "canonical_name": lang_eng_names[i] - }) + langs_json.append({'code': lang_keys[i], + 'name': lang_local_names[i], + 'canonical_name': lang_eng_names[i]}) open("languages_list.json", "w").write(json.dumps(langs_json, indent=4)) -- To view, visit https://gerrit.wikimedia.org/r/277173 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9d26f658bdedc9a1fdbc103fe5499b24b4ef751a Gerrit-PatchSet: 3 Gerrit-Project: apps/android/wikipedia Gerrit-Branch: master Gerrit-Owner: Maroloccio <[email protected]> Gerrit-Reviewer: BearND <[email protected]> Gerrit-Reviewer: Brion VIBBER <[email protected]> Gerrit-Reviewer: Dbrant <[email protected]> Gerrit-Reviewer: Maroloccio <[email protected]> Gerrit-Reviewer: Mholloway <[email protected]> Gerrit-Reviewer: Niedzielski <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
