Maroloccio has uploaded a new change for review. https://gerrit.wikimedia.org/r/277173
Change subject: Clean up scripts/generate_wiki_languages.py ...................................................................... Clean up scripts/generate_wiki_languages.py Change-Id: I9d26f658bdedc9a1fdbc103fe5499b24b4ef751a --- M scripts/generate_wiki_languages.py 1 file changed, 57 insertions(+), 31 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/apps/android/wikipedia refs/changes/73/277173/1 diff --git a/scripts/generate_wiki_languages.py b/scripts/generate_wiki_languages.py index 425fd38..3a04409 100755 --- a/scripts/generate_wiki_languages.py +++ b/scripts/generate_wiki_languages.py @@ -1,21 +1,27 @@ #!/usr/bin/env python # coding=utf-8 -from urllib2 import urlopen -import unicodecsv as csv -from itertools import islice +import urllib +import urllib2 +import unicodecsv +import itertools import json +import lxml import lxml.builder as lb -from lxml import etree +import sys + # Returns CSV of all wikipedias, ordered by number of 'good' articles -URL = "https://wikistats.wmflabs.org/api.php?action=dump&table=wikipedias&format=csv&s=good" - -data = csv.reader(urlopen(URL)) - -lang_keys = [] -lang_local_names = [] -lang_eng_names = [] +URL_BASE= ('https://' + 'wikistats.wmflabs.org' '/' + 'api.php' '?') +QUERY_PARAMS = [('action', 'dump'), + ('table', 'wikipedias'), + ('format', 'csv'), + ('s', 'good')] +COLUMNS = {'language_code': 2, + 'local_name': 10, + 'english_name': 1} def add_lang(key, local_name, eng_name): @@ -23,20 +29,38 @@ lang_local_names.append(local_name) lang_eng_names.append(eng_name) -for row in islice(data, 1, None): - if row[2] == 'got': + +def escape(s): + return s.replace("'", "\\'") + + +REQUEST_URL = URL_BASE + urllib.urlencode(QUERY_PARAMS) +url_file = urllib2.urlopen(REQUEST_URL) +csv_data = unicodecsv.reader(url_file) + +lang_keys = [] +lang_local_names = [] +lang_eng_names = [] + +start_at_row = 1 +end_at_row = None +for row in itertools.islice(csv_data, start_at_row, end_at_row): + language_code = row[COLUMNS['language_code']] + if language_code == 'got': # 'got' is Gothic Runes, which lie outside the Basic Multilingual Plane - # < https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane > # Android segfaults on these. So let's ignore those. # What's good for Android is also good for iOS :P - pass - elif row[2] == 'zh': - add_lang(key='zh-hans', local_name=u'简体', eng_name='Simplified Chinese') - add_lang(key='zh-hant', local_name=u'繁體', eng_name='Traditional Chinese') - else: - add_lang(key=row[2].replace("'", "\\'"), - local_name=row[10].replace("'", "\\'"), - eng_name=row[1].replace("'", "\\'")) + continue + if language_code == 'zh': + add_lang(key='zh-hans', local_name=u'简体', + eng_name='Simplified Chinese') + add_lang(key='zh-hant', local_name=u'繁體', + eng_name='Traditional Chinese') + continue + local_name = row[COLUMNS['local_name']] + english_name = row[COLUMNS['english_name']] + add_lang(key=escape(language_code), local_name=escape(local_name), + eng_name=escape(english_name)) add_lang(key='test', local_name='Test', eng_name='Test') add_lang(key='', local_name='None', eng_name='None (development)') @@ -51,15 +75,17 @@ eng_names = [x.item(k) for k in lang_eng_names] resources = x.resources( - getattr(x, 'string-array')(*keys, name="preference_language_keys"), - getattr(x, 'string-array')(*local_names, name="preference_language_local_names"), - getattr(x, 'string-array')(*eng_names, name="preference_language_canonical_names") + getattr(x, 'string-array')(*keys, name='preference_language_keys'), + getattr(x, 'string-array')(*local_names, + name='preference_language_local_names'), + getattr(x, 'string-array')(*eng_names, + name='preference_language_canonical_names') ) resources.set(TOOLS + 'ignore', 'MissingTranslation') -open("languages_list.xml", "w").write( - etree.tostring(resources, pretty_print=True, xml_declaration=True, encoding='utf-8') -) +with open('languages_list.xml', 'w') as f: + f.write(lxml.etree.tostring(resources, pretty_print=True, + xml_declaration=True, encoding='utf-8')) # Generate the JSON, for iOS langs_json = [] @@ -67,9 +93,9 @@ # Start from 1, to skip the headers for i in xrange(1, len(lang_keys)): langs_json.append({ - "code": lang_keys[i], - "name": lang_local_names[i], - "canonical_name": lang_eng_names[i] + 'code': lang_keys[i], + 'name': lang_local_names[i], + 'canonical_name': lang_eng_names[i] }) open("languages_list.json", "w").write(json.dumps(langs_json, indent=4)) -- To view, visit https://gerrit.wikimedia.org/r/277173 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I9d26f658bdedc9a1fdbc103fe5499b24b4ef751a Gerrit-PatchSet: 1 Gerrit-Project: apps/android/wikipedia Gerrit-Branch: master Gerrit-Owner: Maroloccio <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
