Yurik has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/62672


Change subject: Various python scripts to help with maintenance
......................................................................

Various python scripts to help with maintenance

* The scripts help with loading configurations
* comparing between different config sites - api vs prod
* compare messages in prod vs what they suppose to be on meta
* optimize IPs & CIDR blocks

Change-Id: Ib15a1e9d5694629201efd4434745b66ca2a3891c
---
A maintenance/cidr.py
A maintenance/i18n.py
A maintenance/test.py
A maintenance/utils.py
4 files changed, 721 insertions(+), 0 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ZeroRatedMobileAccess 
refs/changes/72/62672/1

diff --git a/maintenance/cidr.py b/maintenance/cidr.py
new file mode 100644
index 0000000..5c3f6e3
--- /dev/null
+++ b/maintenance/cidr.py
@@ -0,0 +1,41 @@
+from utils import *
+from netaddr import *
+
+def optimize(confs):
+    res = {}
+    for xcs, conf in confs.items():
+        ips = conf['ips']
+        res[xcs] = [str(i).replace('/32','') for i in 
cidr_merge([IPNetwork(ip) for ip in ips])]
+    return res
+
+def printOptimization(confs):
+    opt = optimize(confs)
+    for xcs in confs:
+        ips = confs[xcs]['ips']
+        if ips != opt[xcs]:
+            print()
+            printDiff(ips, opt[xcs], xcs + ':   ')
+
+def printConflicts(confs):
+    sets = dict([(xcs, IPSet(ips['ips'])) for xcs, ips in confs.items()])
+    for xcs, ips in sets.items():
+        for xcs2, ips2 in sets.items():
+            if xcs == xcs2: continue
+            conflcts = ips & ips2
+            if conflcts.size == 0:
+                continue
+            print('{0} conflicts with {1}:'.format(xcs,xcs2))
+            for ip in ips:
+                if ip in conflcts:
+                    print('{0}: {1}'.format(xcs, ip))
+            for ip in ips2:
+                if ip in conflcts:
+                    print('{0}: {1}'.format(xcs2, ip))
+
+
+def run(site = TEST):
+    confs = getAllConfigs(site)
+    printConflicts(confs)
+    print()
+    printOptimization(confs)
+
diff --git a/maintenance/i18n.py b/maintenance/i18n.py
new file mode 100644
index 0000000..974aabf
--- /dev/null
+++ b/maintenance/i18n.py
@@ -0,0 +1,490 @@
+# -*- coding: utf-8  -*-
+""" List taken from pywikibot framework
+"""
+#
+# (C) Pywikipedia bot team, 2004-2012
+#
+# Distributed under the terms of the MIT license.
+#
+def _altlang(code):
+    """Define fallback languages for particular languages.
+
+    If no translation is available to a specified language, translate() will
+    try each of the specified fallback languages, in order, until it finds
+    one with a translation, with 'en' and '_default' as a last resort.
+
+    For example, if for language 'xx', you want the preference of languages
+    to be: xx > fr > ru > en, you let altlang return ['fr', 'ru'].
+    """
+    #Akan
+    if code in ['ak', 'tw']:
+        return ['ak', 'tw']
+    #Amharic
+    if code in ['aa', 'ti']:
+        return ['am']
+    #Arab
+    if code in ['arc', 'arz', 'fa', 'so']:
+        return ['ar']
+    if code == 'kab':
+        return ['ar', 'fr']
+    #Bulgarian
+    if code in ['cu', 'mk']:
+        return ['bg', 'sr', 'sh']
+    #Czech
+    if code in ['cs', 'sk']:
+        return ['cs', 'sk']
+    #German
+    if code in ['bar', 'frr', 'ksh', 'pdc', 'pfl']:
+        return ['de']
+    if code == 'lb':
+        return ['de', 'fr']
+    if code == 'als':
+        return ['gsw', 'de']
+    if code == 'nds':
+        return ['nds-nl', 'de']
+    if code in ['dsb', 'hsb']:
+        return ['hsb', 'dsb', 'de']
+    if code == 'sli':
+        return ['de', 'pl']
+    if code == 'rm':
+        return ['de', 'it']
+    if code =='stq':
+        return ['nds', 'de']
+    #Greek
+    if code in ['grc', 'pnt']:
+        return ['el']
+    #Esperanto
+    if code in ['io', 'nov']:
+        return ['eo']
+    #Spanish
+    if code in ['an', 'arn', 'ast', 'ay', 'ca', 'ext', 'lad', 'nah', 'nv', 
'qu', 'yua']:
+        return ['es']
+    if code in ['gl', 'gn']:
+        return ['es', 'pt']
+    if code == 'eu':
+        return ['es', 'fr']
+    if code == 'cbk-zam':
+        return ['es', 'tl']
+    #Estonian
+    if code == 'fiu-vro':
+        return ['et']
+    if code == 'liv':
+        return ['et', 'lv']
+    #Persian (Farsi)
+    if code == 'ps':
+        return ['fa']
+    if code in ['glk', 'mzn']:
+        return ['glk', 'mzn', 'fa', 'ar']
+    #Finnish
+    if code == 'vep':
+        return ['fi', 'ru']
+    if code == 'fit':
+        return ['fi', 'sv']
+    #French
+    if code in ['bm', 'br', 'ht', 'kg', 'ln', 'mg', 'nrm', 'pcd',
+                'rw', 'sg', 'ty', 'wa']:
+        return ['fr']
+    if code == 'oc':
+        return ['fr', 'ca', 'es']
+    if code in ['co', 'frp']:
+        return ['fr', 'it']
+    #Hindi
+    if code in ['sa']:
+        return ['hi']
+    if code in ['ne', 'new']:
+        return ['ne', 'new', 'hi']
+    #Indonesian and Malay
+    if code in ['ace', 'bug', 'bjn', 'id', 'jv', 'ms', 'su']:
+        return ['id', 'ms', 'jv']
+    if code == 'map-bms':
+        return ['jv', 'id', 'ms']
+    #Inuit languages
+    if code in ['ik', 'iu']:
+        return ['iu', 'kl']
+    if code == 'kl':
+        return ['da', 'iu', 'no']
+    #Italian
+    if code in ['eml', 'fur', 'lij', 'lmo', 'nap', 'pms', 'roa-tara', 'sc',
+                'scn', 'vec']:
+        return ['it']
+    #Lithuanian
+    if code in ['bat-smg']:
+        return ['lt']
+    #Latvian
+    if code == 'ltg':
+        return ['lv']
+    #Dutch
+    if code in ['af', 'fy', 'li', 'pap', 'srn', 'vls', 'zea']:
+        return ['nl']
+    if code == ['nds-nl']:
+        return ['nds', 'nl']
+    #Polish
+    if code in ['csb', 'szl']:
+        return ['pl']
+    #Portuguese
+    if code in ['fab', 'mwl', 'tet']:
+        return ['pt']
+    #Romanian
+    if code in ['mo', 'roa-rup']:
+        return ['ro']
+    #Russian and Belarusian
+    if code in ['ab', 'av', 'ba', 'bxr', 'ce', 'cv', 'inh', 'kk', 'koi', 
'krc', 'kv',
+                'ky', 'lbe', 'lez', 'mdf', 'mhr', 'mn', 'mrj', 'myv', 'os', 
'sah',
+                'tg', 'udm', 'uk', 'xal']:
+        return ['ru']
+    if code in ['kbd', 'ady']:
+        return ['kbd', 'ady', 'ru']
+    if code == 'tt':
+        return ['tt-cyrl', 'ru']
+    if code in ['be', 'be-x-old']:
+        return ['be', 'be-x-old', 'ru']
+    if code == 'kaa':
+        return ['uz', 'ru']
+    #Serbocroatian
+    if code in ['bs', 'hr', 'sh',]:
+        return ['sh', 'hr', 'bs', 'sr', 'sr-el']
+    if code == 'sr':
+        return ['sr-el', 'sh', 'hr', 'bs']
+    #Tagalog
+    if code in ['bcl', 'ceb', 'ilo', 'pag', 'pam', 'war']:
+        return ['tl']
+    #Turkish and Kurdish
+    if code in ['diq', 'ku']:
+        return ['ku', 'ku-latn', 'tr']
+    if code == 'gag':
+        return ['tr']
+    if code == 'ckb':
+        return ['ku', 'fa']
+    #Ukrainian
+    if code in ['crh', 'rue']:
+        return ['uk', 'ru']
+    #Chinese
+    if code in ['minnan', 'zh', 'zh-classical', 'zh-min-nan', 'zh-tw',
+                'zh-hans', 'zh-hant']:
+        return ['zh', 'zh-tw', 'zh-cn', 'zh-classical']
+    if code in ['cdo', 'gan', 'hak', 'ii', 'wuu', 'za', 'zh-cdo',
+                'zh-classical', 'zh-cn', 'zh-yue']:
+        return ['zh', 'zh-cn', 'zh-tw', 'zh-classical']
+    #Scandinavian languages
+    if code in ['da', 'sv']:
+        return ['da', 'no', 'nb', 'sv', 'nn']
+    if code in ['fo', 'is']:
+        return ['da', 'no', 'nb', 'nn', 'sv']
+    if code == 'nn':
+        return ['no', 'nb', 'sv', 'da']
+    if code in ['nb', 'no']:
+        return ['no', 'nb', 'da', 'nn', 'sv']
+    if code == 'se':
+        return ['sv', 'no', 'nb', 'nn', 'fi']
+    #Other languages
+    if code in ['bi', 'tpi']:
+        return ['bi', 'tpi']
+    if code == 'yi':
+        return ['he', 'de']
+    if code in ['ia', 'ie']:
+        return ['ia', 'la', 'it', 'fr', 'es']
+    if code == 'xmf':
+        return ['ka']
+    if code in ['nso', 'st']:
+        return ['st', 'nso']
+    if code in ['kj', 'ng']:
+        return ['kj', 'ng']
+    if code in ['meu', 'hmo']:
+        return ['meu', 'hmo']
+    if code == ['as']:
+        return ['bn']
+    #Default value
+    return []
+
+def getWikiNames(lang):
+    wp = _wikipediaNames[lang]
+    if type(wp) == type([]):
+        wp = list(wp)
+    else:
+        wp = [wp]
+    wp.append('Wikipedia')
+    return wp
+
+def getWikiName(lang):
+    wp = _wikipediaNames[lang]
+    if type(wp) == type([]):
+        wp = wp[0]
+    return wp
+
+_wikipediaNames = {
+    'ab': [u'Авикипедиа', u'Wikipedia'],
+    'ace': u'Wikipedia',
+    'af': u'Wikipedia',
+    'ak': u'Wikipedia',
+    'als': u'Wikipedia',
+    'am': u'ውክፔዲያ',
+    'an': u'Wikipedia',
+    'ang': u'Wikipedia',
+    'ar': [u'ويكيبيديا', u'وب'],
+    'arc': [u'ܘܝܩܝܦܕܝܐ', u'Wikipedia'],
+    'arz': u'ويكيبيديا',
+    'as': [u'ৱিকিপিডিয়া', u'Wikipedia', u'প্ৰকল্প', u'WP'],
+    'ast': u'Uiquipedia',
+    'av': u'Wikipedia',
+    'ay': u'Wikipidiya',
+    'az': u'Vikipediya',
+    'ba': [u'Википедия', u'Wikipedia'],
+    'bar': u'Wikipedia',
+    'bat-smg': u'Vikipedėjė',
+    'bcl': u'Wikipedia',
+    'be': [u'Вікіпедыя', u'ВП'],
+    'be-x-old': [u'Вікіпэдыя', u'ВП'],
+    'bg': u'Уикипедия',
+    'bh': u'विकिपीडिया',
+    'bi': u'Wikipedia',
+    'bjn': u'Wikipidia',
+    'bm': u'Wikipedia',
+    'bn': [u'উইকিপিডিয়া', u'Wikipedia', u'WP'],
+    'bo': u'Wikipedia',
+    'bpy': u'উইকিপিডিয়া',
+    'br': u'Wikipedia',
+    'bs': u'Wikipedia',
+    'bug': u'Wikipedia',
+    'bxr': u'Википеэди',
+    'ca': u'Viquipèdia',
+    'cbk-zam': u'Wikipedia',
+    'cdo': u'Wikipedia',
+    'ce': [u'Википедийа', u'Wikipedia'],
+    'ceb': u'Wikipedia',
+    'ch': u'Wikipedia',
+    'chr': u'Wikipedia',
+    'chy': u'Wikipedia',
+    'ckb': u'ویکیپیدیا',
+    'co': u'Wikipedia',
+    'cr': u'Wikipedia',
+    'crh': u'Vikipediya',
+    'cs': [u'Wikipedie', u'WP'],
+    'csb': u'Wiki',
+    'cu': [u'Википєдїꙗ', u'Википє́дїꙗ'],
+    'cv': u'Википеди',
+    'cy': u'Wicipedia',
+    'da': [u'Wikipedia', u'WP'],
+    'de': [u'Wikipedia', u'WP'],
+    'diq': u'Wikipedia',
+    'dsb': u'Wikipedija',
+    'dv': u'Wikipedia',
+    'dz': u'Wikipedia',
+    'ee': u'Wikipedia',
+    'el': u'Βικιπαίδεια',
+    'eml': u'Wikipedia',
+    'en': [u'Wikipedia', u'WP'],
+    'eo': [u'Vikipedio', u'VP'],
+    'es': u'Wikipedia',
+    'et': u'Vikipeedia',
+    'eu': u'Wikipedia',
+    'ext': u'Güiquipeya',
+    'fa': [u'ویکی‌پدیا', u'وپ'],
+    'ff': u'Wikipedia',
+    'fi': [u'Wikipedia', u'WP'],
+    'fiu-vro': u'Wikipedia',
+    'fj': u'Wikipedia',
+    'fo': u'Wikipedia',
+    'fr': [u'Wikipédia', u'Wikipedia', u'WP'],
+    'frp': u'Vouiquipèdia',
+    'frr': u'Wikipedia',
+    'fur': u'Vichipedie',
+    'fy': u'Wikipedy',
+    'ga': u'Vicipéid',
+    'gag': u'Vikipediya',
+    'gan': u'Wikipedia',
+    'gd': [u'Uicipeid', u'Wikipedia'],
+    'gl': u'Wikipedia',
+    'glk': u'Wikipedia',
+    'gn': u'Vikipetã',
+    'got': u'Wikipedia',
+    'gu': u'વિકિપીડિયા',
+    'gv': u'Wikipedia',
+    'ha': u'Wikipedia',
+    'hak': u'Wikipedia',
+    'haw': u'Wikipedia',
+    'he': u'ויקיפדיה',
+    'hi': [u'विकिपीडिया', u'वि', u'Wikipedia', u'WP'],
+    'hif': u'Wikipedia',
+    'hr': u'Wikipedija',
+    'hsb': u'Wikipedija',
+    'ht': [u'Wikipedya', u'Wikipedia'],
+    'hu': u'Wikipédia',
+    'hy': u'Վիքիպեդիա',
+    'ia': u'Wikipedia',
+    'id': u'Wikipedia',
+    'ie': u'Wikipedia',
+    'ig': u'Wikipedia',
+    'ik': u'Wikipedia',
+    'ilo': [u'Wikipedia', u'WP'],
+    'io': [u'Wikipedio', u'Wikipedia'],
+    'is': u'Wikipedia',
+    'it': [u'Wikipedia', u'WP'],
+    'iu': [u'ᐅᐃᑭᐱᑎᐊ', u'Wikipedia'],
+    'ja': u'Wikipedia',
+    'jbo': u'Wikipedia',
+    'jv': u'Wikipedia',
+    'ka': u'ვიკიპედია',
+    'kaa': u'Wikipedia',
+    'kab': u'Wikipedia',
+    'kbd': u'Уикипедиэ',
+    'kg': u'Wikipedia',
+    'ki': u'Wikipedia',
+    'kk': u'Уикипедия',
+    'kl': u'Wikipedia',
+    'km': u'វិគីភីឌា',
+    'kn': [u'ವಿಕಿಪೀಡಿಯ', u'Wikipedia'],
+    'ko': [u'위키백과', u'백'],
+    'koi': u'Википедия',
+    'krc': u'Википедия',
+    'ks': u'Wikipedia',
+    'ksh': u'Wikipedia',
+    'ku': u'Wîkîpediya',
+    'kv': [u'Википедия', u'Wikipedia'],
+    'kw': u'Wikipedia',
+    'ky': u'Wikipedia',
+    'la': u'Vicipaedia',
+    'lad': u'Vikipedya',
+    'lb': u'Wikipedia',
+    'lbe': u'Википедия',
+    'lez': u'Википедия',
+    'lg': u'Wikipedia',
+    'li': u'Wikipedia',
+    'lij': u'Wikipedia',
+    'lmo': u'Wikipedia',
+    'ln': u'Wikipedia',
+    'lo': u'ວິກິພີເດຍ',
+    'lt': [u'Vikipedija', u'Wikipedia'],
+    'ltg': u'Vikipedeja',
+    'lv': [u'Vikipēdija', u'Wikipedia'],
+    'map-bms': u'Wikipedia',
+    'mdf': u'Википедиесь',
+    'mg': u'Wikipedia',
+    'mhr': u'Википедий',
+    'mi': u'Wikipedia',
+    'min': u'Wikipedia',
+    'mk': u'Википедија',
+    'ml': [u'വിക്കിപീഡിയ', u'വിക്കി', u'Wikipedia', u'WP'],
+    'mn': u'Wikipedia',
+    'mr': [u'विकिपीडिया', u'Wikipedia', u'विपी'],
+    'mrj': u'Википеди',
+    'ms': u'Wikipedia',
+    'mt': u'Wikipedija',
+    'mwl': [u'Biquipédia', u'Wikipedia'],
+    'my': u'Wikipedia',
+    'myv': u'Википедиясь',
+    'mzn': [u'ویکی‌پدیا', u'وپ', u'Wikipedia'],
+    'na': u'Wikipedia',
+    'nah': [u'Huiquipedia', u'Wikipedia'],
+    'nap': u'Wikipedia',
+    'nds': [u'Wikipedia', u'WP'],
+    'nds-nl': [u'Wikipedie', u'Wikipedia'],
+    'ne': [u'विकिपीडिया', u'Wikipedia'],
+    'new': u'विकिपिडिया',
+    'nl': [u'Wikipedia', u'WP'],
+    'nn': [u'Wikipedia', u'WP'],
+    'no': [u'Wikipedia', u'WP'],
+    'nov': u'Wikipedia',
+    'nrm': u'Wikipedia',
+    'nso': u'Wikipedia',
+    'nv': u'Wikiibíídiiya',
+    'ny': u'Wikipedia',
+    'oc': u'Wikipèdia',
+    'om': u'Wikipedia',
+    'or': [u'ଉଇକିପିଡ଼ିଆ', u'Wikipedia', u'WP'],
+    'os': u'Википеди',
+    'pa': u'ਵਿਕੀਪੀਡੀਆ',
+    'pag': u'Wikipedia',
+    'pam': u'Wikipedia',
+    'pap': u'Wikipedia',
+    'pcd': u'Wikipedia',
+    'pdc': u'Wikipedia',
+    'pfl': u'Wikipedia',
+    'pi': u'Wikipedia',
+    'pih': u'Wikipedia',
+    'pl': [u'Wikipedia', u'WP'],
+    'pms': u'Wikipedia',
+    'pnb': u'Wikipedia',
+    'pnt': u'Βικιπαίδεια',
+    'ps': u'ويکيپېډيا',
+    'pt': [u'Wikipédia', u'WP', u'Wikipedia'],
+    'qu': u'Wikipedia',
+    'rm': u'Wikipedia',
+    'rmy': u'Vikipidiya',
+    'rn': u'Wikipedia',
+    'ro': u'Wikipedia',
+    'roa-rup': u'Wikipedia',
+    'roa-tara': u'Wikipedia',
+    'ru': [u'Википедия', u'ВП'],
+    'rue': u'Вікіпедія',
+    'rw': u'Wikipedia',
+    'sa': [u'विकिपीडिया', u'WP', u'Wikipedia'],
+    'sah': u'Бикипиэдьийэ',
+    'sc': u'Wikipedia',
+    'scn': u'Wikipedia',
+    'sco': u'Wikipedia',
+    'sd': u'Wikipedia',
+    'se': [u'Wikipedia', u'WP'],
+    'sg': u'Wikipedia',
+    'sh': u'Wikipedia',
+    'si': [u'විකිපීඩියා', u'Wikipedia'],
+    'simple': [u'Wikipedia', u'WP'],
+    'sk': u'Wikipédia',
+    'sl': u'Wikipedija',
+    'sm': u'Wikipedia',
+    'sn': u'Wikipedia',
+    'so': u'Wikipedia',
+    'sq': u'Wikipedia',
+    'sr': [u'Википедија', u'Vikipedija'],
+    'srn': u'Wikipedia',
+    'ss': u'Wikipedia',
+    'st': u'Wikipedia',
+    'stq': u'Wikipedia',
+    'su': u'Wikipedia',
+    'sv': [u'Wikipedia', u'WP'],
+    'sw': u'Wikipedia',
+    'szl': u'Wikipedyjo',
+    'ta': [u'விக்கிப்பீடியா', u'Wikipedia', u'விக்கிபீடியா'],
+    'te': [u'వికీపీడియా', u'Wikipedia'],
+    'tet': u'Wikipedia',
+    'tg': u'Википедиа',
+    'th': u'วิกิพีเดีย',
+    'ti': u'Wikipedia',
+    'tk': u'Wikipediýa',
+    'tl': u'Wikipedia',
+    'tn': u'Wikipedia',
+    'to': u'Wikipedia',
+    'tpi': u'Wikipedia',
+    'tr': u'Vikipedi',
+    'ts': u'Wikipedia',
+    'tt': [u'Википедия', u'WP', u'ВП', u'Wikipedia'],
+    'tum': u'Wikipedia',
+    'tw': u'Wikipedia',
+    'ty': u'Wikipedia',
+    'udm': u'Wikipedia',
+    'ug': u'Wikipedia',
+    'uk': [u'Вікіпедія', u'ВП'],
+    'ur': u'منصوبہ',
+    'uz': u'Vikipediya',
+    've': u'Wikipedia',
+    'vec': [u'Wikipedia', u'WP'],
+    'vep': u'Vikipedii',
+    'vi': u'Wikipedia',
+    'vls': u'Wikipedia',
+    'vo': [u'Vükiped', u'Wikipedia'],
+    'wa': u'Wikipedia',
+    'war': u'Wikipedia',
+    'wo': u'Wikipedia',
+    'wuu': u'Wikipedia',
+    'xal': u'Wikipedia',
+    'xh': u'Wikipedia',
+    'xmf': u'ვიკიპედია',
+    'yi': [u'װיקיפּעדיע', u'וויקיפעדיע'],
+    'yo': u'Wikipedia',
+    'za': u'Wikipedia',
+    'zea': u'Wikipedia',
+    'zh': [u'Wikipedia', u'维基百科', u'維基百科', u'WP'],
+    'zh-classical': [u'維基大典', u'Wikipedia', u'Wikipedia talk'],
+    'zh-min-nan': u'Wikipedia',
+    'zh-yue': [u'Wikipedia', u'WP'],
+    'zu': u'Wikipedia',
+}
diff --git a/maintenance/test.py b/maintenance/test.py
new file mode 100644
index 0000000..ff5b04d
--- /dev/null
+++ b/maintenance/test.py
@@ -0,0 +1,98 @@
+from utils import *
+from netaddr import *
+
+def orderIps(conf):
+    for xcs, c in conf.items():
+        c['ips'].sort()
+
+def confCompare(src1 = TEST, src2 = META):
+    conf1 = getAllConfigs(src1)
+    conf2 = getAllConfigs(src2)
+    orderIps(conf1)
+    orderIps(conf2)
+    if conf1 != conf2:
+        for id in set(list(conf1)+list(conf2)):
+            if id in conf1 and id in conf2 and conf1[id] == conf2[id]:
+                del conf1[id]
+                del conf2[id]
+        printDiff(conf1, conf2)
+    else:
+        print('Configurations are identical')
+
+
+def cleanBanner(html, lang):
+    tag = html.find('button', class_='notify-close')
+    del tag['title']
+
+    tag = html.a
+    tag['href'] = re.sub(r'title=.*&', 'title=(REMOVED)&', tag['href'])
+
+    # tag.string = re.sub(r'\bWiki[pm]ed[a-z]+\b', '', tag.text)
+    wikinames = i18n.getWikiNames(lang)
+    for w in wikinames:
+        tag.string = tag.text.replace(w, '')
+
+    for w in wikinames:
+        if len(w) > 5:
+            w2 = w[0:len(w)-2]
+            tag.string = re.sub(r'\b'+re.escape(w2)+r'\w*\b', '', tag.text)
+#        wiki = i18n.getWikiName(lang)
+#        tag.string = re.sub(r'\bWikipedia\b', wiki, tag.text)
+
+def checkBanner(expected, xcs, conf, site = TEST, lang='en', title = 'Test1'):
+    try:
+        exp = Soup(translate(lang, expected))
+        original = wikiZ(site, title, xcs, {'uselang':lang})
+        rcvd = Soup(original)
+        rcvd = rcvd.find('div', id='zero-rated-banner')
+
+        origExp = exp.a.text
+        origRcvd = rcvd.a.text
+
+        cleanBanner(exp, lang)
+        cleanBanner(rcvd, lang)
+
+        if str(exp) != str(rcvd):
+            e = ('Expected != Received:\n' +
+                str(exp) + '\n' + str(rcvd) + '\n\n\n' + original)
+            try:
+                with open('linkdiff_exp.txt', 'a+', encoding='utf-8') as o:
+                    o.write('{0} {1}\n{2}\n'.format(xcs,lang,origExp))
+                with open('linkdiff_rcv.txt', 'a+', encoding='utf-8') as o:
+                    o.write('{0} {1}\n{2}\n'.format(xcs,lang,origRcvd))
+            except:
+                print('*** Unable to compare links')
+                pass
+            raise Exception(e)
+
+    except:
+        xcs = xcs.replace('*','#')
+        file = 'err_{0}_{1}.html'.format(xcs, lang)
+        print('Error in ' + file)
+        with open(file, "w", encoding='utf-8') as err:
+            err.write('\n'.join([str(s) for s in sys.exc_info()[1].args]))
+
+def checkProdBanners(configSite = TEST, title = 'A', site = PROD):
+    confs = getAllConfigs(configSite)
+    for xcs, conf in confs.items():
+        expected = getZeroBannersFromConfig(xcs, configSite)
+        langs = conf['whitelistedLangs']
+        if len(langs) == 0:
+            langs = set(list(conf['banner']) + list(conf['name']) + 
list(conf['showLangs']))
+        for lang in langs:
+            # unknown languages
+            if lang in ['zh-hans', 'zh-hant', 'be-tarask']:
+                continue
+            #if lang not in ['si','cs','as']:
+            #    continue
+            checkBanner(expected, xcs, conf, site.format(lang), lang, title)
+
+def getZeroBannersFromConfig(xcs, site = TEST):
+    data = api(site, action='mobileview', sections=0, page='Zero:'+xcs)
+    html = data['sections'][0]['text']
+    soup = Soup(html)
+
+    return dict([(v.parent.parent.th.text, str(v)) for v in 
soup.find_all('div', class_='mw-mf-banner')])
+
+
+# checkProdBanners()
diff --git a/maintenance/utils.py b/maintenance/utils.py
new file mode 100644
index 0000000..4b53e6e
--- /dev/null
+++ b/maintenance/utils.py
@@ -0,0 +1,92 @@
+from bs4 import BeautifulSoup as Soup
+import cgi
+import difflib
+import i18n
+import json
+import pprint
+import re
+import requests
+import sys
+import urllib.parse
+
+META = 'http://meta.wikimedia.org/'
+TEST = 'http://api.beta.wmflabs.org/'
+PROD = 'http://{0}.zero.wikipedia.org/'
+
+oldIds = {
+    '250-99': 'Vimpelcom Beeline',
+    '297-01': 'Telenor Montenegro',
+    '405-0*': 'TATA',
+    '410-01': 'Vimpelcom Mobilink Pakistan',
+    '413-02': 'Dialog Sri Lanka',
+    '420-01': 'Saudi Telecom',
+    '456-02': 'Hello Cambodia',
+    '470-01': 'Grameenphone Bangladesh',
+    '502-13': 'Celcom Malaysia',
+    '502-16': 'DIGI',
+    '510-11': 'XL Axiata',
+    '520-18': 'DTAC',
+    '604-00': 'Orange Meditel Morocco',
+    '605-01': 'Orange Tunisia',
+    '612-03': 'Orange Ivory Coast',
+    '614-04': 'Orange Niger',
+    '623-01': 'Orange Central African Republic',
+    '624-02': 'Orange Cameroon',
+    '630-86': 'Orange Congo',
+    '639-07': 'Orange Kenya',
+    '641-14': 'Orange Uganda',
+    '652-02': 'Orange Botswana',
+    'WMF': 'wikimedia',
+}
+
+def download(url, params=None, headers=None):
+    r = requests.get(url, params=params, headers=headers)
+    log = r.request.url
+    if headers is not None:
+        log = 'Hdrs: ' + str(headers) + ' ' + log
+    print(log)
+    return r
+
+def api(site, **kwargs):
+    kwargs['format'] = 'json'
+    url = site + 'w/api.php'
+    data = download(url, kwargs).json()
+    if 'error' in data:
+        raise Exception(json.dumps(data['error']))
+    return data[kwargs['action']]
+
+def getAllConfigs(site):
+    data = api(site, action='query', generator='allpages', gaplimit='max', 
gapnamespace='480', prop='revisions', rvprop='content')
+    pages = data['pages']
+    return dict([(pages[k]['title'].replace('Zero:',''), 
json.loads(pages[k]['revisions'][0]['*'])) for k in pages])
+
+def wiki(site, title, params=None, headers=None):
+    url = site + 'wiki/' + title
+    return download(url, params, headers).text
+
+def translate(lang, xdict):
+    if lang in xdict:
+        return xdict[lang]
+    for l in i18n._altlang(lang):
+        if l in xdict:
+            return xdict[l]
+    return xdict['en']
+
+def wikiZ(site, title, xcs, params=None):
+    headers = {
+#        'User-Agent':'android ZeroConfig-test/1.0',
+        'X-Device':'android',
+        'X-CS':xcs,
+    }
+    if xcs in oldIds:
+        headers['X-Carrier'] = oldIds[xcs]
+    return wiki(site, title, params, headers)
+
+def printDiff(val1, val2, prefix = ''):
+    vals1 = pprint.pformat(val1).splitlines(1)
+    vals2 = pprint.pformat(val2).splitlines(1)
+    d = difflib.Differ()
+    result = list(d.compare(vals1, vals2))
+    for r in result:
+        if r[0] != ' ':
+            print(prefix + r.rstrip())

-- 
To view, visit https://gerrit.wikimedia.org/r/62672
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib15a1e9d5694629201efd4434745b66ca2a3891c
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ZeroRatedMobileAccess
Gerrit-Branch: master
Gerrit-Owner: Yurik <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to