[MediaWiki-commits] [Gerrit] AllEnabled data aggregation, filter by wikipedia only - change (analytics/zero-sms)
Yurik has uploaded a new change for review. https://gerrit.wikimedia.org/r/163220 Change subject: AllEnabled data aggregation, filter by wikipedia only .. AllEnabled data aggregation, filter by wikipedia only Change-Id: I1298032af723f2f9e1aa78dc955f604695a09e3d --- M scripts/weblogs.py 1 file changed, 46 insertions(+), 26 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/zero-sms refs/changes/20/163220/1 diff --git a/scripts/weblogs.py b/scripts/weblogs.py index 8a0e0cd..5f98aa7 100644 --- a/scripts/weblogs.py +++ b/scripts/weblogs.py @@ -20,8 +20,8 @@ stats[key] = 1 -columnHeaders10 = u'date,type,xcs,via,ipset,https,lang,subdomain,site,count'.split(',') -columnHeaders11 = u'date,type,xcs,via,ipset,https,lang,subdomain,site,iszero,count'.split(',') +columnHdrCache = u'date,type,xcs,via,ipset,https,lang,subdomain,site,count'.split(',') +columnHdrResult = u'date,type,xcs,via,ipset,https,lang,subdomain,site,iszero,ison,count'.split(',') validSubDomains = {'m', 'zero', 'mobile', 'wap'} validHttpCode = {'200', '304'} @@ -210,7 +210,7 @@ # Valid request! addStat(stats, dt, 'DATA', xcs, via, ipset, https, lang, subdomain, site) -writeData(statFile, [list(k) + [v] for k, v in stats.iteritems()], columnHeaders10) +writeData(statFile, [list(k) + [v] for k, v in stats.iteritems()], columnHdrCache) def combineStats(self): safePrint('Combine stat files') @@ -221,7 +221,7 @@ for f in os.listdir(self.pathCache): if not self.statFileRe.match(f): continue -for vals in readData(os.path.join(self.pathCache, f), columnHeaders10): +for vals in readData(os.path.join(self.pathCache, f), columnHdrCache): # 0 12 3 4 56 78 9 # 2014-07-25 DATA 250-99 DIRECT default http ru zero wikipedia 1000 if len(vals) != 10: @@ -240,30 +240,38 @@ if xcs == '404-01b': vals[2] = xcs = '404-01' vals[4] = ipset = 'b' + +isZero = '' +isOn = '' if typ == 'DATA': dt = datetime.strptime(dt, '%Y-%m-%d') site2 = subdomain + '.' + site isZero = False +isEnabled = False for conf in configs[xcs]: langs = conf.languages sites = conf.sites -if conf['from'] = dt conf.before and \ -(conf.https or https == u'http') and \ -(True == langs or lang in langs) and \ -(True == sites or site2 in sites) and \ -(dt ignoreViaBefore or via in conf.via) and \ -(ipset in conf.ipsets): -isZero = True -break -vals[9] = u'yes' if isZero else u'no' -else: -vals[9] = '' +if conf['from'] = dt conf.before: +if 'enabled' not in conf or conf.enabled: +isEnabled = True +if (conf.https or https == u'http') and \ +(True == langs or lang in langs) and \ +(True == sites or site2 in sites) and \ +(dt ignoreViaBefore or via in conf.via) and \ +(ipset in conf.ipsets): +isZero = True +break +isZero = u'yes' if isZero else u'no' +isOn = u'on' if isEnabled else u'off' + +vals[9] = isZero +vals[10] = isOn else: # X-CS does not exist, ignore it error = 'xcs' if error: -vals = (vals[0], 'ERR', '000-00', 'ERR', 'ERR', 'http', '', error, '', '') +vals = (vals[0], 'ERR', '000-00', 'ERR', 'ERR', 'http', '', error, '', '', '') key = tuple(vals) stats[key] += int(count) @@ -271,28 +279,40 @@ # convert {a|b|c:count,...} into [[a,b,c,count],...] stats = [list(k) + [v] for k, v in stats.iteritems()] -writeData(self.combinedFile, stats, columnHeaders11) +writeData(self.combinedFile, stats, columnHdrResult) return stats def generateGraphData(self, stats=None):
[MediaWiki-commits] [Gerrit] AllEnabled data aggregation, filter by wikipedia only - change (analytics/zero-sms)
Yurik has submitted this change and it was merged. Change subject: AllEnabled data aggregation, filter by wikipedia only .. AllEnabled data aggregation, filter by wikipedia only Change-Id: I1298032af723f2f9e1aa78dc955f604695a09e3d --- M scripts/weblogs.py 1 file changed, 46 insertions(+), 26 deletions(-) Approvals: Yurik: Verified; Looks good to me, approved diff --git a/scripts/weblogs.py b/scripts/weblogs.py index 8a0e0cd..5f98aa7 100644 --- a/scripts/weblogs.py +++ b/scripts/weblogs.py @@ -20,8 +20,8 @@ stats[key] = 1 -columnHeaders10 = u'date,type,xcs,via,ipset,https,lang,subdomain,site,count'.split(',') -columnHeaders11 = u'date,type,xcs,via,ipset,https,lang,subdomain,site,iszero,count'.split(',') +columnHdrCache = u'date,type,xcs,via,ipset,https,lang,subdomain,site,count'.split(',') +columnHdrResult = u'date,type,xcs,via,ipset,https,lang,subdomain,site,iszero,ison,count'.split(',') validSubDomains = {'m', 'zero', 'mobile', 'wap'} validHttpCode = {'200', '304'} @@ -210,7 +210,7 @@ # Valid request! addStat(stats, dt, 'DATA', xcs, via, ipset, https, lang, subdomain, site) -writeData(statFile, [list(k) + [v] for k, v in stats.iteritems()], columnHeaders10) +writeData(statFile, [list(k) + [v] for k, v in stats.iteritems()], columnHdrCache) def combineStats(self): safePrint('Combine stat files') @@ -221,7 +221,7 @@ for f in os.listdir(self.pathCache): if not self.statFileRe.match(f): continue -for vals in readData(os.path.join(self.pathCache, f), columnHeaders10): +for vals in readData(os.path.join(self.pathCache, f), columnHdrCache): # 0 12 3 4 56 78 9 # 2014-07-25 DATA 250-99 DIRECT default http ru zero wikipedia 1000 if len(vals) != 10: @@ -240,30 +240,38 @@ if xcs == '404-01b': vals[2] = xcs = '404-01' vals[4] = ipset = 'b' + +isZero = '' +isOn = '' if typ == 'DATA': dt = datetime.strptime(dt, '%Y-%m-%d') site2 = subdomain + '.' + site isZero = False +isEnabled = False for conf in configs[xcs]: langs = conf.languages sites = conf.sites -if conf['from'] = dt conf.before and \ -(conf.https or https == u'http') and \ -(True == langs or lang in langs) and \ -(True == sites or site2 in sites) and \ -(dt ignoreViaBefore or via in conf.via) and \ -(ipset in conf.ipsets): -isZero = True -break -vals[9] = u'yes' if isZero else u'no' -else: -vals[9] = '' +if conf['from'] = dt conf.before: +if 'enabled' not in conf or conf.enabled: +isEnabled = True +if (conf.https or https == u'http') and \ +(True == langs or lang in langs) and \ +(True == sites or site2 in sites) and \ +(dt ignoreViaBefore or via in conf.via) and \ +(ipset in conf.ipsets): +isZero = True +break +isZero = u'yes' if isZero else u'no' +isOn = u'on' if isEnabled else u'off' + +vals[9] = isZero +vals[10] = isOn else: # X-CS does not exist, ignore it error = 'xcs' if error: -vals = (vals[0], 'ERR', '000-00', 'ERR', 'ERR', 'http', '', error, '', '') +vals = (vals[0], 'ERR', '000-00', 'ERR', 'ERR', 'http', '', error, '', '', '') key = tuple(vals) stats[key] += int(count) @@ -271,28 +279,40 @@ # convert {a|b|c:count,...} into [[a,b,c,count],...] stats = [list(k) + [v] for k, v in stats.iteritems()] -writeData(self.combinedFile, stats, columnHeaders11) +writeData(self.combinedFile, stats, columnHdrResult) return stats def generateGraphData(self, stats=None): safePrint('Generating data files to %s' % self.pathGraphs) +wiki =