[MediaWiki-commits] [Gerrit] AllEnabled data aggregation, filter by wikipedia only - change (analytics/zero-sms)

2014-09-26 Thread Yurik (Code Review)
Yurik has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/163220

Change subject: AllEnabled data aggregation, filter by wikipedia only
..

AllEnabled data aggregation, filter by wikipedia only

Change-Id: I1298032af723f2f9e1aa78dc955f604695a09e3d
---
M scripts/weblogs.py
1 file changed, 46 insertions(+), 26 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/zero-sms 
refs/changes/20/163220/1

diff --git a/scripts/weblogs.py b/scripts/weblogs.py
index 8a0e0cd..5f98aa7 100644
--- a/scripts/weblogs.py
+++ b/scripts/weblogs.py
@@ -20,8 +20,8 @@
 stats[key] = 1
 
 
-columnHeaders10 = 
u'date,type,xcs,via,ipset,https,lang,subdomain,site,count'.split(',')
-columnHeaders11 = 
u'date,type,xcs,via,ipset,https,lang,subdomain,site,iszero,count'.split(',')
+columnHdrCache = 
u'date,type,xcs,via,ipset,https,lang,subdomain,site,count'.split(',')
+columnHdrResult = 
u'date,type,xcs,via,ipset,https,lang,subdomain,site,iszero,ison,count'.split(',')
 validSubDomains = {'m', 'zero', 'mobile', 'wap'}
 validHttpCode = {'200', '304'}
 
@@ -210,7 +210,7 @@
 # Valid request!
 addStat(stats, dt, 'DATA', xcs, via, ipset, https, lang, 
subdomain, site)
 
-writeData(statFile, [list(k) + [v] for k, v in stats.iteritems()], 
columnHeaders10)
+writeData(statFile, [list(k) + [v] for k, v in stats.iteritems()], 
columnHdrCache)
 
 def combineStats(self):
 safePrint('Combine stat files')
@@ -221,7 +221,7 @@
 for f in os.listdir(self.pathCache):
 if not self.statFileRe.match(f):
 continue
-for vals in readData(os.path.join(self.pathCache, f), 
columnHeaders10):
+for vals in readData(os.path.join(self.pathCache, f), 
columnHdrCache):
 # 0  12  3  4   56  78
 9
 # 2014-07-25 DATA 250-99 DIRECT default http ru zero 
wikipedia 1000
 if len(vals) != 10:
@@ -240,30 +240,38 @@
 if xcs == '404-01b':
 vals[2] = xcs = '404-01'
 vals[4] = ipset = 'b'
+
+isZero = ''
+isOn = ''
 if typ == 'DATA':
 dt = datetime.strptime(dt, '%Y-%m-%d')
 site2 = subdomain + '.' + site
 isZero = False
+isEnabled = False
 for conf in configs[xcs]:
 langs = conf.languages
 sites = conf.sites
-if conf['from'] = dt  conf.before and \
-(conf.https or https == u'http') and \
-(True == langs or lang in langs) and \
-(True == sites or site2 in sites) and \
-(dt  ignoreViaBefore or via in conf.via) 
and \
-(ipset in conf.ipsets):
-isZero = True
-break
-vals[9] = u'yes' if isZero else u'no'
-else:
-vals[9] = ''
+if conf['from'] = dt  conf.before:
+if 'enabled' not in conf or conf.enabled:
+isEnabled = True
+if (conf.https or https == u'http') and \
+(True == langs or lang in langs) and \
+(True == sites or site2 in sites) and \
+(dt  ignoreViaBefore or via in 
conf.via) and \
+(ipset in conf.ipsets):
+isZero = True
+break
+isZero = u'yes' if isZero else u'no'
+isOn = u'on' if isEnabled else u'off'
+
+vals[9] = isZero
+vals[10] = isOn
 else:
 # X-CS does not exist, ignore it
 error = 'xcs'
 
 if error:
-vals = (vals[0], 'ERR', '000-00', 'ERR', 'ERR', 'http', 
'', error, '', '')
+vals = (vals[0], 'ERR', '000-00', 'ERR', 'ERR', 'http', 
'', error, '', '', '')
 
 key = tuple(vals)
 stats[key] += int(count)
@@ -271,28 +279,40 @@
 # convert {a|b|c:count,...}  into [[a,b,c,count],...]
 
 stats = [list(k) + [v] for k, v in stats.iteritems()]
-writeData(self.combinedFile, stats, columnHeaders11)
+writeData(self.combinedFile, stats, columnHdrResult)
 return stats
 
 def generateGraphData(self, stats=None):
 

[MediaWiki-commits] [Gerrit] AllEnabled data aggregation, filter by wikipedia only - change (analytics/zero-sms)

2014-09-26 Thread Yurik (Code Review)
Yurik has submitted this change and it was merged.

Change subject: AllEnabled data aggregation, filter by wikipedia only
..


AllEnabled data aggregation, filter by wikipedia only

Change-Id: I1298032af723f2f9e1aa78dc955f604695a09e3d
---
M scripts/weblogs.py
1 file changed, 46 insertions(+), 26 deletions(-)

Approvals:
  Yurik: Verified; Looks good to me, approved



diff --git a/scripts/weblogs.py b/scripts/weblogs.py
index 8a0e0cd..5f98aa7 100644
--- a/scripts/weblogs.py
+++ b/scripts/weblogs.py
@@ -20,8 +20,8 @@
 stats[key] = 1
 
 
-columnHeaders10 = 
u'date,type,xcs,via,ipset,https,lang,subdomain,site,count'.split(',')
-columnHeaders11 = 
u'date,type,xcs,via,ipset,https,lang,subdomain,site,iszero,count'.split(',')
+columnHdrCache = 
u'date,type,xcs,via,ipset,https,lang,subdomain,site,count'.split(',')
+columnHdrResult = 
u'date,type,xcs,via,ipset,https,lang,subdomain,site,iszero,ison,count'.split(',')
 validSubDomains = {'m', 'zero', 'mobile', 'wap'}
 validHttpCode = {'200', '304'}
 
@@ -210,7 +210,7 @@
 # Valid request!
 addStat(stats, dt, 'DATA', xcs, via, ipset, https, lang, 
subdomain, site)
 
-writeData(statFile, [list(k) + [v] for k, v in stats.iteritems()], 
columnHeaders10)
+writeData(statFile, [list(k) + [v] for k, v in stats.iteritems()], 
columnHdrCache)
 
 def combineStats(self):
 safePrint('Combine stat files')
@@ -221,7 +221,7 @@
 for f in os.listdir(self.pathCache):
 if not self.statFileRe.match(f):
 continue
-for vals in readData(os.path.join(self.pathCache, f), 
columnHeaders10):
+for vals in readData(os.path.join(self.pathCache, f), 
columnHdrCache):
 # 0  12  3  4   56  78
 9
 # 2014-07-25 DATA 250-99 DIRECT default http ru zero 
wikipedia 1000
 if len(vals) != 10:
@@ -240,30 +240,38 @@
 if xcs == '404-01b':
 vals[2] = xcs = '404-01'
 vals[4] = ipset = 'b'
+
+isZero = ''
+isOn = ''
 if typ == 'DATA':
 dt = datetime.strptime(dt, '%Y-%m-%d')
 site2 = subdomain + '.' + site
 isZero = False
+isEnabled = False
 for conf in configs[xcs]:
 langs = conf.languages
 sites = conf.sites
-if conf['from'] = dt  conf.before and \
-(conf.https or https == u'http') and \
-(True == langs or lang in langs) and \
-(True == sites or site2 in sites) and \
-(dt  ignoreViaBefore or via in conf.via) 
and \
-(ipset in conf.ipsets):
-isZero = True
-break
-vals[9] = u'yes' if isZero else u'no'
-else:
-vals[9] = ''
+if conf['from'] = dt  conf.before:
+if 'enabled' not in conf or conf.enabled:
+isEnabled = True
+if (conf.https or https == u'http') and \
+(True == langs or lang in langs) and \
+(True == sites or site2 in sites) and \
+(dt  ignoreViaBefore or via in 
conf.via) and \
+(ipset in conf.ipsets):
+isZero = True
+break
+isZero = u'yes' if isZero else u'no'
+isOn = u'on' if isEnabled else u'off'
+
+vals[9] = isZero
+vals[10] = isOn
 else:
 # X-CS does not exist, ignore it
 error = 'xcs'
 
 if error:
-vals = (vals[0], 'ERR', '000-00', 'ERR', 'ERR', 'http', 
'', error, '', '')
+vals = (vals[0], 'ERR', '000-00', 'ERR', 'ERR', 'http', 
'', error, '', '', '')
 
 key = tuple(vals)
 stats[key] += int(count)
@@ -271,28 +279,40 @@
 # convert {a|b|c:count,...}  into [[a,b,c,count],...]
 
 stats = [list(k) + [v] for k, v in stats.iteritems()]
-writeData(self.combinedFile, stats, columnHeaders11)
+writeData(self.combinedFile, stats, columnHdrResult)
 return stats
 
 def generateGraphData(self, stats=None):
 safePrint('Generating data files to %s' % self.pathGraphs)
 
+wiki =