[MediaWiki-commits] [Gerrit] Updated scripts - change (analytics/zero-sms)

2014-11-07 Thread Yurik (Code Review)
Yurik has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/171810

Change subject: Updated scripts
..

Updated scripts

Change-Id: I101466924217d19b4def63a4a4ae8b5e3c915a7a
---
M scripts/api.py
M scripts/log2dfs.py
M scripts/logprocessor.py
M scripts/run-hivezero.sh
M scripts/smslogs.py
M scripts/weblogs.py
M scripts/weblogs2.py
M scripts/zero-counts.hql
8 files changed, 141 insertions(+), 134 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/zero-sms 
refs/changes/10/171810/1

diff --git a/scripts/api.py b/scripts/api.py
index 4037aeb..ed0bf6c 100644
--- a/scripts/api.py
+++ b/scripts/api.py
@@ -86,6 +86,7 @@
 
 
 def __init__(self, url, headers=None, session=None, log=None):
+self._loginOnDemand = False
 self.session = session if session else requests.session()
 self.log = log if log else ConsoleLog()
 self.url = url
@@ -143,6 +144,9 @@
 else:
 request_kw['params'] = kwargs
 
+if self._loginOnDemand and action != 'login':
+self.login(self._loginOnDemand[0], self._loginOnDemand[1])
+
 data = parseJson(self.request(method, forceSSL=forceSSL, **request_kw))
 
 # Handle success and failure
@@ -152,13 +156,23 @@
 self.log(2, data['warnings'])
 return data
 
-def login(self, user, password):
+def login(self, user, password, onDemand=False):
+
+:param user:
+:param password:
+:param onDemand: if True, will postpone login until an actual API 
request is made
+:return:
+
+if onDemand:
+self._loginOnDemand = (user, password)
+return
 self.tokens = {}
 res = self('login', lgname=user, lgpassword=password)['login']
 if res['result'] == 'NeedToken':
 res = self('login', lgname=user, lgpassword=password, 
lgtoken=res['token'])['login']
 if res['result'] != 'Success':
 raise ApiError('Login failed', res)
+self._loginOnDemand = False
 
 def query(self, **kwargs):
 
diff --git a/scripts/log2dfs.py b/scripts/log2dfs.py
index 30aa8d5..9f888ec 100644
--- a/scripts/log2dfs.py
+++ b/scripts/log2dfs.py
@@ -221,7 +221,7 @@
 safePrint(u'URL parsing failed: %s\n%s' % (uri, 
line))
 continue
 if m.group(1).lower() == u'https' and u'https=' not in 
x_analytics:
-x_analytics += u'https=1'
+x_analytics += u';https=1'
 uri_host = m.group(2)
 if uri_host.endswith(':80'):
 uri_host = uri_host[:-3]
diff --git a/scripts/logprocessor.py b/scripts/logprocessor.py
index 0e425b1..f74f128 100644
--- a/scripts/logprocessor.py
+++ b/scripts/logprocessor.py
@@ -188,7 +188,7 @@
 self._wiki = api.wikimedia('zero', 'wikimedia', 'https')
 if self.proxy:
 self._wiki.session.proxies = {'http': 'http://%s:%d' % 
(self.proxy, self.proxyPort)}
-self._wiki.login(self.settings.apiUsername, 
self.settings.apiPassword)
+self._wiki.login(self.settings.apiUsername, 
self.settings.apiPassword, onDemand=True)
 return self._wiki
 
 # noinspection PyMethodMayBeStatic
@@ -217,12 +217,11 @@
 def __init__(self, settingsFile, pathSuffix):
 super(LogProcessor, self).__init__(settingsFile, pathSuffix)
 
-if not self.settings.pathLogs or not self.settings.pathCache or not 
self.settings.pathGraphs:
+if not self.settings.pathLogs or not self.settings.pathCache:
 raise ValueError('One of the paths is not set, check %s' % 
settingsFile)
 
 self.pathLogs = self.normalizePath(self.settings.pathLogs)
 self.pathCache = self.normalizePath(self.settings.pathCache)
-self.pathGraphs = self.normalizePath(self.settings.pathGraphs)
 
 def defaultSettings(self, suffix):
 s = super(LogProcessor, self).defaultSettings(suffix)
@@ -231,5 +230,4 @@
 suffix = os.sep + suffix if suffix else ''
 s.pathLogs = 'logs' + suffix
 s.pathCache = 'cache' + suffix
-s.pathGraphs = 'graphs' + suffix
 return s
diff --git a/scripts/run-hivezero.sh b/scripts/run-hivezero.sh
index d1f799b..2c8f95c 100755
--- a/scripts/run-hivezero.sh
+++ b/scripts/run-hivezero.sh
@@ -1,16 +1,63 @@
 #!/bin/bash
 
-#$1 $2 $3   $4 $5 %6 $7
-# ./run-clone.sh wmf_raw.webrequest 515-05 2014 10 11 0  23
+#   $1 $2   $3 $4 %5 $6
+# ./run-hivezero.sh wmf_raw.webrequest 2014 10 1  31
+# ./run-hivezero.sh webreq_archive 2014 10 1  31 overwrite
 
-if [[ -z $7 ]]; then
-   last=$6
+set -e
+
+if [[ -z $5 ]]; then
+   last=$4
 else
-   last=$7
+   last=$5
 fi
 
-for ((hour = $6; hour = $last; hour++)); do
-

[MediaWiki-commits] [Gerrit] Updated scripts - change (analytics/zero-sms)

2014-11-07 Thread Yurik (Code Review)
Yurik has submitted this change and it was merged.

Change subject: Updated scripts
..


Updated scripts

Change-Id: I101466924217d19b4def63a4a4ae8b5e3c915a7a
---
M scripts/api.py
M scripts/log2dfs.py
M scripts/logprocessor.py
M scripts/run-hivezero.sh
M scripts/smslogs.py
M scripts/weblogs.py
M scripts/weblogs2.py
M scripts/zero-counts.hql
8 files changed, 141 insertions(+), 134 deletions(-)

Approvals:
  Yurik: Verified; Looks good to me, approved



diff --git a/scripts/api.py b/scripts/api.py
index 4037aeb..ed0bf6c 100644
--- a/scripts/api.py
+++ b/scripts/api.py
@@ -86,6 +86,7 @@
 
 
 def __init__(self, url, headers=None, session=None, log=None):
+self._loginOnDemand = False
 self.session = session if session else requests.session()
 self.log = log if log else ConsoleLog()
 self.url = url
@@ -143,6 +144,9 @@
 else:
 request_kw['params'] = kwargs
 
+if self._loginOnDemand and action != 'login':
+self.login(self._loginOnDemand[0], self._loginOnDemand[1])
+
 data = parseJson(self.request(method, forceSSL=forceSSL, **request_kw))
 
 # Handle success and failure
@@ -152,13 +156,23 @@
 self.log(2, data['warnings'])
 return data
 
-def login(self, user, password):
+def login(self, user, password, onDemand=False):
+
+:param user:
+:param password:
+:param onDemand: if True, will postpone login until an actual API 
request is made
+:return:
+
+if onDemand:
+self._loginOnDemand = (user, password)
+return
 self.tokens = {}
 res = self('login', lgname=user, lgpassword=password)['login']
 if res['result'] == 'NeedToken':
 res = self('login', lgname=user, lgpassword=password, 
lgtoken=res['token'])['login']
 if res['result'] != 'Success':
 raise ApiError('Login failed', res)
+self._loginOnDemand = False
 
 def query(self, **kwargs):
 
diff --git a/scripts/log2dfs.py b/scripts/log2dfs.py
index 30aa8d5..9f888ec 100644
--- a/scripts/log2dfs.py
+++ b/scripts/log2dfs.py
@@ -221,7 +221,7 @@
 safePrint(u'URL parsing failed: %s\n%s' % (uri, 
line))
 continue
 if m.group(1).lower() == u'https' and u'https=' not in 
x_analytics:
-x_analytics += u'https=1'
+x_analytics += u';https=1'
 uri_host = m.group(2)
 if uri_host.endswith(':80'):
 uri_host = uri_host[:-3]
diff --git a/scripts/logprocessor.py b/scripts/logprocessor.py
index 0e425b1..f74f128 100644
--- a/scripts/logprocessor.py
+++ b/scripts/logprocessor.py
@@ -188,7 +188,7 @@
 self._wiki = api.wikimedia('zero', 'wikimedia', 'https')
 if self.proxy:
 self._wiki.session.proxies = {'http': 'http://%s:%d' % 
(self.proxy, self.proxyPort)}
-self._wiki.login(self.settings.apiUsername, 
self.settings.apiPassword)
+self._wiki.login(self.settings.apiUsername, 
self.settings.apiPassword, onDemand=True)
 return self._wiki
 
 # noinspection PyMethodMayBeStatic
@@ -217,12 +217,11 @@
 def __init__(self, settingsFile, pathSuffix):
 super(LogProcessor, self).__init__(settingsFile, pathSuffix)
 
-if not self.settings.pathLogs or not self.settings.pathCache or not 
self.settings.pathGraphs:
+if not self.settings.pathLogs or not self.settings.pathCache:
 raise ValueError('One of the paths is not set, check %s' % 
settingsFile)
 
 self.pathLogs = self.normalizePath(self.settings.pathLogs)
 self.pathCache = self.normalizePath(self.settings.pathCache)
-self.pathGraphs = self.normalizePath(self.settings.pathGraphs)
 
 def defaultSettings(self, suffix):
 s = super(LogProcessor, self).defaultSettings(suffix)
@@ -231,5 +230,4 @@
 suffix = os.sep + suffix if suffix else ''
 s.pathLogs = 'logs' + suffix
 s.pathCache = 'cache' + suffix
-s.pathGraphs = 'graphs' + suffix
 return s
diff --git a/scripts/run-hivezero.sh b/scripts/run-hivezero.sh
index d1f799b..2c8f95c 100755
--- a/scripts/run-hivezero.sh
+++ b/scripts/run-hivezero.sh
@@ -1,16 +1,63 @@
 #!/bin/bash
 
-#$1 $2 $3   $4 $5 %6 $7
-# ./run-clone.sh wmf_raw.webrequest 515-05 2014 10 11 0  23
+#   $1 $2   $3 $4 %5 $6
+# ./run-hivezero.sh wmf_raw.webrequest 2014 10 1  31
+# ./run-hivezero.sh webreq_archive 2014 10 1  31 overwrite
 
-if [[ -z $7 ]]; then
-   last=$6
+set -e
+
+if [[ -z $5 ]]; then
+   last=$4
 else
-   last=$7
+   last=$5
 fi
 
-for ((hour = $6; hour = $last; hour++)); do
-   printf -v t tmp_%04d_%02d_%02d_%02d $3 $4 $5 $hour
-