[MediaWiki-commits] [Gerrit] Updated scripts - change (analytics/zero-sms)
Yurik has uploaded a new change for review. https://gerrit.wikimedia.org/r/171810 Change subject: Updated scripts .. Updated scripts Change-Id: I101466924217d19b4def63a4a4ae8b5e3c915a7a --- M scripts/api.py M scripts/log2dfs.py M scripts/logprocessor.py M scripts/run-hivezero.sh M scripts/smslogs.py M scripts/weblogs.py M scripts/weblogs2.py M scripts/zero-counts.hql 8 files changed, 141 insertions(+), 134 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/zero-sms refs/changes/10/171810/1 diff --git a/scripts/api.py b/scripts/api.py index 4037aeb..ed0bf6c 100644 --- a/scripts/api.py +++ b/scripts/api.py @@ -86,6 +86,7 @@ def __init__(self, url, headers=None, session=None, log=None): +self._loginOnDemand = False self.session = session if session else requests.session() self.log = log if log else ConsoleLog() self.url = url @@ -143,6 +144,9 @@ else: request_kw['params'] = kwargs +if self._loginOnDemand and action != 'login': +self.login(self._loginOnDemand[0], self._loginOnDemand[1]) + data = parseJson(self.request(method, forceSSL=forceSSL, **request_kw)) # Handle success and failure @@ -152,13 +156,23 @@ self.log(2, data['warnings']) return data -def login(self, user, password): +def login(self, user, password, onDemand=False): + +:param user: +:param password: +:param onDemand: if True, will postpone login until an actual API request is made +:return: + +if onDemand: +self._loginOnDemand = (user, password) +return self.tokens = {} res = self('login', lgname=user, lgpassword=password)['login'] if res['result'] == 'NeedToken': res = self('login', lgname=user, lgpassword=password, lgtoken=res['token'])['login'] if res['result'] != 'Success': raise ApiError('Login failed', res) +self._loginOnDemand = False def query(self, **kwargs): diff --git a/scripts/log2dfs.py b/scripts/log2dfs.py index 30aa8d5..9f888ec 100644 --- a/scripts/log2dfs.py +++ b/scripts/log2dfs.py @@ -221,7 +221,7 @@ safePrint(u'URL parsing failed: %s\n%s' % (uri, line)) continue if m.group(1).lower() == u'https' and u'https=' not in x_analytics: -x_analytics += u'https=1' +x_analytics += u';https=1' uri_host = m.group(2) if uri_host.endswith(':80'): uri_host = uri_host[:-3] diff --git a/scripts/logprocessor.py b/scripts/logprocessor.py index 0e425b1..f74f128 100644 --- a/scripts/logprocessor.py +++ b/scripts/logprocessor.py @@ -188,7 +188,7 @@ self._wiki = api.wikimedia('zero', 'wikimedia', 'https') if self.proxy: self._wiki.session.proxies = {'http': 'http://%s:%d' % (self.proxy, self.proxyPort)} -self._wiki.login(self.settings.apiUsername, self.settings.apiPassword) +self._wiki.login(self.settings.apiUsername, self.settings.apiPassword, onDemand=True) return self._wiki # noinspection PyMethodMayBeStatic @@ -217,12 +217,11 @@ def __init__(self, settingsFile, pathSuffix): super(LogProcessor, self).__init__(settingsFile, pathSuffix) -if not self.settings.pathLogs or not self.settings.pathCache or not self.settings.pathGraphs: +if not self.settings.pathLogs or not self.settings.pathCache: raise ValueError('One of the paths is not set, check %s' % settingsFile) self.pathLogs = self.normalizePath(self.settings.pathLogs) self.pathCache = self.normalizePath(self.settings.pathCache) -self.pathGraphs = self.normalizePath(self.settings.pathGraphs) def defaultSettings(self, suffix): s = super(LogProcessor, self).defaultSettings(suffix) @@ -231,5 +230,4 @@ suffix = os.sep + suffix if suffix else '' s.pathLogs = 'logs' + suffix s.pathCache = 'cache' + suffix -s.pathGraphs = 'graphs' + suffix return s diff --git a/scripts/run-hivezero.sh b/scripts/run-hivezero.sh index d1f799b..2c8f95c 100755 --- a/scripts/run-hivezero.sh +++ b/scripts/run-hivezero.sh @@ -1,16 +1,63 @@ #!/bin/bash -#$1 $2 $3 $4 $5 %6 $7 -# ./run-clone.sh wmf_raw.webrequest 515-05 2014 10 11 0 23 +# $1 $2 $3 $4 %5 $6 +# ./run-hivezero.sh wmf_raw.webrequest 2014 10 1 31 +# ./run-hivezero.sh webreq_archive 2014 10 1 31 overwrite -if [[ -z $7 ]]; then - last=$6 +set -e + +if [[ -z $5 ]]; then + last=$4 else - last=$7 + last=$5 fi -for ((hour = $6; hour = $last; hour++)); do -
[MediaWiki-commits] [Gerrit] Updated scripts - change (analytics/zero-sms)
Yurik has submitted this change and it was merged. Change subject: Updated scripts .. Updated scripts Change-Id: I101466924217d19b4def63a4a4ae8b5e3c915a7a --- M scripts/api.py M scripts/log2dfs.py M scripts/logprocessor.py M scripts/run-hivezero.sh M scripts/smslogs.py M scripts/weblogs.py M scripts/weblogs2.py M scripts/zero-counts.hql 8 files changed, 141 insertions(+), 134 deletions(-) Approvals: Yurik: Verified; Looks good to me, approved diff --git a/scripts/api.py b/scripts/api.py index 4037aeb..ed0bf6c 100644 --- a/scripts/api.py +++ b/scripts/api.py @@ -86,6 +86,7 @@ def __init__(self, url, headers=None, session=None, log=None): +self._loginOnDemand = False self.session = session if session else requests.session() self.log = log if log else ConsoleLog() self.url = url @@ -143,6 +144,9 @@ else: request_kw['params'] = kwargs +if self._loginOnDemand and action != 'login': +self.login(self._loginOnDemand[0], self._loginOnDemand[1]) + data = parseJson(self.request(method, forceSSL=forceSSL, **request_kw)) # Handle success and failure @@ -152,13 +156,23 @@ self.log(2, data['warnings']) return data -def login(self, user, password): +def login(self, user, password, onDemand=False): + +:param user: +:param password: +:param onDemand: if True, will postpone login until an actual API request is made +:return: + +if onDemand: +self._loginOnDemand = (user, password) +return self.tokens = {} res = self('login', lgname=user, lgpassword=password)['login'] if res['result'] == 'NeedToken': res = self('login', lgname=user, lgpassword=password, lgtoken=res['token'])['login'] if res['result'] != 'Success': raise ApiError('Login failed', res) +self._loginOnDemand = False def query(self, **kwargs): diff --git a/scripts/log2dfs.py b/scripts/log2dfs.py index 30aa8d5..9f888ec 100644 --- a/scripts/log2dfs.py +++ b/scripts/log2dfs.py @@ -221,7 +221,7 @@ safePrint(u'URL parsing failed: %s\n%s' % (uri, line)) continue if m.group(1).lower() == u'https' and u'https=' not in x_analytics: -x_analytics += u'https=1' +x_analytics += u';https=1' uri_host = m.group(2) if uri_host.endswith(':80'): uri_host = uri_host[:-3] diff --git a/scripts/logprocessor.py b/scripts/logprocessor.py index 0e425b1..f74f128 100644 --- a/scripts/logprocessor.py +++ b/scripts/logprocessor.py @@ -188,7 +188,7 @@ self._wiki = api.wikimedia('zero', 'wikimedia', 'https') if self.proxy: self._wiki.session.proxies = {'http': 'http://%s:%d' % (self.proxy, self.proxyPort)} -self._wiki.login(self.settings.apiUsername, self.settings.apiPassword) +self._wiki.login(self.settings.apiUsername, self.settings.apiPassword, onDemand=True) return self._wiki # noinspection PyMethodMayBeStatic @@ -217,12 +217,11 @@ def __init__(self, settingsFile, pathSuffix): super(LogProcessor, self).__init__(settingsFile, pathSuffix) -if not self.settings.pathLogs or not self.settings.pathCache or not self.settings.pathGraphs: +if not self.settings.pathLogs or not self.settings.pathCache: raise ValueError('One of the paths is not set, check %s' % settingsFile) self.pathLogs = self.normalizePath(self.settings.pathLogs) self.pathCache = self.normalizePath(self.settings.pathCache) -self.pathGraphs = self.normalizePath(self.settings.pathGraphs) def defaultSettings(self, suffix): s = super(LogProcessor, self).defaultSettings(suffix) @@ -231,5 +230,4 @@ suffix = os.sep + suffix if suffix else '' s.pathLogs = 'logs' + suffix s.pathCache = 'cache' + suffix -s.pathGraphs = 'graphs' + suffix return s diff --git a/scripts/run-hivezero.sh b/scripts/run-hivezero.sh index d1f799b..2c8f95c 100755 --- a/scripts/run-hivezero.sh +++ b/scripts/run-hivezero.sh @@ -1,16 +1,63 @@ #!/bin/bash -#$1 $2 $3 $4 $5 %6 $7 -# ./run-clone.sh wmf_raw.webrequest 515-05 2014 10 11 0 23 +# $1 $2 $3 $4 %5 $6 +# ./run-hivezero.sh wmf_raw.webrequest 2014 10 1 31 +# ./run-hivezero.sh webreq_archive 2014 10 1 31 overwrite -if [[ -z $7 ]]; then - last=$6 +set -e + +if [[ -z $5 ]]; then + last=$4 else - last=$7 + last=$5 fi -for ((hour = $6; hour = $last; hour++)); do - printf -v t tmp_%04d_%02d_%02d_%02d $3 $4 $5 $hour -