Yurik has submitted this change and it was merged. Change subject: Proxy support for S3 & API ......................................................................
Proxy support for S3 & API Change-Id: Iba6069cff72ffbcec3df8132c2496e45c5470c6d --- M scripts/logprocessor.py M scripts/smslogs.py M scripts/weblogs.py 3 files changed, 45 insertions(+), 33 deletions(-) Approvals: Yurik: Verified; Looks good to me, approved diff --git a/scripts/logprocessor.py b/scripts/logprocessor.py index a11a18c..5a0dc69 100644 --- a/scripts/logprocessor.py +++ b/scripts/logprocessor.py @@ -98,6 +98,13 @@ self.pathCache = self.normalizePath(self.settings.pathCache) self.pathGraphs = self.normalizePath(self.settings.pathGraphs) + self.proxy = self.settings.proxy + self.proxyPort = self.settings.proxyPort + if not self.proxy or not self.proxyPort: + if self.proxy or self.proxyPort: + safePrint(u'\nIgnoring proxy settings - both proxy and proxyPort need to be set') + self.proxy = self.proxyPort = None + def saveSettings(self): self.onSavingSettings() try: @@ -165,6 +172,9 @@ s.pathCache = 'cache' + suffix s.pathGraphs = 'graphs' + suffix + s.proxy = False + s.proxyPort = 0 + return s def onSavingSettings(self): diff --git a/scripts/smslogs.py b/scripts/smslogs.py index e01d85a..0b42ee2 100644 --- a/scripts/smslogs.py +++ b/scripts/smslogs.py @@ -5,7 +5,6 @@ import locale from datetime import timedelta import re -import traceback import itertools from boto.s3.connection import S3Connection @@ -23,8 +22,34 @@ return ''.join(random.choice(chars) for _ in range(size)) +def writeLine(dst, line): + if not line: + return + line = line.replace(u'\0', u'\\0') + parts = line.split('\t') + if parts[1][0] == u'+': + return + parts = [p[2:-1] + if (p.startswith(u"u'") and p.endswith(u"'")) or (p.startswith(u'u"') and p.endswith(u'"')) + else p for p in parts] + tmp = parts[0] + parts[0] = parts[1] + parts[1] = tmp \ + .replace(u' [VumiRedis,client]', u'') \ + .replace(u' [HTTP11ClientProtocol,client]', u'') \ + .replace(u' WIKI', u'') \ + .replace(u'+0000', u'') + + if len(parts) > 5 and parts[5].startswith(u'content='): + parts[5] = u'content=' + str(len(parts[5]) - 10) + + if len(parts) > 6: + parts[6] = parts[6].replace(u'\0', u'\\0') + + dst.write(u'\t'.join(parts) + u'\n') + + class SmsLogProcessor(LogProcessor): - dateFormat = '%Y-%m-%d' def __init__(self, settingsFile='settings/smslogs.json'): super(SmsLogProcessor, self).__init__(settingsFile, 'web') @@ -83,7 +108,7 @@ def download(self): safePrint(u'\nDownloading files') - cn = S3Connection(self.settings.awsKeyId, self.settings.awsSecret) + cn = S3Connection(self.settings.awsKeyId, self.settings.awsSecret, proxy=self.proxy, proxy_port=self.proxyPort) bucket = cn.get_bucket(self.settings.awsBucket) files = bucket.list(self.settings.awsPrefix) @@ -167,15 +192,15 @@ l = line.strip(u'\n\r') l = manualLogRe.sub('', l, 1) if u' WIKI\t' in l: - self.writeLine(dst, last) + writeLine(dst, last) last = l elif len(l) > 2 and l[0] == u'2' and l[1] == u'0': - self.writeLine(dst, last) + writeLine(dst, last) last = False elif isinstance(last, basestring): last = last + '\t' + l - self.writeLine(dst, last) + writeLine(dst, last) if fileDate and (not self.settings.lastProcessedTs or self.settings.lastProcessedTs < fileDate): self.settings.lastProcessedTs = fileDate @@ -210,35 +235,9 @@ os.remove(appendingDataFile) - def writeLine(self, dst, line): - if not line: - return - line = line.replace(u'\0', u'\\0') - parts = line.split('\t') - if parts[1][0] == u'+': - return - parts = [p[2:-1] - if (p.startswith(u"u'") and p.endswith(u"'")) or (p.startswith(u'u"') and p.endswith(u'"')) - else p for p in parts] - tmp = parts[0] - parts[0] = parts[1] - parts[1] = tmp \ - .replace(u' [VumiRedis,client]', u'') \ - .replace(u' [HTTP11ClientProtocol,client]', u'') \ - .replace(u' WIKI', u'') \ - .replace(u'+0000', u'') - - if len(parts) > 5 and parts[5].startswith(u'content='): - parts[5] = u'content=' + str(len(parts[5]) - 10) - - if len(parts) > 6: - parts[6] = parts[6].replace(u'\0', u'\\0') - - dst.write(u'\t'.join(parts) + u'\n') - def generateGraphData(self, skipParsing=False): stats = smsgraphs.Stats(self.combinedFilePath, self.pathGraphs, self.statsFilePath, self.settings.partnerMap, - self.settings.partnerDirMap, self.settings.salt) + self.settings.partnerDirMap, self.settings.salt) if not skipParsing: safePrint(u'\nParsing data') stats.process() diff --git a/scripts/weblogs.py b/scripts/weblogs.py index 0b8ebd5..4abb5b0 100644 --- a/scripts/weblogs.py +++ b/scripts/weblogs.py @@ -47,6 +47,9 @@ import api site = api.wikimedia('zero', 'wikimedia', 'https') + if self.proxy: + site.session.proxies = {"http": "http://%s:%d" % (self.proxy, self.proxyPort)} + site.login(self.settings.apiUsername, self.settings.apiPassword) # https://zero.wikimedia.org/w/api.php?action=zeroportal&type=analyticsconfig&format=jsonfm configs = site('zeroportal', type='analyticsconfig').zeroportal -- To view, visit https://gerrit.wikimedia.org/r/154232 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Iba6069cff72ffbcec3df8132c2496e45c5470c6d Gerrit-PatchSet: 2 Gerrit-Project: analytics/zero-sms Gerrit-Branch: master Gerrit-Owner: Yurik <yu...@wikimedia.org> Gerrit-Reviewer: Yurik <yu...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits