Yurik has submitted this change and it was merged.

Change subject: Proxy support for S3 & API
......................................................................


Proxy support for S3 & API

Change-Id: Iba6069cff72ffbcec3df8132c2496e45c5470c6d
---
M scripts/logprocessor.py
M scripts/smslogs.py
M scripts/weblogs.py
3 files changed, 45 insertions(+), 33 deletions(-)

Approvals:
  Yurik: Verified; Looks good to me, approved



diff --git a/scripts/logprocessor.py b/scripts/logprocessor.py
index a11a18c..5a0dc69 100644
--- a/scripts/logprocessor.py
+++ b/scripts/logprocessor.py
@@ -98,6 +98,13 @@
         self.pathCache = self.normalizePath(self.settings.pathCache)
         self.pathGraphs = self.normalizePath(self.settings.pathGraphs)
 
+        self.proxy = self.settings.proxy
+        self.proxyPort = self.settings.proxyPort
+        if not self.proxy or not self.proxyPort:
+            if self.proxy or self.proxyPort:
+                safePrint(u'\nIgnoring proxy settings - both proxy and 
proxyPort need to be set')
+            self.proxy = self.proxyPort = None
+
     def saveSettings(self):
         self.onSavingSettings()
         try:
@@ -165,6 +172,9 @@
         s.pathCache = 'cache' + suffix
         s.pathGraphs = 'graphs' + suffix
 
+        s.proxy = False
+        s.proxyPort = 0
+
         return s
 
     def onSavingSettings(self):
diff --git a/scripts/smslogs.py b/scripts/smslogs.py
index e01d85a..0b42ee2 100644
--- a/scripts/smslogs.py
+++ b/scripts/smslogs.py
@@ -5,7 +5,6 @@
 import locale
 from datetime import timedelta
 import re
-import traceback
 import itertools
 
 from boto.s3.connection import S3Connection
@@ -23,8 +22,34 @@
     return ''.join(random.choice(chars) for _ in range(size))
 
 
+def writeLine(dst, line):
+    if not line:
+        return
+    line = line.replace(u'\0', u'\\0')
+    parts = line.split('\t')
+    if parts[1][0] == u'+':
+        return
+    parts = [p[2:-1]
+             if (p.startswith(u"u'") and p.endswith(u"'")) or 
(p.startswith(u'u"') and p.endswith(u'"'))
+             else p for p in parts]
+    tmp = parts[0]
+    parts[0] = parts[1]
+    parts[1] = tmp \
+        .replace(u' [VumiRedis,client]', u'') \
+        .replace(u' [HTTP11ClientProtocol,client]', u'') \
+        .replace(u' WIKI', u'') \
+        .replace(u'+0000', u'')
+
+    if len(parts) > 5 and parts[5].startswith(u'content='):
+        parts[5] = u'content=' + str(len(parts[5]) - 10)
+
+    if len(parts) > 6:
+        parts[6] = parts[6].replace(u'\0', u'\\0')
+
+    dst.write(u'\t'.join(parts) + u'\n')
+
+
 class SmsLogProcessor(LogProcessor):
-    dateFormat = '%Y-%m-%d'
 
     def __init__(self, settingsFile='settings/smslogs.json'):
         super(SmsLogProcessor, self).__init__(settingsFile, 'web')
@@ -83,7 +108,7 @@
     def download(self):
         safePrint(u'\nDownloading files')
 
-        cn = S3Connection(self.settings.awsKeyId, self.settings.awsSecret)
+        cn = S3Connection(self.settings.awsKeyId, self.settings.awsSecret, 
proxy=self.proxy, proxy_port=self.proxyPort)
 
         bucket = cn.get_bucket(self.settings.awsBucket)
         files = bucket.list(self.settings.awsPrefix)
@@ -167,15 +192,15 @@
                     l = line.strip(u'\n\r')
                     l = manualLogRe.sub('', l, 1)
                     if u' WIKI\t' in l:
-                        self.writeLine(dst, last)
+                        writeLine(dst, last)
                         last = l
                     elif len(l) > 2 and l[0] == u'2' and l[1] == u'0':
-                        self.writeLine(dst, last)
+                        writeLine(dst, last)
                         last = False
                     elif isinstance(last, basestring):
                         last = last + '\t' + l
 
-                self.writeLine(dst, last)
+                writeLine(dst, last)
                 if fileDate and (not self.settings.lastProcessedTs or 
self.settings.lastProcessedTs < fileDate):
                     self.settings.lastProcessedTs = fileDate
 
@@ -210,35 +235,9 @@
 
         os.remove(appendingDataFile)
 
-    def writeLine(self, dst, line):
-        if not line:
-            return
-        line = line.replace(u'\0', u'\\0')
-        parts = line.split('\t')
-        if parts[1][0] == u'+':
-            return
-        parts = [p[2:-1]
-                 if (p.startswith(u"u'") and p.endswith(u"'")) or 
(p.startswith(u'u"') and p.endswith(u'"'))
-                 else p for p in parts]
-        tmp = parts[0]
-        parts[0] = parts[1]
-        parts[1] = tmp \
-            .replace(u' [VumiRedis,client]', u'') \
-            .replace(u' [HTTP11ClientProtocol,client]', u'') \
-            .replace(u' WIKI', u'') \
-            .replace(u'+0000', u'')
-
-        if len(parts) > 5 and parts[5].startswith(u'content='):
-            parts[5] = u'content=' + str(len(parts[5]) - 10)
-
-        if len(parts) > 6:
-            parts[6] = parts[6].replace(u'\0', u'\\0')
-
-        dst.write(u'\t'.join(parts) + u'\n')
-
     def generateGraphData(self, skipParsing=False):
         stats = smsgraphs.Stats(self.combinedFilePath, self.pathGraphs, 
self.statsFilePath, self.settings.partnerMap,
-                               self.settings.partnerDirMap, self.settings.salt)
+                                self.settings.partnerDirMap, 
self.settings.salt)
         if not skipParsing:
             safePrint(u'\nParsing data')
             stats.process()
diff --git a/scripts/weblogs.py b/scripts/weblogs.py
index 0b8ebd5..4abb5b0 100644
--- a/scripts/weblogs.py
+++ b/scripts/weblogs.py
@@ -47,6 +47,9 @@
         import api
 
         site = api.wikimedia('zero', 'wikimedia', 'https')
+        if self.proxy:
+            site.session.proxies = {"http": "http://%s:%d"; % (self.proxy, 
self.proxyPort)}
+
         site.login(self.settings.apiUsername, self.settings.apiPassword)
         # 
https://zero.wikimedia.org/w/api.php?action=zeroportal&type=analyticsconfig&format=jsonfm
         configs = site('zeroportal', type='analyticsconfig').zeroportal

-- 
To view, visit https://gerrit.wikimedia.org/r/154232
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iba6069cff72ffbcec3df8132c2496e45c5470c6d
Gerrit-PatchSet: 2
Gerrit-Project: analytics/zero-sms
Gerrit-Branch: master
Gerrit-Owner: Yurik <yu...@wikimedia.org>
Gerrit-Reviewer: Yurik <yu...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to