Adamw has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/91570


Change subject: Log rather than cronspam.  Log like bloody hell.
......................................................................

Log rather than cronspam.  Log like bloody hell.

Change-Id: I3b406f677ed22b655ba53389e80b0121bace2ee9
---
M banner_screenshot/config.yaml.example
M banner_screenshot/rasterize.js
M banner_screenshot/shoot_banners
M dedupe/README
M process/lock.py
A process/logging.py
6 files changed, 87 insertions(+), 32 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/fundraising/tools 
refs/changes/70/91570/1

diff --git a/banner_screenshot/config.yaml.example 
b/banner_screenshot/config.yaml.example
index ca3fa56..2e423f9 100644
--- a/banner_screenshot/config.yaml.example
+++ b/banner_screenshot/config.yaml.example
@@ -8,7 +8,5 @@
     "/tmp/banner_screenshots"
 banner_screenshot_format:
     png
-crop_height:
-    500
 banner_name_regex:
     "^B13_.*_(?P<lang>[a-z]{2})(?P<country>[A-Z0-9]{2})$"
diff --git a/banner_screenshot/rasterize.js b/banner_screenshot/rasterize.js
index 7d608ef..8ce9e58 100644
--- a/banner_screenshot/rasterize.js
+++ b/banner_screenshot/rasterize.js
@@ -19,17 +19,18 @@
     page.viewportSize = { width: 1024, height: 728 };
     page.open(address, function (status) {
         if (status !== 'success') {
-            console.log('Unable to load the address!');
+            console.error('Unable to load the address!');
         } else {
             //console.log(JSON.stringify(phantom.cookies, null, 2));
             window.setTimeout(function () {
                 page.clipRect = page.evaluate(function() {
                     var cn = $('#centralNotice');
 
-                    // FIXME: workaround for broken dropdown banner css, see 
FR #1085
+                    // workaround for broken banner css
                     var divHeight = cn.height();
                     if ( divHeight === 0 ) {
-                        divHeight = 728;
+                        divHeight = page.viewportSize.height;
+                        console.log("No height found, using default of " + 
divHeight);
                     }
 
                     return {
@@ -39,7 +40,7 @@
                         height: divHeight
                     };
                 });
-                console.log(page.clipRect.width + " x " + 
page.clipRect.height);
+                console.debug("#centralNotice was " + page.clipRect.width + 
"px x " + page.clipRect.height + "px");
                 page.render(output);
                 phantom.exit();
             }, 1000);
diff --git a/banner_screenshot/shoot_banners b/banner_screenshot/shoot_banners
index ebd6d6d..88d4e7d 100755
--- a/banner_screenshot/shoot_banners
+++ b/banner_screenshot/shoot_banners
@@ -6,26 +6,31 @@
 import re
 import sys
 
+from process.logging import Logger as log
 from process.globals import load_config
 load_config("banners")
-
 from process.globals import config
+
 from mediawiki.centralnotice.api import get_campaign_logs
 from mediawiki.centralnotice.time_util import str_time_offset
 from process.lock import begin, end
 
 def reduce_banners(campaign_logs):
-    '''Return a map from banner names to most recent campaign settings.'''
+    '''Return a map of banner names, to their most recent campaign settings.'''
+    # TODO: this would be in error if a banner were linked from different 
campaigns...
     banners = dict()
     for entry in campaign_logs:
         settings = entry['end']
         campaign_banners = settings['banners']
 
-        # we only need one country...
         settings['country'] = "US"
         if settings['geo'] == "1" and settings['countries']:
+            # FIXME: unfudge country list.  The campaign should specify 
whether it displays regional variation.
             settings['country'] = settings['countries'][0]
+            if len(settings['countries']) > 1:
+                log.debug("Multi-country campaign found, however, we woefully 
ignore country-based locale variation.")
 
+        # XXX why not "not empty campaign_banners"?
         if hasattr(campaign_banners, 'keys'):
             banners.update(
                 dict.fromkeys(
@@ -38,34 +43,41 @@
 def get_screenshot_path(name, lang):
     return os.path.join(
         config.banner_screenshots_dir,
-        "%(banner)s/%(banner)s_%(lang)s.%(ext)s" % {
-            "banner": name,
-            "lang": lang,
-            "ext": config.banner_screenshot_format,
-        }
+        "{banner}", "{banner}_{lang}.{ext}"
+    ).format(
+        banner=name,
+        lang=lang,
+        ext=config.banner_screenshot_format,
     )
 
-def banner_screenshot_exists(name, lang):
-    return os.path.exists(get_screenshot_path(name, lang))
-
 def render(name, lang, country):
+    '''Render a localized banner to file'''
     global JS_RENDER_SCRIPT
 
-    url = config.article_url % { "banner": name, "lang": lang, "country": 
country }
+    url = config.article_url % {"banner": name, "lang": lang, "country": 
country}
     path = get_screenshot_path(name, lang)
+    # TODO: option/default to update
+    if os.path.exists(path):
+        log.info("Banner screenshot already saved to {path}, not 
updating.".format(path=path))
+        return
     dir = os.path.dirname(path)
     if not os.path.exists(dir):
+        log.info("Beginning dumps for banner {banner}".format(banner=name))
         os.makedirs(dir)
 
-    print "Fetching " + url + " into " + path
-    subprocess.check_call([config.phantomjs, JS_RENDER_SCRIPT, url, path])
+    log.info("Fetching {url} into {path}".format(url=url, path=path))
+    cmd = [config.phantomjs, JS_RENDER_SCRIPT, url, path]
+    buf = subprocess.check_output(cmd)
+    log.info("phantom> {output}".format(output=buf))
 
 def process_banners():
+    log.info("Getting campaigns changed in the last two days...")
     banners = reduce_banners(get_campaign_logs(since=str_time_offset(days=-2)))
     for name, campaign_settings in banners.items():
         country = "US"
         m = re.match(config.banner_name_regex, name)
         if m:
+            # "yy" means, "all languages", and "YY" all countries.
             explicit_lang = m.group('lang')
             if explicit_lang != "yy":
                 campaign_settings['languages'] = [ explicit_lang ]
@@ -73,16 +85,23 @@
             if explicit_country != "YY":
                 campaign_settings['country'] = explicit_country
 
-        for lang in campaign_settings['languages']:
-            if not banner_screenshot_exists(name, lang):
-                render(name, lang, campaign_settings['country'])
+            log.debug("Parsed banner naming magic to get lang={lang}, 
country={country}".format(lang=explicit_lang, country=explicit_country))
+        else:
+            log.debug("Unrecognized banner naming magic: 
{name}".format(name=name))
 
+        log.debug("Rendering for languages: {languages}.".format(languages=", 
".join(campaign_settings['languages'])))
+        for lang in campaign_settings['languages']:
+            render(name, lang, campaign_settings['country'])
+
+
+# set a magic global
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+JS_RENDER_SCRIPT = os.path.join(__dir__, "rasterize.js")
 
 if __name__ == "__main__":
     try:
+        log.info("Beginning banner scrape")
         begin()
-
-        JS_RENDER_SCRIPT = 
os.path.join(os.path.dirname(os.path.abspath(__file__)), "rasterize.js")
 
         if len(sys.argv) > 1:
             for name in sys.argv[1:]:
@@ -91,3 +110,4 @@
             process_banners()
     finally:
         end()
+        log.info("Done.")
diff --git a/dedupe/README b/dedupe/README
index 453d27c..f624cd4 100644
--- a/dedupe/README
+++ b/dedupe/README
@@ -1 +1 @@
-pip install python-Levenshtein MySQL-python
+apt-get install python-yaml python-Levenshtein python-mysqldb
diff --git a/process/lock.py b/process/lock.py
index 1e69ead..da0eef5 100644
--- a/process/lock.py
+++ b/process/lock.py
@@ -6,6 +6,8 @@
 import os, os.path
 import sys
 
+from logging import Logger as log
+
 lockfile = None
 
 def begin(filename=None, failopen=False):
@@ -15,7 +17,7 @@
         filename = "/tmp/%s-%s.lock" % (unique, cmd)
 
     if os.path.exists(filename):
-        print "Lockfile found!"
+        log.warn("Lockfile found!")
         f = open(filename, "r")
         pid = None
         try:
@@ -24,18 +26,18 @@
             pass
         f.close()
         if not pid:
-            print "Invalid lockfile contents."
+            log.error("Invalid lockfile contents.")
         else:
             try:
                 os.getpgid(pid)
-                print "Aborting! Previous process (%d) is still alive. Remove 
lockfile manually if in error: %s" % (pid, filename, )
+                log.error("Aborting! Previous process ({pid}) is still alive. 
Remove lockfile manually if in error: {path}".format(pid=pid, path=filename))
                 sys.exit(1)
             except OSError:
                 if failopen:
-                    print "Aborting until stale lockfile is investigated: %s" 
% filename
+                    log.fatal("Aborting until stale lockfile is investigated: 
{path}".format(path=filename))
                     sys.exit(1)
-                print "Lockfile is stale."
-        print "Removing old lockfile."
+                log.error("Lockfile is stale.")
+        log.info("Removing old lockfile.")
         os.unlink(filename)
 
     f = open(filename, "w")
diff --git a/process/logging.py b/process/logging.py
new file mode 100644
index 0000000..4a58b5c
--- /dev/null
+++ b/process/logging.py
@@ -0,0 +1,34 @@
+import sys
+import syslog
+
+class Logger(object):
+    
+    @staticmethod
+    def debug(message):
+        Logger.log(message, syslog.LOG_DEBUG)
+
+    @staticmethod
+    def info(message):
+        Logger.log(message, syslog.LOG_INFO)
+
+    @staticmethod
+    def warn(message):
+        Logger.log(message, syslog.LOG_WARNING)
+
+    @staticmethod
+    def error(message):
+        Logger.log(message, syslog.LOG_ERR)
+
+    @staticmethod
+    def fatal(message):
+        Logger.log(message, syslog.LOG_CRIT)
+        print >>sys.stderr, message
+
+    @staticmethod
+    def log(message, severity):
+        syslog.openlog()
+        syslog.syslog(severity, message)
+        syslog.closelog()
+
+        if sys.stdout.isatty():
+            print(message)

-- 
To view, visit https://gerrit.wikimedia.org/r/91570
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3b406f677ed22b655ba53389e80b0121bace2ee9
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/fundraising/tools
Gerrit-Branch: master
Gerrit-Owner: Adamw <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to