[MediaWiki-commits] [Gerrit] Log rather than cronspam. Log like bloody hell. - change (wikimedia...tools)

Mwalker (Code Review) Thu, 24 Oct 2013 10:50:48 -0700

Mwalker has submitted this change and it was merged.

Change subject: Log rather than cronspam.  Log like bloody hell.
......................................................................



Log rather than cronspam.  Log like bloody hell.

Change-Id: I3b406f677ed22b655ba53389e80b0121bace2ee9
---
M banner_screenshot/config.yaml.example
M banner_screenshot/rasterize.js
M banner_screenshot/shoot_banners
M database/db.py
M dedupe/README
M dedupe/contact_cache.py
M dedupe/quick_autoreview.py
M dedupe/review_queue.py
M process/globals.py
M process/lock.py
A process/logging.py
11 files changed, 113 insertions(+), 35 deletions(-)

Approvals:
  Mwalker: Looks good to me, approved
  Jgreen: Looks good to me, but someone else must approve
  jenkins-bot: Verified



diff --git a/banner_screenshot/config.yaml.example 
b/banner_screenshot/config.yaml.example
index ca3fa56..2e423f9 100644
--- a/banner_screenshot/config.yaml.example
+++ b/banner_screenshot/config.yaml.example
@@ -8,7 +8,5 @@
     "/tmp/banner_screenshots"
 banner_screenshot_format:
     png
-crop_height:
-    500
 banner_name_regex:
     "^B13_.*_(?P<lang>[a-z]{2})(?P<country>[A-Z0-9]{2})$"
diff --git a/banner_screenshot/rasterize.js b/banner_screenshot/rasterize.js
index 7d608ef..4ad4e37 100644
--- a/banner_screenshot/rasterize.js
+++ b/banner_screenshot/rasterize.js
@@ -19,17 +19,18 @@
     page.viewportSize = { width: 1024, height: 728 };
     page.open(address, function (status) {
         if (status !== 'success') {
-            console.log('Unable to load the address!');
+            console.error('Unable to load the address!');
         } else {
             //console.log(JSON.stringify(phantom.cookies, null, 2));
             window.setTimeout(function () {
                 page.clipRect = page.evaluate(function() {
                     var cn = $('#centralNotice');
 
-                    // FIXME: workaround for broken dropdown banner css, see 
FR #1085
+                    // workaround for broken banner css
                     var divHeight = cn.height();
                     if ( divHeight === 0 ) {
                         divHeight = 728;
+                        console.log("No height found, using default of " + 
divHeight);
                     }
 
                     return {
@@ -39,7 +40,7 @@
                         height: divHeight
                     };
                 });
-                console.log(page.clipRect.width + " x " + 
page.clipRect.height);
+                console.debug("#centralNotice was " + page.clipRect.width + 
"px x " + page.clipRect.height + "px");
                 page.render(output);
                 phantom.exit();
             }, 1000);
diff --git a/banner_screenshot/shoot_banners b/banner_screenshot/shoot_banners
index ebd6d6d..0b5990a 100755
--- a/banner_screenshot/shoot_banners
+++ b/banner_screenshot/shoot_banners
@@ -6,26 +6,31 @@
 import re
 import sys
 
+from process.logging import Logger as log
 from process.globals import load_config
-load_config("banners")
-
+load_config("banner_screenshot")
 from process.globals import config
+
 from mediawiki.centralnotice.api import get_campaign_logs
 from mediawiki.centralnotice.time_util import str_time_offset
 from process.lock import begin, end
 
 def reduce_banners(campaign_logs):
-    '''Return a map from banner names to most recent campaign settings.'''
+    '''Return a map of banner names, to their most recent campaign settings.'''
+    # TODO: this would be in error if a banner were linked from different 
campaigns...
     banners = dict()
     for entry in campaign_logs:
         settings = entry['end']
         campaign_banners = settings['banners']
 
-        # we only need one country...
         settings['country'] = "US"
         if settings['geo'] == "1" and settings['countries']:
+            # FIXME: unfudge country list.  The campaign should specify 
whether it displays regional variation.
             settings['country'] = settings['countries'][0]
+            if len(settings['countries']) > 1:
+                log.debug("Multi-country campaign found, however, we woefully 
ignore country-based locale variation.")
 
+        # XXX why not "not empty campaign_banners"?
         if hasattr(campaign_banners, 'keys'):
             banners.update(
                 dict.fromkeys(
@@ -38,34 +43,41 @@
 def get_screenshot_path(name, lang):
     return os.path.join(
         config.banner_screenshots_dir,
-        "%(banner)s/%(banner)s_%(lang)s.%(ext)s" % {
-            "banner": name,
-            "lang": lang,
-            "ext": config.banner_screenshot_format,
-        }
+        "{banner}", "{banner}_{lang}.{ext}"
+    ).format(
+        banner=name,
+        lang=lang,
+        ext=config.banner_screenshot_format,
     )
 
-def banner_screenshot_exists(name, lang):
-    return os.path.exists(get_screenshot_path(name, lang))
-
 def render(name, lang, country):
+    '''Render a localized banner to file'''
     global JS_RENDER_SCRIPT
 
-    url = config.article_url % { "banner": name, "lang": lang, "country": 
country }
+    url = config.article_url % {"banner": name, "lang": lang, "country": 
country}
     path = get_screenshot_path(name, lang)
+    # TODO: option/default to update
+    if os.path.exists(path):
+        log.info("Banner screenshot already saved to {path}, not 
updating.".format(path=path))
+        return
     dir = os.path.dirname(path)
     if not os.path.exists(dir):
+        log.info("Beginning dumps for banner {banner}".format(banner=name))
         os.makedirs(dir)
 
-    print "Fetching " + url + " into " + path
-    subprocess.check_call([config.phantomjs, JS_RENDER_SCRIPT, url, path])
+    log.info("Fetching {url} into {path}".format(url=url, path=path))
+    cmd = [config.phantomjs, JS_RENDER_SCRIPT, url, path]
+    buf = subprocess.check_output(cmd)
+    log.info("phantom> {output}".format(output=buf))
 
 def process_banners():
+    log.info("Getting campaigns changed in the last two days...")
     banners = reduce_banners(get_campaign_logs(since=str_time_offset(days=-2)))
     for name, campaign_settings in banners.items():
         country = "US"
         m = re.match(config.banner_name_regex, name)
         if m:
+            # "yy" means, "all languages", and "YY" all countries.
             explicit_lang = m.group('lang')
             if explicit_lang != "yy":
                 campaign_settings['languages'] = [ explicit_lang ]
@@ -73,16 +85,23 @@
             if explicit_country != "YY":
                 campaign_settings['country'] = explicit_country
 
-        for lang in campaign_settings['languages']:
-            if not banner_screenshot_exists(name, lang):
-                render(name, lang, campaign_settings['country'])
+            log.debug("Parsed banner naming magic to get lang={lang}, 
country={country}".format(lang=explicit_lang, country=explicit_country))
+        else:
+            log.debug("Unrecognized banner naming magic: 
{name}".format(name=name))
 
+        log.debug("Rendering for languages: {languages}.".format(languages=", 
".join(campaign_settings['languages'])))
+        for lang in campaign_settings['languages']:
+            render(name, lang, campaign_settings['country'])
+
+
+# set a magic global
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+JS_RENDER_SCRIPT = os.path.join(__dir__, "rasterize.js")
 
 if __name__ == "__main__":
     try:
+        log.info("Beginning banner scrape")
         begin()
-
-        JS_RENDER_SCRIPT = 
os.path.join(os.path.dirname(os.path.abspath(__file__)), "rasterize.js")
 
         if len(sys.argv) > 1:
             for name in sys.argv[1:]:
@@ -91,3 +110,4 @@
             process_banners()
     finally:
         end()
+        log.info("Done.")
diff --git a/database/db.py b/database/db.py
index 9db6663..02616b3 100644
--- a/database/db.py
+++ b/database/db.py
@@ -4,6 +4,7 @@
 import MySQLdb as Dbi
 import atexit
 
+from process.logging import Logger as log
 from process.globals import config
 
 class Connection(object):
@@ -18,7 +19,10 @@
         cursor = self.db_conn.cursor(cursorclass=Dbi.cursors.DictCursor)
 
         if self.debug:
-            print sql, params
+            if params:
+                log.debug(str(sql) + " % " + repr(params))
+            else:
+                log.debug(str(sql))
 
         if params:
             cursor.execute(sql, params)
diff --git a/dedupe/README b/dedupe/README
index 453d27c..f624cd4 100644
--- a/dedupe/README
+++ b/dedupe/README
@@ -1 +1 @@
-pip install python-Levenshtein MySQL-python
+apt-get install python-yaml python-Levenshtein python-mysqldb
diff --git a/dedupe/contact_cache.py b/dedupe/contact_cache.py
index 8fc353d..6475875 100644
--- a/dedupe/contact_cache.py
+++ b/dedupe/contact_cache.py
@@ -1,5 +1,6 @@
 '''Optimized retrieval and in-memory storage of a small amount of information 
across many contacts.'''
 
+from process.logging import Logger as log
 from process.globals import config
 from database import db
 
@@ -75,6 +76,7 @@
 
     def buildQuery(self):
         query = super(PagedGroup, self).buildQuery()
+        log.info("Limiting batch contact retrieval to {num} 
records.".format(num=self.pagesize))
         query.limit = self.pagesize
         query.offset = self.offset
         return query
diff --git a/dedupe/quick_autoreview.py b/dedupe/quick_autoreview.py
index 2e8fa6a..7f75ac0 100755
--- a/dedupe/quick_autoreview.py
+++ b/dedupe/quick_autoreview.py
@@ -2,6 +2,7 @@
 
 '''Find low-hanging dupe fruits and mark them for the manual review queue'''
 
+from process.logging import Logger as log
 from process.globals import load_config
 load_config("dedupe")
 from process.globals import config
@@ -62,7 +63,15 @@
 
             ReviewQueue.tag(contact['id'], QuickAutoreview.QUICK_REVIEWED)
 
+        if not self.contactCache.contacts:
+            log.warn("Searched an empty batch of contacts!")
+        else:
+            last_seen = self.contactCache.contacts[-1]['id']
+            log.info("End of batch.  Last contact scanned was ID 
{id}".format(id=last_seen))
+
+
 if __name__ == '__main__':
+    log.info("Begin quick_autoreview deduper")
     lock.begin()
 
     job = QuickAutoreview()
@@ -70,3 +79,4 @@
     ReviewQueue.commit()
 
     lock.end()
+    log.info("End quick_autoreview deduper")
diff --git a/dedupe/review_queue.py b/dedupe/review_queue.py
index b71355a..5f721b1 100644
--- a/dedupe/review_queue.py
+++ b/dedupe/review_queue.py
@@ -1,3 +1,4 @@
+from process.logging import Logger as log
 from process.globals import config
 from database import db
 
@@ -7,6 +8,7 @@
 
     @staticmethod
     def addMatch(job_id, oldId, newId, action, match):
+        log.info("Found a match: {old} -> {new} : {match}".format(old=oldId, 
new=newId, match=match))
         db.get_db(config.drupal_schema).execute("""
             INSERT INTO donor_review_queue
                 SET
@@ -35,7 +37,9 @@
 
     @staticmethod
     def commit():
+        log.info("Committing tags...")
         for tag, contacts in ReviewQueue.cached_tags.items():
+            log.info("Bulk tagging {num} contacts with tag 
<{tag}>".format(num=len(contacts), tag=tag.name))
             ReviewQueue.tag_many(contacts, tag)
 
     @staticmethod
diff --git a/process/globals.py b/process/globals.py
index 4ae2b04..a89f0a0 100644
--- a/process/globals.py
+++ b/process/globals.py
@@ -2,6 +2,8 @@
 import os.path
 from yaml import safe_load as load_yaml
 
+from process.logging import Logger as log
+
 # n.b. Careful not to import `config` by value
 config = dict()
 
@@ -11,10 +13,10 @@
     search_filenames = [
         os.path.expanduser("~/.fundraising/%s.yaml" % app_name),
         os.path.expanduser("~/.%s.yaml" % app_name),
-        "config.yaml",
+        os.path.dirname(__file__) + "/../%s/config.yaml" % app_name,
         "/etc/fundraising/%s.yaml" % app_name,
         "/etc/%s.yaml" % app_name,
-        "%s.yaml" % app_name,
+        os.path.dirname(__file__) + "/../%s/%s.yaml" % (app_name, app_name,)
     ]
     # TODO: if getops.get(--config/-f): search_filenames.append
 
@@ -22,6 +24,7 @@
         if not os.path.exists(filename):
             continue
 
+        log.info("Found config file {path}, loading...".format(path=filename))
         config = DictAsAttrDict(load_yaml(file(filename, 'r')))
 
         return
diff --git a/process/lock.py b/process/lock.py
index 1e69ead..da0eef5 100644
--- a/process/lock.py
+++ b/process/lock.py
@@ -6,6 +6,8 @@
 import os, os.path
 import sys
 
+from logging import Logger as log
+
 lockfile = None
 
 def begin(filename=None, failopen=False):
@@ -15,7 +17,7 @@
         filename = "/tmp/%s-%s.lock" % (unique, cmd)
 
     if os.path.exists(filename):
-        print "Lockfile found!"
+        log.warn("Lockfile found!")
         f = open(filename, "r")
         pid = None
         try:
@@ -24,18 +26,18 @@
             pass
         f.close()
         if not pid:
-            print "Invalid lockfile contents."
+            log.error("Invalid lockfile contents.")
         else:
             try:
                 os.getpgid(pid)
-                print "Aborting! Previous process (%d) is still alive. Remove 
lockfile manually if in error: %s" % (pid, filename, )
+                log.error("Aborting! Previous process ({pid}) is still alive. 
Remove lockfile manually if in error: {path}".format(pid=pid, path=filename))
                 sys.exit(1)
             except OSError:
                 if failopen:
-                    print "Aborting until stale lockfile is investigated: %s" 
% filename
+                    log.fatal("Aborting until stale lockfile is investigated: 
{path}".format(path=filename))
                     sys.exit(1)
-                print "Lockfile is stale."
-        print "Removing old lockfile."
+                log.error("Lockfile is stale.")
+        log.info("Removing old lockfile.")
         os.unlink(filename)
 
     f = open(filename, "w")
diff --git a/process/logging.py b/process/logging.py
new file mode 100644
index 0000000..4a58b5c
--- /dev/null
+++ b/process/logging.py
@@ -0,0 +1,34 @@
+import sys
+import syslog
+
+class Logger(object):
+    
+    @staticmethod
+    def debug(message):
+        Logger.log(message, syslog.LOG_DEBUG)
+
+    @staticmethod
+    def info(message):
+        Logger.log(message, syslog.LOG_INFO)
+
+    @staticmethod
+    def warn(message):
+        Logger.log(message, syslog.LOG_WARNING)
+
+    @staticmethod
+    def error(message):
+        Logger.log(message, syslog.LOG_ERR)
+
+    @staticmethod
+    def fatal(message):
+        Logger.log(message, syslog.LOG_CRIT)
+        print >>sys.stderr, message
+
+    @staticmethod
+    def log(message, severity):
+        syslog.openlog()
+        syslog.syslog(severity, message)
+        syslog.closelog()
+
+        if sys.stdout.isatty():
+            print(message)

-- 
To view, visit https://gerrit.wikimedia.org/r/91570
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I3b406f677ed22b655ba53389e80b0121bace2ee9
Gerrit-PatchSet: 3
Gerrit-Project: wikimedia/fundraising/tools
Gerrit-Branch: master
Gerrit-Owner: Adamw <[email protected]>
Gerrit-Reviewer: Jgreen <[email protected]>
Gerrit-Reviewer: Katie Horn <[email protected]>
Gerrit-Reviewer: Mwalker <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] Log rather than cronspam. Log like bloody hell. - change (wikimedia...tools)

Reply via email to