Adamw has uploaded a new change for review.
https://gerrit.wikimedia.org/r/91570
Change subject: Log rather than cronspam. Log like bloody hell.
......................................................................
Log rather than cronspam. Log like bloody hell.
Change-Id: I3b406f677ed22b655ba53389e80b0121bace2ee9
---
M banner_screenshot/config.yaml.example
M banner_screenshot/rasterize.js
M banner_screenshot/shoot_banners
M dedupe/README
M process/lock.py
A process/logging.py
6 files changed, 87 insertions(+), 32 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/wikimedia/fundraising/tools
refs/changes/70/91570/1
diff --git a/banner_screenshot/config.yaml.example
b/banner_screenshot/config.yaml.example
index ca3fa56..2e423f9 100644
--- a/banner_screenshot/config.yaml.example
+++ b/banner_screenshot/config.yaml.example
@@ -8,7 +8,5 @@
"/tmp/banner_screenshots"
banner_screenshot_format:
png
-crop_height:
- 500
banner_name_regex:
"^B13_.*_(?P<lang>[a-z]{2})(?P<country>[A-Z0-9]{2})$"
diff --git a/banner_screenshot/rasterize.js b/banner_screenshot/rasterize.js
index 7d608ef..8ce9e58 100644
--- a/banner_screenshot/rasterize.js
+++ b/banner_screenshot/rasterize.js
@@ -19,17 +19,18 @@
page.viewportSize = { width: 1024, height: 728 };
page.open(address, function (status) {
if (status !== 'success') {
- console.log('Unable to load the address!');
+ console.error('Unable to load the address!');
} else {
//console.log(JSON.stringify(phantom.cookies, null, 2));
window.setTimeout(function () {
page.clipRect = page.evaluate(function() {
var cn = $('#centralNotice');
- // FIXME: workaround for broken dropdown banner css, see
FR #1085
+ // workaround for broken banner css
var divHeight = cn.height();
if ( divHeight === 0 ) {
- divHeight = 728;
+ divHeight = page.viewportSize.height;
+ console.log("No height found, using default of " +
divHeight);
}
return {
@@ -39,7 +40,7 @@
height: divHeight
};
});
- console.log(page.clipRect.width + " x " +
page.clipRect.height);
+ console.debug("#centralNotice was " + page.clipRect.width +
"px x " + page.clipRect.height + "px");
page.render(output);
phantom.exit();
}, 1000);
diff --git a/banner_screenshot/shoot_banners b/banner_screenshot/shoot_banners
index ebd6d6d..88d4e7d 100755
--- a/banner_screenshot/shoot_banners
+++ b/banner_screenshot/shoot_banners
@@ -6,26 +6,31 @@
import re
import sys
+from process.logging import Logger as log
from process.globals import load_config
load_config("banners")
-
from process.globals import config
+
from mediawiki.centralnotice.api import get_campaign_logs
from mediawiki.centralnotice.time_util import str_time_offset
from process.lock import begin, end
def reduce_banners(campaign_logs):
- '''Return a map from banner names to most recent campaign settings.'''
+ '''Return a map of banner names, to their most recent campaign settings.'''
+ # TODO: this would be in error if a banner were linked from different
campaigns...
banners = dict()
for entry in campaign_logs:
settings = entry['end']
campaign_banners = settings['banners']
- # we only need one country...
settings['country'] = "US"
if settings['geo'] == "1" and settings['countries']:
+ # FIXME: unfudge country list. The campaign should specify
whether it displays regional variation.
settings['country'] = settings['countries'][0]
+ if len(settings['countries']) > 1:
+ log.debug("Multi-country campaign found, however, we woefully
ignore country-based locale variation.")
+ # XXX why not "not empty campaign_banners"?
if hasattr(campaign_banners, 'keys'):
banners.update(
dict.fromkeys(
@@ -38,34 +43,41 @@
def get_screenshot_path(name, lang):
return os.path.join(
config.banner_screenshots_dir,
- "%(banner)s/%(banner)s_%(lang)s.%(ext)s" % {
- "banner": name,
- "lang": lang,
- "ext": config.banner_screenshot_format,
- }
+ "{banner}", "{banner}_{lang}.{ext}"
+ ).format(
+ banner=name,
+ lang=lang,
+ ext=config.banner_screenshot_format,
)
-def banner_screenshot_exists(name, lang):
- return os.path.exists(get_screenshot_path(name, lang))
-
def render(name, lang, country):
+ '''Render a localized banner to file'''
global JS_RENDER_SCRIPT
- url = config.article_url % { "banner": name, "lang": lang, "country":
country }
+ url = config.article_url % {"banner": name, "lang": lang, "country":
country}
path = get_screenshot_path(name, lang)
+ # TODO: option/default to update
+ if os.path.exists(path):
+ log.info("Banner screenshot already saved to {path}, not
updating.".format(path=path))
+ return
dir = os.path.dirname(path)
if not os.path.exists(dir):
+ log.info("Beginning dumps for banner {banner}".format(banner=name))
os.makedirs(dir)
- print "Fetching " + url + " into " + path
- subprocess.check_call([config.phantomjs, JS_RENDER_SCRIPT, url, path])
+ log.info("Fetching {url} into {path}".format(url=url, path=path))
+ cmd = [config.phantomjs, JS_RENDER_SCRIPT, url, path]
+ buf = subprocess.check_output(cmd)
+ log.info("phantom> {output}".format(output=buf))
def process_banners():
+ log.info("Getting campaigns changed in the last two days...")
banners = reduce_banners(get_campaign_logs(since=str_time_offset(days=-2)))
for name, campaign_settings in banners.items():
country = "US"
m = re.match(config.banner_name_regex, name)
if m:
+ # "yy" means, "all languages", and "YY" all countries.
explicit_lang = m.group('lang')
if explicit_lang != "yy":
campaign_settings['languages'] = [ explicit_lang ]
@@ -73,16 +85,23 @@
if explicit_country != "YY":
campaign_settings['country'] = explicit_country
- for lang in campaign_settings['languages']:
- if not banner_screenshot_exists(name, lang):
- render(name, lang, campaign_settings['country'])
+ log.debug("Parsed banner naming magic to get lang={lang},
country={country}".format(lang=explicit_lang, country=explicit_country))
+ else:
+ log.debug("Unrecognized banner naming magic:
{name}".format(name=name))
+ log.debug("Rendering for languages: {languages}.".format(languages=",
".join(campaign_settings['languages'])))
+ for lang in campaign_settings['languages']:
+ render(name, lang, campaign_settings['country'])
+
+
+# set a magic global
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+JS_RENDER_SCRIPT = os.path.join(__dir__, "rasterize.js")
if __name__ == "__main__":
try:
+ log.info("Beginning banner scrape")
begin()
-
- JS_RENDER_SCRIPT =
os.path.join(os.path.dirname(os.path.abspath(__file__)), "rasterize.js")
if len(sys.argv) > 1:
for name in sys.argv[1:]:
@@ -91,3 +110,4 @@
process_banners()
finally:
end()
+ log.info("Done.")
diff --git a/dedupe/README b/dedupe/README
index 453d27c..f624cd4 100644
--- a/dedupe/README
+++ b/dedupe/README
@@ -1 +1 @@
-pip install python-Levenshtein MySQL-python
+apt-get install python-yaml python-Levenshtein python-mysqldb
diff --git a/process/lock.py b/process/lock.py
index 1e69ead..da0eef5 100644
--- a/process/lock.py
+++ b/process/lock.py
@@ -6,6 +6,8 @@
import os, os.path
import sys
+from logging import Logger as log
+
lockfile = None
def begin(filename=None, failopen=False):
@@ -15,7 +17,7 @@
filename = "/tmp/%s-%s.lock" % (unique, cmd)
if os.path.exists(filename):
- print "Lockfile found!"
+ log.warn("Lockfile found!")
f = open(filename, "r")
pid = None
try:
@@ -24,18 +26,18 @@
pass
f.close()
if not pid:
- print "Invalid lockfile contents."
+ log.error("Invalid lockfile contents.")
else:
try:
os.getpgid(pid)
- print "Aborting! Previous process (%d) is still alive. Remove
lockfile manually if in error: %s" % (pid, filename, )
+ log.error("Aborting! Previous process ({pid}) is still alive.
Remove lockfile manually if in error: {path}".format(pid=pid, path=filename))
sys.exit(1)
except OSError:
if failopen:
- print "Aborting until stale lockfile is investigated: %s"
% filename
+ log.fatal("Aborting until stale lockfile is investigated:
{path}".format(path=filename))
sys.exit(1)
- print "Lockfile is stale."
- print "Removing old lockfile."
+ log.error("Lockfile is stale.")
+ log.info("Removing old lockfile.")
os.unlink(filename)
f = open(filename, "w")
diff --git a/process/logging.py b/process/logging.py
new file mode 100644
index 0000000..4a58b5c
--- /dev/null
+++ b/process/logging.py
@@ -0,0 +1,34 @@
+import sys
+import syslog
+
+class Logger(object):
+
+ @staticmethod
+ def debug(message):
+ Logger.log(message, syslog.LOG_DEBUG)
+
+ @staticmethod
+ def info(message):
+ Logger.log(message, syslog.LOG_INFO)
+
+ @staticmethod
+ def warn(message):
+ Logger.log(message, syslog.LOG_WARNING)
+
+ @staticmethod
+ def error(message):
+ Logger.log(message, syslog.LOG_ERR)
+
+ @staticmethod
+ def fatal(message):
+ Logger.log(message, syslog.LOG_CRIT)
+ print >>sys.stderr, message
+
+ @staticmethod
+ def log(message, severity):
+ syslog.openlog()
+ syslog.syslog(severity, message)
+ syslog.closelog()
+
+ if sys.stdout.isatty():
+ print(message)
--
To view, visit https://gerrit.wikimedia.org/r/91570
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I3b406f677ed22b655ba53389e80b0121bace2ee9
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/fundraising/tools
Gerrit-Branch: master
Gerrit-Owner: Adamw <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits