Mwalker has submitted this change and it was merged.
Change subject: Log rather than cronspam. Log like bloody hell.
......................................................................
Log rather than cronspam. Log like bloody hell.
Change-Id: I3b406f677ed22b655ba53389e80b0121bace2ee9
---
M banner_screenshot/config.yaml.example
M banner_screenshot/rasterize.js
M banner_screenshot/shoot_banners
M database/db.py
M dedupe/README
M dedupe/contact_cache.py
M dedupe/quick_autoreview.py
M dedupe/review_queue.py
M process/globals.py
M process/lock.py
A process/logging.py
11 files changed, 113 insertions(+), 35 deletions(-)
Approvals:
Mwalker: Looks good to me, approved
Jgreen: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/banner_screenshot/config.yaml.example
b/banner_screenshot/config.yaml.example
index ca3fa56..2e423f9 100644
--- a/banner_screenshot/config.yaml.example
+++ b/banner_screenshot/config.yaml.example
@@ -8,7 +8,5 @@
"/tmp/banner_screenshots"
banner_screenshot_format:
png
-crop_height:
- 500
banner_name_regex:
"^B13_.*_(?P<lang>[a-z]{2})(?P<country>[A-Z0-9]{2})$"
diff --git a/banner_screenshot/rasterize.js b/banner_screenshot/rasterize.js
index 7d608ef..4ad4e37 100644
--- a/banner_screenshot/rasterize.js
+++ b/banner_screenshot/rasterize.js
@@ -19,17 +19,18 @@
page.viewportSize = { width: 1024, height: 728 };
page.open(address, function (status) {
if (status !== 'success') {
- console.log('Unable to load the address!');
+ console.error('Unable to load the address!');
} else {
//console.log(JSON.stringify(phantom.cookies, null, 2));
window.setTimeout(function () {
page.clipRect = page.evaluate(function() {
var cn = $('#centralNotice');
- // FIXME: workaround for broken dropdown banner css, see
FR #1085
+ // workaround for broken banner css
var divHeight = cn.height();
if ( divHeight === 0 ) {
divHeight = 728;
+ console.log("No height found, using default of " +
divHeight);
}
return {
@@ -39,7 +40,7 @@
height: divHeight
};
});
- console.log(page.clipRect.width + " x " +
page.clipRect.height);
+ console.debug("#centralNotice was " + page.clipRect.width +
"px x " + page.clipRect.height + "px");
page.render(output);
phantom.exit();
}, 1000);
diff --git a/banner_screenshot/shoot_banners b/banner_screenshot/shoot_banners
index ebd6d6d..0b5990a 100755
--- a/banner_screenshot/shoot_banners
+++ b/banner_screenshot/shoot_banners
@@ -6,26 +6,31 @@
import re
import sys
+from process.logging import Logger as log
from process.globals import load_config
-load_config("banners")
-
+load_config("banner_screenshot")
from process.globals import config
+
from mediawiki.centralnotice.api import get_campaign_logs
from mediawiki.centralnotice.time_util import str_time_offset
from process.lock import begin, end
def reduce_banners(campaign_logs):
- '''Return a map from banner names to most recent campaign settings.'''
+ '''Return a map of banner names, to their most recent campaign settings.'''
+ # TODO: this would be in error if a banner were linked from different
campaigns...
banners = dict()
for entry in campaign_logs:
settings = entry['end']
campaign_banners = settings['banners']
- # we only need one country...
settings['country'] = "US"
if settings['geo'] == "1" and settings['countries']:
+ # FIXME: unfudge country list. The campaign should specify
whether it displays regional variation.
settings['country'] = settings['countries'][0]
+ if len(settings['countries']) > 1:
+ log.debug("Multi-country campaign found, however, we woefully
ignore country-based locale variation.")
+ # XXX why not "not empty campaign_banners"?
if hasattr(campaign_banners, 'keys'):
banners.update(
dict.fromkeys(
@@ -38,34 +43,41 @@
def get_screenshot_path(name, lang):
return os.path.join(
config.banner_screenshots_dir,
- "%(banner)s/%(banner)s_%(lang)s.%(ext)s" % {
- "banner": name,
- "lang": lang,
- "ext": config.banner_screenshot_format,
- }
+ "{banner}", "{banner}_{lang}.{ext}"
+ ).format(
+ banner=name,
+ lang=lang,
+ ext=config.banner_screenshot_format,
)
-def banner_screenshot_exists(name, lang):
- return os.path.exists(get_screenshot_path(name, lang))
-
def render(name, lang, country):
+ '''Render a localized banner to file'''
global JS_RENDER_SCRIPT
- url = config.article_url % { "banner": name, "lang": lang, "country":
country }
+ url = config.article_url % {"banner": name, "lang": lang, "country":
country}
path = get_screenshot_path(name, lang)
+ # TODO: option/default to update
+ if os.path.exists(path):
+ log.info("Banner screenshot already saved to {path}, not
updating.".format(path=path))
+ return
dir = os.path.dirname(path)
if not os.path.exists(dir):
+ log.info("Beginning dumps for banner {banner}".format(banner=name))
os.makedirs(dir)
- print "Fetching " + url + " into " + path
- subprocess.check_call([config.phantomjs, JS_RENDER_SCRIPT, url, path])
+ log.info("Fetching {url} into {path}".format(url=url, path=path))
+ cmd = [config.phantomjs, JS_RENDER_SCRIPT, url, path]
+ buf = subprocess.check_output(cmd)
+ log.info("phantom> {output}".format(output=buf))
def process_banners():
+ log.info("Getting campaigns changed in the last two days...")
banners = reduce_banners(get_campaign_logs(since=str_time_offset(days=-2)))
for name, campaign_settings in banners.items():
country = "US"
m = re.match(config.banner_name_regex, name)
if m:
+ # "yy" means, "all languages", and "YY" all countries.
explicit_lang = m.group('lang')
if explicit_lang != "yy":
campaign_settings['languages'] = [ explicit_lang ]
@@ -73,16 +85,23 @@
if explicit_country != "YY":
campaign_settings['country'] = explicit_country
- for lang in campaign_settings['languages']:
- if not banner_screenshot_exists(name, lang):
- render(name, lang, campaign_settings['country'])
+ log.debug("Parsed banner naming magic to get lang={lang},
country={country}".format(lang=explicit_lang, country=explicit_country))
+ else:
+ log.debug("Unrecognized banner naming magic:
{name}".format(name=name))
+ log.debug("Rendering for languages: {languages}.".format(languages=",
".join(campaign_settings['languages'])))
+ for lang in campaign_settings['languages']:
+ render(name, lang, campaign_settings['country'])
+
+
+# set a magic global
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+JS_RENDER_SCRIPT = os.path.join(__dir__, "rasterize.js")
if __name__ == "__main__":
try:
+ log.info("Beginning banner scrape")
begin()
-
- JS_RENDER_SCRIPT =
os.path.join(os.path.dirname(os.path.abspath(__file__)), "rasterize.js")
if len(sys.argv) > 1:
for name in sys.argv[1:]:
@@ -91,3 +110,4 @@
process_banners()
finally:
end()
+ log.info("Done.")
diff --git a/database/db.py b/database/db.py
index 9db6663..02616b3 100644
--- a/database/db.py
+++ b/database/db.py
@@ -4,6 +4,7 @@
import MySQLdb as Dbi
import atexit
+from process.logging import Logger as log
from process.globals import config
class Connection(object):
@@ -18,7 +19,10 @@
cursor = self.db_conn.cursor(cursorclass=Dbi.cursors.DictCursor)
if self.debug:
- print sql, params
+ if params:
+ log.debug(str(sql) + " % " + repr(params))
+ else:
+ log.debug(str(sql))
if params:
cursor.execute(sql, params)
diff --git a/dedupe/README b/dedupe/README
index 453d27c..f624cd4 100644
--- a/dedupe/README
+++ b/dedupe/README
@@ -1 +1 @@
-pip install python-Levenshtein MySQL-python
+apt-get install python-yaml python-Levenshtein python-mysqldb
diff --git a/dedupe/contact_cache.py b/dedupe/contact_cache.py
index 8fc353d..6475875 100644
--- a/dedupe/contact_cache.py
+++ b/dedupe/contact_cache.py
@@ -1,5 +1,6 @@
'''Optimized retrieval and in-memory storage of a small amount of information
across many contacts.'''
+from process.logging import Logger as log
from process.globals import config
from database import db
@@ -75,6 +76,7 @@
def buildQuery(self):
query = super(PagedGroup, self).buildQuery()
+ log.info("Limiting batch contact retrieval to {num}
records.".format(num=self.pagesize))
query.limit = self.pagesize
query.offset = self.offset
return query
diff --git a/dedupe/quick_autoreview.py b/dedupe/quick_autoreview.py
index 2e8fa6a..7f75ac0 100755
--- a/dedupe/quick_autoreview.py
+++ b/dedupe/quick_autoreview.py
@@ -2,6 +2,7 @@
'''Find low-hanging dupe fruits and mark them for the manual review queue'''
+from process.logging import Logger as log
from process.globals import load_config
load_config("dedupe")
from process.globals import config
@@ -62,7 +63,15 @@
ReviewQueue.tag(contact['id'], QuickAutoreview.QUICK_REVIEWED)
+ if not self.contactCache.contacts:
+ log.warn("Searched an empty batch of contacts!")
+ else:
+ last_seen = self.contactCache.contacts[-1]['id']
+ log.info("End of batch. Last contact scanned was ID
{id}".format(id=last_seen))
+
+
if __name__ == '__main__':
+ log.info("Begin quick_autoreview deduper")
lock.begin()
job = QuickAutoreview()
@@ -70,3 +79,4 @@
ReviewQueue.commit()
lock.end()
+ log.info("End quick_autoreview deduper")
diff --git a/dedupe/review_queue.py b/dedupe/review_queue.py
index b71355a..5f721b1 100644
--- a/dedupe/review_queue.py
+++ b/dedupe/review_queue.py
@@ -1,3 +1,4 @@
+from process.logging import Logger as log
from process.globals import config
from database import db
@@ -7,6 +8,7 @@
@staticmethod
def addMatch(job_id, oldId, newId, action, match):
+ log.info("Found a match: {old} -> {new} : {match}".format(old=oldId,
new=newId, match=match))
db.get_db(config.drupal_schema).execute("""
INSERT INTO donor_review_queue
SET
@@ -35,7 +37,9 @@
@staticmethod
def commit():
+ log.info("Committing tags...")
for tag, contacts in ReviewQueue.cached_tags.items():
+ log.info("Bulk tagging {num} contacts with tag
<{tag}>".format(num=len(contacts), tag=tag.name))
ReviewQueue.tag_many(contacts, tag)
@staticmethod
diff --git a/process/globals.py b/process/globals.py
index 4ae2b04..a89f0a0 100644
--- a/process/globals.py
+++ b/process/globals.py
@@ -2,6 +2,8 @@
import os.path
from yaml import safe_load as load_yaml
+from process.logging import Logger as log
+
# n.b. Careful not to import `config` by value
config = dict()
@@ -11,10 +13,10 @@
search_filenames = [
os.path.expanduser("~/.fundraising/%s.yaml" % app_name),
os.path.expanduser("~/.%s.yaml" % app_name),
- "config.yaml",
+ os.path.dirname(__file__) + "/../%s/config.yaml" % app_name,
"/etc/fundraising/%s.yaml" % app_name,
"/etc/%s.yaml" % app_name,
- "%s.yaml" % app_name,
+ os.path.dirname(__file__) + "/../%s/%s.yaml" % (app_name, app_name,)
]
# TODO: if getops.get(--config/-f): search_filenames.append
@@ -22,6 +24,7 @@
if not os.path.exists(filename):
continue
+ log.info("Found config file {path}, loading...".format(path=filename))
config = DictAsAttrDict(load_yaml(file(filename, 'r')))
return
diff --git a/process/lock.py b/process/lock.py
index 1e69ead..da0eef5 100644
--- a/process/lock.py
+++ b/process/lock.py
@@ -6,6 +6,8 @@
import os, os.path
import sys
+from logging import Logger as log
+
lockfile = None
def begin(filename=None, failopen=False):
@@ -15,7 +17,7 @@
filename = "/tmp/%s-%s.lock" % (unique, cmd)
if os.path.exists(filename):
- print "Lockfile found!"
+ log.warn("Lockfile found!")
f = open(filename, "r")
pid = None
try:
@@ -24,18 +26,18 @@
pass
f.close()
if not pid:
- print "Invalid lockfile contents."
+ log.error("Invalid lockfile contents.")
else:
try:
os.getpgid(pid)
- print "Aborting! Previous process (%d) is still alive. Remove
lockfile manually if in error: %s" % (pid, filename, )
+ log.error("Aborting! Previous process ({pid}) is still alive.
Remove lockfile manually if in error: {path}".format(pid=pid, path=filename))
sys.exit(1)
except OSError:
if failopen:
- print "Aborting until stale lockfile is investigated: %s"
% filename
+ log.fatal("Aborting until stale lockfile is investigated:
{path}".format(path=filename))
sys.exit(1)
- print "Lockfile is stale."
- print "Removing old lockfile."
+ log.error("Lockfile is stale.")
+ log.info("Removing old lockfile.")
os.unlink(filename)
f = open(filename, "w")
diff --git a/process/logging.py b/process/logging.py
new file mode 100644
index 0000000..4a58b5c
--- /dev/null
+++ b/process/logging.py
@@ -0,0 +1,34 @@
+import sys
+import syslog
+
+class Logger(object):
+
+ @staticmethod
+ def debug(message):
+ Logger.log(message, syslog.LOG_DEBUG)
+
+ @staticmethod
+ def info(message):
+ Logger.log(message, syslog.LOG_INFO)
+
+ @staticmethod
+ def warn(message):
+ Logger.log(message, syslog.LOG_WARNING)
+
+ @staticmethod
+ def error(message):
+ Logger.log(message, syslog.LOG_ERR)
+
+ @staticmethod
+ def fatal(message):
+ Logger.log(message, syslog.LOG_CRIT)
+ print >>sys.stderr, message
+
+ @staticmethod
+ def log(message, severity):
+ syslog.openlog()
+ syslog.syslog(severity, message)
+ syslog.closelog()
+
+ if sys.stdout.isatty():
+ print(message)
--
To view, visit https://gerrit.wikimedia.org/r/91570
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I3b406f677ed22b655ba53389e80b0121bace2ee9
Gerrit-PatchSet: 3
Gerrit-Project: wikimedia/fundraising/tools
Gerrit-Branch: master
Gerrit-Owner: Adamw <[email protected]>
Gerrit-Reviewer: Jgreen <[email protected]>
Gerrit-Reviewer: Katie Horn <[email protected]>
Gerrit-Reviewer: Mwalker <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits