Ori.livneh has uploaded a new change for review. https://gerrit.wikimedia.org/r/60794
Change subject: Script for reporting daily activity on Wikimedia blog ...................................................................... Script for reporting daily activity on Wikimedia blog This script produces a report of activity on Wikimedia blog for the previous day, taken as midnight UTC to midnight UTC, and sends it via e-mail. Change-Id: I7076c8c64fe24692dd83dd10bb1211ad46e7cb31 --- A blogreport.py 1 file changed, 151 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/analytics/blog refs/changes/94/60794/1 diff --git a/blogreport.py b/blogreport.py new file mode 100644 index 0000000..ea74393 --- /dev/null +++ b/blogreport.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + Produce a report of daily activity on Wikimedia blog and e-mail it + + Copyright (C) 2013 Wikimedia Foundation + Licensed under the GNU Public License, version 2 + +""" +import sys +reload(sys) +sys.setdefaultencoding('utf-8') + +import collections +import operator +import os +import re +import socket +import subprocess +import urlparse + +from cStringIO import StringIO +from datetime import datetime, timedelta +from email.mime.text import MIMEText + +from sqlalchemy import * +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker, scoped_session + + +# FIXME: Load configs from a file. +db_url = os.environ['BLOGREPORT_DB'] +email_sender = os.environ['BLOGREPORT_FROM'] +email_recipient = os.environ['BLOGREPORT_TO'] +email_cc = os.environ['BLOGREPORT_CC'] + +yesterday = datetime.utcnow() - timedelta(days=1) + +Base = declarative_base() +Base.metadata.bind = create_engine(db_url) +Session = sessionmaker(bind=Base.metadata.bind, autocommit=True) + + +def send_email(sender, recipient, subject, text, cc=None): + """Send an e-mail by shelling out to 'sendmail'.""" + message = MIMEText(text) + message['From'] = sender + message['To'] = recipient + if cc is not None: + message['Cc'] = cc + message['Subject'] = subject + p = subprocess.Popen(('/usr/sbin/sendmail', '-t'), stdin=subprocess.PIPE) + p.communicate(message.as_string().encode('utf8')) + + +class BlogVisit(Base): + __table__ = Table('WikimediaBlogVisit_5308166', Base.metadata, autoload=True) + +session = Session() +q = session.query(BlogVisit).filter(BlogVisit.webHost == 'blog.wikimedia.org') +q = q.filter(BlogVisit.timestamp.startswith(yesterday.strftime('%Y%m%d'))) + +uniques = set() +visits = 0 +referrers = collections.Counter() +searches = collections.Counter() +urls = collections.Counter() +ref_domains = collections.Counter() + +for visit in q: + # Exclude previews, testblogs, and WP admin pages + if re.search(r'[&?]preview=|testblog|\/wp-', + visit.event_requestUrl): + continue + # Transform all searches into '(search)' + if re.search(r'[&?]s=', visit.event_requestUrl): + try: + search = dict(urlparse.parse_qsl(visit.event_requestUrl.rsplit('?', 1)[1])).pop('s', '') + searches[search] += 1 + except: + pass + visit.event_requestUrl = '(search)' + urls[visit.event_requestUrl] += 1 + visits += 1 + uniques.add(visit.clientIp) + ref = visit.event_referrerUrl + if ref is not None: + if ref.startswith('https://blog.wikimedia.org'): + ref = ref[26:] + domain = urlparse.urlparse(visit.event_referrerUrl).hostname + if domain: + if domain.startswith('www.'): + domain = domain[4:] + ref_domains[domain] += 1 + referrers[ref] += 1 + +body = StringIO() + +body.write('Total visits: %d\n' % visits) +body.write('Unique visitors: %d\n' % len(uniques)) +body.write('\n') + +body.write('\n') +body.write('Pages / hits (ordered by number of hits):\n') +body.write('=========================================\n') +for url, count in sorted(urls.iteritems(), key=operator.itemgetter(1), reverse=True): + body.write('%s\t%s\n' % (url, count)) + +body.seek(0) + +send_email( + 'eventlogging@stat1.eqiad.wmnet', + 'tba...@wikimedia.org', + 'Wikimedia blog stats for %s: pageviews' % yesterday.strftime('%Y-%m-%d'), + body.read(), + 'o...@wikimedia.org' +) + +body.close() +body = StringIO() + +body.write('Search queries / count (sorted by number of queries):\n') +body.write('=====================================================\n') +for search, count in sorted(searches.iteritems(), + key=operator.itemgetter(1), reverse=True): + body.write('"%s"\t%s\n' % (search, count)) + +body.write('\n') +body.write('Referring domain names / referrals (sorted by number of referrals):\n') +body.write('===================================================================\n') +for hostname, count in sorted(ref_domains.iteritems(), + key=operator.itemgetter(1), reverse=True): + body.write('%s\t%s\n' % (hostname, count)) + +body.write('\n') +body.write('Referrers / count (sorted alphabetically):\n') +body.write('==========================================\n') +for url, count in sorted(sorted(referrers.iteritems(), key=operator.itemgetter(0)), reverse=True): + if url is None: + url = '(no referrer)' + body.write('%s\t%s\n' % (url, count)) + +body.seek(0) + +send_email( + 'blogreport@' + socket.getfqdn(), + email_recipient, + 'Wikimedia blog stats for %s: referrers & searches' % yesterday.strftime('%Y-%m-%d'), + body.read(), + email_cc, +) -- To view, visit https://gerrit.wikimedia.org/r/60794 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7076c8c64fe24692dd83dd10bb1211ad46e7cb31 Gerrit-PatchSet: 1 Gerrit-Project: analytics/blog Gerrit-Branch: master Gerrit-Owner: Ori.livneh <o...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits