Ori.livneh has submitted this change and it was merged.
Change subject: Script for reporting daily activity on Wikimedia blog
......................................................................
Script for reporting daily activity on Wikimedia blog
This script produces a report of activity on Wikimedia blog for the previous
day, taken as midnight UTC to midnight UTC, and sends it via e-mail.
Change-Id: I7076c8c64fe24692dd83dd10bb1211ad46e7cb31
---
A blogreport.py
1 file changed, 151 insertions(+), 0 deletions(-)
Approvals:
Ori.livneh: Verified; Looks good to me, approved
diff --git a/blogreport.py b/blogreport.py
new file mode 100644
index 0000000..ea74393
--- /dev/null
+++ b/blogreport.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+ Produce a report of daily activity on Wikimedia blog and e-mail it
+
+ Copyright (C) 2013 Wikimedia Foundation
+ Licensed under the GNU Public License, version 2
+
+"""
+import sys
+reload(sys)
+sys.setdefaultencoding('utf-8')
+
+import collections
+import operator
+import os
+import re
+import socket
+import subprocess
+import urlparse
+
+from cStringIO import StringIO
+from datetime import datetime, timedelta
+from email.mime.text import MIMEText
+
+from sqlalchemy import *
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, scoped_session
+
+
+# FIXME: Load configs from a file.
+db_url = os.environ['BLOGREPORT_DB']
+email_sender = os.environ['BLOGREPORT_FROM']
+email_recipient = os.environ['BLOGREPORT_TO']
+email_cc = os.environ['BLOGREPORT_CC']
+
+yesterday = datetime.utcnow() - timedelta(days=1)
+
+Base = declarative_base()
+Base.metadata.bind = create_engine(db_url)
+Session = sessionmaker(bind=Base.metadata.bind, autocommit=True)
+
+
+def send_email(sender, recipient, subject, text, cc=None):
+ """Send an e-mail by shelling out to 'sendmail'."""
+ message = MIMEText(text)
+ message['From'] = sender
+ message['To'] = recipient
+ if cc is not None:
+ message['Cc'] = cc
+ message['Subject'] = subject
+ p = subprocess.Popen(('/usr/sbin/sendmail', '-t'), stdin=subprocess.PIPE)
+ p.communicate(message.as_string().encode('utf8'))
+
+
+class BlogVisit(Base):
+ __table__ = Table('WikimediaBlogVisit_5308166', Base.metadata,
autoload=True)
+
+session = Session()
+q = session.query(BlogVisit).filter(BlogVisit.webHost == 'blog.wikimedia.org')
+q = q.filter(BlogVisit.timestamp.startswith(yesterday.strftime('%Y%m%d')))
+
+uniques = set()
+visits = 0
+referrers = collections.Counter()
+searches = collections.Counter()
+urls = collections.Counter()
+ref_domains = collections.Counter()
+
+for visit in q:
+ # Exclude previews, testblogs, and WP admin pages
+ if re.search(r'[&?]preview=|testblog|\/wp-',
+ visit.event_requestUrl):
+ continue
+ # Transform all searches into '(search)'
+ if re.search(r'[&?]s=', visit.event_requestUrl):
+ try:
+ search =
dict(urlparse.parse_qsl(visit.event_requestUrl.rsplit('?', 1)[1])).pop('s', '')
+ searches[search] += 1
+ except:
+ pass
+ visit.event_requestUrl = '(search)'
+ urls[visit.event_requestUrl] += 1
+ visits += 1
+ uniques.add(visit.clientIp)
+ ref = visit.event_referrerUrl
+ if ref is not None:
+ if ref.startswith('https://blog.wikimedia.org'):
+ ref = ref[26:]
+ domain = urlparse.urlparse(visit.event_referrerUrl).hostname
+ if domain:
+ if domain.startswith('www.'):
+ domain = domain[4:]
+ ref_domains[domain] += 1
+ referrers[ref] += 1
+
+body = StringIO()
+
+body.write('Total visits: %d\n' % visits)
+body.write('Unique visitors: %d\n' % len(uniques))
+body.write('\n')
+
+body.write('\n')
+body.write('Pages / hits (ordered by number of hits):\n')
+body.write('=========================================\n')
+for url, count in sorted(urls.iteritems(), key=operator.itemgetter(1),
reverse=True):
+ body.write('%s\t%s\n' % (url, count))
+
+body.seek(0)
+
+send_email(
+ '[email protected]',
+ '[email protected]',
+ 'Wikimedia blog stats for %s: pageviews' % yesterday.strftime('%Y-%m-%d'),
+ body.read(),
+ '[email protected]'
+)
+
+body.close()
+body = StringIO()
+
+body.write('Search queries / count (sorted by number of queries):\n')
+body.write('=====================================================\n')
+for search, count in sorted(searches.iteritems(),
+ key=operator.itemgetter(1), reverse=True):
+ body.write('"%s"\t%s\n' % (search, count))
+
+body.write('\n')
+body.write('Referring domain names / referrals (sorted by number of
referrals):\n')
+body.write('===================================================================\n')
+for hostname, count in sorted(ref_domains.iteritems(),
+ key=operator.itemgetter(1), reverse=True):
+ body.write('%s\t%s\n' % (hostname, count))
+
+body.write('\n')
+body.write('Referrers / count (sorted alphabetically):\n')
+body.write('==========================================\n')
+for url, count in sorted(sorted(referrers.iteritems(),
key=operator.itemgetter(0)), reverse=True):
+ if url is None:
+ url = '(no referrer)'
+ body.write('%s\t%s\n' % (url, count))
+
+body.seek(0)
+
+send_email(
+ 'blogreport@' + socket.getfqdn(),
+ email_recipient,
+ 'Wikimedia blog stats for %s: referrers & searches' %
yesterday.strftime('%Y-%m-%d'),
+ body.read(),
+ email_cc,
+)
--
To view, visit https://gerrit.wikimedia.org/r/60794
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I7076c8c64fe24692dd83dd10bb1211ad46e7cb31
Gerrit-PatchSet: 1
Gerrit-Project: analytics/blog
Gerrit-Branch: master
Gerrit-Owner: Ori.livneh <[email protected]>
Gerrit-Reviewer: HaeB <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>
Gerrit-Reviewer: Ori.livneh <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits