Ori.livneh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/60794


Change subject: Script for reporting daily activity on Wikimedia blog
......................................................................

Script for reporting daily activity on Wikimedia blog

This script produces a report of activity on Wikimedia blog for the previous
day, taken as midnight UTC to midnight UTC, and sends it via e-mail.

Change-Id: I7076c8c64fe24692dd83dd10bb1211ad46e7cb31
---
A blogreport.py
1 file changed, 151 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/blog 
refs/changes/94/60794/1

diff --git a/blogreport.py b/blogreport.py
new file mode 100644
index 0000000..ea74393
--- /dev/null
+++ b/blogreport.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+  Produce a report of daily activity on Wikimedia blog and e-mail it
+
+  Copyright (C) 2013 Wikimedia Foundation
+  Licensed under the GNU Public License, version 2
+
+"""
+import sys
+reload(sys)
+sys.setdefaultencoding('utf-8')
+
+import collections
+import operator
+import os
+import re
+import socket
+import subprocess
+import urlparse
+
+from cStringIO import StringIO
+from datetime import datetime, timedelta
+from email.mime.text import MIMEText
+
+from sqlalchemy import *
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, scoped_session
+
+
+# FIXME: Load configs from a file.
+db_url = os.environ['BLOGREPORT_DB']
+email_sender = os.environ['BLOGREPORT_FROM']
+email_recipient = os.environ['BLOGREPORT_TO']
+email_cc = os.environ['BLOGREPORT_CC']
+
+yesterday = datetime.utcnow() - timedelta(days=1)
+
+Base = declarative_base()
+Base.metadata.bind = create_engine(db_url)
+Session = sessionmaker(bind=Base.metadata.bind, autocommit=True)
+
+
+def send_email(sender, recipient, subject, text, cc=None):
+    """Send an e-mail by shelling out to 'sendmail'."""
+    message = MIMEText(text)
+    message['From'] = sender
+    message['To'] = recipient
+    if cc is not None:
+        message['Cc'] = cc
+    message['Subject'] = subject
+    p = subprocess.Popen(('/usr/sbin/sendmail', '-t'), stdin=subprocess.PIPE)
+    p.communicate(message.as_string().encode('utf8'))
+
+
+class BlogVisit(Base):
+    __table__ = Table('WikimediaBlogVisit_5308166', Base.metadata, 
autoload=True)
+
+session = Session()
+q = session.query(BlogVisit).filter(BlogVisit.webHost == 'blog.wikimedia.org')
+q = q.filter(BlogVisit.timestamp.startswith(yesterday.strftime('%Y%m%d')))
+
+uniques = set()
+visits = 0
+referrers = collections.Counter()
+searches = collections.Counter()
+urls = collections.Counter()
+ref_domains = collections.Counter()
+
+for visit in q:
+    # Exclude previews, testblogs, and WP admin pages
+    if re.search(r'[&?]preview=|testblog|\/wp-',
+            visit.event_requestUrl):
+        continue
+    # Transform all searches into '(search)'
+    if re.search(r'[&?]s=', visit.event_requestUrl):
+        try:
+            search = 
dict(urlparse.parse_qsl(visit.event_requestUrl.rsplit('?', 1)[1])).pop('s', '')
+            searches[search] += 1
+        except:
+            pass
+        visit.event_requestUrl = '(search)'
+    urls[visit.event_requestUrl] += 1
+    visits += 1
+    uniques.add(visit.clientIp)
+    ref = visit.event_referrerUrl
+    if ref is not None:
+        if ref.startswith('https://blog.wikimedia.org'):
+            ref = ref[26:]
+        domain = urlparse.urlparse(visit.event_referrerUrl).hostname
+        if domain:
+            if domain.startswith('www.'):
+                domain = domain[4:]
+            ref_domains[domain] += 1
+    referrers[ref] += 1
+
+body = StringIO()
+
+body.write('Total visits: %d\n' % visits)
+body.write('Unique visitors: %d\n' % len(uniques))
+body.write('\n')
+
+body.write('\n')
+body.write('Pages / hits (ordered by number of hits):\n')
+body.write('=========================================\n')
+for url, count in sorted(urls.iteritems(), key=operator.itemgetter(1), 
reverse=True):
+    body.write('%s\t%s\n' % (url, count))
+
+body.seek(0)
+
+send_email(
+    'eventlogging@stat1.eqiad.wmnet',
+    'tba...@wikimedia.org',
+    'Wikimedia blog stats for %s: pageviews' % yesterday.strftime('%Y-%m-%d'),
+    body.read(),
+    'o...@wikimedia.org'
+)
+
+body.close()
+body = StringIO()
+
+body.write('Search queries / count (sorted by number of queries):\n')
+body.write('=====================================================\n')
+for search, count in sorted(searches.iteritems(),
+        key=operator.itemgetter(1), reverse=True):
+    body.write('"%s"\t%s\n' % (search, count))
+
+body.write('\n')
+body.write('Referring domain names / referrals (sorted by number of 
referrals):\n')
+body.write('===================================================================\n')
+for hostname, count in sorted(ref_domains.iteritems(),
+        key=operator.itemgetter(1), reverse=True):
+    body.write('%s\t%s\n' % (hostname, count))
+
+body.write('\n')
+body.write('Referrers / count (sorted alphabetically):\n')
+body.write('==========================================\n')
+for url, count in sorted(sorted(referrers.iteritems(), 
key=operator.itemgetter(0)), reverse=True):
+    if url is None:
+        url = '(no referrer)'
+    body.write('%s\t%s\n' % (url, count))
+
+body.seek(0)
+
+send_email(
+    'blogreport@' + socket.getfqdn(),
+    email_recipient,
+    'Wikimedia blog stats for %s: referrers & searches' % 
yesterday.strftime('%Y-%m-%d'),
+    body.read(),
+    email_cc,
+)

-- 
To view, visit https://gerrit.wikimedia.org/r/60794
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7076c8c64fe24692dd83dd10bb1211ad46e7cb31
Gerrit-PatchSet: 1
Gerrit-Project: analytics/blog
Gerrit-Branch: master
Gerrit-Owner: Ori.livneh <o...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to