Nuria has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/335145 )

Change subject: Changes UA string to JSON map
......................................................................

Changes UA string to JSON map

Uses ua_parser to generate a JSON object with properties obtained from
the user agent string. The capsule schema remains unchanged.

Custom code parses the WMF app version

Bug: T153207
Change-Id: I165214a8b12ff573115381ff1d2d0305e8310e93
---
M eventlogging/parse.py
M eventlogging/utils.py
M requirements.txt
M tests/test_parser.py
4 files changed, 62 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/eventlogging 
refs/changes/45/335145/1

diff --git a/eventlogging/parse.py b/eventlogging/parse.py
index 98bf758..982cfa8 100644
--- a/eventlogging/parse.py
+++ b/eventlogging/parse.py
@@ -42,6 +42,7 @@
 
 from .compat import json, unquote_plus, uuid5
 from .event import Event
+from .utils import parse_ua
 
 __all__ = (
     'LogParser', 'ncsa_to_unix',
@@ -155,6 +156,9 @@
         event = {k: f(match.group(k)) for f, k in caster_key_pairs}
         event.update(event.pop('capsule'))
         event['uuid'] = capsule_uuid(event)
+        if ('userAgent' in event) and event['userAgent']:
+            parsed_ua = parse_ua(event['userAgent'])
+            event['userAgent'] = parsed_ua
         return Event(event)
 
     def __repr__(self):
diff --git a/eventlogging/utils.py b/eventlogging/utils.py
index a0cfa62..55ab8e5 100644
--- a/eventlogging/utils.py
+++ b/eventlogging/utils.py
@@ -12,6 +12,7 @@
 import copy
 import datetime
 import dateutil.parser
+import json
 import logging
 import re
 import os
@@ -20,6 +21,7 @@
 import threading
 import traceback
 import uuid
+from ua_parser import user_agent_parser
 
 from .compat import (
     items, monotonic_clock, urisplit, urlencode, parse_qsl,
@@ -291,3 +293,46 @@
         # Set module logging level to INFO, DEBUG is too noisy.
         logging.getLogger("kafka").setLevel(logging.INFO)
         logging.getLogger("kazoo").setLevel(logging.INFO)
+
+
+def parse_ua(userAgent):
+    """
+    Returns a json string containing the parsed User Agent data
+    from a request's UA string. Uses the following format:
+    {
+        "device_family":"Other",
+        "browser_major":"11",
+        "os_family":"Windows",
+        "os_major":"-",
+        "browser_family":"IE",
+        "os_minor":"-",
+        "wmf_app_version":"-"
+    }
+    
+    App version in user agents is parsed as follows:
+    WikipediaApp/5.3.1.1011 (iOS 10.0.2; Phone)
+    "wmf_app_version":"5.3.1.1011"
+    WikipediaApp/2.4.160-r-2016-10-14 (Android 4.4.2; Phone) Google Play
+    "wmf_app_version":"2.4.160-r-2016-10-14"
+    """
+    parsed_ua = user_agent_parser.Parse(userAgent)
+    formatted_ua = {}
+    formatted_ua['device_family'] = parsed_ua['device']['family']
+    formatted_ua['browser_major'] = parsed_ua['user_agent']['major']
+    formatted_ua['os_family'] = parsed_ua['os']['family']
+    formatted_ua['os_major'] = parsed_ua['os']['major']
+    formatted_ua['browser_family'] = parsed_ua['user_agent']['family']
+    formatted_ua['os_minor'] = parsed_ua['os']['minor']
+    # default wmf_app_version is '-'
+    formatted_ua['wmf_app_version'] = '-'
+    appUA = 'WikipediaApp/'
+    l = 'WikipediaApp/'
+
+    if appUA in userAgent:
+        items = userAgent.split()
+        version = items[0][l:]
+        formatted_ua['wmf_app_version'] = version
+
+    # escape json so it doesn't cause problems when validating
+    # to string (per capsule definition)
+    return json.dumps(formatted_ua)
diff --git a/requirements.txt b/requirements.txt
index 44a567b..fd3d7b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@
 statsd>=3.0
 tornado>=4.0
 sprockets.mixins.statsd>=1.3.1
+ua_parser>=0.7.2
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 8d0c117..5d64b71 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -10,6 +10,7 @@
 
 import calendar
 import datetime
+import json
 import unittest
 
 import eventlogging
@@ -39,7 +40,16 @@
                '2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7'
                'D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp3022.esa'
                'ms.wikimedia.org 132073 2013-01-19T23:16:38 - '
-               'Mozilla/5.0')
+               'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)'
+               ' Gecko/20100101 Firefox/10.0')
+        ua = json.dumps({
+                'os_minor': None,
+                'os_major': None,
+                'device_family': 'Other',
+                'os_family': 'Linux',
+                'browser_major': '10',
+                'browser_family': 'Firefox'
+            })
         parsed = {
             'uuid': '799341a01ba957c79b15dc4d2d950864',
             'recvFrom': 'cp3022.esams.wikimedia.org',
@@ -49,7 +59,7 @@
             'timestamp': 1358637398,
             'schema': 'Generic',
             'revision': 13,
-            'userAgent': 'Mozilla/5.0',
+            'userAgent': ua,
             'event': {
                 'articleTitle': 'Héctor Elizondo',
                 'articleId': 1

-- 
To view, visit https://gerrit.wikimedia.org/r/335145
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I165214a8b12ff573115381ff1d2d0305e8310e93
Gerrit-PatchSet: 1
Gerrit-Project: eventlogging
Gerrit-Branch: master
Gerrit-Owner: Nuria <nu...@wikimedia.org>
Gerrit-Reviewer: Fdans <fd...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to