Ottomata has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/388255 )
Change subject: Resolve EventCapsule / MySQL schema discrepancies
......................................................................
Resolve EventCapsule / MySQL schema discrepancies
- Add map:// reader and writer to take in custom map/filter function
- find_function can now find functions in globals()
- LogParser now supports %D format for ISO-8601 `dt` field
- LogParser now supports %u format for parsed userAgent object
- `dt` used in uuid5, rather than `timestamp`
- EventCapsule bumped to revision 17397982, which changes:
-- Adds `dt` optional string
-- `timestamp` is now optional and just a number type;
utc-milliseconds type has been removed in jsonschema draft 4
-- `userAgent` is now an either an object or string, rather than a string.
(Actually, it is type: any, because EventLogging extension does not
allow adding union types in wiki schema editor, even though
JSONSchema draft 3 supports it.
- object properties schemas are only used if the field is definitly
an 'object' type, not one of possible types
- MediaWikiTimestamp serializer in jrm.py supports incoming integer or
string timestamps
In WMF Analytics production, this change is intended to be used as follows.
- mysql_mapper functions added as eventlogging plugin
- eventlogging-processor format changed to use %D for `dt` instead of
`timestamp`
- eventlogging-processor format changed to use %u for parsed `userAgent`
nested object
This will make `dt` the main time field, and and remove the `timestamp
field in all downstream events.
- eventlogging-consumer mysql writer modified to add integer `timestamp`,
remove `dt`, filter out bots and convert `userAgent` to string using
map:// with mysql_mapper function, e.g.
kafka://... map://kafka://...&function=mysql_mapper
This will make events headed to MySQL conform to existing schemas there.
Bug: T179625
Change-Id: I23263b93f119274cb5b75955f61edb76de2bbdda
---
M bin/eventlogging-processor
M eventlogging/handlers.py
M eventlogging/jrm.py
M eventlogging/parse.py
M eventlogging/schema.py
M eventlogging/utils.py
M tests/fixtures.py
M tests/test_event.py
M tests/test_parser.py
M tests/test_schema.py
M tests/test_utils.py
M tox.ini
12 files changed, 220 insertions(+), 88 deletions(-)
Approvals:
Ottomata: Looks good to me, approved
jenkins-bot: Verified
diff --git a/bin/eventlogging-processor b/bin/eventlogging-processor
index dd0c420..686bbae 100755
--- a/bin/eventlogging-processor
+++ b/bin/eventlogging-processor
@@ -21,7 +21,9 @@
formatters:
%j JSON object
%q Query-string-encoded JSON
- %t Timestamp in NCSA format.
+ %D Timestamp in ISO-8601 format.
+ %t Timestamp in NCSA format -> integer timestamp
+ %u userAgent string to be parsed into an object
%o Omit/Ignore the value
:copyright: (c) 2012 by Ori Livneh <[email protected]>
diff --git a/eventlogging/handlers.py b/eventlogging/handlers.py
index 3f3d3c6..5e00fdd 100644
--- a/eventlogging/handlers.py
+++ b/eventlogging/handlers.py
@@ -27,7 +27,7 @@
from .compat import items, json
from .event import Event
-from .factory import writes, reads, get_reader
+from .factory import writes, reads, get_reader, get_writer
from .streams import stream, pub_socket, sub_socket, udp_socket
from .jrm import store_sql_events
from .topic import TopicNotFound
@@ -70,6 +70,8 @@
Arguments:
*function_name (str): name of the function specified in url params
"""
+ if function_name in globals():
+ return globals()[function_name]
function = plugin_functions.get(function_name)
if not function:
raise NotImplementedError(
@@ -479,6 +481,7 @@
try:
while True:
event = (yield)
+
# Group the event stream by schema (and revision)
scid = event.scid()
try:
@@ -600,6 +603,36 @@
else:
sock.sendto(json.dumps(event), (hostname, port))
+
+@writes('map')
+def map_writer(uri, function):
+ """
+ Receives events and runs a map function on them.
+ The map function is specified in the url's parameters.
+ It should either return a new mapped event, or None if you want
+ to exclude (filter) that event from the stream.
+
+ Arguments:
+ *uri: a writer uri, with a custom "map" scheme, e.g.
+ map://{kafka_uri}?function={map_function}
+ *function (str): name of the map function as given by the
+ url's parameters
+ """
+ if not callable(function):
+ function = find_function(function)
+
+ # Remove 'map://'' and 'function' query arg from uri.
+ writer_uri = uri_delete_query_item(uri.replace("map://", ""), 'function')
+
+ writer = get_writer(writer_uri)
+
+ while True:
+ event = (yield)
+ mapped_event = function(event)
+ if mapped_event is not None:
+ writer.send(mapped_event)
+
+
#
# Readers
#
@@ -625,6 +658,9 @@
return stream(udp_socket(hostname, port), raw)
+# TODO: Remove this in favor of the more versitile
+# map:// reader/writer after it is no longer used in
+# production. T179625
@reads('filter')
def filtered_reader(uri, function):
"""
@@ -642,6 +678,32 @@
return (e for e in get_reader(reader_uri) if filter_function(e))
+@reads('map')
+def map_reader(uri, function):
+ """
+ Receives events and runs a map function on them.
+ The map function is specified in the url's parameters.
+ It should either return a new mapped event, or None if you want
+ to exclude (filter) that event from the stream.
+
+ Arguments:
+ *uri: a reader uri, with a custom "map" scheme, e.g.
+ map://{kafka_uri}?function={map_function}
+ *function (str): name of the map function as given by the
+ url's parameters
+ """
+ if not callable(function):
+ function = find_function(function)
+
+ # Remove 'map://'' and 'function' query arg from uri.
+ reader_uri = uri_delete_query_item(uri.replace("map://", ""), 'function')
+
+ # Apply map function to each item in the stream to create a new stream.
+ new_stream = (function(e) for e in get_reader(reader_uri))
+ # Return a new stream with Nones removed.
+ return (e for e in new_stream if e is not None)
+
+
# Can be addressed as default kafka:// handler, and as specific
# kafka client name, kafka-python://
@reads('kafka')
diff --git a/eventlogging/jrm.py b/eventlogging/jrm.py
index c451e9a..9384a94 100644
--- a/eventlogging/jrm.py
+++ b/eventlogging/jrm.py
@@ -91,10 +91,15 @@
def process_bind_param(self, value, dialect=None):
"""Convert an integer timestamp (specifying number of seconds or
miliseconds since UNIX epoch) to MediaWiki timestamp format."""
- if value > 1e12:
- value /= 1000
- value = datetime.datetime.utcfromtimestamp(value).strftime(
- MEDIAWIKI_TIMESTAMP)
+
+ if isinstance(value, str):
+ dt = dateutil.parser.parse(value)
+ else:
+ if value > 1e12:
+ value /= 1000
+ dt = datetime.datetime.utcfromtimestamp(value)
+
+ value = dt.strftime(MEDIAWIKI_TIMESTAMP)
if hasattr(value, 'decode'):
value = value.decode('utf-8')
return value
@@ -162,7 +167,6 @@
'array': {'type_': JsonSerde},
}),
('format', {
- 'utc-millisec': {'type_': MediaWikiTimestamp, 'index': True},
'uuid5-hex': {'type_': sqlalchemy.CHAR(32), 'index': True,
'unique': True},
# Add indexes to datetime fields: T170925
@@ -198,10 +202,14 @@
'id': {
'index': True,
'unique': True
+ },
+ # 'timestamp' should be indexed and trasformed to Mediawiki Timestamp
+ # for backwards compatibility: T179540
+ 'timestamp': {
+ 'type_': MediaWikiTimestamp,
+ 'index': True
}
}
-
-print(mappers)
def typecast(property, name=None):
@@ -213,15 +221,15 @@
:param name: JSONSchema field name (optional)
"""
options = COLUMN_DEFAULTS.copy()
- # jsonschema attribute -> jsonschema attribute value -> sqlalchmey option,
+ # jsonschema attribute -> jsonschema attribute value -> sqlalchemy option,
for attribute, mapping in items(mappers):
# Get the jsonschema attribute value from the jsonschema property
value = property.get(attribute)
# if this attribute's sqlachemy option mapping contains a setting
# for this value, update the options.
- options.update(mapping.get(value, ()))
+ options.update(mapping.get(str(value), ()))
- # field names are the most specific sqlalchmey option,
+ # field names are the most specific sqlalchemy option,
# update the options if the name_mappers
# has a field name key that matches this properties' name.
if name and name in name_mappers:
@@ -385,7 +393,13 @@
and their types from schema."""
key, val = item
if isinstance(val, dict):
- if 'properties' in val:
+ if ('properties' in val and
+ # Only use properties if no type is given
+ # (top level schema) or the type of the field
+ # is explicitly an 'object'. This avoids
+ # using the object properties for object fields
+ # with multiple possible types.
+ ('type' not in val or val['type'] == 'object')):
val = val['properties']
elif 'type' in val:
val = typecast(val, key)
diff --git a/eventlogging/parse.py b/eventlogging/parse.py
index 9e8a382..ebf7093 100644
--- a/eventlogging/parse.py
+++ b/eventlogging/parse.py
@@ -23,6 +23,10 @@
+--------+-----------------------------+
| %t | Timestamp in NCSA format |
+--------+-----------------------------+
+ | %D | Timestamp in ISO-8601 format|
+ +--------+-----------------------------+
+ | %u | User agent to be parsed |
+ +--------+-----------------------------+
| %{..}i | Tab-delimited string |
+--------+-----------------------------+
| %{..}s | Space-delimited string |
@@ -42,7 +46,7 @@
from .compat import json, unquote_plus, uuid5
from .event import Event
-from .utils import parse_ua
+from .utils import parse_ua, iso8601_from_timestamp
__all__ = (
'LogParser', 'ncsa_to_unix',
@@ -57,10 +61,11 @@
# origin hostname, sequence ID, and timestamp. This combination is
# guaranteed to be unique. Example::
#
-# event://vanadium.eqiad.wmnet/?seqId=438763×tamp=1359702955
+# event://cp1054.eqiad.wmnet/?seqId=438763&dt=2013-01-21T18:10:34
#
EVENTLOGGING_URL_FORMAT = (
- 'event://%(recvFrom)s/?seqId=%(seqId)s×tamp=%(timestamp).10s')
+ 'event://%(recvFrom)s/?seqId=%(seqId)s&dt=%(dt)s'
+)
def capsule_uuid(capsule):
@@ -72,10 +77,19 @@
..seealso:: `RFC 4122 <https://www.ietf.org/rfc/rfc4122.txt>`_.
:param capsule: A capsule object (or any dictionary that defines
- `recvFrom`, `seqId`, and `timestamp`).
+ `recvFrom`, `seqId`, and `dt`).
"""
- id = uuid5(uuid.NAMESPACE_URL, EVENTLOGGING_URL_FORMAT % capsule)
+ uuid_fields = {
+ 'recvFrom': capsule.get('recvFrom'),
+ 'seqId': capsule.get('seqId'),
+ # TODO: remove this timestamp default as part of T179625
+ 'dt': capsule.get('dt', iso8601_from_timestamp(
+ capsule.get('timestamp', time.time())
+ ))
+ }
+
+ id = uuid5(uuid.NAMESPACE_URL, EVENTLOGGING_URL_FORMAT % uuid_fields)
return '%032x' % id.int
@@ -110,7 +124,8 @@
"""
self.format = format
- # A mapping of format specifiers to a tuple of (regexp, caster).
+ # A mapping of format specifiers (%d, %i, etc.)
+ # to a tuple of (regexp, caster).
self.format_specifiers = {
'd': (r'(?P<%s>\d+)', int),
'i': (r'(?P<%s>[^\t]+)', str),
@@ -119,6 +134,9 @@
's': (r'(?P<%s>\S+)', str),
't': (r'(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})',
ncsa_to_unix),
+ 'D': (r'(?P<dt>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})',
+ iso8601_from_timestamp),
+ 'u': (r'(?P<userAgent>[^\t]+)', parse_ua),
# Set caster to None for the ignore/omit format specifier
# so the corresponding value doesn't end up in the parsed event
'o': (r'(?P<omit>\S+)', None),
@@ -128,9 +146,22 @@
# format string.
self.casters = []
- # Compiled regexp.
+ # Convert the format string to a regex that will match
+ # the incoming lines into named groups and include them
+ # into the parsed dict.
+
+ # Space chars in format string should match any number of
+ # space characters.
format = re.sub(' ', r'\s+', format)
- raw = re.sub(r'(?<!%)%({(\w+)})?([dijqsto])', self._repl, format)
+
+ # Pattern that converts from format specifiers (e.g. %t, %d, etc.)
+ # into a regex will extract into a matched group key name.
+ format_to_regex_pattern = '(?<!%%)%%({(\w+)})?([%s])' % (
+ ''.join(self.format_specifiers.keys())
+ )
+ raw = re.sub(
+ re.compile(format_to_regex_pattern), self._repl, format
+ )
self.re = re.compile(raw)
def _repl(self, spec):
@@ -156,8 +187,11 @@
event = {k: f(match.group(k)) for f, k in caster_key_pairs}
event.update(event.pop('capsule'))
event['uuid'] = capsule_uuid(event)
- if ('userAgent' in event) and event['userAgent']:
- event['userAgent'] = parse_ua(event['userAgent'])
+
+ # TODO: remove this code in favor of %u format specifier
+ # after %{userAgent}i is not used. T179625
+ if 'userAgent' in event and isinstance(event['userAgent'], str):
+ event['userAgent'] = json.dumps(parse_ua(event['userAgent']))
return Event(event)
def __repr__(self):
diff --git a/eventlogging/schema.py b/eventlogging/schema.py
index 68f276b..d3ca651 100644
--- a/eventlogging/schema.py
+++ b/eventlogging/schema.py
@@ -64,7 +64,7 @@
SCHEMA_URI_FORMAT = '%s/%s'
# SCID of the metadata object which wraps each capsule-style event.
-CAPSULE_SCID = ('EventCapsule', 15423246)
+CAPSULE_SCID = ('EventCapsule', 17397982)
# TODO: Make new meta style EventError on meta.
ERROR_SCID = ('EventError', 14035058)
diff --git a/eventlogging/utils.py b/eventlogging/utils.py
index 92d6f68..7dd1e2e 100644
--- a/eventlogging/utils.py
+++ b/eventlogging/utils.py
@@ -12,7 +12,6 @@
import copy
import datetime
import dateutil.parser
-import json
import logging
import re
import os
@@ -34,7 +33,8 @@
__all__ = ('EventConsumer', 'PeriodicThread', 'flatten', 'is_subset_dict',
'setup_logging', 'unflatten', 'update_recursive',
'uri_delete_query_item', 'uri_append_query_items', 'uri_force_raw',
- 'parse_etcd_uri', 'datetime_from_uuid1', 'datetime_from_timestamp')
+ 'parse_etcd_uri', 'datetime_from_uuid1', 'datetime_from_timestamp',
+ 'iso8601_from_timestamp')
# Regex extending uaparser's bot/spider detection, comes from
# Webrequest.java in refinery-source/core
@@ -294,6 +294,10 @@
return dt
+def iso8601_from_timestamp(t):
+ return datetime_from_timestamp(t).isoformat()
+
+
def setup_logging(config_file=None):
if config_file:
fileConfig(config_file)
@@ -313,7 +317,7 @@
def parse_ua(user_agent):
"""
- Returns a json string containing the parsed User Agent data
+ Returns a dict containing the parsed User Agent data
from a request's UA string. Uses the following format:
{
"device_family": "Other",
@@ -356,7 +360,7 @@
# escape json so it doesn't cause problems when validating
# to string (per capsule definition)
- return json.dumps(formatted_ua)
+ return formatted_ua
def is_bot(device_family, user_agent):
diff --git a/tests/fixtures.py b/tests/fixtures.py
index 01f193b..82f4667 100644
--- a/tests/fixtures.py
+++ b/tests/fixtures.py
@@ -94,8 +94,12 @@
},
'timestamp': {
'type': 'number',
- 'required': True,
- 'format': 'utc-millisec'
+ 'required': False,
+ },
+ 'dt': {
+ 'type': 'string',
+ 'required': False,
+ 'format': 'date-time'
},
'uuid': {
'type': 'string',
@@ -103,9 +107,41 @@
'format': 'uuid5-hex'
},
'userAgent': {
- 'type': 'string',
- 'description': 'User Agent from HTTP request',
- 'required': False
+ 'type': 'any',
+ 'description': 'Parsed User Agent from HTTP request',
+ 'required': False,
+ 'properties': {
+ 'browser_family': {
+ 'type': 'string'
+ },
+ 'browser_major': {
+ 'type': 'string'
+ },
+ 'browser_minor': {
+ 'type': 'string'
+ },
+ 'device_family': {
+ 'type': 'string'
+ },
+ 'os_family': {
+ 'type': 'string'
+ },
+ 'os_major': {
+ 'type': 'string'
+ },
+ 'os_minor': {
+ 'type': 'string'
+ },
+ 'wmf_app_version': {
+ 'type': 'string'
+ },
+ 'is_bot': {
+ 'type': 'boolean'
+ },
+ 'is_mediawiki': {
+ 'type': 'boolean'
+ }
+ }
}
},
'additionalProperties': False
@@ -217,6 +253,7 @@
},
'seqId': 12345,
'timestamp': 1358791834912,
+ 'dt': '2013-01-21T18:10:34.912000',
'wiki': 'enwiki',
'webHost': 'en.m.wikipedia.org',
'recvFrom': 'fenari',
@@ -234,6 +271,7 @@
'event': [],
'seqId': 12345,
'timestamp': 1358791834912,
+ 'dt': '2013-01-21T18:10:34.912000',
'wiki': 'enwiki',
'webHost': 'en.m.wikipedia.org',
'recvFrom': 'fenari',
diff --git a/tests/test_event.py b/tests/test_event.py
index 4824a4d..38b0e23 100644
--- a/tests/test_event.py
+++ b/tests/test_event.py
@@ -163,7 +163,7 @@
"""Test that a datetime can be extracted from event meta"""
self.assertEqual(
self.event.datetime(),
- eventlogging.utils.datetime_from_timestamp(self.event['timestamp'])
+ eventlogging.utils.datetime_from_timestamp(self.event['dt'])
)
self.assertEqual(
self.event_with_meta.datetime(),
diff --git a/tests/test_parser.py b/tests/test_parser.py
index f2ca534..a27d339 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -10,7 +10,6 @@
import calendar
import datetime
-import json
import unittest
import eventlogging
@@ -34,7 +33,7 @@
def test_parse_client_side_events(self):
"""Parser test: client-side events."""
parser = eventlogging.LogParser(
- '%q %{recvFrom}s %{seqId}d %t %o %{userAgent}i')
+ '%q %{recvFrom}s %{seqId}d %D %o %u')
raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22Generic'
'%22%2C%22revision%22%3A13%2C%22event%22%3A%7B%22articleId%2'
'2%3A1%2C%22articleTitle%22%3A%22H%C3%A9ctor%20Elizondo%22%7'
@@ -42,7 +41,7 @@
'ms.wikimedia.org 132073 2013-01-19T23:16:38 - '
'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)'
' Gecko/20100101 Firefox/10.0')
- ua = json.dumps({
+ ua = {
'os_minor': None,
'os_major': None,
'device_family': 'Other',
@@ -53,14 +52,14 @@
'wmf_app_version': '-',
'is_bot': False,
'is_mediawiki': False
- })
+ }
parsed = {
- 'uuid': '799341a01ba957c79b15dc4d2d950864',
+ 'uuid': '5202f558f6aa5894978062d7aa039486',
'recvFrom': 'cp3022.esams.wikimedia.org',
'wiki': 'testwiki',
'webHost': 'test.wikipedia.org',
'seqId': 132073,
- 'timestamp': 1358637398,
+ 'dt': '2013-01-19T23:16:38',
'schema': 'Generic',
'revision': 13,
'userAgent': ua,
@@ -72,37 +71,15 @@
fromParser = parser.parse(raw)
for key in parsed:
if key == 'userAgent':
- # Python changes the order of keys when dumping objects into
- # a string, so we need to compare the ua separately parsing
- # it into an object.
- self.assertEqual(json.loads(parsed[key]),
- json.loads(fromParser[key]))
+ self.assertEqual(parsed[key],
+ fromParser[key])
else:
self.assertEqual(fromParser[key], parsed[key])
- def test_parser_server_side_events(self):
- """Parser test: server-side events."""
- parser = eventlogging.LogParser('%{seqId}d EventLogging %j')
- raw = ('99 EventLogging {"revision":123,"timestamp":1358627115,"sche'
- 'ma":"FakeSchema","wiki":"enwiki","event":{"action":"save\\u0'
- '020page"},"recvFrom":"fenari"}')
- parsed = {
- 'uuid': '67cc2c1afa5752ba80bbbd7c5fc41f28',
- 'recvFrom': 'fenari',
- 'timestamp': 1358627115,
- 'wiki': 'enwiki',
- 'seqId': 99,
- 'schema': 'FakeSchema',
- 'revision': 123,
- 'event': {
- 'action': 'save page'
- }
- }
- self.assertEqual(parser.parse(raw), parsed)
def test_parser_bot_requests(self):
parser = eventlogging.LogParser(
- '%q %{recvFrom}s %{seqId}d %t %o %{userAgent}i')
+ '%q %{recvFrom}s %{seqId}d %D %o %u')
# Bot - recognised by uaparser
raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22Generic'
'%22%2C%22revision%22%3A13%2C%22event%22%3A%7B%22articleId%2'
@@ -111,7 +88,7 @@
'ms.wikimedia.org 132073 2013-01-19T23:16:38 - '
'AppEngine-Google; (+http://code.google.com/appengine; appid'
': webetrex)')
- ua_map = json.loads(parser.parse(raw)['userAgent'])
+ ua_map = parser.parse(raw)['userAgent']
self.assertEqual(ua_map['is_bot'], True)
# Bot - not recognised by uaparser
raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22G'
@@ -120,7 +97,7 @@
'o%22%7D%2C%22webHost%22%3A%22test.wikipedia.org%22%7D; cp30'
'22.esams.wikimedia.org 132073 2013-01-19T23:16:38 - '
'WikiDemo/10.2.0;')
- ua_map = json.loads(parser.parse(raw)['userAgent'])
+ ua_map = parser.parse(raw)['userAgent']
self.assertEqual(ua_map['is_bot'], True)
# Regular browser
raw = ('?%7B%22wiki%22%3A%22testwiki%22%2C%22schema%22%3A%22'
@@ -130,8 +107,9 @@
'022.esams.wikimedia.org 132073 2013-01-19T23:16:38 - '
'Mozilla/5.0 (X11; Linux x86_64; rv:10.0)'
' Gecko/20100101 Firefox/10.0')
- ua_map = json.loads(parser.parse(raw)['userAgent'])
+ ua_map = parser.parse(raw)['userAgent']
self.assertEqual(ua_map['is_bot'], False)
+
def test_parse_failure(self):
"""Parse failure raises ValueError exception."""
@@ -139,6 +117,7 @@
with self.assertRaises(ValueError):
parser.parse('Fails to parse.')
+
def test_repr(self):
"""Calling 'repr' on LogParser returns canonical string
representation."""
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 044448a..7b7113a 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -152,7 +152,7 @@
def test_missing_property(self):
"""Missing property in capsule object triggers validation failure."""
- self.event.pop('timestamp')
+ self.event.pop('uuid')
self.assertIsInvalid(self.event)
def test_missing_nested_property(self):
@@ -183,7 +183,7 @@
def test_capsule_uuid(self):
"""capsule_uuid() generates a unique UUID for capsule objects."""
self.assertEqual(eventlogging.capsule_uuid(self.event),
- 'babb66f34a0a5de3be0c6513088be33e')
+ '93e0f58d0c605e90a3c4861b1f00c407')
def test_empty_event(self):
"""An empty event with no mandatory properties should validate"""
diff --git a/tests/test_utils.py b/tests/test_utils.py
index ede38a9..8513adb 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -11,7 +11,6 @@
import datetime
import unittest
import uuid
-import json
import eventlogging
from eventlogging.compat import long
@@ -146,7 +145,7 @@
def test_ua_parse_ios(self):
ios_ua = 'WikipediaApp/5.3.3.1038 (iOS 10.2; Phone)'
- parsed = json.dumps({
+ parsed = {
'os_minor': '2',
'os_major': '10',
'device_family': 'Other',
@@ -157,13 +156,13 @@
'wmf_app_version': '5.3.3.1038',
'is_bot': False,
'is_mediawiki': False
- })
- self.assertEqual(json.loads(parsed),
- json.loads(eventlogging.utils.parse_ua(ios_ua)))
+ }
+ self.assertEqual(parsed,
+ eventlogging.utils.parse_ua(ios_ua))
def test_ua_parse_android(self):
android_ua = 'WikipediaApp/2.4.160-r-2016-10-14 (Android 4.4.2; Phone)'
- parsed = json.dumps({
+ parsed = {
'os_major': '4',
'wmf_app_version': '2.4.160-r-2016-10-14',
'os_family': 'Android',
@@ -174,13 +173,13 @@
'os_minor': '4',
'is_bot': False,
'is_mediawiki': False
- })
- self.assertEqual(json.loads(parsed),
- json.loads(eventlogging.utils.parse_ua(android_ua)))
+ }
+ self.assertEqual(parsed,
+ eventlogging.utils.parse_ua(android_ua))
def test_ua_parse_empty(self):
ua = ""
- parsed = json.dumps({
+ parsed = {
'os_minor': None,
'os_major': None,
'device_family': 'Other',
@@ -191,13 +190,13 @@
'wmf_app_version': '-',
'is_bot': False,
'is_mediawiki': False
- })
- self.assertEqual(json.loads(parsed),
- json.loads(eventlogging.utils.parse_ua(ua)))
+ }
+ self.assertEqual(parsed,
+ eventlogging.utils.parse_ua(ua))
def test_ua_parse_mediawiki(self):
mw_ua = 'MediaWiki 1.28'
- parsed = json.dumps({
+ parsed = {
'os_major': None,
'wmf_app_version': '-',
'os_family': 'Other',
@@ -208,6 +207,6 @@
'os_minor': None,
'is_bot': False,
'is_mediawiki': True
- })
- self.assertEqual(json.loads(parsed),
- json.loads(eventlogging.utils.parse_ua(mw_ua)))
+ }
+ self.assertEqual(parsed,
+ eventlogging.utils.parse_ua(mw_ua))
diff --git a/tox.ini b/tox.ini
index 0736e71..0bd0ac3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -23,7 +23,7 @@
commands = python setup.py test {posargs}
[testenv:flake8]
-commands = flake8
+commands = flake8 eventlogging
deps = flake8
[testenv:flake8-bin]
--
To view, visit https://gerrit.wikimedia.org/r/388255
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I23263b93f119274cb5b75955f61edb76de2bbdda
Gerrit-PatchSet: 19
Gerrit-Project: eventlogging
Gerrit-Branch: master
Gerrit-Owner: Ottomata <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Nuria <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits