jenkins-bot has submitted this change and it was merged.
Change subject: Tidy up some rough edges.
......................................................................
Tidy up some rough edges.
* Schemas are validated upon retrieval. Previously we only verified that the
schema decodes to a dict, and trusted that the server will not allow invalid
schemas to save.
* Consequently, 'http_get_schema' now raises jsonschema.SchemaError exceptions
rather than ValueError. Updated test to reflect this and added local binding
for SchemaError.
* Switch from using 'unquote_plus' to 'unquote'; we're not encoding form fields
so the '+' = space thing doesn't apply.
* Added a helper, 'ncsa_utcnow', which produces a string timestamp of the
current time (UTC) in NCSA Common Log Format. Needed by el-devserver.
* Replace dependency on 'zmq.util.json' with our own ranking (ujson, than
json). The simplejson in CPython 2.7's standard lib (i.e., 'json') includes
the C extension, so it's already quite fast. Compared to alternatives, only
ujson is significantly faster.
Change-Id: Id146ea42de6e3cc709514d5787a3dc76a2362654
---
M server/eventlogging/__init__.py
M server/eventlogging/compat.py
M server/eventlogging/parse.py
M server/eventlogging/schema.py
M server/tests/test_schema.py
5 files changed, 43 insertions(+), 21 deletions(-)
Approvals:
Ori.livneh: Looks good to me, approved
jenkins-bot: Verified
diff --git a/server/eventlogging/__init__.py b/server/eventlogging/__init__.py
index 43ced2e..415c916 100644
--- a/server/eventlogging/__init__.py
+++ b/server/eventlogging/__init__.py
@@ -24,6 +24,9 @@
# The fact that schema validation is entrusted to a third-party module
# is an implementation detail that a consumer of this package's API
-# should not have to know or care about. We thus provide a local binding
-# for :exc:`jsonschema.ValidationError`.
-from jsonschema import ValidationError
+# should not have to know or care about. We thus provide package-local
+# bindings for :exc:`jsonschema.ValidationError` and
+# :exc:`jsonschema.SchemaError`.
+from jsonschema import ValidationError, SchemaError
+
+__version__ = '0.5'
diff --git a/server/eventlogging/compat.py b/server/eventlogging/compat.py
index f4db412..77742ff 100644
--- a/server/eventlogging/compat.py
+++ b/server/eventlogging/compat.py
@@ -20,21 +20,24 @@
import uuid
-from zmq.utils import jsonapi as json
+try:
+ import ujson as json
+except ImportError:
+ import json
-__all__ = ('items', 'json', 'unquote_plus', 'urlopen', 'uuid5')
+__all__ = ('items', 'json', 'unquote', 'urlopen', 'uuid5')
PY3 = sys.version_info[0] == 3
if PY3:
items = operator.methodcaller('items')
- from urllib.parse import unquote_plus
+ from urllib.parse import unquote
from urllib.request import urlopen
else:
items = operator.methodcaller('iteritems')
from urllib2 import urlopen
- from urllib import unquote_plus
+ from urllib import unquote
@functools.wraps(uuid.uuid5)
diff --git a/server/eventlogging/parse.py b/server/eventlogging/parse.py
index 7b6dc08..5cf173e 100644
--- a/server/eventlogging/parse.py
+++ b/server/eventlogging/parse.py
@@ -37,10 +37,10 @@
import re
import time
-from .compat import json, unquote_plus
+from .compat import json, unquote
-__all__ = ('LogParser',)
+__all__ = ('LogParser', 'ncsa_to_epoch', 'ncsa_utcnow')
#: Salt value for hashing IPs. Because this value is generated at
#: runtime, IPs cannot be compared across restarts. This limitation is
@@ -64,6 +64,11 @@
return calendar.timegm(time.strptime(ncsa_ts, NCSA_FORMAT))
+def ncsa_utcnow():
+ """Gets the current UTC date and time in NCSA Common Log Format"""
+ return time.strftime(NCSA_FORMAT, time.gmtime())
+
+
def hash_value(val):
"""Produces a salted SHA1 hash of any string value.
:param val: String to hash.
@@ -76,7 +81,7 @@
"""Decodes a QSON (query-string-encoded JSON) object.
:param qs: Query string.
"""
- return json.loads(unquote_plus(qson.strip('?;')))
+ return json.loads(unquote(qson.strip('?;')))
#: A mapping of format specifiers to a tuple of (regexp, caster).
diff --git a/server/eventlogging/schema.py b/server/eventlogging/schema.py
index ec3ba52..b7d12d2 100644
--- a/server/eventlogging/schema.py
+++ b/server/eventlogging/schema.py
@@ -17,12 +17,19 @@
from .compat import json, urlopen, uuid5
-__all__ = ('CAPSULE_SCID', 'capsule_uuid', 'get_schema', 'validate')
+__all__ = ('CAPSULE_SCID', 'capsule_uuid', 'get_schema', 'SCHEMA_URL_FORMAT',
+ 'validate')
-#: Template for schema retrieval URLs. Interpolates SCIDs.
-url_format = ('http://meta.wikimedia.org/w/index.php?action=raw'
- '&title=Schema:%s&oldid=%s')
+#: URL of index.php on the schema wiki (same as
+#: '$wgEventLoggingSchemaIndexUri').
+SCHEMA_WIKI_INDEX_PHP = 'http://meta.wikimedia.org/w/index.php'
+
+#: Template for schema article URLs. Interpolates SCIDs.
+SCHEMA_URL_FORMAT = SCHEMA_WIKI_INDEX_PHP + '?title=Schema:%s&oldid=%s'
+
+#: Template for raw schema URLs. Interpolates SCIDs.
+RAW_SCHEMA_URL_FORMAT = SCHEMA_URL_FORMAT + '&action=raw'
#: Schemas retrieved via HTTP are cached in this dictionary.
schema_cache = {}
@@ -75,11 +82,15 @@
def http_get_schema(scid):
"""Retrieve schema via HTTP."""
- req = urlopen(url_format % scid)
- content = req.read().decode('utf-8')
- schema = json.loads(content)
- if not isinstance(schema, dict):
- raise ValueError('HTTP response did not decode into dict: %s', schema)
+ url = RAW_SCHEMA_URL_FORMAT % scid
+ try:
+ content = urlopen(url).read().decode('utf-8')
+ schema = json.loads(content)
+ except EnvironmentError as ex:
+ raise jsonschema.SchemaError('Failed to retrieve schema: %s' % ex)
+ except ValueError:
+ raise jsonschema.SchemaError('Could not decode JSON Schema at ' + url)
+ jsonschema.Draft3Validator.check_schema(schema)
return schema
@@ -88,7 +99,7 @@
:raises :exc:`jsonschema.ValidationError`: If event is invalid.
"""
try:
- scid = (capsule['schema'], capsule['revision'])
+ scid = capsule['schema'], capsule['revision']
except KeyError as ex:
# If `schema`, `revision` or `event` keys are missing, a
# KeyError exception will be raised. We re-raise it as a
diff --git a/server/tests/test_schema.py b/server/tests/test_schema.py
index 697f844..aee533b 100644
--- a/server/tests/test_schema.py
+++ b/server/tests/test_schema.py
@@ -39,7 +39,7 @@
def test_invalid_resp(self):
"""Test handling of HTTP response not containing valid schema."""
self.http_resp = b'"foo"'
- with self.assertRaises(ValueError):
+ with self.assertRaises(eventlogging.SchemaError):
eventlogging.schema.http_get_schema(TEST_SCHEMA_SCID)
--
To view, visit https://gerrit.wikimedia.org/r/49462
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Id146ea42de6e3cc709514d5787a3dc76a2362654
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/EventLogging
Gerrit-Branch: master
Gerrit-Owner: Ori.livneh <[email protected]>
Gerrit-Reviewer: Ori.livneh <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits