Ori.livneh has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/49462


Change subject: Tidy up some rough edges.
......................................................................

Tidy up some rough edges.

* Schemas are validated upon retrieval. Previously we only verified that the
  schema decodes to a dict, and trusted that the server will not allow invalid
  schemas to save.
* Consequently, 'http_get_schema' now raises jsonschema.SchemaError exceptions
  rather than ValueError. Updated test to reflect this and added local binding
  for SchemaError.
* Switch from using 'unquote_plus' to 'unquote'; we're not encoding form fields
  so the '+' = space thing doesn't apply.
* Added a helper, 'ncsa_utcnow', which produces a string timestamp of the
  current time (UTC) in NCSA Common Log Format. Needed by el-devserver.
* Replace dependency on 'zmq.util.json' with our own ranking (ujson, than
  json). The simplejson in CPython 2.7's standard lib (i.e., 'json') includes
  the C extension, so it's already quite fast. Compared to alternatives, only
  ujson is significantly faster.

Change-Id: Id146ea42de6e3cc709514d5787a3dc76a2362654
---
M server/eventlogging/__init__.py
M server/eventlogging/compat.py
M server/eventlogging/parse.py
M server/eventlogging/schema.py
M server/tests/test_schema.py
5 files changed, 43 insertions(+), 21 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/EventLogging 
refs/changes/62/49462/1

diff --git a/server/eventlogging/__init__.py b/server/eventlogging/__init__.py
index 43ced2e..415c916 100644
--- a/server/eventlogging/__init__.py
+++ b/server/eventlogging/__init__.py
@@ -24,6 +24,9 @@
 
 # The fact that schema validation is entrusted to a third-party module
 # is an implementation detail that a consumer of this package's API
-# should not have to know or care about. We thus provide a local binding
-# for :exc:`jsonschema.ValidationError`.
-from jsonschema import ValidationError
+# should not have to know or care about. We thus provide package-local
+# bindings for :exc:`jsonschema.ValidationError` and
+# :exc:`jsonschema.SchemaError`.
+from jsonschema import ValidationError, SchemaError
+
+__version__ = '0.5'
diff --git a/server/eventlogging/compat.py b/server/eventlogging/compat.py
index f4db412..77742ff 100644
--- a/server/eventlogging/compat.py
+++ b/server/eventlogging/compat.py
@@ -20,21 +20,24 @@
 import uuid
 
 
-from zmq.utils import jsonapi as json
+try:
+    import ujson as json
+except ImportError:
+    import json
 
 
-__all__ = ('items', 'json', 'unquote_plus', 'urlopen', 'uuid5')
+__all__ = ('items', 'json', 'unquote', 'urlopen', 'uuid5')
 
 PY3 = sys.version_info[0] == 3
 
 if PY3:
     items = operator.methodcaller('items')
-    from urllib.parse import unquote_plus
+    from urllib.parse import unquote
     from urllib.request import urlopen
 else:
     items = operator.methodcaller('iteritems')
     from urllib2 import urlopen
-    from urllib import unquote_plus
+    from urllib import unquote
 
 
 @functools.wraps(uuid.uuid5)
diff --git a/server/eventlogging/parse.py b/server/eventlogging/parse.py
index 7b6dc08..5cf173e 100644
--- a/server/eventlogging/parse.py
+++ b/server/eventlogging/parse.py
@@ -37,10 +37,10 @@
 import re
 import time
 
-from .compat import json, unquote_plus
+from .compat import json, unquote
 
 
-__all__ = ('LogParser',)
+__all__ = ('LogParser', 'ncsa_to_epoch', 'ncsa_utcnow')
 
 #: Salt value for hashing IPs. Because this value is generated at
 #: runtime, IPs cannot be compared across restarts. This limitation is
@@ -64,6 +64,11 @@
     return calendar.timegm(time.strptime(ncsa_ts, NCSA_FORMAT))
 
 
+def ncsa_utcnow():
+    """Gets the current UTC date and time in NCSA Common Log Format"""
+    return time.strftime(NCSA_FORMAT, time.gmtime())
+
+
 def hash_value(val):
     """Produces a salted SHA1 hash of any string value.
     :param val: String to hash.
@@ -76,7 +81,7 @@
     """Decodes a QSON (query-string-encoded JSON) object.
     :param qs: Query string.
     """
-    return json.loads(unquote_plus(qson.strip('?;')))
+    return json.loads(unquote(qson.strip('?;')))
 
 
 #: A mapping of format specifiers to a tuple of (regexp, caster).
diff --git a/server/eventlogging/schema.py b/server/eventlogging/schema.py
index ec3ba52..b7d12d2 100644
--- a/server/eventlogging/schema.py
+++ b/server/eventlogging/schema.py
@@ -17,12 +17,19 @@
 
 from .compat import json, urlopen, uuid5
 
-__all__ = ('CAPSULE_SCID', 'capsule_uuid', 'get_schema', 'validate')
+__all__ = ('CAPSULE_SCID', 'capsule_uuid', 'get_schema', 'SCHEMA_URL_FORMAT',
+           'validate')
 
 
-#: Template for schema retrieval URLs. Interpolates SCIDs.
-url_format = ('http://meta.wikimedia.org/w/index.php?action=raw'
-              '&title=Schema:%s&oldid=%s')
+#: URL of index.php on the schema wiki (same as
+#: '$wgEventLoggingSchemaIndexUri').
+SCHEMA_WIKI_INDEX_PHP = 'http://meta.wikimedia.org/w/index.php'
+
+#: Template for schema article URLs. Interpolates SCIDs.
+SCHEMA_URL_FORMAT = SCHEMA_WIKI_INDEX_PHP + '?title=Schema:%s&oldid=%s'
+
+#: Template for raw schema URLs. Interpolates SCIDs.
+RAW_SCHEMA_URL_FORMAT = SCHEMA_URL_FORMAT + '&action=raw'
 
 #: Schemas retrieved via HTTP are cached in this dictionary.
 schema_cache = {}
@@ -75,11 +82,15 @@
 
 def http_get_schema(scid):
     """Retrieve schema via HTTP."""
-    req = urlopen(url_format % scid)
-    content = req.read().decode('utf-8')
-    schema = json.loads(content)
-    if not isinstance(schema, dict):
-        raise ValueError('HTTP response did not decode into dict: %s', schema)
+    url = RAW_SCHEMA_URL_FORMAT % scid
+    try:
+        content = urlopen(url).read().decode('utf-8')
+        schema = json.loads(content)
+    except EnvironmentError as ex:
+        raise jsonschema.SchemaError('Failed to retrieve schema: %s' % ex)
+    except ValueError:
+        raise jsonschema.SchemaError('Could not decode JSON Schema at ' + url)
+    jsonschema.Draft3Validator.check_schema(schema)
     return schema
 
 
@@ -88,7 +99,7 @@
     :raises :exc:`jsonschema.ValidationError`: If event is invalid.
     """
     try:
-        scid = (capsule['schema'], capsule['revision'])
+        scid = capsule['schema'], capsule['revision']
     except KeyError as ex:
         # If `schema`, `revision` or `event` keys are missing, a
         # KeyError exception will be raised. We re-raise it as a
diff --git a/server/tests/test_schema.py b/server/tests/test_schema.py
index 697f844..aee533b 100644
--- a/server/tests/test_schema.py
+++ b/server/tests/test_schema.py
@@ -39,7 +39,7 @@
     def test_invalid_resp(self):
         """Test handling of HTTP response not containing valid schema."""
         self.http_resp = b'"foo"'
-        with self.assertRaises(ValueError):
+        with self.assertRaises(eventlogging.SchemaError):
             eventlogging.schema.http_get_schema(TEST_SCHEMA_SCID)
 
 

-- 
To view, visit https://gerrit.wikimedia.org/r/49462
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id146ea42de6e3cc709514d5787a3dc76a2362654
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/EventLogging
Gerrit-Branch: master
Gerrit-Owner: Ori.livneh <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to