Ottomata has submitted this change and it was merged.

Change subject: Include schema and revision of errored event in EventError if 
it can be parsed
......................................................................


Include schema and revision of errored event in EventError if it can be parsed

Bug: T115121
Change-Id: Ia85228e244690b0517023be10b59469113db0b7a
---
M server/bin/eventlogging-processor
M server/eventlogging/schema.py
M server/tests/fixtures.py
M server/tests/test_schema.py
4 files changed, 195 insertions(+), 22 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved



diff --git a/server/bin/eventlogging-processor 
b/server/bin/eventlogging-processor
index e298848..be6f901 100755
--- a/server/bin/eventlogging-processor
+++ b/server/bin/eventlogging-processor
@@ -122,14 +122,30 @@
     args.input = uri_append_query_items(args.input, {'identity': args.sid})
 
 
-def write_event_error(writer, raw_event, error_message, error_code):
+def write_event_error(
+    writer,
+    raw_event,
+    error_message,
+    error_code,
+    parsed_event=None
+):
+    """
+    Constructs an EventError object and sends it to writer.
+    """
     try:
-        event_error = create_event_error(raw_event, error_message, error_code)
+        writer.send(
+            create_event_error(
+                raw_event,
+                error_message,
+                error_code,
+                parsed_event
+            )
+        )
     except Exception as e:
         logging.error('Unable to create EventError object: %s' % e.message)
-    writer.send(event_error)
 
 for raw_event in get_reader(args.input):
+    event = None
     try:
         event = parser.parse(raw_event)
         event.pop('clientValidated', None)
@@ -141,14 +157,14 @@
         logging.error('Unable to validate: %s (%s)', raw_event, e.message)
         if writer_invalid:
             write_event_error(
-                writer_invalid, raw_event, e.message, 'validation'
+                writer_invalid, raw_event, e.message, 'validation', event
             )
 
     except Exception as e:
         logging.error('Unable to process: %s (%s)', raw_event, e.message)
         if writer_invalid:
             write_event_error(
-                writer_invalid, raw_event, e.message, 'processor'
+                writer_invalid, raw_event, e.message, 'processor', event
             )
 
     else:
diff --git a/server/eventlogging/schema.py b/server/eventlogging/schema.py
index 62e34f5..75a7a7a 100644
--- a/server/eventlogging/schema.py
+++ b/server/eventlogging/schema.py
@@ -29,7 +29,16 @@
 
 
 # Regular expression which matches valid schema names.
-SCHEMA_RE = re.compile(r'^[a-zA-Z0-9_-]{1,63}$')
+SCHEMA_RE_PATTERN = r'[a-zA-Z0-9_-]{1,63}'
+SCHEMA_RE = re.compile(r'^{0}$'.format(SCHEMA_RE_PATTERN))
+
+# These REs will be used when constructing an ErrorEvent
+# to extract the schema and revision out of a raw event
+# string in the case it cannot be parsed as JSON.
+RAW_SCHEMA_RE = re.compile(
+    r'%22schema%22%3A%22({0})%22'.format(SCHEMA_RE_PATTERN)
+)
+RAW_REVISION_RE = re.compile(r'%22revision%22%3A(\d+)')
 
 # URL of index.php on the schema wiki (same as
 # '$wgEventLoggingSchemaApiUri').
@@ -47,7 +56,7 @@
 CAPSULE_SCID = ('EventCapsule', 10981547)
 
 # TODO:
-ERROR_SCID = ('EventError', 12407995)
+ERROR_SCID = ('EventError', 14035058)
 
 
 def get_schema(scid, encapsulate=False):
@@ -105,11 +114,38 @@
     jsonschema.Draft3Validator(schema).validate(capsule)
 
 
-def create_event_error(raw_event, error_message, error_code):
+def create_event_error(
+    raw_event,
+    error_message,
+    error_code,
+    parsed_event=None
+):
     """
-    Creates an EventError around this
-    unparsed and unvalidated raw_event string.
+    Creates an EventError around this raw_event string.
+    If parsed_event is provided, The raw event's schema and revision
+    will be included in the ErrorEvent as event.schema and event.revision.
+    Otherwise these will be attempted to be extracted from the raw_event via
+    a regex.  If this still fails, these will be set to 'unknown' and -1.
     """
+    errored_schema = 'unknown'
+    errored_revision = -1
+
+    # If we've got a parsed event, then we can just get the schema
+    # and revision out of the object.
+    if parsed_event:
+        errored_schema = parsed_event.get('schema', 'unknown')
+        errored_revision = int(parsed_event.get('revision', -1))
+
+    # otherwise attempt to get them out of the raw_event with a regex
+    else:
+        schema_match = RAW_SCHEMA_RE.search(raw_event)
+        if schema_match:
+            errored_schema = schema_match.group(1)
+
+        revision_match = RAW_REVISION_RE.search(raw_event)
+        if revision_match:
+            errored_revision = int(revision_match.group(1))
+
     return {
         'schema': ERROR_SCID[0],
         'revision': ERROR_SCID[1],
@@ -120,6 +156,8 @@
         'event': {
             'rawEvent': raw_event,
             'message': error_message,
-            'code': error_code
+            'code': error_code,
+            'schema': errored_schema,
+            'revision': errored_revision
         }
     }
diff --git a/server/tests/fixtures.py b/server/tests/fixtures.py
index ac6df58..6abfaef 100644
--- a/server/tests/fixtures.py
+++ b/server/tests/fixtures.py
@@ -73,14 +73,22 @@
                 'type': 'string',
                 'required': True
             },
-            "code": {
-                "type": "string",
-                "required": True,
-                "enum": [
-                    "processor",
-                    "consumer",
-                    "validation"
+            'code': {
+                'type': 'string',
+                'required': True,
+                'enum': [
+                    'processor',
+                    'consumer',
+                    'validation'
                 ],
+            },
+            'schema': {
+                'type': 'string',
+                'required': True
+            },
+            'revision': {
+                'type': 'integer',
+                'required': True
             }
         }
     },
diff --git a/server/tests/test_schema.py b/server/tests/test_schema.py
index 346962a..1c7f634 100644
--- a/server/tests/test_schema.py
+++ b/server/tests/test_schema.py
@@ -124,8 +124,11 @@
         """An empty event with no mandatory properties should validate"""
         self.assertIsValid(self.incorrectly_serialized_empty_event)
 
-    def test_create_event_error(self):
-        """create_event_error() should create a valid EventError object."""
+    def test_create_event_error_unparsed(self):
+        """
+        create_event_error() should create a valid
+        EventError object without schema or revision set.
+        """
 
         invalid_raw_event = "Duh this won't validate against any schema."
         error_message = "This is just a test."
@@ -153,7 +156,115 @@
             event_error['event']['message'],
             error_message
         )
+        # assert that schema and revision are the defaults, since there's
+        # there is no parsed_event and these are not even present in this
+        # raw_event.
         self.assertEqual(
-            event_error['event']['code'],
-            error_code
+            event_error['event']['schema'],
+            'unknown'
+        )
+        self.assertEqual(
+            event_error['event']['revision'],
+            -1
+        )
+
+    def test_create_event_error_parsed(self):
+        """
+        create_event_error() should create a valid
+        EventError object with schema and revision set.
+        """
+
+        invalid_raw_event = "Duh this won't validate against any schema."
+        error_message = "This is just a test."
+        error_code = "processor"
+        parsed_event = {
+            'schema': 'Nonya',
+            'revision': 12345
+        }
+
+        event_error = eventlogging.create_event_error(
+            invalid_raw_event,
+            error_message,
+            error_code,
+            parsed_event
+        )
+        # Test that this event validates against the EventError schema.
+        self.assertIsValid(event_error)
+        self.assertEqual(
+            event_error['schema'],
+            eventlogging.schema.ERROR_SCID[0]
+        )
+        self.assertEqual(
+            event_error['revision'],
+            eventlogging.schema.ERROR_SCID[1]
+        )
+        self.assertEqual(
+            event_error['event']['rawEvent'],
+            invalid_raw_event
+        )
+        self.assertEqual(
+            event_error['event']['message'],
+            error_message
+        )
+        # assert that schema and revision the same as in parsed_event
+        self.assertEqual(
+            event_error['event']['schema'],
+            'Nonya'
+        )
+        self.assertEqual(
+            event_error['event']['revision'],
+            12345
+        )
+
+    def test_create_event_error_raw_schema_and_revision(self):
+        """
+        create_event_error() should create a valid
+        EventError object with schema and revision set, extracted
+        via a regex out of the raw_event.
+        """
+
+        invalid_raw_event = '?%7B%22event%22%3A%7B%22mobileMode%22%3A' \
+            '%22stable%22%2C%22name%22%3A%22home%22%2C%22destination%22%3A' \
+            '%22%2Fwiki%2FPagina_principale%22%7D%2C%22revision%22%3A' \
+            '11568715%2C%22schema%22%3A' \
+            '%22MobileWebMainMenuClickTracking%22%2C' \
+            '%22webHost%22%3A%12345terfdit.m.wikipedia.org%22%2C%22wiki%22' \
+            '%3A%22itwiki%22%7D;       cp3013.esams.wmnet      4724275 ' \
+            '2015-09-21T21:55:27       1.2.3.4 "Mozilla"'
+
+        print(invalid_raw_event)
+        error_message = "This is just a test."
+        error_code = "processor"
+
+        event_error = eventlogging.create_event_error(
+            invalid_raw_event,
+            error_message,
+            error_code,
+        )
+        # Test that this event validates against the EventError schema.
+        self.assertIsValid(event_error)
+        self.assertEqual(
+            event_error['schema'],
+            eventlogging.schema.ERROR_SCID[0]
+        )
+        self.assertEqual(
+            event_error['revision'],
+            eventlogging.schema.ERROR_SCID[1]
+        )
+        self.assertEqual(
+            event_error['event']['rawEvent'],
+            invalid_raw_event
+        )
+        self.assertEqual(
+            event_error['event']['message'],
+            error_message
+        )
+        # assert that schema and revision the same as in the invalid raw event
+        self.assertEqual(
+            event_error['event']['schema'],
+            'MobileWebMainMenuClickTracking'
+        )
+        self.assertEqual(
+            event_error['event']['revision'],
+            11568715
         )

-- 
To view, visit https://gerrit.wikimedia.org/r/244729
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ia85228e244690b0517023be10b59469113db0b7a
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/EventLogging
Gerrit-Branch: master
Gerrit-Owner: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: Mforns <mfo...@wikimedia.org>
Gerrit-Reviewer: Ori.livneh <o...@wikimedia.org>
Gerrit-Reviewer: Ottomata <o...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to