Ottomata has submitted this change and it was merged. Change subject: Include schema and revision of errored event in EventError if it can be parsed ......................................................................
Include schema and revision of errored event in EventError if it can be parsed Bug: T115121 Change-Id: Ia85228e244690b0517023be10b59469113db0b7a --- M server/bin/eventlogging-processor M server/eventlogging/schema.py M server/tests/fixtures.py M server/tests/test_schema.py 4 files changed, 195 insertions(+), 22 deletions(-) Approvals: Ottomata: Verified; Looks good to me, approved diff --git a/server/bin/eventlogging-processor b/server/bin/eventlogging-processor index e298848..be6f901 100755 --- a/server/bin/eventlogging-processor +++ b/server/bin/eventlogging-processor @@ -122,14 +122,30 @@ args.input = uri_append_query_items(args.input, {'identity': args.sid}) -def write_event_error(writer, raw_event, error_message, error_code): +def write_event_error( + writer, + raw_event, + error_message, + error_code, + parsed_event=None +): + """ + Constructs an EventError object and sends it to writer. + """ try: - event_error = create_event_error(raw_event, error_message, error_code) + writer.send( + create_event_error( + raw_event, + error_message, + error_code, + parsed_event + ) + ) except Exception as e: logging.error('Unable to create EventError object: %s' % e.message) - writer.send(event_error) for raw_event in get_reader(args.input): + event = None try: event = parser.parse(raw_event) event.pop('clientValidated', None) @@ -141,14 +157,14 @@ logging.error('Unable to validate: %s (%s)', raw_event, e.message) if writer_invalid: write_event_error( - writer_invalid, raw_event, e.message, 'validation' + writer_invalid, raw_event, e.message, 'validation', event ) except Exception as e: logging.error('Unable to process: %s (%s)', raw_event, e.message) if writer_invalid: write_event_error( - writer_invalid, raw_event, e.message, 'processor' + writer_invalid, raw_event, e.message, 'processor', event ) else: diff --git a/server/eventlogging/schema.py b/server/eventlogging/schema.py index 62e34f5..75a7a7a 100644 --- a/server/eventlogging/schema.py +++ b/server/eventlogging/schema.py @@ -29,7 +29,16 @@ # Regular expression which matches valid schema names. -SCHEMA_RE = re.compile(r'^[a-zA-Z0-9_-]{1,63}$') +SCHEMA_RE_PATTERN = r'[a-zA-Z0-9_-]{1,63}' +SCHEMA_RE = re.compile(r'^{0}$'.format(SCHEMA_RE_PATTERN)) + +# These REs will be used when constructing an ErrorEvent +# to extract the schema and revision out of a raw event +# string in the case it cannot be parsed as JSON. +RAW_SCHEMA_RE = re.compile( + r'%22schema%22%3A%22({0})%22'.format(SCHEMA_RE_PATTERN) +) +RAW_REVISION_RE = re.compile(r'%22revision%22%3A(\d+)') # URL of index.php on the schema wiki (same as # '$wgEventLoggingSchemaApiUri'). @@ -47,7 +56,7 @@ CAPSULE_SCID = ('EventCapsule', 10981547) # TODO: -ERROR_SCID = ('EventError', 12407995) +ERROR_SCID = ('EventError', 14035058) def get_schema(scid, encapsulate=False): @@ -105,11 +114,38 @@ jsonschema.Draft3Validator(schema).validate(capsule) -def create_event_error(raw_event, error_message, error_code): +def create_event_error( + raw_event, + error_message, + error_code, + parsed_event=None +): """ - Creates an EventError around this - unparsed and unvalidated raw_event string. + Creates an EventError around this raw_event string. + If parsed_event is provided, The raw event's schema and revision + will be included in the ErrorEvent as event.schema and event.revision. + Otherwise these will be attempted to be extracted from the raw_event via + a regex. If this still fails, these will be set to 'unknown' and -1. """ + errored_schema = 'unknown' + errored_revision = -1 + + # If we've got a parsed event, then we can just get the schema + # and revision out of the object. + if parsed_event: + errored_schema = parsed_event.get('schema', 'unknown') + errored_revision = int(parsed_event.get('revision', -1)) + + # otherwise attempt to get them out of the raw_event with a regex + else: + schema_match = RAW_SCHEMA_RE.search(raw_event) + if schema_match: + errored_schema = schema_match.group(1) + + revision_match = RAW_REVISION_RE.search(raw_event) + if revision_match: + errored_revision = int(revision_match.group(1)) + return { 'schema': ERROR_SCID[0], 'revision': ERROR_SCID[1], @@ -120,6 +156,8 @@ 'event': { 'rawEvent': raw_event, 'message': error_message, - 'code': error_code + 'code': error_code, + 'schema': errored_schema, + 'revision': errored_revision } } diff --git a/server/tests/fixtures.py b/server/tests/fixtures.py index ac6df58..6abfaef 100644 --- a/server/tests/fixtures.py +++ b/server/tests/fixtures.py @@ -73,14 +73,22 @@ 'type': 'string', 'required': True }, - "code": { - "type": "string", - "required": True, - "enum": [ - "processor", - "consumer", - "validation" + 'code': { + 'type': 'string', + 'required': True, + 'enum': [ + 'processor', + 'consumer', + 'validation' ], + }, + 'schema': { + 'type': 'string', + 'required': True + }, + 'revision': { + 'type': 'integer', + 'required': True } } }, diff --git a/server/tests/test_schema.py b/server/tests/test_schema.py index 346962a..1c7f634 100644 --- a/server/tests/test_schema.py +++ b/server/tests/test_schema.py @@ -124,8 +124,11 @@ """An empty event with no mandatory properties should validate""" self.assertIsValid(self.incorrectly_serialized_empty_event) - def test_create_event_error(self): - """create_event_error() should create a valid EventError object.""" + def test_create_event_error_unparsed(self): + """ + create_event_error() should create a valid + EventError object without schema or revision set. + """ invalid_raw_event = "Duh this won't validate against any schema." error_message = "This is just a test." @@ -153,7 +156,115 @@ event_error['event']['message'], error_message ) + # assert that schema and revision are the defaults, since there's + # there is no parsed_event and these are not even present in this + # raw_event. self.assertEqual( - event_error['event']['code'], - error_code + event_error['event']['schema'], + 'unknown' + ) + self.assertEqual( + event_error['event']['revision'], + -1 + ) + + def test_create_event_error_parsed(self): + """ + create_event_error() should create a valid + EventError object with schema and revision set. + """ + + invalid_raw_event = "Duh this won't validate against any schema." + error_message = "This is just a test." + error_code = "processor" + parsed_event = { + 'schema': 'Nonya', + 'revision': 12345 + } + + event_error = eventlogging.create_event_error( + invalid_raw_event, + error_message, + error_code, + parsed_event + ) + # Test that this event validates against the EventError schema. + self.assertIsValid(event_error) + self.assertEqual( + event_error['schema'], + eventlogging.schema.ERROR_SCID[0] + ) + self.assertEqual( + event_error['revision'], + eventlogging.schema.ERROR_SCID[1] + ) + self.assertEqual( + event_error['event']['rawEvent'], + invalid_raw_event + ) + self.assertEqual( + event_error['event']['message'], + error_message + ) + # assert that schema and revision the same as in parsed_event + self.assertEqual( + event_error['event']['schema'], + 'Nonya' + ) + self.assertEqual( + event_error['event']['revision'], + 12345 + ) + + def test_create_event_error_raw_schema_and_revision(self): + """ + create_event_error() should create a valid + EventError object with schema and revision set, extracted + via a regex out of the raw_event. + """ + + invalid_raw_event = '?%7B%22event%22%3A%7B%22mobileMode%22%3A' \ + '%22stable%22%2C%22name%22%3A%22home%22%2C%22destination%22%3A' \ + '%22%2Fwiki%2FPagina_principale%22%7D%2C%22revision%22%3A' \ + '11568715%2C%22schema%22%3A' \ + '%22MobileWebMainMenuClickTracking%22%2C' \ + '%22webHost%22%3A%12345terfdit.m.wikipedia.org%22%2C%22wiki%22' \ + '%3A%22itwiki%22%7D; cp3013.esams.wmnet 4724275 ' \ + '2015-09-21T21:55:27 1.2.3.4 "Mozilla"' + + print(invalid_raw_event) + error_message = "This is just a test." + error_code = "processor" + + event_error = eventlogging.create_event_error( + invalid_raw_event, + error_message, + error_code, + ) + # Test that this event validates against the EventError schema. + self.assertIsValid(event_error) + self.assertEqual( + event_error['schema'], + eventlogging.schema.ERROR_SCID[0] + ) + self.assertEqual( + event_error['revision'], + eventlogging.schema.ERROR_SCID[1] + ) + self.assertEqual( + event_error['event']['rawEvent'], + invalid_raw_event + ) + self.assertEqual( + event_error['event']['message'], + error_message + ) + # assert that schema and revision the same as in the invalid raw event + self.assertEqual( + event_error['event']['schema'], + 'MobileWebMainMenuClickTracking' + ) + self.assertEqual( + event_error['event']['revision'], + 11568715 ) -- To view, visit https://gerrit.wikimedia.org/r/244729 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ia85228e244690b0517023be10b59469113db0b7a Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/extensions/EventLogging Gerrit-Branch: master Gerrit-Owner: Ottomata <o...@wikimedia.org> Gerrit-Reviewer: Mforns <mfo...@wikimedia.org> Gerrit-Reviewer: Ori.livneh <o...@wikimedia.org> Gerrit-Reviewer: Ottomata <o...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits