Madhuvishy has uploaded a new change for review. https://gerrit.wikimedia.org/r/271728
Change subject: [WIP] Allow sensitive fields like clientIP to be collected only be schemas that need it ...................................................................... [WIP] Allow sensitive fields like clientIP to be collected only be schemas that need it Bug: T126366 Change-Id: I1fcc8becfe4a41018b9986e5fb8a978344d5952b --- M eventlogging/parse.py 1 file changed, 26 insertions(+), 1 deletion(-) git pull ssh://gerrit.wikimedia.org:29418/eventlogging refs/changes/28/271728/1 diff --git a/eventlogging/parse.py b/eventlogging/parse.py index cb61d6e..4b4bc10 100644 --- a/eventlogging/parse.py +++ b/eventlogging/parse.py @@ -69,6 +69,13 @@ # used to anonymize IP addresses. KEY_LIFESPAN = datetime.timedelta(days=90) +# Define special sensitive fields that may be included in the schema, and +# their mapping to keys extracted from raw Eventlogging data. These +# fields are only stored with the event if they are specified +# explicitly in the schema. Any information that is already present in those +# fields will be overridden by eventlogging. So far only clientIp is supported. +SENSITIVE_FIELDS_MAP = {'__clientIp': 'clientIp'} + def capsule_uuid(capsule): """Generate a UUID for a capsule object. @@ -158,6 +165,24 @@ self.casters.append(caster) return matcher + def drop_sensitive_fields(self, event): + """Sensitive fields like ip are only included if the special __ip + like field name is present in the Schema. Drop them if the special + fields listed in SENSITIVE_FIELDS are absent in the JSON schema. + """ + for field, key in SENSITIVE_FIELDS_MAP.iteritems(): + # If the special field is present, pop it - the actual value + # associated with the key is left untouched. For e.g if __clientIp + # is present, pop __clientIp. clientIp is already extracted and + # remains in the event + if field in event.schema(): + event.pop(field) + # If the field is not present remove the actual key. e.g clientIp + # is popped if __clientIp is not present in the schema. + else: + event.pop(key) + return event + def parse(self, line): """Parse a log line into a map of field names / values.""" match = self.re.match(line) @@ -167,7 +192,7 @@ event = {k: f(match.group(k)) for f, k in zip(self.casters, keys)} event.update(event.pop('capsule')) event['uuid'] = capsule_uuid(event) - return Event(event) + return self.drop_sensitive_fields(Event(event)) def __repr__(self): return '<LogParser(\'%s\')>' % self.format -- To view, visit https://gerrit.wikimedia.org/r/271728 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1fcc8becfe4a41018b9986e5fb8a978344d5952b Gerrit-PatchSet: 1 Gerrit-Project: eventlogging Gerrit-Branch: master Gerrit-Owner: Madhuvishy <mviswanat...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits