[ https://issues.apache.org/jira/browse/METRON-157?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15862733#comment-15862733 ]
ASF GitHub Bot commented on METRON-157: --------------------------------------- Github user simonellistonball commented on a diff in the pull request: https://github.com/apache/incubator-metron/pull/451#discussion_r100688919 --- Diff: metron-platform/metron-parsers/src/main/java/org/apache/metron/parsers/cef/CEFParser.java --- @@ -0,0 +1,272 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.metron.parsers.cef; + +import java.nio.charset.Charset; +import java.time.Clock; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.metron.parsers.BasicParser; +import org.apache.metron.parsers.ParseException; +import org.apache.metron.parsers.utils.DateUtils; +import org.apache.metron.parsers.utils.SyslogUtils; +import org.json.simple.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CEFParser extends BasicParser { + private static final long serialVersionUID = 1L; + + protected static final Logger LOG = LoggerFactory.getLogger(CEFParser.class); + private static final String HEADER_CAPTURE_PATTERN = "[^\\|]*"; + private static final String EXTENSION_CAPTURE_PATTERN = "(?<!\\\\)="; + private static final Charset UTF_8 = Charset.forName("UTF-8"); + + private Pattern p; + private Pattern pext; + + public void init() { + + // CEF Headers: Device Vendor|Device Product|Device Version|Device Event + // Class ID|Name|Severity + + String syslogTime = "(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\\b +(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]) (?!<[0-9])(?:2[0123]|[01]?[0-9]):(?:[0-5][0-9])(?::(?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?))(?![0-9])?"; + String syslogPriority = "<(?:[0-9]+)>"; + String syslogHost = "[a-z0-9\\.\\\\-_]+"; + + StringBuilder sb = new StringBuilder("(?<syslogTime>"); + + sb.append(syslogTime); + sb.append(")?"); + + sb.append("(?<syslogHost>"); + sb.append(syslogHost); + sb.append(")?"); + + sb.append("(?<syslogPriority>"); + sb.append(syslogPriority); + sb.append(")?"); + + sb.append(".*"); + + sb.append("CEF:0\\|"); + + headerBlock("DeviceVendor", sb); + sb.append("\\|"); + headerBlock("DeviceProduct", sb); + sb.append("\\|"); + headerBlock("DeviceVersion", sb); + sb.append("\\|"); + headerBlock("DeviceEvent", sb); + sb.append("\\|"); + headerBlock("Name", sb); + sb.append("\\|"); + headerBlock("Severity", sb); + sb.append("\\|"); + + // extension capture: + sb.append("(?<extensions>.*)"); + String pattern = sb.toString(); + + p = Pattern.compile(pattern); + + // key finder for extensions + pext = Pattern.compile(EXTENSION_CAPTURE_PATTERN); + } + + @SuppressWarnings("unchecked") + public List<JSONObject> parse(byte[] rawMessage) { + List<JSONObject> messages = new ArrayList<>(); + + String cefString = new String(rawMessage, UTF_8); + + Matcher matcher = p.matcher(cefString); + + while (matcher.find()) { + JSONObject obj = new JSONObject(); + if (matcher.matches()) { + LOG.info(String.format("Found %d groups", matcher.groupCount())); + obj.put("DeviceVendor", matcher.group("DeviceVendor")); + obj.put("DeviceProduct", matcher.group("DeviceProduct")); + obj.put("DeviceVersion", matcher.group("DeviceVersion")); + obj.put("DeviceEvent", matcher.group("DeviceEvent")); + obj.put("Name", matcher.group("Name")); + obj.put("Severity", standardizeSeverity(matcher.group("Severity"))); + } + + String ext = matcher.group("extensions"); + Matcher m = pext.matcher(ext); + + int index = 0; + String key = null; + String value = null; + Map<String, String> labelMap = new HashMap<String, String>(); + + while (m.find()) { + if (key == null) { + key = ext.substring(index, m.start()); + index = m.end(); + if (!m.find()) { + break; + } + } + value = ext.substring(index, m.start()); + index = m.end(); + int v = value.lastIndexOf(" "); + if (v > 0) { + String temp = value.substring(0, v).trim(); + if (key.endsWith("Label")) { + labelMap.put(key.substring(0, key.length() - 5), temp); + } else { + obj.put(key, temp); + } + key = value.substring(v).trim(); + } + } + value = ext.substring(index); + + // Build a map of Label extensions to apply later + if (key.endsWith("Label")) { + labelMap.put(key.substring(0, key.length() - 5), value); + } else { + obj.put(key, value); + } + + // Apply the labels to custom fields + for (Entry<String, String> label : labelMap.entrySet()) { + mutate(obj, label.getKey(), label.getValue()); + } + + // Rename standard CEF fields to comply with Metron standards + obj = mutate(obj, "dst", "ip_dst_addr"); + obj = mutate(obj, "dpt", "ip_dst_port"); + obj = convertToInt(obj, "ip_dst_port"); + + obj = mutate(obj, "src", "ip_src_addr"); + obj = mutate(obj, "spt", "ip_src_port"); + obj = convertToInt(obj, "ip_src_port"); + + obj = mutate(obj, "act", "deviceAction"); + // applicationProtocol + obj = mutate(obj, "app", "protocol"); + + obj.put("original_string", cefString); + + // apply timestamp from message if present, using syslog timestamp, --- End diff -- Good point, submitting a fix now. > Create CEF Parser > ----------------- > > Key: METRON-157 > URL: https://issues.apache.org/jira/browse/METRON-157 > Project: Metron > Issue Type: New Feature > Reporter: Domenic Puzio > Priority: Minor > Labels: platform > > Create a parser for CEF (Common Event Format). CEF is a very common > formatting for security data sources; it is used by FireEye, Adallom, Imperva > WAF, CyberArk, and others. The parser should be flexible enough to work for > any of these data sources. CEF uses shorthand field names, so field names > should be changed to human-readable and Metron-friendly equivalents. CEF > custom labels (cs1Label, flexString1Label, etc.) should be converted > appropriately. > Below are sample messages and their expected parsed output. > Adallom CEF > 2016-04-01T09:29:11.356-0400 > CEF:0|Adallom|Adallom|1.0|56fe779ee4b0459f4e9a484a|ALERT_CABINET_EVENT_MATCH_AUDIT|0|msg=Activity > policy 'User download/view file' was triggered by 'scolb...@gmail.com' > suser=wander...@rock.com start=1459517280810 end=1459517280810 > audits=["AVPR-4oIPeFmuZ3CKKrg","AVPR-wx80cd9PUpAu2aj","AVPR-6XGPeFmuZ3CKKvx","AVPSALn_qE4Kgs_8_yK9","AVPSASW3gw_f3aEvgEmi"] > services=["APPID_SXC"] users=["lva...@hotmail.com"] > cs6=https://abcd-remote.console.arc.com/#/alerts/56fe779ee4b0459f4e9a484a > cs6Label=consoleUrl > ... > {"source.type":"adallom","device_version":"1.0","severity":"0","device_product":"Adallom","services":"[\"APPID_SXC\"]","src_username":"wander...@rock.com","message":"Activity > policy 'User download\/view file' was triggered by > 'scolb...@gmail.com'","users":"[\"lva...@hotmail.com\"]","consoleUrl":"https:\/\/abcd-remote.console.arc.com\/#\/alerts\/56fe779ee4b0459f4e9a484a","event_class_id":"56fe779ee4b0459f4e9a484a","original_string":"2016-04-01T09:29:11.356-0400 > > CEF:0|Adallom|Adallom|1.0|56fe779ee4b0459f4e9a484a|ALERT_CABINET_EVENT_MATCH_AUDIT|0|msg=Activity > policy 'User download\/view file' was triggered by 'scolb...@gmail.com' > suser=wander...@rock.com start=1459517280810 end=1459517280810 > audits=[\"AVPR-4oIPeFmuZ3CKKrg\",\"AVPR-wx80cd9PUpAu2aj\",\"AVPR-6XGPeFmuZ3CKKvx\",\"AVPSALn_qE4Kgs_8_yK9\",\"AVPSASW3gw_f3aEvgEmi\"] > services=[\"APPID_SXC\"] users=[\"lva...@hotmail.com\"] > cs6=https:\/\/abcd-remote.console.arc.com\/#\/alerts\/56fe779ee4b0459f4e9a484a > cs6Label=consoleUrl","header":"2016-04-01T09:29:11.356-0400 > CEF:0","event_name":"ALERT_CABINET_EVENT_MATCH_AUDIT","startTime":"1459517280810","device_vendor":"Adallom","endTime":"1459517280810","audits":"[\"AVPR-4oIPeFmuZ3CKKrg\",\"AVPR-wx80cd9PUpAu2aj\",\"AVPR-6XGPeFmuZ3CKKvx\",\"AVPSALn_qE4Kgs_8_yK9\",\"AVPSASW3gw_f3aEvgEmi\"]","timestamp":1459502951000} > CyberArk CEF > Mar 21 14:05:02 HHHPVATN1 CEF:0|Cyber-Ark|Vault|7.20.0091|295|Retrieve > password|5|act=Retrieve password suser=spilgrim fname=Root\ABC phobos3 - COMP > dvc=120.99.70.3 shost=10.44.134.78 dhost= duser= externalId= app= reason= > cs1Label="Affected User Name" cs1= cs2Label="Safe Name" cs2=Security > Vulnerability Mgmt cs3Label="Device Type" cs3= cs4Label="Database" cs4= > cs5Label="Other info" cs5=101.198.70.93 cn1Label="Request Id" cn1= > cn2Label="Ticket Id" cn2=Needed to verify config files being pulled > msg=Needed to verify config files being pulled > ... > {"timestamp":1458569102000,"source.type":"cyberark","device_version":"7.20.0091","device_product":"Vault","fileName":"Root\\ABC > phobos3 - COMP","src_username":"spilgrim","\"Other > info\"":"101.198.70.93","\"Ticket Id\"":"Needed to verify config files being > pulled > ","deviceAddress":"120.99.70.3","severity":"5","deviceAction":"Retrieve > password","message":"Needed to verify config files being > pulled","event_class_id":"295","original_string":"Mar 21 14:05:02 HHHPVATN1 > CEF:0|Cyber-Ark|Vault|7.20.0091|295|Retrieve password|5|act=Retrieve password > suser=spilgrim fname=Root\\ABC phobos3 - COMP dvc=120.99.70.3 > shost=10.44.134.78 dhost= duser= externalId= app= reason= cs1Label=\"Affected > User Name\" cs1= cs2Label=\"Safe Name\" cs2=Security Vulnerability Mgmt > cs3Label=\"Device Type\" cs3= cs4Label=\"Database\" cs4= cs5Label=\"Other > info\" cs5=101.198.70.93 cn1Label=\"Request Id\" cn1= cn2Label=\"Ticket Id\" > cn2=Needed to verify config files being pulled msg=Needed to verify config > files being pulled","\"Safe Name\"":"Security Vulnerability > Mgmt","header":"Mar 21 14:05:02 HHHPVATN1 CEF:0","event_name":"Retrieve > password","device_vendor":"Cyber-Ark","src_hostname":"10.44.134.78"} > WAF CEF > <14>CEF:0|Imperva Inc.|SecureSphere|10.0.0.4_16|ABC - Secure Login.vm Page > Rate Limit UK - Source IP||High|act=alert dst=17.43.200.42 dpt=88 > duser=${Alert.username} src=10.31.45.69 spt=34435 proto=TCP rt=31 March 2016 > 13:04:55 cat=Alert cs1= cs1Label=Policy cs2=ABC-Secure cs2Label=ServerGroup > cs3=servers_svc cs3Label=ServiceName cs4=server_app cs4Label=ApplicationName > cs5=QA cs5Label=Description > ... > {"source.type":"waf","device_version":"10.0.0.4_16","severity":"High","device_product":"SecureSphere","ServerGroup":"ABC-Secure","ApplicationName":"server_app","Description":"QA","deviceAction":"alert","ip_dst_port":"88","dst_username":"${Alert.username}","priority":"14","deviceEventCategory":"Alert","protocol":"TCP","ip_dst_addr":"17.43.200.42","ip_src_port":"34435","event_class_id":"ABC > - Secure Login.vm Page Rate Limit UK - Source > IP","ServiceName":"servers_svc","original_string":"<14>CEF:0|Imperva > Inc.|SecureSphere|10.0.0.4_16|ABC - Secure Login.vm Page Rate Limit UK - > Source IP||High|act=alert dst=17.43.200.42 dpt=88 duser=${Alert.username} > src=10.31.45.69 spt=34435 proto=TCP rt=31 March 2016 13:04:55 cat=Alert cs1= > cs1Label=Policy cs2=ABC-Secure cs2Label=ServerGroup cs3=servers_svc > cs3Label=ServiceName cs4=server_app cs4Label=ApplicationName cs5=QA > cs5Label=Description","header":"<14>CEF:0","device_vendor":"Imperva > Inc.","ip_src_addr":"10.31.45.69","timestamp":1459429495000} -- This message was sent by Atlassian JIRA (v6.3.15#6346)