[ 
https://issues.apache.org/jira/browse/NIFI-2341?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15562687#comment-15562687
 ] 

ASF GitHub Bot commented on NIFI-2341:
--------------------------------------

Github user mattyb149 commented on a diff in the pull request:

    https://github.com/apache/nifi/pull/785#discussion_r82633006
  
    --- Diff: 
nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ParseCEF.java
 ---
    @@ -0,0 +1,323 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.nifi.processors.standard;
    +
    +import com.fasterxml.jackson.core.JsonGenerator;
    +import com.fasterxml.jackson.core.JsonProcessingException;
    +import com.fasterxml.jackson.databind.JsonSerializer;
    +import com.fasterxml.jackson.databind.ObjectMapper;
    +import com.fasterxml.jackson.databind.SerializerProvider;
    +import com.fasterxml.jackson.databind.module.SimpleModule;
    +
    +import com.fasterxml.jackson.databind.node.ObjectNode;
    +import com.fluenda.parcefone.event.CEFHandlingException;
    +import com.fluenda.parcefone.event.CommonEvent;
    +import com.fluenda.parcefone.parser.CEFParser;
    +
    +import com.martiansoftware.macnificent.MacAddress;
    +
    +import org.apache.nifi.annotation.behavior.EventDriven;
    +import org.apache.nifi.annotation.behavior.InputRequirement;
    +import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
    +import org.apache.nifi.annotation.behavior.SideEffectFree;
    +import org.apache.nifi.annotation.behavior.SupportsBatching;
    +import org.apache.nifi.annotation.behavior.WritesAttribute;
    +import org.apache.nifi.annotation.behavior.WritesAttributes;
    +import org.apache.nifi.annotation.documentation.CapabilityDescription;
    +import org.apache.nifi.annotation.documentation.SeeAlso;
    +import org.apache.nifi.annotation.documentation.Tags;
    +import org.apache.nifi.annotation.lifecycle.OnScheduled;
    +import org.apache.nifi.components.PropertyDescriptor;
    +import org.apache.nifi.flowfile.FlowFile;
    +import org.apache.nifi.processor.AbstractProcessor;
    +import org.apache.nifi.processor.ProcessContext;
    +import org.apache.nifi.processor.ProcessSession;
    +import org.apache.nifi.processor.Relationship;
    +import org.apache.nifi.processor.exception.ProcessException;
    +import org.apache.nifi.processor.io.InputStreamCallback;
    +import org.apache.nifi.processor.io.OutputStreamCallback;
    +import org.apache.nifi.processor.util.StandardValidators;
    +import org.apache.nifi.stream.io.BufferedOutputStream;
    +import org.apache.nifi.stream.io.StreamUtils;
    +
    +import java.io.IOException;
    +import java.io.InputStream;
    +import java.io.OutputStream;
    +import java.net.InetAddress;
    +import java.text.SimpleDateFormat;
    +import java.time.ZoneId;
    +import java.time.ZonedDateTime;
    +import java.time.format.DateTimeFormatter;
    +import java.util.ArrayList;
    +import java.util.Date;
    +import java.util.HashMap;
    +import java.util.HashSet;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Set;
    +import java.util.TimeZone;
    +
    +@EventDriven
    +@SideEffectFree
    +@SupportsBatching
    +@InputRequirement(Requirement.INPUT_REQUIRED)
    +@Tags({"logs", "cef", "attributes", "system", "event", "message"})
    +@CapabilityDescription("Parses the contents of a CEF formatted message and 
adds attributes to the FlowFile for " +
    +        "headers and extensions of the parts of the CEF message.\n" +
    +        "Note: This Processor expects CEF messages WITHOUT the syslog 
headers (i.e. starting at \"CEF:0\"")
    +@WritesAttributes({@WritesAttribute(attribute = "cef.header.version", 
description = "The version of the CEF message."),
    +    @WritesAttribute(attribute = "cef.header.deviceVendor", description = 
"The Device Vendor of the CEF message."),
    +    @WritesAttribute(attribute = "cef.header.deviceProduct", description = 
"The Device Product of the CEF message."),
    +    @WritesAttribute(attribute = "cef.header.deviceVersion", description = 
"The Device Version of the CEF message."),
    +    @WritesAttribute(attribute = "cef.header.deviceEventClassId", 
description = "The Device Event Class ID of the CEF message."),
    +    @WritesAttribute(attribute = "cef.header.name", description = "The 
name of the CEF message."),
    +    @WritesAttribute(attribute = "cef.header.severity", description = "The 
severity of the CEF message."),
    +    @WritesAttribute(attribute = "cef.extension.*", description = "The key 
and value generated by the parsing of the message.")})
    +@SeeAlso({ParseSyslog.class})
    +
    +public class ParseCEF extends AbstractProcessor {
    +
    +    // There should be no date format other than internationally agreed 
formats...
    +    // flowfile-attributes uses Java 8 time to parse data (as Date  
objects are not timezoned)
    +    private final static DateTimeFormatter dateTimeFormatter = 
DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ");
    +
    +    // for some reason Jackson doesnt seem to be able to use 
DateTieFormater
    +    // so we use a SimpleDateFormat to format within flowfile-content
    +    private final SimpleDateFormat simpleDateFormat = new 
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ");
    +
    +
    +    // add a TZ object to be used by flowfile-attribute routine
    +    private String tzId = null;
    +
    +    // Add serializer and mapper
    +    private static final ObjectMapper mapper = new ObjectMapper();
    +
    +    public static final String DESTINATION_CONTENT = "flowfile-content";
    +    public static final String DESTINATION_ATTRIBUTES = 
"flowfile-attribute";
    +    public static final PropertyDescriptor FIELDS_DESTINATION = new 
PropertyDescriptor.Builder()
    +        .name("FIELDS_DESTINATION")
    +        .displayName("Parsed fields destination")
    +        .description(
    +                "Indicates whether the results of the CEF parser are 
written " +
    +                "to the FlowFile content or a FlowFile attribute; if using 
" + DESTINATION_ATTRIBUTES +
    +                "attribute, fields will be populated as attributes. " +
    +                "If set to " + DESTINATION_CONTENT + ", the CEF extension 
field will be converted into " +
    +                "a flat JSON object.")
    +        .required(true)
    +        .allowableValues(DESTINATION_CONTENT, DESTINATION_ATTRIBUTES)
    +        .defaultValue(DESTINATION_CONTENT)
    +        .build();
    +
    +    public static final PropertyDescriptor APPEND_RAW_MESSAGE_TO_JSON = 
new PropertyDescriptor.Builder()
    +            .name("APPEND_RAW_MESSAGE_TO_JSON")
    +            .displayName("Append raw message to JSON")
    +            .description("When using flowfile-content, add the original 
flow content as JSON node _raw, " +
    +                    "ignored otherwise.")
    +            .addValidator(StandardValidators.BOOLEAN_VALIDATOR)
    +            .required(true)
    +            .defaultValue("true")
    +            .build();
    +
    +    public static final String UTC = "UTC";
    +    public static final String LOCAL_TZ = "Local Timezone (system 
Default)";
    +    public static final PropertyDescriptor TIME_REPRESENTATION = new 
PropertyDescriptor.Builder()
    +        .name("TIME_REPRESENTATION")
    +        .displayName("Timezone")
    +        .description("Timezone to be used when representing date fields. 
UTC will convert all " +
    +                "dates to UTC, while Local Timezone will convert them to 
the timezone used by NiFi.")
    +        .allowableValues(UTC, LOCAL_TZ)
    +        .required(true)
    +        .defaultValue(LOCAL_TZ)
    +        .build();
    +
    +    static final Relationship REL_FAILURE = new Relationship.Builder()
    +        .name("failure")
    +        .description("Any FlowFile that could not be parsed as a CEF 
message will be transferred to this Relationship without any attributes being 
added")
    +        .build();
    +    static final Relationship REL_SUCCESS = new Relationship.Builder()
    +        .name("success")
    +        .description("Any FlowFile that is successfully parsed as a CEF 
message will be to this Relationship.")
    +        .build();
    +
    +    @Override
    +    public List<PropertyDescriptor> getSupportedPropertyDescriptors() {
    +        final List<PropertyDescriptor>properties =new ArrayList<>();
    +        properties.add(FIELDS_DESTINATION);
    +        properties.add(APPEND_RAW_MESSAGE_TO_JSON);
    +        properties.add(TIME_REPRESENTATION);
    +        return properties;
    +    }
    +
    +    @Override
    +    public Set<Relationship> getRelationships() {
    +        final Set<Relationship> relationships = new HashSet<>();
    +        relationships.add(REL_FAILURE);
    +        relationships.add(REL_SUCCESS);
    +        return relationships;
    +    }
    +
    +    @OnScheduled
    +    public void OnScheduled(final ProcessContext context) {
    +
    +        // Configure jackson mapper before spawning onTriggers
    +        final SimpleModule module = new SimpleModule()
    +                                        .addSerializer(MacAddress.class, 
new MacAddressToStringSerializer());
    +        this.mapper.registerModule(module);
    +        this.mapper.setDateFormat(this.simpleDateFormat);
    +
    +        switch (context.getProperty(TIME_REPRESENTATION).getValue()) {
    +            case LOCAL_TZ:
    +                // set the mapper TZ to local TZ
    +                this.mapper.setTimeZone(TimeZone.getDefault());
    +                tzId = TimeZone.getDefault().getID();
    +                break;
    +            case UTC:
    +                // set the mapper TZ to local TZ
    +                this.mapper.setTimeZone(TimeZone.getTimeZone(UTC));
    +                tzId = UTC;
    +                break;
    +        }
    +    }
    +
    +    @Override
    +    public void onTrigger(final ProcessContext context, final 
ProcessSession session) throws ProcessException {
    +        FlowFile flowFile = session.get();
    +        if (flowFile == null) {
    +            return;
    +        }
    +
    +        final CEFParser parser = new CEFParser();
    +        final byte[] buffer = new byte[(int) flowFile.getSize()];
    +        session.read(flowFile, new InputStreamCallback() {
    +            @Override
    +            public void process(final InputStream in) throws IOException {
    +                StreamUtils.fillBuffer(in, buffer);
    +            }
    +        });
    +
    +        CommonEvent event;
    +
    +        try {
    +            event = parser.parse(buffer, true);
    +        } catch (Exception e) {
    +            // This should never trigger but adding in here as a fencing 
mechanism to
    +            // address possible ParCEFone bugs.
    +            getLogger().error("Parser returned unexpected Exception {} 
while processing {}; routing to failure", new Object[] {e, flowFile});
    +            session.transfer(flowFile, REL_FAILURE);
    +            return;
    +        }
    +
    +
    +        // ParCEFone returns null every time it cannot parse an
    +        // event, so we test
    +        if (event==null) {
    +            getLogger().error("Failed to parse {} as a CEF message: it 
does not conform to the CEF standard; routing to failure", new Object[] 
{flowFile});
    +            session.transfer(flowFile, REL_FAILURE);
    +            return;
    +        }
    +
    +
    +        try {
    +            final String destination = 
context.getProperty(FIELDS_DESTINATION).getValue();
    +
    +            switch (destination) {
    +                case DESTINATION_ATTRIBUTES:
    +
    +                    final Map<String, String> attributes = new HashMap<>();
    +
    +                    // Process KVs of the Header field
    +                    for (Map.Entry<String, Object> entry : 
event.getHeader().entrySet()) {
    +                        attributes.put("cef.header."+entry.getKey(), 
prettyResult(entry.getValue(), tzId));
    +                    }
    +
    +                    // Process KVs composing the Extension field
    +                    for (Map.Entry<String, Object> entry : 
event.getExtension(true).entrySet()) {
    +                    attributes.put("cef.extension." + entry.getKey(), 
prettyResult(entry.getValue(), tzId));
    +
    +                    flowFile = session.putAllAttributes(flowFile, 
attributes);
    +                    }
    +                    break;
    +
    +                case DESTINATION_CONTENT:
    +
    +                    ObjectNode results = mapper.createObjectNode();
    +
    +                    // Add two JSON objects containing one CEF field each
    +                    results.set("header", 
mapper.valueToTree(event.getHeader()));
    +                    results.set("extension", 
mapper.valueToTree(event.getExtension(true)));
    +
    +                    // Add the original content to original CEF content
    +                    // to the resulting JSON
    +                    if 
(context.getProperty(APPEND_RAW_MESSAGE_TO_JSON).asBoolean()) {
    +                        results.set("_raw", mapper.valueToTree(new 
String(buffer)));
    +                    }
    +
    +                    flowFile = session.write(flowFile, new 
OutputStreamCallback() {
    +                        @Override
    +                        public void process(OutputStream out) throws 
IOException {
    +                            try (OutputStream outputStream = new 
BufferedOutputStream(out)) {
    +                                
outputStream.write(mapper.writeValueAsBytes(results));
    +                            }
    +                        }
    +                    });
    +
    +                    // Update the provenance for good measure
    +                    
session.getProvenanceReporter().modifyContent(flowFile, "Replaced content with 
parsed CEF fields and values");
    --- End diff --
    
    Since the output in this case is JSON, it should update the "mime.type" 
attribute with "application/json". I don't see a MIME type for CEF, so 
text/plain is probably fine for the DESTINATION_ATTRIBUTES case.


> Create a processor to parse logs formated using CEF
> ---------------------------------------------------
>
>                 Key: NIFI-2341
>                 URL: https://issues.apache.org/jira/browse/NIFI-2341
>             Project: Apache NiFi
>          Issue Type: Improvement
>            Reporter: Andre
>            Assignee: Andre
>             Fix For: 1.1.0
>
>
> As NiFi continue to increase its abilities to complement SIEM, Splunk and ELK 
> deployments, a number of users will be looking to parse CEF formatted 
> logs[1][2].
> CEF is a format specified by Arcsight (now part of HPE) and is described in 
> detail in here:
> https://www.protect724.hpe.com/docs/DOC-1072
> [1] 
> http://apache-nifi.1125220.n5.nabble.com/Suggestion-of-processors-td9795.html
> [2] 
> https://community.hortonworks.com/questions/43185/which-processor-is-used-to-parse-cef-format-logs.html



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to