[ 
https://issues.apache.org/jira/browse/NIFI-3726?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15984887#comment-15984887
 ] 

ASF GitHub Bot commented on NIFI-3726:
--------------------------------------

Github user trixpan commented on a diff in the pull request:

    https://github.com/apache/nifi/pull/1692#discussion_r113463146
  
    --- Diff: 
nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/CompareFuzzyHash.java
 ---
    @@ -0,0 +1,358 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.nifi.processors.cybersecurity;
    +
    +import com.idealista.tlsh.digests.Digest;
    +import com.idealista.tlsh.digests.DigestBuilder;
    +import info.debatty.java.spamsum.SpamSum;
    +
    +import org.apache.nifi.annotation.behavior.EventDriven;
    +import org.apache.nifi.annotation.behavior.InputRequirement;
    +import org.apache.nifi.annotation.behavior.SideEffectFree;
    +import org.apache.nifi.annotation.behavior.SupportsBatching;
    +import org.apache.nifi.annotation.behavior.WritesAttribute;
    +import org.apache.nifi.annotation.behavior.WritesAttributes;
    +import org.apache.nifi.annotation.documentation.CapabilityDescription;
    +import org.apache.nifi.annotation.documentation.SeeAlso;
    +import org.apache.nifi.annotation.documentation.Tags;
    +import org.apache.nifi.annotation.lifecycle.OnScheduled;
    +import org.apache.nifi.components.AllowableValue;
    +import org.apache.nifi.components.PropertyDescriptor;
    +import org.apache.nifi.flowfile.FlowFile;
    +import org.apache.nifi.logging.ComponentLog;
    +import org.apache.nifi.processor.ProcessContext;
    +import org.apache.nifi.processor.ProcessSession;
    +import org.apache.nifi.processor.ProcessorInitializationContext;
    +import org.apache.nifi.processor.Relationship;
    +import org.apache.nifi.processor.exception.ProcessException;
    +import org.apache.nifi.processor.util.StandardValidators;
    +
    +import java.io.BufferedReader;
    +import java.io.File;
    +import java.io.FileInputStream;
    +import java.io.FileNotFoundException;
    +import java.io.IOException;
    +import java.io.InputStreamReader;
    +import java.util.ArrayList;
    +import java.util.Collections;
    +import java.util.HashSet;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Scanner;
    +import java.util.Set;
    +import java.util.concurrent.ConcurrentHashMap;
    +
    +
    +@EventDriven
    +@SideEffectFree
    +@SupportsBatching
    +@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
    +@SeeAlso({FuzzyHashContent.class})
    +@Tags({"hashing", "fuzzy-hashing", "cyber-security"})
    +@CapabilityDescription("Compares an attribute containing a Fuzzy Hash 
against a file containing a list of fuzzy hashes, " +
    +        "appending an attribute to the FlowFile in case of a successful 
match.")
    +
    +@WritesAttributes({
    +        @WritesAttribute(attribute = "XXXX.N.match", description = "The 
match that ressambles the attribute specified " +
    +                "by the <Hash Attribute Name> property. Note that: 'XXX' 
gets replaced with the <Hash Attribute Name>"),
    +        @WritesAttribute(attribute = "XXXX.N.similarity", description = 
"The similarity score between this flowfile" +
    +                "and the its match of the same number N. Note that: 'XXX' 
gets replaced with the <Hash Attribute Name>")})
    +
    +public class CompareFuzzyHash extends AbstractFuzzyHashProcessor {
    +    public static final AllowableValue singleMatch = new AllowableValue(
    +            "single",
    +            "single",
    +            "Send FlowFile to matched after the first match above 
threshold");
    +    public static final AllowableValue multiMatch = new AllowableValue(
    +            "multi-match",
    +            "multi-match",
    +            "Iterate full list of hashes before deciding to send FlowFile 
to matched or unmatched");
    +
    +    public static final PropertyDescriptor HASH_LIST_FILE = new 
PropertyDescriptor.Builder()
    +            .name("HASH_LIST_FILE")
    +            .displayName("Hash List source file")
    +            .description("Path to the file containing hashes to be 
validated against")
    +            .required(true)
    +            .addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
    +            .build();
    +
    +    // Note we add a PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME 
from parent class
    +
    +
    +
    +    public static final PropertyDescriptor MATCH_THRESHOLD = new 
PropertyDescriptor.Builder()
    +            // Note that while both TLSH and SSDeep seems to return int, 
we treat them as double in code.
    +            // The rationale behind being the expectation that other 
algorithms thatmay return double values
    +            // may be added to the processor later on.
    +            .name("MATCH_THRESHOLD")
    +            .displayName("Match threshold")
    +            .description("The similarity score must exceed or be equal to 
in order for" +
    +                    "match to be considered true. Refer to Additional 
Information for differences between TLSH " +
    +                    "and SSDEEP scores and how they relate to this 
property.")
    +            .required(true)
    +            .addValidator(StandardValidators.NUMBER_VALIDATOR)
    +            .build();
    +
    +    public static final PropertyDescriptor MATCHING_MODE = new 
PropertyDescriptor.Builder()
    +            .name("MATCHING_MODE")
    +            .displayName("Matching mode")
    +            .description("The ")
    +            .required(true)
    +            .allowableValues(singleMatch,multiMatch)
    +            .defaultValue(singleMatch.getValue())
    +            .build();
    +
    +    public static final Relationship REL_MATCH = new Relationship.Builder()
    +            .name("Matched")
    +            .description("Any FlowFile that is successfully matched to an 
existing hash will be sent to this Relationship.")
    +            .build();
    +
    +    public static final Relationship REL_NON_MATCH = new 
Relationship.Builder()
    +            .name("non-match")
    --- End diff --
    
    Good call. renamed to "found", "not found" as per GeoEnrichIP and also 
added "failure"


> Create FuzzyHash comparison processor
> -------------------------------------
>
>                 Key: NIFI-3726
>                 URL: https://issues.apache.org/jira/browse/NIFI-3726
>             Project: Apache NiFi
>          Issue Type: Improvement
>            Reporter: Andre F de Miranda
>            Assignee: Andre F de Miranda
>
> Now that NiFi cyber-security package supports "Fuzzy Hashing" it may be a 
> good idea to support a processor that makes use of it for comparison and 
> routing of matches



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to