[
https://issues.apache.org/jira/browse/NIFI-3726?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15984883#comment-15984883
]
ASF GitHub Bot commented on NIFI-3726:
--------------------------------------
Github user trixpan commented on a diff in the pull request:
https://github.com/apache/nifi/pull/1692#discussion_r113462459
--- Diff:
nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/CompareFuzzyHash.java
---
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.cybersecurity;
+
+import com.idealista.tlsh.digests.Digest;
+import com.idealista.tlsh.digests.DigestBuilder;
+import info.debatty.java.spamsum.SpamSum;
+
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.SideEffectFree;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.SeeAlso;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+
+@EventDriven
+@SideEffectFree
+@SupportsBatching
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@SeeAlso({FuzzyHashContent.class})
+@Tags({"hashing", "fuzzy-hashing", "cyber-security"})
+@CapabilityDescription("Compares an attribute containing a Fuzzy Hash
against a file containing a list of fuzzy hashes, " +
+ "appending an attribute to the FlowFile in case of a successful
match.")
+
+@WritesAttributes({
+ @WritesAttribute(attribute = "XXXX.N.match", description = "The
match that ressambles the attribute specified " +
+ "by the <Hash Attribute Name> property. Note that: 'XXX'
gets replaced with the <Hash Attribute Name>"),
+ @WritesAttribute(attribute = "XXXX.N.similarity", description =
"The similarity score between this flowfile" +
+ "and the its match of the same number N. Note that: 'XXX'
gets replaced with the <Hash Attribute Name>")})
+
+public class CompareFuzzyHash extends AbstractFuzzyHashProcessor {
+ public static final AllowableValue singleMatch = new AllowableValue(
+ "single",
+ "single",
+ "Send FlowFile to matched after the first match above
threshold");
+ public static final AllowableValue multiMatch = new AllowableValue(
+ "multi-match",
+ "multi-match",
+ "Iterate full list of hashes before deciding to send FlowFile
to matched or unmatched");
+
+ public static final PropertyDescriptor HASH_LIST_FILE = new
PropertyDescriptor.Builder()
+ .name("HASH_LIST_FILE")
+ .displayName("Hash List source file")
+ .description("Path to the file containing hashes to be
validated against")
+ .required(true)
+ .addValidator(StandardValidators.FILE_EXISTS_VALIDATOR)
+ .build();
+
+ // Note we add a PropertyDescriptor HASH_ALGORITHM and ATTRIBUTE_NAME
from parent class
+
+
+
+ public static final PropertyDescriptor MATCH_THRESHOLD = new
PropertyDescriptor.Builder()
+ // Note that while both TLSH and SSDeep seems to return int,
we treat them as double in code.
+ // The rationale behind being the expectation that other
algorithms thatmay return double values
+ // may be added to the processor later on.
+ .name("MATCH_THRESHOLD")
+ .displayName("Match threshold")
+ .description("The similarity score must exceed or be equal to
in order for" +
+ "match to be considered true. Refer to Additional
Information for differences between TLSH " +
+ "and SSDEEP scores and how they relate to this
property.")
+ .required(true)
+ .addValidator(StandardValidators.NUMBER_VALIDATOR)
+ .build();
+
+ public static final PropertyDescriptor MATCHING_MODE = new
PropertyDescriptor.Builder()
+ .name("MATCHING_MODE")
+ .displayName("Matching mode")
+ .description("The ")
--- End diff --
fixed
> Create FuzzyHash comparison processor
> -------------------------------------
>
> Key: NIFI-3726
> URL: https://issues.apache.org/jira/browse/NIFI-3726
> Project: Apache NiFi
> Issue Type: Improvement
> Reporter: Andre F de Miranda
> Assignee: Andre F de Miranda
>
> Now that NiFi cyber-security package supports "Fuzzy Hashing" it may be a
> good idea to support a processor that makes use of it for comparison and
> routing of matches
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)