[
https://issues.apache.org/jira/browse/NIFI-2747?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15945914#comment-15945914
]
ASF GitHub Bot commented on NIFI-2747:
--------------------------------------
Github user apiri commented on a diff in the pull request:
https://github.com/apache/nifi/pull/1619#discussion_r108529113
--- Diff:
nifi-nar-bundles/nifi-cybersecurity-bundle/nifi-cybersecurity-processors/src/main/java/org/apache/nifi/processors/cybersecurity/FuzzyHashContent.java
---
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.cybersecurity;
+
+import com.idealista.tlsh.TLSH;
+import com.idealista.tlsh.exceptions.InsufficientComplexityException;
+import info.debatty.java.spamsum.SpamSum;
+
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.SideEffectFree;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.ReadsAttribute;
+import org.apache.nifi.annotation.behavior.ReadsAttributes;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.SeeAlso;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+
+import org.apache.nifi.components.AllowableValue;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.processors.standard.HashContent;
+
+import org.apache.nifi.stream.io.StreamUtils;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicReference;
+
+
+@EventDriven
+@SideEffectFree
+@SupportsBatching
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@Tags({"hashing", "fuzzy-hashing", "cyber-security"})
+@CapabilityDescription("Calculates a fuzzy/locality-sensitive hash value
for the Content of a FlowFile and puts that " +
+ "hash value on the FlowFile as an attribute whose name is
determined by the <Hash Attribute Name> property." +
+ "Note: this processor only offers non-criptographic hash
algorithms. And it should be not be " +
+ "seen as a replacement to the HashContent processor")
+
+@SeeAlso({HashContent.class})
+@ReadsAttributes({@ReadsAttribute(attribute="", description="")})
+@WritesAttributes({@WritesAttribute(attribute = "<Hash Attribute Name>",
description = "This Processor adds an attribute whose value is the result of
Hashing the "
+ + "existing FlowFile content. The name of this attribute is
specified by the <Hash Attribute Name> property")})
+
+public class FuzzyHashContent extends AbstractProcessor {
+
+ public static final AllowableValue allowableValueSSDEEP = new
AllowableValue(
+ "ssdeep",
+ "ssdeep",
+ "Uses ssdeep / SpamSum 'context triggered piecewise hash'.");
+ public static final AllowableValue allowableValueTLSH = new
AllowableValue(
+ "tlsh",
+ "tlsh",
+ "Uses TLSH (Trend 'Locality Sensitive Hash'). Note: FlowFile
Content must be at least 512 characters long");
+
+ public static final PropertyDescriptor ATTRIBUTE_NAME = new
PropertyDescriptor.Builder()
+ .name("ATTRIBUTE_NAME")
+ .displayName("Hash Attribute Name")
+ .description("The name of the FlowFile Attribute into which
the Hash Value should be written. " +
+ "If the value already exists, it will be overwritten")
+ .required(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .defaultValue("fuzzyhash.value")
+ .build();
+
+ public static final PropertyDescriptor HASH_ALGORITHM = new
PropertyDescriptor.Builder()
+ .name("HASH_ALGORITHM")
+ .displayName("Hashing Algorithm")
+ .description("The hashing algorithm utilised")
+ .allowableValues(allowableValueSSDEEP, allowableValueTLSH)
+ .required(true)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ public static final Relationship REL_SUCCESS = new
Relationship.Builder()
+ .name("Success")
+ .description("Any FlowFile that is successfully hashed will be
sent to this Relationship.")
+ .build();
+
+ public static final Relationship REL_FAILURE = new
Relationship.Builder()
+ .name("Failure")
+ .description("Any FlowFile that is successfully hashed will be
sent to this Relationship.")
+ .build();
+
+ private List<PropertyDescriptor> descriptors;
+
+ private Set<Relationship> relationships;
+
+ @Override
+ protected void init(final ProcessorInitializationContext context) {
+ final List<PropertyDescriptor> descriptors = new
ArrayList<PropertyDescriptor>();
+ descriptors.add(ATTRIBUTE_NAME);
+ descriptors.add(HASH_ALGORITHM);
+ this.descriptors = Collections.unmodifiableList(descriptors);
+
+ final Set<Relationship> relationships = new
HashSet<Relationship>();
+ relationships.add(REL_SUCCESS);
+ relationships.add(REL_FAILURE);
+ this.relationships = Collections.unmodifiableSet(relationships);
+ }
+
+ @Override
+ public Set<Relationship> getRelationships() {
+ return this.relationships;
+ }
+
+ @Override
+ public final List<PropertyDescriptor>
getSupportedPropertyDescriptors() {
+ return descriptors;
+ }
+
+ @OnScheduled
+ public void onScheduled(final ProcessContext context) {
+
+ }
+
+ @Override
+ public void onTrigger(final ProcessContext context, final
ProcessSession session) throws ProcessException {
+ FlowFile flowFile = session.get();
+ if (flowFile == null) {
+ return;
+ }
+
+ final ComponentLog logger = getLogger();
+ final String algorithm =
context.getProperty(HASH_ALGORITHM).getValue();
+
+
+ final AtomicReference<String> hashValueHolder = new
AtomicReference<>(null);
+
+ try {
+ session.read(flowFile, new InputStreamCallback() {
+ @Override
+ public void process(final InputStream in) throws
IOException {
+ try (ByteArrayOutputStream holder = new
ByteArrayOutputStream()) {
+ StreamUtils.copy(in,holder);
+
+ if
(algorithm.equals(allowableValueSSDEEP.getValue())) {
+ hashValueHolder.set(new
SpamSum().HashString(holder.toString()));
+ }
+
+ if
(algorithm.equals(allowableValueTLSH.getValue())) {
--- End diff --
Same as above
> Add Processor support SSDeep (SpamSum) hashes (and possibly others e.g. TLSH)
> -----------------------------------------------------------------------------
>
> Key: NIFI-2747
> URL: https://issues.apache.org/jira/browse/NIFI-2747
> Project: Apache NiFi
> Issue Type: Improvement
> Affects Versions: 1.0.0
> Reporter: Andre F de Miranda
> Assignee: Andre F de Miranda
>
> It would be great if we had a processor to support fuzzy hashing algorithms
> such as SpamSum/SSDeep
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)