[
https://issues.apache.org/jira/browse/NIFI-1965?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15399838#comment-15399838
]
ASF GitHub Bot commented on NIFI-1965:
--------------------------------------
Github user pvillard31 commented on a diff in the pull request:
https://github.com/apache/nifi/pull/496#discussion_r72842053
--- Diff:
nifi-nar-bundles/nifi-enrich-bundle/nifi-enrich-processors/src/main/java/org/apache/nifi/processors/enrich/QueryDNS.java
---
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.processors.enrich;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+import javax.naming.Context;
+import javax.naming.NameNotFoundException;
+import javax.naming.NamingEnumeration;
+import javax.naming.NamingException;
+import javax.naming.directory.Attributes;
+import javax.naming.directory.BasicAttributes;
+import javax.naming.directory.DirContext;
+import javax.naming.directory.InitialDirContext;
+
+import org.apache.commons.lang3.StringUtils;
+
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.SideEffectFree;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+
+
+@EventDriven
+@SideEffectFree
+@SupportsBatching
+@Tags({"dns", "enrich", "ip"})
+@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
+@CapabilityDescription("A powerful DNS query processor primary designed to
enrich DataFlows with DNS based APIs " +
+ "(e.g. RBLs, ShadowServer's ASN lookup) but that can be also used
to perform regular DNS lookups.")
+@WritesAttributes({
+ @WritesAttribute(attribute = "enrich.dns.record*.group*",
description = "The captured fields of the DNS query response for each of the
records received"),
+})
+public class QueryDNS extends AbstractEnrichProcessor {
+
+ public static final PropertyDescriptor DNS_QUERY_TYPE = new
PropertyDescriptor.Builder()
+ .name("DNS Query Type")
+ .description("The DNS query type to be used by the processor
(e.g. TXT, A)")
+ .required(true)
+ .defaultValue("TXT")
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ public static final PropertyDescriptor DNS_SERVER = new
PropertyDescriptor.Builder()
+ .name("DNS Servers")
+ .description("A comma separated list of DNS servers to be
used. (Defaults to system wide if none is used)")
+ .required(false)
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ public static final PropertyDescriptor DNS_TIMEOUT = new
PropertyDescriptor.Builder()
+ .name("DNS Query Timeout")
+ .description("The amount of milliseconds to wait until
considering a query as failed")
+ .required(true)
+ .defaultValue("1500")
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ public static final PropertyDescriptor DNS_RETRIES = new
PropertyDescriptor.Builder()
+ .name("DNS Query Retries")
+ .description("The number of attempts before giving up and
moving on")
+ .required(true)
+ .defaultValue("1")
+ .addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+
+ private final static List<PropertyDescriptor> propertyDescriptors;
+ private final static Set<Relationship> relationships;
+
+ private DirContext ictx;
+
+ // Assign the default and generally used contextFactory value
+ private String contextFactory =
com.sun.jndi.dns.DnsContextFactory.class.getName();;
+
+ static {
+ List<PropertyDescriptor> props = new ArrayList<>();
+ props.add(QUERY_INPUT);
+ props.add(QUERY_PARSER);
+ props.add(QUERY_PARSER_INPUT);
+ props.add(DNS_RETRIES);
+ props.add(DNS_TIMEOUT);
+ props.add(DNS_SERVER);
+ props.add(DNS_QUERY_TYPE);
+ propertyDescriptors = Collections.unmodifiableList(props);
+
+ Set<Relationship> rels = new HashSet<>();
+ rels.add(REL_FOUND);
+ rels.add(REL_NOT_FOUND);
+ relationships = Collections.unmodifiableSet(rels);
+ }
+
+ private AtomicBoolean initialized = new AtomicBoolean(false);
+
+ @Override
+ protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
+ return propertyDescriptors;
+ }
+
+ @Override
+ public Set<Relationship> getRelationships() {
+ return relationships;
+ }
+
+ @Override
+ public void onTrigger(ProcessContext context, ProcessSession session)
throws ProcessException {
+ if (!initialized.get()) {
+ initializeResolver(context);
+ getLogger().warn("Resolver was initialized at onTrigger
instead of onScheduled");
+
+ }
+
+ FlowFile flowFile = session.get();
+ if (flowFile == null) {
+ return;
+ }
+
+ final String queryType =
context.getProperty(DNS_QUERY_TYPE).getValue();
+ final String queryInput =
context.getProperty(QUERY_INPUT).evaluateAttributeExpressions(flowFile).getValue();
+ final String queryParser =
context.getProperty(QUERY_PARSER).getValue();
+ final String queryRegex =
context.getProperty(QUERY_PARSER_INPUT).getValue();
+
+ boolean found = false;
+ try {
+ Attributes results = doLookup(queryInput, queryType);
+ // NOERROR & NODATA seem to return empty Attributes handled
bellow
+ // but defaulting to not found in any case
+ if (results.size() < 1) {
+ found = false;
+
+
+ } else {
+ int recordNumber = 0;
+ NamingEnumeration<?> dnsEntryIterator =
results.get(queryType).getAll();
+ while (dnsEntryIterator.hasMoreElements()) {
+ String dnsRecord = dnsEntryIterator.next().toString();
+ // While NXDOMAIN is being generated by doLookup catch
+ if (dnsRecord != "NXDOMAIN") {
+ Map<String, String> parsedResults =
parseResponse(recordNumber, dnsRecord, queryParser, queryRegex, "dns");
+ flowFile = session.putAllAttributes(flowFile,
parsedResults);
+ found = true;
+
+
+ } else {
+ // Otherwise treat as not found
+ found = false;
+
+
+ }
+ // Increase the counter and iterate over next
record....
+ recordNumber++;
+ }
+ }
+ } catch (NamingException e) {
+ session.rollback(true);
+ getLogger().error("Something went wrong while processing the
field. Please review your configuration");
--- End diff --
Please log the exception ``error(message,exception)``
Also, you have another issue here: you rollback the flow file but then you
still try to transfer it to a relationship. This leads to the following issue:
````
2016-07-29 20:38:24,693 WARN [Timer-Driven Process Thread-1]
o.a.n.c.t.ContinuallyRunProcessorTask
org.apache.nifi.processor.exception.FlowFileHandlingException:
StandardFlowFileRecord[uuid=0b54f258-cf0e-47c3-a9b3-79fa56d5d6b0,claim=,offset=0,name=496405305778152,size=0]
is not known in this session (StandardProcessSession[id=841971])
at
org.apache.nifi.controller.repository.StandardProcessSession.validateRecordState(StandardProcessSession.java:2589)
~[nifi-framework-core-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
org.apache.nifi.controller.repository.StandardProcessSession.transfer(StandardProcessSession.java:1562)
~[nifi-framework-core-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
org.apache.nifi.processors.enrich.QueryDNS.onTrigger(QueryDNS.java:200) ~[na:na]
at
org.apache.nifi.processor.AbstractProcessor.onTrigger(AbstractProcessor.java:27)
~[nifi-api-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
org.apache.nifi.controller.StandardProcessorNode.onTrigger(StandardProcessorNode.java:1060)
~[nifi-framework-core-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
org.apache.nifi.controller.tasks.ContinuallyRunProcessorTask.call(ContinuallyRunProcessorTask.java:136)
[nifi-framework-core-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
org.apache.nifi.controller.tasks.ContinuallyRunProcessorTask.call(ContinuallyRunProcessorTask.java:47)
[nifi-framework-core-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
org.apache.nifi.controller.scheduling.TimerDrivenSchedulingAgent$1.run(TimerDrivenSchedulingAgent.java:123)
[nifi-framework-core-1.0.0-SNAPSHOT.jar:1.0.0-SNAPSHOT]
at
java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
[na:1.8.0_77]
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
[na:1.8.0_77]
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
[na:1.8.0_77]
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
[na:1.8.0_77]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
[na:1.8.0_77]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
[na:1.8.0_77]
at java.lang.Thread.run(Thread.java:745) [na:1.8.0_77]
````
> Create a QueryDNS processor
> ---------------------------
>
> Key: NIFI-1965
> URL: https://issues.apache.org/jira/browse/NIFI-1965
> Project: Apache NiFi
> Issue Type: Bug
> Reporter: Andre
>
> As part of a data pipeline security teams frequently must enrich data using
> DNS enabled APIs such as:
> ShadowServer BGP and ASN lookup via DNS
> https://www.shadowserver.org/wiki/pmwiki.php/Services/IP-BGP#toc7
> Team Cymru Malware Hash Registry
> http://www.team-cymru.org/MHR.html
> Spamhaus (SBL, XBL, etc)
> and others
> QueryDNS will use an expression language enabled property to run a query
> against DNS and add the raw result to an attribute (for later processing if
> necessary).
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)