Repository: nifi
Updated Branches:
  refs/heads/master a3edd34db -> 28067a29f


NIFI-4869 Added SAX utility method for SplitXML processor.
Added unit tests.
Added test resources to RAT exclude list.
This closes #2466


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/28067a29
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/28067a29
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/28067a29

Branch: refs/heads/master
Commit: 28067a29fd13cdf8e21b440fc65c6dd67872522f
Parents: a3edd34
Author: Andy LoPresto <alopre...@apache.org>
Authored: Mon Feb 12 21:10:16 2018 -0800
Committer: Matt Gilman <matt.c.gil...@gmail.com>
Committed: Tue Feb 13 15:29:29 2018 -0500

----------------------------------------------------------------------
 .../org/apache/nifi/security/xml/XmlUtils.java  |  27 ++-
 .../nifi-standard-processors/pom.xml            |   2 +
 .../nifi/processors/standard/SplitXml.java      |  24 +-
 .../processors/standard/SplitXmlTest.groovy     |  85 +++++++
 .../src/test/resources/xxe_from_report.xml      |   2 +
 .../src/test/resources/xxe_template.xml         | 230 +++++++++++++++++++
 6 files changed, 354 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java
----------------------------------------------------------------------
diff --git 
a/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java
 
b/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java
index 99c90a6..e384c5b 100644
--- 
a/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java
+++ 
b/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java
@@ -16,11 +16,17 @@
  */
 package org.apache.nifi.security.xml;
 
+import java.io.InputStream;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
 import javax.xml.stream.XMLInputFactory;
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
 import javax.xml.transform.stream.StreamSource;
-import java.io.InputStream;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
 
 public class XmlUtils {
 
@@ -41,4 +47,23 @@ public class XmlUtils {
         xif.setProperty(XMLInputFactory.SUPPORT_DTD, false);
         return xif.createXMLStreamReader(source);
     }
+
+    public static XMLReader createSafeSaxReader(SAXParserFactory 
saxParserFactory, ContentHandler contentHandler) throws SAXException, 
ParserConfigurationException {
+        if (saxParserFactory == null) {
+            throw new IllegalArgumentException("The provided SAX parser 
factory cannot be null");
+        }
+
+        if (contentHandler == null) {
+            throw new IllegalArgumentException("The provided SAX content 
handler cannot be null");
+        }
+
+        
saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities";,
 false);
+        
saxParserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl";,
 true);
+
+        SAXParser saxParser = saxParserFactory.newSAXParser();
+        XMLReader xmlReader = saxParser.getXMLReader();
+        xmlReader.setContentHandler(contentHandler);
+
+        return xmlReader;
+    }
 }

http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml
----------------------------------------------------------------------
diff --git 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml
index 3cdd787..0fffbb8 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml
@@ -515,6 +515,8 @@
                         <!-- This file is copied from 
https://github.com/jeremyh/jBCrypt
                             because the binary is compiled for Java 8 and we 
must support Java 7 -->
                         
<exclude>src/main/java/org/apache/nifi/security/util/crypto/bcrypt/BCrypt.java</exclude>
+                        <exclude>src/test/resources/xxe_template.xml</exclude>
+                        
<exclude>src/test/resources/xxe_from_report.xml</exclude>
                     </excludes>
                 </configuration>
             </plugin>

http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java
----------------------------------------------------------------------
diff --git 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java
 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java
index 502f7f3..de513c8 100644
--- 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java
+++ 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java
@@ -16,6 +16,12 @@
  */
 package org.apache.nifi.processors.standard;
 
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME;
+import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.copyAttributesToOriginal;
+
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -28,11 +34,8 @@ import java.util.TreeMap;
 import java.util.UUID;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
-
 import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
-
 import org.apache.commons.lang3.StringEscapeUtils;
 import org.apache.nifi.annotation.behavior.EventDriven;
 import org.apache.nifi.annotation.behavior.InputRequirement;
@@ -54,7 +57,7 @@ import 
org.apache.nifi.processor.ProcessorInitializationContext;
 import org.apache.nifi.processor.Relationship;
 import org.apache.nifi.processor.util.StandardValidators;
 import org.apache.nifi.processors.standard.util.XmlElementNotifier;
-import org.apache.nifi.stream.io.BufferedInputStream;
+import org.apache.nifi.security.xml.XmlUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.Attributes;
@@ -64,12 +67,6 @@ import org.xml.sax.Locator;
 import org.xml.sax.SAXException;
 import org.xml.sax.XMLReader;
 
-import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT;
-import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID;
-import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX;
-import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME;
-import static 
org.apache.nifi.flowfile.attributes.FragmentAttributes.copyAttributesToOriginal;
-
 @EventDriven
 @SideEffectFree
 @SupportsBatching
@@ -175,12 +172,9 @@ public class SplitXml extends AbstractProcessor {
 
         final AtomicBoolean failed = new AtomicBoolean(false);
         session.read(original, rawIn -> {
-            try (final InputStream in = new BufferedInputStream(rawIn)) {
-                SAXParser saxParser = null;
+            try (final InputStream in = new 
java.io.BufferedInputStream(rawIn)) {
                 try {
-                    saxParser = saxParserFactory.newSAXParser();
-                    final XMLReader reader = saxParser.getXMLReader();
-                    reader.setContentHandler(parser);
+                    final XMLReader reader = 
XmlUtils.createSafeSaxReader(saxParserFactory, parser);
                     reader.parse(new InputSource(in));
                 } catch (final ParserConfigurationException | SAXException e) {
                     logger.error("Unable to parse {} due to {}", new 
Object[]{original, e});

http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/groovy/org/apache/nifi/processors/standard/SplitXmlTest.groovy
----------------------------------------------------------------------
diff --git 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/groovy/org/apache/nifi/processors/standard/SplitXmlTest.groovy
 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/groovy/org/apache/nifi/processors/standard/SplitXmlTest.groovy
new file mode 100644
index 0000000..f04dca6
--- /dev/null
+++ 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/groovy/org/apache/nifi/processors/standard/SplitXmlTest.groovy
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.standard
+
+import org.apache.nifi.util.TestRunner
+import org.apache.nifi.util.TestRunners
+import org.junit.After
+import org.junit.Before
+import org.junit.BeforeClass
+import org.junit.Test
+import org.junit.runner.RunWith
+import org.junit.runners.JUnit4
+import org.slf4j.Logger
+import org.slf4j.LoggerFactory
+
+import java.nio.file.Paths
+
+
+@RunWith(JUnit4.class)
+class SplitXmlTest extends GroovyTestCase {
+    private static final Logger logger = 
LoggerFactory.getLogger(SplitXmlTest.class)
+
+    @BeforeClass
+    static void setUpOnce() throws Exception {
+        logger.metaClass.methodMissing = { String name, args ->
+            logger.info("[${name?.toUpperCase()}] ${(args as List).join(" ")}")
+        }
+    }
+
+    @Before
+    void setUp() throws Exception {
+
+    }
+
+    @After
+    void tearDown() throws Exception {
+
+    }
+
+    @Test
+    void testShouldHandleXXEInTemplate() {
+        // Arrange
+        final String XXE_TEMPLATE_FILEPATH = 
"src/test/resources/xxe_template.xml"
+        final TestRunner runner = TestRunners.newTestRunner(new SplitXml())
+        runner.setProperty(SplitXml.SPLIT_DEPTH, "3")
+        runner.enqueue(Paths.get(XXE_TEMPLATE_FILEPATH))
+
+        // Act
+        runner.run()
+        logger.info("SplitXML processor ran")
+
+        // Assert
+        runner.assertAllFlowFilesTransferred(SplitXml.REL_FAILURE)
+    }
+
+    @Test
+    void testShouldHandleRemoteCallXXE() {
+        // Arrange
+        final String XXE_TEMPLATE_FILEPATH = 
"src/test/resources/xxe_from_report.xml"
+        final TestRunner runner = TestRunners.newTestRunner(new SplitXml())
+        runner.setProperty(SplitXml.SPLIT_DEPTH, "3")
+        runner.enqueue(Paths.get(XXE_TEMPLATE_FILEPATH))
+
+        // Act
+        runner.run()
+        logger.info("SplitXML processor ran")
+
+        // Assert
+        runner.assertAllFlowFilesTransferred(SplitXml.REL_FAILURE)
+    }
+}

http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_from_report.xml
----------------------------------------------------------------------
diff --git 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_from_report.xml
 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_from_report.xml
new file mode 100644
index 0000000..42b22a0
--- /dev/null
+++ 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_from_report.xml
@@ -0,0 +1,2 @@
+<!DOCTYPE ANY SYSTEM "http://some.external.nifi.apache.org:8888/xxe";>
+<a>1</b>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_template.xml
----------------------------------------------------------------------
diff --git 
a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_template.xml
 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_template.xml
new file mode 100644
index 0000000..82674e0
--- /dev/null
+++ 
b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_template.xml
@@ -0,0 +1,230 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?><!DOCTYPE netspi 
[<!ENTITY xxe SYSTEM "file:///etc/passwd" >]>
+<template>
+    <name>&xxe;</name>
+  <description>A simple template which generates flowfiles and logs them. 
</description>
+  <groupId>3a204982-015e-1000-eaa2-19d352ec8394</groupId>
+  <snippet>
+    <connections>
+      <id>0fbe8be5-306c-3b6c-0000-000000000000</id>
+      <parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId>
+      <backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
+      <backPressureObjectThreshold>10000</backPressureObjectThreshold>
+      <destination>
+        <groupId>21ae0bd6-5db6-3a47-0000-000000000000</groupId>
+        <id>fd90023d-a235-30f6-0000-000000000000</id>
+        <type>PROCESSOR</type>
+      </destination>
+      <flowFileExpiration>0 sec</flowFileExpiration>
+      <labelIndex>1</labelIndex>
+      <name></name>
+      <selectedRelationships>success</selectedRelationships>
+      <source>
+        <groupId>21ae0bd6-5db6-3a47-0000-000000000000</groupId>
+        <id>ff49910d-06bb-37ee-0000-000000000000</id>
+        <type>PROCESSOR</type>
+      </source>
+      <zIndex>0</zIndex>
+    </connections>
+    <processors>
+      <id>fd90023d-a235-30f6-0000-000000000000</id>
+      <parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId>
+      <position>
+        <x>0.0</x>
+        <y>318.3128613789876</y>
+      </position>
+      <bundle>
+        <artifact>nifi-standard-nar</artifact>
+        <group>org.apache.nifi</group>
+        <version>1.4.0-SNAPSHOT</version>
+      </bundle>
+      <config>
+        <bulletinLevel>WARN</bulletinLevel>
+        <comments></comments>
+        <concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
+        <descriptors>
+          <entry>
+            <key>Log Level</key>
+            <value>
+              <name>Log Level</name>
+            </value>
+          </entry>
+          <entry>
+            <key>Log Payload</key>
+            <value>
+              <name>Log Payload</name>
+            </value>
+          </entry>
+          <entry>
+            <key>Attributes to Log</key>
+            <value>
+              <name>Attributes to Log</name>
+            </value>
+          </entry>
+          <entry>
+            <key>attributes-to-log-regex</key>
+            <value>
+              <name>attributes-to-log-regex</name>
+            </value>
+          </entry>
+          <entry>
+            <key>Attributes to Ignore</key>
+            <value>
+              <name>Attributes to Ignore</name>
+            </value>
+          </entry>
+          <entry>
+            <key>attributes-to-ignore-regex</key>
+            <value>
+              <name>attributes-to-ignore-regex</name>
+            </value>
+          </entry>
+          <entry>
+            <key>Log prefix</key>
+            <value>
+              <name>Log prefix</name>
+            </value>
+          </entry>
+          <entry>
+            <key>character-set</key>
+            <value>
+              <name>character-set</name>
+            </value>
+          </entry>
+        </descriptors>
+        <executionNode>ALL</executionNode>
+        <lossTolerant>false</lossTolerant>
+        <penaltyDuration>30 sec</penaltyDuration>
+        <properties>
+          <entry>
+            <key>Log Level</key>
+            <value>info</value>
+          </entry>
+          <entry>
+            <key>Log Payload</key>
+            <value>true</value>
+          </entry>
+          <entry>
+            <key>Attributes to Log</key>
+          </entry>
+          <entry>
+            <key>attributes-to-log-regex</key>
+            <value>.*</value>
+          </entry>
+          <entry>
+            <key>Attributes to Ignore</key>
+          </entry>
+          <entry>
+            <key>attributes-to-ignore-regex</key>
+          </entry>
+          <entry>
+            <key>Log prefix</key>
+          </entry>
+          <entry>
+            <key>character-set</key>
+            <value>UTF-8</value>
+          </entry>
+        </properties>
+        <runDurationMillis>0</runDurationMillis>
+        <schedulingPeriod>0 sec</schedulingPeriod>
+        <schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
+        <yieldDuration>1 sec</yieldDuration>
+      </config>
+      <name>LogAttribute</name>
+      <relationships>
+        <autoTerminate>true</autoTerminate>
+        <name>success</name>
+      </relationships>
+      <state>STOPPED</state>
+      <style></style>
+      <type>org.apache.nifi.processors.standard.LogAttribute</type>
+    </processors>
+    <processors>
+      <id>ff49910d-06bb-37ee-0000-000000000000</id>
+      <parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId>
+      <position>
+        <x>1.1368683772161603E-13</x>
+        <y>0.0</y>
+      </position>
+      <bundle>
+        <artifact>nifi-standard-nar</artifact>
+        <group>org.apache.nifi</group>
+        <version>1.4.0-SNAPSHOT</version>
+      </bundle>
+      <config>
+        <bulletinLevel>WARN</bulletinLevel>
+        <comments></comments>
+        <concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
+        <descriptors>
+          <entry>
+            <key>File Size</key>
+            <value>
+              <name>File Size</name>
+            </value>
+          </entry>
+          <entry>
+            <key>Batch Size</key>
+            <value>
+              <name>Batch Size</name>
+            </value>
+          </entry>
+          <entry>
+            <key>Data Format</key>
+            <value>
+              <name>Data Format</name>
+            </value>
+          </entry>
+          <entry>
+            <key>Unique FlowFiles</key>
+            <value>
+              <name>Unique FlowFiles</name>
+            </value>
+          </entry>
+          <entry>
+            <key>generate-ff-custom-text</key>
+            <value>
+              <name>generate-ff-custom-text</name>
+            </value>
+          </entry>
+        </descriptors>
+        <executionNode>ALL</executionNode>
+        <lossTolerant>false</lossTolerant>
+        <penaltyDuration>30 sec</penaltyDuration>
+        <properties>
+          <entry>
+            <key>File Size</key>
+            <value>0B</value>
+          </entry>
+          <entry>
+            <key>Batch Size</key>
+            <value>1</value>
+          </entry>
+          <entry>
+            <key>Data Format</key>
+            <value>Text</value>
+          </entry>
+          <entry>
+            <key>Unique FlowFiles</key>
+            <value>false</value>
+          </entry>
+          <entry>
+            <key>generate-ff-custom-text</key>
+            <value>This is a plaintext message. </value>
+          </entry>
+        </properties>
+        <runDurationMillis>0</runDurationMillis>
+        <schedulingPeriod>1 sec</schedulingPeriod>
+        <schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
+        <yieldDuration>1 sec</yieldDuration>
+      </config>
+      <name>GenerateFlowFile</name>
+      <relationships>
+        <autoTerminate>false</autoTerminate>
+        <name>success</name>
+      </relationships>
+      <state>STOPPED</state>
+      <style></style>
+      <type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
+    </processors>
+  </snippet>
+  <timestamp>09/05/2017 14:51:01 PDT</timestamp>
+</template>

Reply via email to