Repository: nifi Updated Branches: refs/heads/master a3edd34db -> 28067a29f
NIFI-4869 Added SAX utility method for SplitXML processor. Added unit tests. Added test resources to RAT exclude list. This closes #2466 Project: http://git-wip-us.apache.org/repos/asf/nifi/repo Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/28067a29 Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/28067a29 Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/28067a29 Branch: refs/heads/master Commit: 28067a29fd13cdf8e21b440fc65c6dd67872522f Parents: a3edd34 Author: Andy LoPresto <alopre...@apache.org> Authored: Mon Feb 12 21:10:16 2018 -0800 Committer: Matt Gilman <matt.c.gil...@gmail.com> Committed: Tue Feb 13 15:29:29 2018 -0500 ---------------------------------------------------------------------- .../org/apache/nifi/security/xml/XmlUtils.java | 27 ++- .../nifi-standard-processors/pom.xml | 2 + .../nifi/processors/standard/SplitXml.java | 24 +- .../processors/standard/SplitXmlTest.groovy | 85 +++++++ .../src/test/resources/xxe_from_report.xml | 2 + .../src/test/resources/xxe_template.xml | 230 +++++++++++++++++++ 6 files changed, 354 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java ---------------------------------------------------------------------- diff --git a/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java b/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java index 99c90a6..e384c5b 100644 --- a/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java +++ b/nifi-commons/nifi-security-utils/src/main/java/org/apache/nifi/security/xml/XmlUtils.java @@ -16,11 +16,17 @@ */ package org.apache.nifi.security.xml; +import java.io.InputStream; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import javax.xml.transform.stream.StreamSource; -import java.io.InputStream; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; public class XmlUtils { @@ -41,4 +47,23 @@ public class XmlUtils { xif.setProperty(XMLInputFactory.SUPPORT_DTD, false); return xif.createXMLStreamReader(source); } + + public static XMLReader createSafeSaxReader(SAXParserFactory saxParserFactory, ContentHandler contentHandler) throws SAXException, ParserConfigurationException { + if (saxParserFactory == null) { + throw new IllegalArgumentException("The provided SAX parser factory cannot be null"); + } + + if (contentHandler == null) { + throw new IllegalArgumentException("The provided SAX content handler cannot be null"); + } + + saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false); + saxParserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + + SAXParser saxParser = saxParserFactory.newSAXParser(); + XMLReader xmlReader = saxParser.getXMLReader(); + xmlReader.setContentHandler(contentHandler); + + return xmlReader; + } } http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml index 3cdd787..0fffbb8 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/pom.xml @@ -515,6 +515,8 @@ <!-- This file is copied from https://github.com/jeremyh/jBCrypt because the binary is compiled for Java 8 and we must support Java 7 --> <exclude>src/main/java/org/apache/nifi/security/util/crypto/bcrypt/BCrypt.java</exclude> + <exclude>src/test/resources/xxe_template.xml</exclude> + <exclude>src/test/resources/xxe_from_report.xml</exclude> </excludes> </configuration> </plugin> http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java index 502f7f3..de513c8 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitXml.java @@ -16,6 +16,12 @@ */ package org.apache.nifi.processors.standard; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME; +import static org.apache.nifi.flowfile.attributes.FragmentAttributes.copyAttributesToOriginal; + import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; @@ -28,11 +34,8 @@ import java.util.TreeMap; import java.util.UUID; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; - import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; - import org.apache.commons.lang3.StringEscapeUtils; import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.InputRequirement; @@ -54,7 +57,7 @@ import org.apache.nifi.processor.ProcessorInitializationContext; import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.util.StandardValidators; import org.apache.nifi.processors.standard.util.XmlElementNotifier; -import org.apache.nifi.stream.io.BufferedInputStream; +import org.apache.nifi.security.xml.XmlUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.Attributes; @@ -64,12 +67,6 @@ import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; -import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT; -import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID; -import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX; -import static org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME; -import static org.apache.nifi.flowfile.attributes.FragmentAttributes.copyAttributesToOriginal; - @EventDriven @SideEffectFree @SupportsBatching @@ -175,12 +172,9 @@ public class SplitXml extends AbstractProcessor { final AtomicBoolean failed = new AtomicBoolean(false); session.read(original, rawIn -> { - try (final InputStream in = new BufferedInputStream(rawIn)) { - SAXParser saxParser = null; + try (final InputStream in = new java.io.BufferedInputStream(rawIn)) { try { - saxParser = saxParserFactory.newSAXParser(); - final XMLReader reader = saxParser.getXMLReader(); - reader.setContentHandler(parser); + final XMLReader reader = XmlUtils.createSafeSaxReader(saxParserFactory, parser); reader.parse(new InputSource(in)); } catch (final ParserConfigurationException | SAXException e) { logger.error("Unable to parse {} due to {}", new Object[]{original, e}); http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/groovy/org/apache/nifi/processors/standard/SplitXmlTest.groovy ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/groovy/org/apache/nifi/processors/standard/SplitXmlTest.groovy b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/groovy/org/apache/nifi/processors/standard/SplitXmlTest.groovy new file mode 100644 index 0000000..f04dca6 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/groovy/org/apache/nifi/processors/standard/SplitXmlTest.groovy @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.standard + +import org.apache.nifi.util.TestRunner +import org.apache.nifi.util.TestRunners +import org.junit.After +import org.junit.Before +import org.junit.BeforeClass +import org.junit.Test +import org.junit.runner.RunWith +import org.junit.runners.JUnit4 +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +import java.nio.file.Paths + + +@RunWith(JUnit4.class) +class SplitXmlTest extends GroovyTestCase { + private static final Logger logger = LoggerFactory.getLogger(SplitXmlTest.class) + + @BeforeClass + static void setUpOnce() throws Exception { + logger.metaClass.methodMissing = { String name, args -> + logger.info("[${name?.toUpperCase()}] ${(args as List).join(" ")}") + } + } + + @Before + void setUp() throws Exception { + + } + + @After + void tearDown() throws Exception { + + } + + @Test + void testShouldHandleXXEInTemplate() { + // Arrange + final String XXE_TEMPLATE_FILEPATH = "src/test/resources/xxe_template.xml" + final TestRunner runner = TestRunners.newTestRunner(new SplitXml()) + runner.setProperty(SplitXml.SPLIT_DEPTH, "3") + runner.enqueue(Paths.get(XXE_TEMPLATE_FILEPATH)) + + // Act + runner.run() + logger.info("SplitXML processor ran") + + // Assert + runner.assertAllFlowFilesTransferred(SplitXml.REL_FAILURE) + } + + @Test + void testShouldHandleRemoteCallXXE() { + // Arrange + final String XXE_TEMPLATE_FILEPATH = "src/test/resources/xxe_from_report.xml" + final TestRunner runner = TestRunners.newTestRunner(new SplitXml()) + runner.setProperty(SplitXml.SPLIT_DEPTH, "3") + runner.enqueue(Paths.get(XXE_TEMPLATE_FILEPATH)) + + // Act + runner.run() + logger.info("SplitXML processor ran") + + // Assert + runner.assertAllFlowFilesTransferred(SplitXml.REL_FAILURE) + } +} http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_from_report.xml ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_from_report.xml b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_from_report.xml new file mode 100644 index 0000000..42b22a0 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_from_report.xml @@ -0,0 +1,2 @@ +<!DOCTYPE ANY SYSTEM "http://some.external.nifi.apache.org:8888/xxe"> +<a>1</b> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/nifi/blob/28067a29/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_template.xml ---------------------------------------------------------------------- diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_template.xml b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_template.xml new file mode 100644 index 0000000..82674e0 --- /dev/null +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/resources/xxe_template.xml @@ -0,0 +1,230 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes"?><!DOCTYPE netspi [<!ENTITY xxe SYSTEM "file:///etc/passwd" >]> +<template> + <name>&xxe;</name> + <description>A simple template which generates flowfiles and logs them. </description> + <groupId>3a204982-015e-1000-eaa2-19d352ec8394</groupId> + <snippet> + <connections> + <id>0fbe8be5-306c-3b6c-0000-000000000000</id> + <parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId> + <backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold> + <backPressureObjectThreshold>10000</backPressureObjectThreshold> + <destination> + <groupId>21ae0bd6-5db6-3a47-0000-000000000000</groupId> + <id>fd90023d-a235-30f6-0000-000000000000</id> + <type>PROCESSOR</type> + </destination> + <flowFileExpiration>0 sec</flowFileExpiration> + <labelIndex>1</labelIndex> + <name></name> + <selectedRelationships>success</selectedRelationships> + <source> + <groupId>21ae0bd6-5db6-3a47-0000-000000000000</groupId> + <id>ff49910d-06bb-37ee-0000-000000000000</id> + <type>PROCESSOR</type> + </source> + <zIndex>0</zIndex> + </connections> + <processors> + <id>fd90023d-a235-30f6-0000-000000000000</id> + <parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId> + <position> + <x>0.0</x> + <y>318.3128613789876</y> + </position> + <bundle> + <artifact>nifi-standard-nar</artifact> + <group>org.apache.nifi</group> + <version>1.4.0-SNAPSHOT</version> + </bundle> + <config> + <bulletinLevel>WARN</bulletinLevel> + <comments></comments> + <concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> + <descriptors> + <entry> + <key>Log Level</key> + <value> + <name>Log Level</name> + </value> + </entry> + <entry> + <key>Log Payload</key> + <value> + <name>Log Payload</name> + </value> + </entry> + <entry> + <key>Attributes to Log</key> + <value> + <name>Attributes to Log</name> + </value> + </entry> + <entry> + <key>attributes-to-log-regex</key> + <value> + <name>attributes-to-log-regex</name> + </value> + </entry> + <entry> + <key>Attributes to Ignore</key> + <value> + <name>Attributes to Ignore</name> + </value> + </entry> + <entry> + <key>attributes-to-ignore-regex</key> + <value> + <name>attributes-to-ignore-regex</name> + </value> + </entry> + <entry> + <key>Log prefix</key> + <value> + <name>Log prefix</name> + </value> + </entry> + <entry> + <key>character-set</key> + <value> + <name>character-set</name> + </value> + </entry> + </descriptors> + <executionNode>ALL</executionNode> + <lossTolerant>false</lossTolerant> + <penaltyDuration>30 sec</penaltyDuration> + <properties> + <entry> + <key>Log Level</key> + <value>info</value> + </entry> + <entry> + <key>Log Payload</key> + <value>true</value> + </entry> + <entry> + <key>Attributes to Log</key> + </entry> + <entry> + <key>attributes-to-log-regex</key> + <value>.*</value> + </entry> + <entry> + <key>Attributes to Ignore</key> + </entry> + <entry> + <key>attributes-to-ignore-regex</key> + </entry> + <entry> + <key>Log prefix</key> + </entry> + <entry> + <key>character-set</key> + <value>UTF-8</value> + </entry> + </properties> + <runDurationMillis>0</runDurationMillis> + <schedulingPeriod>0 sec</schedulingPeriod> + <schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> + <yieldDuration>1 sec</yieldDuration> + </config> + <name>LogAttribute</name> + <relationships> + <autoTerminate>true</autoTerminate> + <name>success</name> + </relationships> + <state>STOPPED</state> + <style></style> + <type>org.apache.nifi.processors.standard.LogAttribute</type> + </processors> + <processors> + <id>ff49910d-06bb-37ee-0000-000000000000</id> + <parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId> + <position> + <x>1.1368683772161603E-13</x> + <y>0.0</y> + </position> + <bundle> + <artifact>nifi-standard-nar</artifact> + <group>org.apache.nifi</group> + <version>1.4.0-SNAPSHOT</version> + </bundle> + <config> + <bulletinLevel>WARN</bulletinLevel> + <comments></comments> + <concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount> + <descriptors> + <entry> + <key>File Size</key> + <value> + <name>File Size</name> + </value> + </entry> + <entry> + <key>Batch Size</key> + <value> + <name>Batch Size</name> + </value> + </entry> + <entry> + <key>Data Format</key> + <value> + <name>Data Format</name> + </value> + </entry> + <entry> + <key>Unique FlowFiles</key> + <value> + <name>Unique FlowFiles</name> + </value> + </entry> + <entry> + <key>generate-ff-custom-text</key> + <value> + <name>generate-ff-custom-text</name> + </value> + </entry> + </descriptors> + <executionNode>ALL</executionNode> + <lossTolerant>false</lossTolerant> + <penaltyDuration>30 sec</penaltyDuration> + <properties> + <entry> + <key>File Size</key> + <value>0B</value> + </entry> + <entry> + <key>Batch Size</key> + <value>1</value> + </entry> + <entry> + <key>Data Format</key> + <value>Text</value> + </entry> + <entry> + <key>Unique FlowFiles</key> + <value>false</value> + </entry> + <entry> + <key>generate-ff-custom-text</key> + <value>This is a plaintext message. </value> + </entry> + </properties> + <runDurationMillis>0</runDurationMillis> + <schedulingPeriod>1 sec</schedulingPeriod> + <schedulingStrategy>TIMER_DRIVEN</schedulingStrategy> + <yieldDuration>1 sec</yieldDuration> + </config> + <name>GenerateFlowFile</name> + <relationships> + <autoTerminate>false</autoTerminate> + <name>success</name> + </relationships> + <state>STOPPED</state> + <style></style> + <type>org.apache.nifi.processors.standard.GenerateFlowFile</type> + </processors> + </snippet> + <timestamp>09/05/2017 14:51:01 PDT</timestamp> +</template>