This is an automated email from the ASF dual-hosted git repository.
hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git
The following commit(s) were added to refs/heads/main by this push:
new 41769179be Harden the pipeline and workflow XML parser, fixes #5804
(#5937)
41769179be is described below
commit 41769179be11af30dfedaecc03980a7d1c5023d6
Author: Matt Casters <[email protected]>
AuthorDate: Mon Nov 3 11:24:15 2025 +0100
Harden the pipeline and workflow XML parser, fixes #5804 (#5937)
* Issue #5730 (parquet file input improvements)
* Issue #5804 (Harden the pipeline and workflow XML parser)
* spotless/import fixes
---------
Co-authored-by: Matt Casters <[email protected]>
Co-authored-by: Hans Van Akelyen <[email protected]>
---
.../hop/core/xml/XmlParserFactoryProducer.java | 36 ++++++++++++++++++++++
.../apache/hop/core/xml/XmlHandlerUnitTest.java | 24 ---------------
.../transforms/input/ParquetValueConverter.java | 1 -
.../xml/xsdvalidator/XsdValidatorIntTest.java | 7 +++++
.../hop/pipeline/transforms/xml/xslt/XsltTest.java | 7 +++++
5 files changed, 50 insertions(+), 25 deletions(-)
diff --git
a/core/src/main/java/org/apache/hop/core/xml/XmlParserFactoryProducer.java
b/core/src/main/java/org/apache/hop/core/xml/XmlParserFactoryProducer.java
index 6852ca2576..b39f496fff 100644
--- a/core/src/main/java/org/apache/hop/core/xml/XmlParserFactoryProducer.java
+++ b/core/src/main/java/org/apache/hop/core/xml/XmlParserFactoryProducer.java
@@ -22,6 +22,7 @@ import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.apache.hop.core.Const;
+import org.apache.hop.core.logging.LogChannel;
import org.apache.hop.core.util.EnvUtil;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
@@ -47,6 +48,41 @@ public class XmlParserFactoryProducer {
"http://apache.org/xml/features/disallow-doctype-decl",
"N".equals(EnvUtil.getSystemProperty(Const.XML_ALLOW_DOCTYPE_DECL)));
+ String[] featuresToDisable = {
+ // Xerces 1 -
http://xerces.apache.org/xerces-j/features.html#external-general-entities
+ // Xerces 2 -
http://xerces.apache.org/xerces2-j/features.html#external-general-entities
+ // JDK7+ - http://xml.org/sax/features/external-general-entities
+ // This feature has to be used together with the following one,
otherwise it will not protect
+ // you from XXE for sure
+ "http://xml.org/sax/features/external-general-entities",
+
+ // Xerces 1 -
http://xerces.apache.org/xerces-j/features.html#external-parameter-entities
+ // Xerces 2 -
http://xerces.apache.org/xerces2-j/features.html#external-parameter-entities
+ // JDK7+ - http://xml.org/sax/features/external-parameter-entities
+ // This feature has to be used together with the previous one, otherwise
it will not protect
+ // you from XXE for sure
+ "http://xml.org/sax/features/external-parameter-entities",
+
+ // Disable external DTDs as well
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd"
+ };
+ for (String feature : featuresToDisable) {
+ try {
+ docBuilderFactory.setFeature(feature, false);
+ } catch (ParserConfigurationException e) {
+ // This should catch a failed setFeature feature
+ if (LogChannel.GENERAL.isDetailed()) {
+ LogChannel.GENERAL.logDetailed(
+ "ParserConfigurationException was thrown. The feature '"
+ + feature
+ + "' is probably not supported by your XML processor.");
+ }
+ }
+ }
+
+ docBuilderFactory.setXIncludeAware(false);
+ docBuilderFactory.setExpandEntityReferences(false);
+ docBuilderFactory.setValidating(false);
return docBuilderFactory;
}
diff --git a/core/src/test/java/org/apache/hop/core/xml/XmlHandlerUnitTest.java
b/core/src/test/java/org/apache/hop/core/xml/XmlHandlerUnitTest.java
index 9fbfb36d81..20400214f3 100644
--- a/core/src/test/java/org/apache/hop/core/xml/XmlHandlerUnitTest.java
+++ b/core/src/test/java/org/apache/hop/core/xml/XmlHandlerUnitTest.java
@@ -30,14 +30,12 @@ import java.util.Calendar;
import java.util.GregorianCalendar;
import javax.xml.parsers.DocumentBuilder;
import org.apache.hop.core.Const;
-import org.apache.hop.core.exception.HopXmlException;
import org.apache.hop.junit.rules.RestoreHopEnvironment;
import org.junit.ClassRule;
import org.junit.Ignore;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
-import org.xml.sax.SAXParseException;
public class XmlHandlerUnitTest {
@ClassRule public static RestoreHopEnvironment env = new
RestoreHopEnvironment();
@@ -259,28 +257,6 @@ public class XmlHandlerUnitTest {
assertEquals(expectedStrAfterConversion, result);
}
- @Test(expected = SAXParseException.class)
- public void
createdDocumentBuilderThrowsExceptionWhenParsingXmlWithABigAmountOfExternalEntities()
- throws Exception {
- DocumentBuilder builder = XmlHandler.createDocumentBuilder(false, false);
- builder.parse(new ByteArrayInputStream(MALICIOUS_XML.getBytes()));
- }
-
- @Test(expected = HopXmlException.class)
- public void
loadingXmlFromStreamThrowsExceptionWhenParsingXmlWithBigAmountOfExternalEntities()
- throws Exception {
- XmlHandler.loadXmlFile(
- new ByteArrayInputStream(MALICIOUS_XML.getBytes()), "<def>", false,
false);
- }
-
- @Test(expected = HopXmlException.class)
- public void
loadingXmlFromURLThrowsExceptionWhenParsingXmlWithBigAmountOfExternalEntities()
- throws Exception {
- File tmpFile = createTmpFile(MALICIOUS_XML);
-
- XmlHandler.loadXmlFile(tmpFile.toURI().toURL());
- }
-
private File createTmpFile(String content) throws Exception {
File tmpFile = File.createTempFile("XmlHandlerUnitTest", ".xml");
tmpFile.deleteOnExit();
diff --git
a/plugins/tech/parquet/src/main/java/org/apache/hop/parquet/transforms/input/ParquetValueConverter.java
b/plugins/tech/parquet/src/main/java/org/apache/hop/parquet/transforms/input/ParquetValueConverter.java
index 5611dba77c..6328809ac0 100644
---
a/plugins/tech/parquet/src/main/java/org/apache/hop/parquet/transforms/input/ParquetValueConverter.java
+++
b/plugins/tech/parquet/src/main/java/org/apache/hop/parquet/transforms/input/ParquetValueConverter.java
@@ -86,7 +86,6 @@ public class ParquetValueConverter extends PrimitiveConverter
{
object = timestamp;
break;
}
-
default:
throw new RuntimeException(
"Unable to convert Binary source data to type " +
valueMeta.getTypeDesc());
diff --git
a/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xsdvalidator/XsdValidatorIntTest.java
b/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xsdvalidator/XsdValidatorIntTest.java
index 8442a24936..6730e3d57e 100644
---
a/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xsdvalidator/XsdValidatorIntTest.java
+++
b/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xsdvalidator/XsdValidatorIntTest.java
@@ -29,6 +29,7 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.vfs2.FileObject;
+import org.apache.hop.core.HopClientEnvironment;
import org.apache.hop.core.HopEnvironment;
import org.apache.hop.core.RowMetaAndData;
import org.apache.hop.core.exception.HopException;
@@ -42,6 +43,7 @@ import org.apache.hop.pipeline.PipelineMeta;
import org.apache.hop.pipeline.transforms.xml.PipelineTestFactory;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
class XsdValidatorIntTest {
@@ -52,6 +54,11 @@ class XsdValidatorIntTest {
private static FileObject schemaRamFile = null;
private static FileObject dataRamFile = null;
+ @BeforeEach
+ public void init() throws Exception {
+ HopClientEnvironment.init();
+ }
+
@BeforeAll
static void setUpBeforeClass() throws HopException {
HopEnvironment.init();
diff --git
a/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xslt/XsltTest.java
b/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xslt/XsltTest.java
index 4fcd008164..cc24ff2888 100644
---
a/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xslt/XsltTest.java
+++
b/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xslt/XsltTest.java
@@ -25,6 +25,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import org.apache.hop.core.HopClientEnvironment;
import org.apache.hop.core.HopEnvironment;
import org.apache.hop.core.RowMetaAndData;
import org.apache.hop.core.exception.HopValueException;
@@ -45,6 +46,7 @@ import org.apache.hop.pipeline.transform.TransformMeta;
import org.apache.hop.pipeline.transforms.dummy.DummyMeta;
import org.apache.hop.pipeline.transforms.injector.InjectorMeta;
import org.apache.hop.pipeline.transforms.xml.RowTransformCollector;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
class XsltTest {
@@ -64,6 +66,11 @@ class XsltTest {
private static final String TEST1_FNAME = "template.xsl";
+ @BeforeEach
+ public void init() throws Exception {
+ HopClientEnvironment.init();
+ }
+
/**
* Write the file to be used as input (as a temporary file).
*