This is an automated email from the ASF dual-hosted git repository.

hansva pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hop.git


The following commit(s) were added to refs/heads/main by this push:
     new 41769179be Harden the pipeline and workflow XML parser, fixes #5804 
(#5937)
41769179be is described below

commit 41769179be11af30dfedaecc03980a7d1c5023d6
Author: Matt Casters <[email protected]>
AuthorDate: Mon Nov 3 11:24:15 2025 +0100

    Harden the pipeline and workflow XML parser, fixes #5804 (#5937)
    
    * Issue #5730 (parquet file input improvements)
    
    * Issue #5804 (Harden the pipeline and workflow XML parser)
    
    * spotless/import fixes
    
    ---------
    
    Co-authored-by: Matt Casters <[email protected]>
    Co-authored-by: Hans Van Akelyen <[email protected]>
---
 .../hop/core/xml/XmlParserFactoryProducer.java     | 36 ++++++++++++++++++++++
 .../apache/hop/core/xml/XmlHandlerUnitTest.java    | 24 ---------------
 .../transforms/input/ParquetValueConverter.java    |  1 -
 .../xml/xsdvalidator/XsdValidatorIntTest.java      |  7 +++++
 .../hop/pipeline/transforms/xml/xslt/XsltTest.java |  7 +++++
 5 files changed, 50 insertions(+), 25 deletions(-)

diff --git 
a/core/src/main/java/org/apache/hop/core/xml/XmlParserFactoryProducer.java 
b/core/src/main/java/org/apache/hop/core/xml/XmlParserFactoryProducer.java
index 6852ca2576..b39f496fff 100644
--- a/core/src/main/java/org/apache/hop/core/xml/XmlParserFactoryProducer.java
+++ b/core/src/main/java/org/apache/hop/core/xml/XmlParserFactoryProducer.java
@@ -22,6 +22,7 @@ import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParserFactory;
 import org.apache.hop.core.Const;
+import org.apache.hop.core.logging.LogChannel;
 import org.apache.hop.core.util.EnvUtil;
 import org.xml.sax.SAXNotRecognizedException;
 import org.xml.sax.SAXNotSupportedException;
@@ -47,6 +48,41 @@ public class XmlParserFactoryProducer {
         "http://apache.org/xml/features/disallow-doctype-decl";,
         "N".equals(EnvUtil.getSystemProperty(Const.XML_ALLOW_DOCTYPE_DECL)));
 
+    String[] featuresToDisable = {
+      // Xerces 1 - 
http://xerces.apache.org/xerces-j/features.html#external-general-entities
+      // Xerces 2 - 
http://xerces.apache.org/xerces2-j/features.html#external-general-entities
+      // JDK7+ - http://xml.org/sax/features/external-general-entities
+      // This feature has to be used together with the following one, 
otherwise it will not protect
+      // you from XXE for sure
+      "http://xml.org/sax/features/external-general-entities";,
+
+      // Xerces 1 - 
http://xerces.apache.org/xerces-j/features.html#external-parameter-entities
+      // Xerces 2 - 
http://xerces.apache.org/xerces2-j/features.html#external-parameter-entities
+      // JDK7+ - http://xml.org/sax/features/external-parameter-entities
+      // This feature has to be used together with the previous one, otherwise 
it will not protect
+      // you from XXE for sure
+      "http://xml.org/sax/features/external-parameter-entities";,
+
+      // Disable external DTDs as well
+      "http://apache.org/xml/features/nonvalidating/load-external-dtd";
+    };
+    for (String feature : featuresToDisable) {
+      try {
+        docBuilderFactory.setFeature(feature, false);
+      } catch (ParserConfigurationException e) {
+        // This should catch a failed setFeature feature
+        if (LogChannel.GENERAL.isDetailed()) {
+          LogChannel.GENERAL.logDetailed(
+              "ParserConfigurationException was thrown. The feature '"
+                  + feature
+                  + "' is probably not supported by your XML processor.");
+        }
+      }
+    }
+
+    docBuilderFactory.setXIncludeAware(false);
+    docBuilderFactory.setExpandEntityReferences(false);
+    docBuilderFactory.setValidating(false);
     return docBuilderFactory;
   }
 
diff --git a/core/src/test/java/org/apache/hop/core/xml/XmlHandlerUnitTest.java 
b/core/src/test/java/org/apache/hop/core/xml/XmlHandlerUnitTest.java
index 9fbfb36d81..20400214f3 100644
--- a/core/src/test/java/org/apache/hop/core/xml/XmlHandlerUnitTest.java
+++ b/core/src/test/java/org/apache/hop/core/xml/XmlHandlerUnitTest.java
@@ -30,14 +30,12 @@ import java.util.Calendar;
 import java.util.GregorianCalendar;
 import javax.xml.parsers.DocumentBuilder;
 import org.apache.hop.core.Const;
-import org.apache.hop.core.exception.HopXmlException;
 import org.apache.hop.junit.rules.RestoreHopEnvironment;
 import org.junit.ClassRule;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
-import org.xml.sax.SAXParseException;
 
 public class XmlHandlerUnitTest {
   @ClassRule public static RestoreHopEnvironment env = new 
RestoreHopEnvironment();
@@ -259,28 +257,6 @@ public class XmlHandlerUnitTest {
     assertEquals(expectedStrAfterConversion, result);
   }
 
-  @Test(expected = SAXParseException.class)
-  public void 
createdDocumentBuilderThrowsExceptionWhenParsingXmlWithABigAmountOfExternalEntities()
-      throws Exception {
-    DocumentBuilder builder = XmlHandler.createDocumentBuilder(false, false);
-    builder.parse(new ByteArrayInputStream(MALICIOUS_XML.getBytes()));
-  }
-
-  @Test(expected = HopXmlException.class)
-  public void 
loadingXmlFromStreamThrowsExceptionWhenParsingXmlWithBigAmountOfExternalEntities()
-      throws Exception {
-    XmlHandler.loadXmlFile(
-        new ByteArrayInputStream(MALICIOUS_XML.getBytes()), "<def>", false, 
false);
-  }
-
-  @Test(expected = HopXmlException.class)
-  public void 
loadingXmlFromURLThrowsExceptionWhenParsingXmlWithBigAmountOfExternalEntities()
-      throws Exception {
-    File tmpFile = createTmpFile(MALICIOUS_XML);
-
-    XmlHandler.loadXmlFile(tmpFile.toURI().toURL());
-  }
-
   private File createTmpFile(String content) throws Exception {
     File tmpFile = File.createTempFile("XmlHandlerUnitTest", ".xml");
     tmpFile.deleteOnExit();
diff --git 
a/plugins/tech/parquet/src/main/java/org/apache/hop/parquet/transforms/input/ParquetValueConverter.java
 
b/plugins/tech/parquet/src/main/java/org/apache/hop/parquet/transforms/input/ParquetValueConverter.java
index 5611dba77c..6328809ac0 100644
--- 
a/plugins/tech/parquet/src/main/java/org/apache/hop/parquet/transforms/input/ParquetValueConverter.java
+++ 
b/plugins/tech/parquet/src/main/java/org/apache/hop/parquet/transforms/input/ParquetValueConverter.java
@@ -86,7 +86,6 @@ public class ParquetValueConverter extends PrimitiveConverter 
{
           object = timestamp;
           break;
         }
-
       default:
         throw new RuntimeException(
             "Unable to convert Binary source data to type " + 
valueMeta.getTypeDesc());
diff --git 
a/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xsdvalidator/XsdValidatorIntTest.java
 
b/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xsdvalidator/XsdValidatorIntTest.java
index 8442a24936..6730e3d57e 100644
--- 
a/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xsdvalidator/XsdValidatorIntTest.java
+++ 
b/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xsdvalidator/XsdValidatorIntTest.java
@@ -29,6 +29,7 @@ import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.commons.vfs2.FileObject;
+import org.apache.hop.core.HopClientEnvironment;
 import org.apache.hop.core.HopEnvironment;
 import org.apache.hop.core.RowMetaAndData;
 import org.apache.hop.core.exception.HopException;
@@ -42,6 +43,7 @@ import org.apache.hop.pipeline.PipelineMeta;
 import org.apache.hop.pipeline.transforms.xml.PipelineTestFactory;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
 class XsdValidatorIntTest {
@@ -52,6 +54,11 @@ class XsdValidatorIntTest {
   private static FileObject schemaRamFile = null;
   private static FileObject dataRamFile = null;
 
+  @BeforeEach
+  public void init() throws Exception {
+    HopClientEnvironment.init();
+  }
+
   @BeforeAll
   static void setUpBeforeClass() throws HopException {
     HopEnvironment.init();
diff --git 
a/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xslt/XsltTest.java
 
b/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xslt/XsltTest.java
index 4fcd008164..cc24ff2888 100644
--- 
a/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xslt/XsltTest.java
+++ 
b/plugins/transforms/xml/src/test/java/org/apache/hop/pipeline/transforms/xml/xslt/XsltTest.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
+import org.apache.hop.core.HopClientEnvironment;
 import org.apache.hop.core.HopEnvironment;
 import org.apache.hop.core.RowMetaAndData;
 import org.apache.hop.core.exception.HopValueException;
@@ -45,6 +46,7 @@ import org.apache.hop.pipeline.transform.TransformMeta;
 import org.apache.hop.pipeline.transforms.dummy.DummyMeta;
 import org.apache.hop.pipeline.transforms.injector.InjectorMeta;
 import org.apache.hop.pipeline.transforms.xml.RowTransformCollector;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
 class XsltTest {
@@ -64,6 +66,11 @@ class XsltTest {
 
   private static final String TEST1_FNAME = "template.xsl";
 
+  @BeforeEach
+  public void init() throws Exception {
+    HopClientEnvironment.init();
+  }
+
   /**
    * Write the file to be used as input (as a temporary file).
    *

Reply via email to