This is an automated email from the ASF dual-hosted git repository.

rubenql pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new ba80b9156a [CALCITE-5263] Improve XmlFunctions by using an XML 
DocumentBuilder
ba80b9156a is described below

commit ba80b9156afc0db26b194d97a031fcc0dc7f4c03
Author: rubenada <rube...@gmail.com>
AuthorDate: Sat Sep 3 19:25:25 2022 +0100

    [CALCITE-5263] Improve XmlFunctions by using an XML DocumentBuilder
    
    Co-authored-by: David Handermann <exceptionfact...@apache.org>
---
 .../org/apache/calcite/runtime/XmlFunctions.java   | 67 +++++++++++++++++++---
 .../apache/calcite/test/SqlXmlFunctionsTest.java   | 48 ++++++++++++++++
 2 files changed, 106 insertions(+), 9 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java 
b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
index 8c81647525..03134321a4 100644
--- a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
@@ -24,7 +24,9 @@ import org.checkerframework.checker.nullness.qual.Nullable;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
 
+import java.io.IOException;
 import java.io.StringReader;
 import java.io.StringWriter;
 import java.util.ArrayList;
@@ -33,6 +35,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.transform.ErrorListener;
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Source;
@@ -48,6 +54,7 @@ import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpression;
 import javax.xml.xpath.XPathExpressionException;
 import javax.xml.xpath.XPathFactory;
+import javax.xml.xpath.XPathFactoryConfigurationException;
 
 import static org.apache.calcite.linq4j.Nullness.castNonNull;
 import static org.apache.calcite.util.Static.RESOURCE;
@@ -60,13 +67,41 @@ import static java.util.Objects.requireNonNull;
 public class XmlFunctions {
 
   private static final ThreadLocal<@Nullable XPathFactory> XPATH_FACTORY =
-      ThreadLocal.withInitial(XPathFactory::newInstance);
+      ThreadLocal.withInitial(() -> {
+        final XPathFactory xPathFactory = XPathFactory.newInstance();
+        try {
+          xPathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, 
true);
+        } catch (XPathFactoryConfigurationException e) {
+          throw new IllegalStateException("XPath Factory configuration 
failed", e);
+        }
+        return xPathFactory;
+      });
   private static final ThreadLocal<@Nullable TransformerFactory> 
TRANSFORMER_FACTORY =
       ThreadLocal.withInitial(() -> {
-        TransformerFactory transformerFactory = 
TransformerFactory.newInstance();
+        final TransformerFactory transformerFactory = 
TransformerFactory.newInstance();
         transformerFactory.setErrorListener(new InternalErrorListener());
+        try {
+          
transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+        } catch (TransformerConfigurationException e) {
+          throw new IllegalStateException("Transformer Factory configuration 
failed", e);
+        }
         return transformerFactory;
       });
+  private static final ThreadLocal<@Nullable DocumentBuilderFactory> 
DOCUMENT_BUILDER_FACTORY =
+      ThreadLocal.withInitial(() -> {
+        final DocumentBuilderFactory documentBuilderFactory = 
DocumentBuilderFactory.newInstance();
+        documentBuilderFactory.setXIncludeAware(false);
+        documentBuilderFactory.setExpandEntityReferences(false);
+        documentBuilderFactory.setNamespaceAware(true);
+        try {
+          
documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+          documentBuilderFactory
+              
.setFeature("http://apache.org/xml/features/disallow-doctype-decl";, true);
+        } catch (final ParserConfigurationException e) {
+          throw new IllegalStateException("Document Builder configuration 
failed", e);
+        }
+        return documentBuilderFactory;
+      });
 
   private static final Pattern VALID_NAMESPACE_PATTERN = Pattern
       .compile("^(([0-9a-zA-Z:_-]+=\"[^\"]*\")( 
[0-9a-zA-Z:_-]+=\"[^\"]*\")*)$");
@@ -81,10 +116,11 @@ public class XmlFunctions {
       return null;
     }
     try {
+      final Node documentNode = getDocumentNode(input);
       XPathExpression xpathExpression = 
castNonNull(XPATH_FACTORY.get()).newXPath().compile(xpath);
       try {
         NodeList nodes = (NodeList) xpathExpression
-            .evaluate(new InputSource(new StringReader(input)), 
XPathConstants.NODESET);
+            .evaluate(documentNode, XPathConstants.NODESET);
         List<@Nullable String> result = new ArrayList<>();
         for (int i = 0; i < nodes.getLength(); i++) {
           Node item = castNonNull(nodes.item(i));
@@ -94,9 +130,9 @@ public class XmlFunctions {
         }
         return StringUtils.join(result, " ");
       } catch (XPathExpressionException e) {
-        return xpathExpression.evaluate(new InputSource(new 
StringReader(input)));
+        return xpathExpression.evaluate(documentNode);
       }
-    } catch (XPathExpressionException ex) {
+    } catch (IllegalArgumentException | XPathExpressionException ex) {
       throw RESOURCE.invalidInputForExtractValue(input, xpath).ex();
     }
   }
@@ -140,17 +176,18 @@ public class XmlFunctions {
 
       XPathExpression xpathExpression = xPath.compile(xpath);
 
+      final Node documentNode = getDocumentNode(xml);
       try {
         List<String> result = new ArrayList<>();
         NodeList nodes = (NodeList) xpathExpression
-            .evaluate(new InputSource(new StringReader(xml)), 
XPathConstants.NODESET);
+            .evaluate(documentNode, XPathConstants.NODESET);
         for (int i = 0; i < nodes.getLength(); i++) {
           result.add(convertNodeToString(castNonNull(nodes.item(i))));
         }
         return StringUtils.join(result, "");
       } catch (XPathExpressionException e) {
         Node node = (Node) xpathExpression
-            .evaluate(new InputSource(new StringReader(xml)), 
XPathConstants.NODE);
+            .evaluate(documentNode, XPathConstants.NODE);
         return convertNodeToString(node);
       }
     } catch (IllegalArgumentException | XPathExpressionException | 
TransformerException ex) {
@@ -174,16 +211,17 @@ public class XmlFunctions {
       }
 
       XPathExpression xpathExpression = xPath.compile(xpath);
+      final Node documentNode = getDocumentNode(xml);
       try {
         NodeList nodes = (NodeList) xpathExpression
-            .evaluate(new InputSource(new StringReader(xml)), 
XPathConstants.NODESET);
+            .evaluate(documentNode, XPathConstants.NODESET);
         if (nodes != null && nodes.getLength() > 0) {
           return 1;
         }
         return 0;
       } catch (XPathExpressionException e) {
         Node node = (Node) xpathExpression
-            .evaluate(new InputSource(new StringReader(xml)), 
XPathConstants.NODE);
+            .evaluate(documentNode, XPathConstants.NODE);
         if (node != null) {
           return 1;
         }
@@ -215,6 +253,17 @@ public class XmlFunctions {
     return writer.toString();
   }
 
+  private static Node getDocumentNode(final String xml) {
+    try {
+      final DocumentBuilder documentBuilder =
+          castNonNull(DOCUMENT_BUILDER_FACTORY.get()).newDocumentBuilder();
+      final InputSource inputSource = new InputSource(new StringReader(xml));
+      return documentBuilder.parse(inputSource);
+    } catch (final ParserConfigurationException | SAXException | IOException 
e) {
+      throw new IllegalArgumentException("XML parsing failed", e);
+    }
+  }
+
   /** The internal default ErrorListener for Transformer. Just rethrows errors 
to
    * discontinue the XML transformation. */
   private static class InternalErrorListener implements ErrorListener {
diff --git 
a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java 
b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
index 1457ef6908..046d935705 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
@@ -21,9 +21,13 @@ import org.apache.calcite.runtime.SqlFunctions;
 import org.apache.calcite.runtime.XmlFunctions;
 import org.apache.calcite.util.BuiltInMethod;
 
+import org.checkerframework.checker.nullness.qual.Nullable;
 import org.hamcrest.Matcher;
+import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.function.Supplier;
 
 import static org.hamcrest.CoreMatchers.is;
@@ -36,6 +40,23 @@ import static org.junit.jupiter.api.Assertions.fail;
  */
 class SqlXmlFunctionsTest {
 
+  private static final String XML = "<document>string</document>";
+  private static final String XSLT =
+      "<xsl:stylesheet 
xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\";></xsl:stylesheet>";
+  private static final String DOCUMENT_PATH = "/document";
+  private static @Nullable String xmlExternalEntity = null;
+  private static @Nullable String xsltExternalEntity = null;
+
+  @BeforeAll public static void setup() throws Exception {
+    final Path testFile = Files.createTempFile("foo", "temp");
+    testFile.toFile().deleteOnExit();
+    final String filePath = "file:///" + testFile.toAbsolutePath();
+    xmlExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + 
filePath
+        + "\"> ]><document>&entity;</document>";
+    xsltExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + 
filePath
+        + "\"> ]><xsl:stylesheet 
xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\";>&entity;</xsl:stylesheet>";
+  }
+
   @Test void testExtractValue() {
     assertExtractValue("<a>ccc<b>ddd</b></a>", "/a", is("ccc"));
 
@@ -45,6 +66,33 @@ class SqlXmlFunctionsTest {
     assertExtractValueFailed(input, "#", Matchers.expectThrowable(expected));
   }
 
+  @Test void testExtractValueExternalEntity() {
+    String message = "Invalid input for EXTRACTVALUE: xml: '"
+        + xmlExternalEntity + "', xpath expression: '" + DOCUMENT_PATH + "'";
+    CalciteException expected = new CalciteException(message, null);
+    assertExtractValueFailed(xmlExternalEntity, DOCUMENT_PATH,
+        Matchers.expectThrowable(expected));
+  }
+
+  @Test void testExistsNodeExternalEntity() {
+    String message = "Invalid input for EXISTSNODE xpath: '"
+        + DOCUMENT_PATH + "', namespace: '" + null + "'";
+    CalciteException expected = new CalciteException(message, null);
+    assertExistsNodeFailed(xmlExternalEntity, DOCUMENT_PATH, null,
+        Matchers.expectThrowable(expected));
+  }
+
+  @Test void testXmlTransformExternalEntity() {
+    String message = "Invalid input for XMLTRANSFORM xml: '" + 
xmlExternalEntity + "'";
+    CalciteException expected = new CalciteException(message, null);
+    assertXmlTransformFailed(xmlExternalEntity, XSLT, 
Matchers.expectThrowable(expected));
+  }
+
+  @Test void testXmlTransformExternalEntityXslt() {
+    String message = "Illegal xslt specified : '" + xsltExternalEntity + "'";
+    CalciteException expected = new CalciteException(message, null);
+    assertXmlTransformFailed(XML, xsltExternalEntity, 
Matchers.expectThrowable(expected));
+  }
 
   @Test void testXmlTransform() {
     assertXmlTransform(null, "", nullValue());

Reply via email to