This is an automated email from the ASF dual-hosted git repository. rubenql pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push: new ba80b9156a [CALCITE-5263] Improve XmlFunctions by using an XML DocumentBuilder ba80b9156a is described below commit ba80b9156afc0db26b194d97a031fcc0dc7f4c03 Author: rubenada <rube...@gmail.com> AuthorDate: Sat Sep 3 19:25:25 2022 +0100 [CALCITE-5263] Improve XmlFunctions by using an XML DocumentBuilder Co-authored-by: David Handermann <exceptionfact...@apache.org> --- .../org/apache/calcite/runtime/XmlFunctions.java | 67 +++++++++++++++++++--- .../apache/calcite/test/SqlXmlFunctionsTest.java | 48 ++++++++++++++++ 2 files changed, 106 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java index 8c81647525..03134321a4 100644 --- a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java @@ -24,7 +24,9 @@ import org.checkerframework.checker.nullness.qual.Nullable; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.util.ArrayList; @@ -33,6 +35,10 @@ import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.ErrorListener; import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; @@ -48,6 +54,7 @@ import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; +import javax.xml.xpath.XPathFactoryConfigurationException; import static org.apache.calcite.linq4j.Nullness.castNonNull; import static org.apache.calcite.util.Static.RESOURCE; @@ -60,13 +67,41 @@ import static java.util.Objects.requireNonNull; public class XmlFunctions { private static final ThreadLocal<@Nullable XPathFactory> XPATH_FACTORY = - ThreadLocal.withInitial(XPathFactory::newInstance); + ThreadLocal.withInitial(() -> { + final XPathFactory xPathFactory = XPathFactory.newInstance(); + try { + xPathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + } catch (XPathFactoryConfigurationException e) { + throw new IllegalStateException("XPath Factory configuration failed", e); + } + return xPathFactory; + }); private static final ThreadLocal<@Nullable TransformerFactory> TRANSFORMER_FACTORY = ThreadLocal.withInitial(() -> { - TransformerFactory transformerFactory = TransformerFactory.newInstance(); + final TransformerFactory transformerFactory = TransformerFactory.newInstance(); transformerFactory.setErrorListener(new InternalErrorListener()); + try { + transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + } catch (TransformerConfigurationException e) { + throw new IllegalStateException("Transformer Factory configuration failed", e); + } return transformerFactory; }); + private static final ThreadLocal<@Nullable DocumentBuilderFactory> DOCUMENT_BUILDER_FACTORY = + ThreadLocal.withInitial(() -> { + final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); + documentBuilderFactory.setXIncludeAware(false); + documentBuilderFactory.setExpandEntityReferences(false); + documentBuilderFactory.setNamespaceAware(true); + try { + documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + documentBuilderFactory + .setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + } catch (final ParserConfigurationException e) { + throw new IllegalStateException("Document Builder configuration failed", e); + } + return documentBuilderFactory; + }); private static final Pattern VALID_NAMESPACE_PATTERN = Pattern .compile("^(([0-9a-zA-Z:_-]+=\"[^\"]*\")( [0-9a-zA-Z:_-]+=\"[^\"]*\")*)$"); @@ -81,10 +116,11 @@ public class XmlFunctions { return null; } try { + final Node documentNode = getDocumentNode(input); XPathExpression xpathExpression = castNonNull(XPATH_FACTORY.get()).newXPath().compile(xpath); try { NodeList nodes = (NodeList) xpathExpression - .evaluate(new InputSource(new StringReader(input)), XPathConstants.NODESET); + .evaluate(documentNode, XPathConstants.NODESET); List<@Nullable String> result = new ArrayList<>(); for (int i = 0; i < nodes.getLength(); i++) { Node item = castNonNull(nodes.item(i)); @@ -94,9 +130,9 @@ public class XmlFunctions { } return StringUtils.join(result, " "); } catch (XPathExpressionException e) { - return xpathExpression.evaluate(new InputSource(new StringReader(input))); + return xpathExpression.evaluate(documentNode); } - } catch (XPathExpressionException ex) { + } catch (IllegalArgumentException | XPathExpressionException ex) { throw RESOURCE.invalidInputForExtractValue(input, xpath).ex(); } } @@ -140,17 +176,18 @@ public class XmlFunctions { XPathExpression xpathExpression = xPath.compile(xpath); + final Node documentNode = getDocumentNode(xml); try { List<String> result = new ArrayList<>(); NodeList nodes = (NodeList) xpathExpression - .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET); + .evaluate(documentNode, XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { result.add(convertNodeToString(castNonNull(nodes.item(i)))); } return StringUtils.join(result, ""); } catch (XPathExpressionException e) { Node node = (Node) xpathExpression - .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE); + .evaluate(documentNode, XPathConstants.NODE); return convertNodeToString(node); } } catch (IllegalArgumentException | XPathExpressionException | TransformerException ex) { @@ -174,16 +211,17 @@ public class XmlFunctions { } XPathExpression xpathExpression = xPath.compile(xpath); + final Node documentNode = getDocumentNode(xml); try { NodeList nodes = (NodeList) xpathExpression - .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET); + .evaluate(documentNode, XPathConstants.NODESET); if (nodes != null && nodes.getLength() > 0) { return 1; } return 0; } catch (XPathExpressionException e) { Node node = (Node) xpathExpression - .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE); + .evaluate(documentNode, XPathConstants.NODE); if (node != null) { return 1; } @@ -215,6 +253,17 @@ public class XmlFunctions { return writer.toString(); } + private static Node getDocumentNode(final String xml) { + try { + final DocumentBuilder documentBuilder = + castNonNull(DOCUMENT_BUILDER_FACTORY.get()).newDocumentBuilder(); + final InputSource inputSource = new InputSource(new StringReader(xml)); + return documentBuilder.parse(inputSource); + } catch (final ParserConfigurationException | SAXException | IOException e) { + throw new IllegalArgumentException("XML parsing failed", e); + } + } + /** The internal default ErrorListener for Transformer. Just rethrows errors to * discontinue the XML transformation. */ private static class InternalErrorListener implements ErrorListener { diff --git a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java index 1457ef6908..046d935705 100644 --- a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java +++ b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java @@ -21,9 +21,13 @@ import org.apache.calcite.runtime.SqlFunctions; import org.apache.calcite.runtime.XmlFunctions; import org.apache.calcite.util.BuiltInMethod; +import org.checkerframework.checker.nullness.qual.Nullable; import org.hamcrest.Matcher; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.function.Supplier; import static org.hamcrest.CoreMatchers.is; @@ -36,6 +40,23 @@ import static org.junit.jupiter.api.Assertions.fail; */ class SqlXmlFunctionsTest { + private static final String XML = "<document>string</document>"; + private static final String XSLT = + "<xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"></xsl:stylesheet>"; + private static final String DOCUMENT_PATH = "/document"; + private static @Nullable String xmlExternalEntity = null; + private static @Nullable String xsltExternalEntity = null; + + @BeforeAll public static void setup() throws Exception { + final Path testFile = Files.createTempFile("foo", "temp"); + testFile.toFile().deleteOnExit(); + final String filePath = "file:///" + testFile.toAbsolutePath(); + xmlExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + filePath + + "\"> ]><document>&entity;</document>"; + xsltExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + filePath + + "\"> ]><xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">&entity;</xsl:stylesheet>"; + } + @Test void testExtractValue() { assertExtractValue("<a>ccc<b>ddd</b></a>", "/a", is("ccc")); @@ -45,6 +66,33 @@ class SqlXmlFunctionsTest { assertExtractValueFailed(input, "#", Matchers.expectThrowable(expected)); } + @Test void testExtractValueExternalEntity() { + String message = "Invalid input for EXTRACTVALUE: xml: '" + + xmlExternalEntity + "', xpath expression: '" + DOCUMENT_PATH + "'"; + CalciteException expected = new CalciteException(message, null); + assertExtractValueFailed(xmlExternalEntity, DOCUMENT_PATH, + Matchers.expectThrowable(expected)); + } + + @Test void testExistsNodeExternalEntity() { + String message = "Invalid input for EXISTSNODE xpath: '" + + DOCUMENT_PATH + "', namespace: '" + null + "'"; + CalciteException expected = new CalciteException(message, null); + assertExistsNodeFailed(xmlExternalEntity, DOCUMENT_PATH, null, + Matchers.expectThrowable(expected)); + } + + @Test void testXmlTransformExternalEntity() { + String message = "Invalid input for XMLTRANSFORM xml: '" + xmlExternalEntity + "'"; + CalciteException expected = new CalciteException(message, null); + assertXmlTransformFailed(xmlExternalEntity, XSLT, Matchers.expectThrowable(expected)); + } + + @Test void testXmlTransformExternalEntityXslt() { + String message = "Illegal xslt specified : '" + xsltExternalEntity + "'"; + CalciteException expected = new CalciteException(message, null); + assertXmlTransformFailed(XML, xsltExternalEntity, Matchers.expectThrowable(expected)); + } @Test void testXmlTransform() { assertXmlTransform(null, "", nullValue());