This is an automated email from the ASF dual-hosted git repository.
chunwei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new 3302a49 [CALCITE-3580] Oracle EXTRACT(XML) Function Support
(ritesh-kapoor)
3302a49 is described below
commit 3302a494767b4d64b5212e21f53d2c8cc49fe1c7
Author: Ritesh Kapoor <[email protected]>
AuthorDate: Sat Dec 28 10:40:08 2019 +0530
[CALCITE-3580] Oracle EXTRACT(XML) Function Support (ritesh-kapoor)
close apache/calcite#1701
---
.../calcite/adapter/enumerable/RexImpTable.java | 2 +
.../apache/calcite/runtime/CalciteResource.java | 3 +
.../org/apache/calcite/runtime/XmlFunctions.java | 66 ++++++++++++++++++++
.../calcite/sql/fun/SqlLibraryOperators.java | 5 ++
.../org/apache/calcite/sql/type/OperandTypes.java | 5 ++
.../org/apache/calcite/util/BuiltInMethod.java | 1 +
.../calcite/util/SimpleNamespaceContext.java | 71 ++++++++++++++++++++++
.../calcite/runtime/CalciteResource.properties | 1 +
.../calcite/sql/test/SqlOperatorBaseTest.java | 36 +++++++++++
.../apache/calcite/test/SqlXmlFunctionsTest.java | 26 ++++++++
core/src/test/resources/sql/functions.iq | 14 +++++
site/_docs/reference.md | 1 +
12 files changed, 231 insertions(+)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 05a4af3..4f432bc 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -98,6 +98,7 @@ import static
org.apache.calcite.sql.fun.SqlLibraryOperators.CHR;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.DAYNAME;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.DIFFERENCE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_BASE64;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_DEPTH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_KEYS;
@@ -532,6 +533,7 @@ public class RexImpTable {
// Xml Operators
defineMethod(EXTRACT_VALUE, BuiltInMethod.EXTRACT_VALUE.method,
NullPolicy.ARG0);
defineMethod(XML_TRANSFORM, BuiltInMethod.XML_TRANSFORM.method,
NullPolicy.ARG0);
+ defineMethod(EXTRACT_XML, BuiltInMethod.EXTRACT_XML.method,
NullPolicy.ARG0);
// Json Operators
defineMethod(JSON_VALUE_EXPRESSION,
diff --git a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
index 53caa36..c5f8392 100644
--- a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
+++ b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
@@ -910,6 +910,9 @@ public interface CalciteResource {
@BaseMessage("Invalid input for XMLTRANSFORM xml: ''{0}''")
ExInst<CalciteException> invalidInputForXmlTransform(String xml);
+ @BaseMessage("Invalid input for EXTRACT xpath: ''{0}'', namespace: ''{1}''")
+ ExInst<CalciteException> invalidInputForExtractXml(String xpath, String
namespace);
+
@BaseMessage("Invalid input for EXTRACTVALUE: xml: ''{0}'', xpath
expression: ''{1}''")
ExInst<CalciteException> invalidInputForExtractValue(String xml, String
xpath);
}
diff --git a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
index a1ce84c..bcf386e 100644
--- a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
@@ -16,22 +16,32 @@
*/
package org.apache.calcite.runtime;
+import org.apache.calcite.util.SimpleNamespaceContext;
+
import org.apache.commons.lang3.StringUtils;
+import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
+import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
@@ -49,6 +59,11 @@ public class XmlFunctions {
private static final ThreadLocal<TransformerFactory> TRANSFORMER_FACTORY =
ThreadLocal.withInitial(TransformerFactory::newInstance);
+ private static final Pattern VALID_NAMESPACE_PATTERN = Pattern
+ .compile("^(([0-9a-zA-Z:_-]+=\"[^\"]*\")(
[0-9a-zA-Z:_-]+=\"[^\"]*\")*)$");
+ private static final Pattern EXTRACT_NAMESPACE_PATTERN = Pattern
+ .compile("([0-9a-zA-Z:_-]+)=(['\"])((?!\\2).+?)\\2");
+
private XmlFunctions() {
}
@@ -92,4 +107,55 @@ public class XmlFunctions {
throw RESOURCE.invalidInputForXmlTransform(xml).ex();
}
}
+
+ public static String extractXml(String xml, String xpath) {
+ return extractXml(xml, xpath, null);
+ }
+
+ public static String extractXml(String xml, String xpath, String namespace) {
+ if (xml == null || xpath == null) {
+ return null;
+ }
+ try {
+ XPath xPath = XPATH_FACTORY.get().newXPath();
+
+ if (namespace != null) {
+ if (!VALID_NAMESPACE_PATTERN.matcher(namespace).find()) {
+ throw new IllegalArgumentException("Invalid namespace " + namespace);
+ }
+ Map<String, String> namespaceMap = new HashMap<>();
+ Matcher matcher = EXTRACT_NAMESPACE_PATTERN.matcher(namespace);
+ while (matcher.find()) {
+ namespaceMap.put(matcher.group(1), matcher.group(3));
+ }
+ xPath.setNamespaceContext(new SimpleNamespaceContext(namespaceMap));
+ }
+
+ XPathExpression xpathExpression = xPath.compile(xpath);
+
+ try {
+ List<String> result = new ArrayList<>();
+ NodeList nodes = (NodeList) xpathExpression
+ .evaluate(new InputSource(new StringReader(xml)),
XPathConstants.NODESET);
+ for (int i = 0; i < nodes.getLength(); i++) {
+ result.add(convertNodeToString(nodes.item(i)));
+ }
+ return StringUtils.join(result, "");
+ } catch (XPathExpressionException e) {
+ Node node = (Node) xpathExpression
+ .evaluate(new InputSource(new StringReader(xml)),
XPathConstants.NODE);
+ return convertNodeToString(node);
+ }
+ } catch (IllegalArgumentException | XPathExpressionException |
TransformerException ex) {
+ throw RESOURCE.invalidInputForExtractXml(xpath, namespace).ex();
+ }
+ }
+
+ private static String convertNodeToString(Node node) throws
TransformerException {
+ StringWriter writer = new StringWriter();
+ Transformer transformer = TRANSFORMER_FACTORY.get().newTransformer();
+ transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+ transformer.transform(new DOMSource(node), new StreamResult(writer));
+ return writer.toString();
+ }
}
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index d71349a..5ff3cc2 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -184,6 +184,11 @@ public abstract class SqlLibraryOperators {
ReturnTypes.cascade(ReturnTypes.VARCHAR_2000,
SqlTypeTransforms.FORCE_NULLABLE),
null, OperandTypes.STRING_STRING, SqlFunctionCategory.SYSTEM);
+ @LibraryOperator(libraries = {ORACLE})
+ public static final SqlFunction EXTRACT_XML = new SqlFunction(
+ "EXTRACT", SqlKind.OTHER_FUNCTION,
+ ReturnTypes.cascade(ReturnTypes.VARCHAR_2000,
SqlTypeTransforms.FORCE_NULLABLE),
+ null, OperandTypes.STRING_STRING_OPTIONAL_STRING,
SqlFunctionCategory.SYSTEM);
/** The "MONTHNAME(datetime)" function; returns the name of the month,
* in the current locale, of a TIMESTAMP or DATE argument. */
diff --git a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
index 51c182f..eddba18 100644
--- a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
+++ b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
@@ -231,6 +231,11 @@ public abstract class OperandTypes {
public static final FamilyOperandTypeChecker STRING_STRING_STRING =
family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING);
+ public static final FamilyOperandTypeChecker STRING_STRING_OPTIONAL_STRING =
+ family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING,
SqlTypeFamily.STRING),
+ // Third operand optional (operand index 0, 1, 2)
+ number -> number == 2);
+
public static final SqlSingleOperandTypeChecker CHARACTER =
family(SqlTypeFamily.CHARACTER);
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 457ec2f..92ce6d4 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -324,6 +324,7 @@ public enum BuiltInMethod {
SHA1(SqlFunctions.class, "sha1", String.class),
EXTRACT_VALUE(XmlFunctions.class, "extractValue", String.class,
String.class),
XML_TRANSFORM(XmlFunctions.class, "xmlTransform", String.class,
String.class),
+ EXTRACT_XML(XmlFunctions.class, "extractXml", String.class, String.class,
String.class),
JSONIZE(JsonFunctions.class, "jsonize", Object.class),
DEJSONIZE(JsonFunctions.class, "dejsonize", String.class),
JSON_VALUE_EXPRESSION(JsonFunctions.class, "jsonValueExpression",
diff --git
a/core/src/main/java/org/apache/calcite/util/SimpleNamespaceContext.java
b/core/src/main/java/org/apache/calcite/util/SimpleNamespaceContext.java
new file mode 100644
index 0000000..55a872c
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/util/SimpleNamespaceContext.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import javax.xml.XMLConstants;
+import javax.xml.namespace.NamespaceContext;
+
+/**
+ * Simple {@link javax.xml.namespace.NamespaceContext} implementation. Follows
the standard
+ * NamespaceContext contract, and is loadable via a {@link java.util.Map}
+ */
+public class SimpleNamespaceContext implements NamespaceContext {
+
+ private final Map<String, String> prefixToNamespaceUri = new HashMap<>();
+ private final Map<String, Set<String>> namespaceUriToPrefixes = new
HashMap<>();
+
+ public SimpleNamespaceContext(Map<String, String> bindings) {
+ bindNamespaceUri(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI);
+ bindNamespaceUri(XMLConstants.XMLNS_ATTRIBUTE,
XMLConstants.XMLNS_ATTRIBUTE_NS_URI);
+ bindNamespaceUri(XMLConstants.DEFAULT_NS_PREFIX, "");
+ bindings.forEach(this::bindNamespaceUri);
+ }
+
+ @Override public String getNamespaceURI(String prefix) {
+ if (this.prefixToNamespaceUri.containsKey(prefix)) {
+ return this.prefixToNamespaceUri.get(prefix);
+ }
+ return "";
+ }
+
+ @Override public String getPrefix(String namespaceUri) {
+ Set<String> prefixes = getPrefixesSet(namespaceUri);
+ return !prefixes.isEmpty() ? prefixes.iterator().next() : null;
+ }
+
+ @Override public Iterator<String> getPrefixes(String namespaceUri) {
+ return getPrefixesSet(namespaceUri).iterator();
+ }
+
+ private Set<String> getPrefixesSet(String namespaceUri) {
+ Set<String> prefixes = this.namespaceUriToPrefixes.get(namespaceUri);
+ return prefixes != null ? Collections.unmodifiableSet(prefixes) :
Collections.emptySet();
+ }
+
+ private void bindNamespaceUri(String prefix, String namespaceUri) {
+ this.prefixToNamespaceUri.put(prefix, namespaceUri);
+ Set<String> prefixes = this.namespaceUriToPrefixes
+ .computeIfAbsent(namespaceUri, k -> new LinkedHashSet<>());
+ prefixes.add(prefix);
+ }
+}
diff --git
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
index f5d18e7..57da204 100644
---
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
+++
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
@@ -299,4 +299,5 @@ InvalidInputForRegexpReplace=Not a valid input for
REGEXP_REPLACE: ''{0}''
IllegalXslt=Illegal xslt specified : ''{0}''
InvalidInputForXmlTransform=Invalid input for XMLTRANSFORM xml: ''{0}''
InvalidInputForExtractValue=Invalid input for EXTRACTVALUE: xml: ''{0}'',
xpath expression: ''{1}''
+InvalidInputForExtractXml=Invalid input for EXTRACT xpath: ''{0}'', namespace:
''{1}''
# End CalciteResource.properties
diff --git
a/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
b/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
index 4300ba4..5b362b4 100644
--- a/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
+++ b/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
@@ -5226,6 +5226,42 @@ public abstract class SqlOperatorBaseTest {
"VARCHAR(2000)");
}
+ @Test public void testExtractXml() {
+ SqlTester sqlTester = tester(SqlLibrary.ORACLE);
+
+ sqlTester.checkFails("\"EXTRACT\"('', '<','a')",
+ "Invalid input for EXTRACT xpath: '.*", true);
+ sqlTester.checkFails("\"EXTRACT\"('', '<')",
+ "Invalid input for EXTRACT xpath: '.*", true);
+ sqlTester.checkNull("\"EXTRACT\"('', NULL)");
+ sqlTester.checkNull("\"EXTRACT\"(NULL,'')");
+
+ sqlTester.checkString(
+
"\"EXTRACT\"('<Article><Title>Article1</Title><Authors><Author>Foo</Author><Author>Bar"
+ + "</Author></Authors><Body>article text"
+ + ".</Body></Article>', '/Article/Title')",
+ "<Title>Article1</Title>",
+ "VARCHAR(2000)");
+
+ sqlTester.checkString(
+
"\"EXTRACT\"('<Article><Title>Article1</Title><Title>Article2</Title><Authors><Author>Foo"
+ + "</Author><Author>Bar</Author></Authors><Body>article text"
+ + ".</Body></Article>', '/Article/Title')",
+ "<Title>Article1</Title><Title>Article2</Title>",
+ "VARCHAR(2000)");
+
+ sqlTester.checkString(
+ "\"EXTRACT\"(\n"
+ + "'<books xmlns=\"http://www.contoso"
+ + ".com/books\"><book><title>Title</title><author>Author
Name</author><price>5"
+ + ".50</price></book></books>'"
+ + ", '/books:books/books:book',
'books=\"http://www.contoso.com/books\"'"
+ + ")",
+ "<book
xmlns=\"http://www.contoso.com/books\"><title>Title</title><author>Author "
+ + "Name</author><price>5.50</price></book>",
+ "VARCHAR(2000)");
+ }
+
@Test public void testLowerFunc() {
tester.setFor(SqlStdOperatorTable.LOWER);
diff --git
a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
index a506cbe..7da17cd 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
@@ -56,6 +56,32 @@ public class SqlXmlFunctionsTest {
assertXmlTransformFailed("", xslt, Matchers.expectThrowable(expected));
}
+ @Test public void testExtractXml() {
+ assertExtractXml(null, "", null, nullValue());
+ assertExtractXml("", null, null, nullValue());
+
+ String xpath = "<";
+ String namespace = "a";
+ String message =
+ "Invalid input for EXTRACT xpath: '" + xpath + "', namespace: '" +
namespace + "'";
+ CalciteException expected = new CalciteException(message, null);
+ assertExtractXmlFailed("", xpath, namespace,
Matchers.expectThrowable(expected));
+ }
+
+ private void assertExtractXml(String xml, String xpath, String namespace,
+ Matcher<? super String> matcher) {
+ String methodDesc = BuiltInMethod.EXTRACT_XML.getMethodName()
+ + "(" + String.join(", ", xml, xpath, namespace) + ")";
+ assertThat(methodDesc, XmlFunctions.extractXml(xml, xpath, namespace),
matcher);
+ }
+
+ private void assertExtractXmlFailed(String xml, String xpath, String
namespace,
+ Matcher<? super Throwable> matcher) {
+ String methodDesc = BuiltInMethod.EXTRACT_XML.getMethodName()
+ + "(" + String.join(", ", xml, xpath, namespace) + ")";
+ assertFailed(methodDesc, () -> XmlFunctions.extractXml(xml, xpath,
namespace), matcher);
+ }
+
private void assertXmlTransform(String xml, String xslt,
Matcher<? super String> matcher) {
String methodDesc =
diff --git a/core/src/test/resources/sql/functions.iq
b/core/src/test/resources/sql/functions.iq
index c5cd279..2927770 100644
--- a/core/src/test/resources/sql/functions.iq
+++ b/core/src/test/resources/sql/functions.iq
@@ -59,4 +59,18 @@ SELECT XMLTRANSFORM(
!ok
+SELECT "EXTRACT"(
+
'<Article><Title>Article1</Title><Authors><Author>Foo</Author><Author>Bar</Author></Authors><Body>article
text</Body></Article>'
+ , '/Article/Title'
+ );
++-------------------------+
+| EXPR$0 |
++-------------------------+
+| <Title>Article1</Title> |
++-------------------------+
+(1 row)
+
+!ok
+
+
# End functions.iq
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index ddb041c..37ef3f2 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2281,6 +2281,7 @@ semantics.
| m | DAYNAME(datetime) | Returns the name, in
the connection's locale, of the weekday in *datetime*; for example, it returns
'星期日' for both DATE '2020-02-10' and TIMESTAMP '2020-02-10 10:10:10'
| o | DECODE(value, value1, result1 [, valueN, resultN ]* [, default ]) |
Compares *value* to each *valueN* value one by one; if *value* is equal to a
*valueN*, returns the corresponding *resultN*, else returns *default*, or NULL
if *default* is not specified
| p | DIFFERENCE(string, string) | Returns a measure of
the similarity of two strings, namely the number of character positions that
their `SOUNDEX` values have in common: 4 if the `SOUNDEX` values are same and 0
if the `SOUNDEX` values are totally different
+| o | EXTRACT(xml, xpath, [, namespace ]) | Returns the xml
fragment of the element or elements matched by the XPath expression. The
optional namespace value that specifies a default mapping or namespace mapping
for prefixes, which is used when evaluating the XPath expression
| m | EXTRACTVALUE(xml, xpathExpr)) | Returns the text of the
first text node which is a child of the element or elements matched by the
XPath expression.
| o | GREATEST(expr [, expr ]*) | Returns the greatest of
the expressions
| m | JSON_TYPE(jsonValue) | Returns a string value
indicating the type of a *jsonValue*