This is an automated email from the ASF dual-hosted git repository.

chunwei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new 3302a49  [CALCITE-3580] Oracle EXTRACT(XML) Function Support 
(ritesh-kapoor)
3302a49 is described below

commit 3302a494767b4d64b5212e21f53d2c8cc49fe1c7
Author: Ritesh Kapoor <[email protected]>
AuthorDate: Sat Dec 28 10:40:08 2019 +0530

    [CALCITE-3580] Oracle EXTRACT(XML) Function Support (ritesh-kapoor)
    
    close apache/calcite#1701
---
 .../calcite/adapter/enumerable/RexImpTable.java    |  2 +
 .../apache/calcite/runtime/CalciteResource.java    |  3 +
 .../org/apache/calcite/runtime/XmlFunctions.java   | 66 ++++++++++++++++++++
 .../calcite/sql/fun/SqlLibraryOperators.java       |  5 ++
 .../org/apache/calcite/sql/type/OperandTypes.java  |  5 ++
 .../org/apache/calcite/util/BuiltInMethod.java     |  1 +
 .../calcite/util/SimpleNamespaceContext.java       | 71 ++++++++++++++++++++++
 .../calcite/runtime/CalciteResource.properties     |  1 +
 .../calcite/sql/test/SqlOperatorBaseTest.java      | 36 +++++++++++
 .../apache/calcite/test/SqlXmlFunctionsTest.java   | 26 ++++++++
 core/src/test/resources/sql/functions.iq           | 14 +++++
 site/_docs/reference.md                            |  1 +
 12 files changed, 231 insertions(+)

diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 05a4af3..4f432bc 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -98,6 +98,7 @@ import static 
org.apache.calcite.sql.fun.SqlLibraryOperators.CHR;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.DAYNAME;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.DIFFERENCE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_BASE64;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_DEPTH;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.JSON_KEYS;
@@ -532,6 +533,7 @@ public class RexImpTable {
     // Xml Operators
     defineMethod(EXTRACT_VALUE, BuiltInMethod.EXTRACT_VALUE.method, 
NullPolicy.ARG0);
     defineMethod(XML_TRANSFORM, BuiltInMethod.XML_TRANSFORM.method, 
NullPolicy.ARG0);
+    defineMethod(EXTRACT_XML, BuiltInMethod.EXTRACT_XML.method, 
NullPolicy.ARG0);
 
     // Json Operators
     defineMethod(JSON_VALUE_EXPRESSION,
diff --git a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java 
b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
index 53caa36..c5f8392 100644
--- a/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
+++ b/core/src/main/java/org/apache/calcite/runtime/CalciteResource.java
@@ -910,6 +910,9 @@ public interface CalciteResource {
   @BaseMessage("Invalid input for XMLTRANSFORM xml: ''{0}''")
   ExInst<CalciteException> invalidInputForXmlTransform(String xml);
 
+  @BaseMessage("Invalid input for EXTRACT xpath: ''{0}'', namespace: ''{1}''")
+  ExInst<CalciteException> invalidInputForExtractXml(String xpath, String 
namespace);
+
   @BaseMessage("Invalid input for EXTRACTVALUE: xml: ''{0}'', xpath 
expression: ''{1}''")
   ExInst<CalciteException> invalidInputForExtractValue(String xml, String 
xpath);
 }
diff --git a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java 
b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
index a1ce84c..bcf386e 100644
--- a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
@@ -16,22 +16,32 @@
  */
 package org.apache.calcite.runtime;
 
+import org.apache.calcite.util.SimpleNamespaceContext;
+
 import org.apache.commons.lang3.StringUtils;
 
+import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.InputSource;
 
 import java.io.StringReader;
 import java.io.StringWriter;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Source;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerConfigurationException;
 import javax.xml.transform.TransformerException;
 import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 import javax.xml.transform.stream.StreamSource;
+import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpression;
 import javax.xml.xpath.XPathExpressionException;
@@ -49,6 +59,11 @@ public class XmlFunctions {
   private static final ThreadLocal<TransformerFactory> TRANSFORMER_FACTORY =
       ThreadLocal.withInitial(TransformerFactory::newInstance);
 
+  private static final Pattern VALID_NAMESPACE_PATTERN = Pattern
+      .compile("^(([0-9a-zA-Z:_-]+=\"[^\"]*\")( 
[0-9a-zA-Z:_-]+=\"[^\"]*\")*)$");
+  private static final Pattern EXTRACT_NAMESPACE_PATTERN = Pattern
+      .compile("([0-9a-zA-Z:_-]+)=(['\"])((?!\\2).+?)\\2");
+
   private XmlFunctions() {
   }
 
@@ -92,4 +107,55 @@ public class XmlFunctions {
       throw RESOURCE.invalidInputForXmlTransform(xml).ex();
     }
   }
+
+  public static String extractXml(String xml, String xpath) {
+    return extractXml(xml, xpath, null);
+  }
+
+  public static String extractXml(String xml, String xpath, String namespace) {
+    if (xml == null || xpath == null) {
+      return null;
+    }
+    try {
+      XPath xPath = XPATH_FACTORY.get().newXPath();
+
+      if (namespace != null) {
+        if (!VALID_NAMESPACE_PATTERN.matcher(namespace).find()) {
+          throw new IllegalArgumentException("Invalid namespace " + namespace);
+        }
+        Map<String, String> namespaceMap = new HashMap<>();
+        Matcher matcher = EXTRACT_NAMESPACE_PATTERN.matcher(namespace);
+        while (matcher.find()) {
+          namespaceMap.put(matcher.group(1), matcher.group(3));
+        }
+        xPath.setNamespaceContext(new SimpleNamespaceContext(namespaceMap));
+      }
+
+      XPathExpression xpathExpression = xPath.compile(xpath);
+
+      try {
+        List<String> result = new ArrayList<>();
+        NodeList nodes = (NodeList) xpathExpression
+            .evaluate(new InputSource(new StringReader(xml)), 
XPathConstants.NODESET);
+        for (int i = 0; i < nodes.getLength(); i++) {
+          result.add(convertNodeToString(nodes.item(i)));
+        }
+        return StringUtils.join(result, "");
+      } catch (XPathExpressionException e) {
+        Node node = (Node) xpathExpression
+            .evaluate(new InputSource(new StringReader(xml)), 
XPathConstants.NODE);
+        return convertNodeToString(node);
+      }
+    } catch (IllegalArgumentException | XPathExpressionException | 
TransformerException ex) {
+      throw RESOURCE.invalidInputForExtractXml(xpath, namespace).ex();
+    }
+  }
+
+  private static String convertNodeToString(Node node) throws 
TransformerException {
+    StringWriter writer = new StringWriter();
+    Transformer transformer = TRANSFORMER_FACTORY.get().newTransformer();
+    transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+    transformer.transform(new DOMSource(node), new StreamResult(writer));
+    return writer.toString();
+  }
 }
diff --git 
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java 
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index d71349a..5ff3cc2 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -184,6 +184,11 @@ public abstract class SqlLibraryOperators {
       ReturnTypes.cascade(ReturnTypes.VARCHAR_2000, 
SqlTypeTransforms.FORCE_NULLABLE),
       null, OperandTypes.STRING_STRING, SqlFunctionCategory.SYSTEM);
 
+  @LibraryOperator(libraries = {ORACLE})
+  public static final SqlFunction EXTRACT_XML = new SqlFunction(
+      "EXTRACT", SqlKind.OTHER_FUNCTION,
+      ReturnTypes.cascade(ReturnTypes.VARCHAR_2000, 
SqlTypeTransforms.FORCE_NULLABLE),
+      null, OperandTypes.STRING_STRING_OPTIONAL_STRING, 
SqlFunctionCategory.SYSTEM);
 
   /** The "MONTHNAME(datetime)" function; returns the name of the month,
    * in the current locale, of a TIMESTAMP or DATE argument. */
diff --git a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java 
b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
index 51c182f..eddba18 100644
--- a/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
+++ b/core/src/main/java/org/apache/calcite/sql/type/OperandTypes.java
@@ -231,6 +231,11 @@ public abstract class OperandTypes {
   public static final FamilyOperandTypeChecker STRING_STRING_STRING =
       family(SqlTypeFamily.STRING, SqlTypeFamily.STRING, SqlTypeFamily.STRING);
 
+  public static final FamilyOperandTypeChecker STRING_STRING_OPTIONAL_STRING =
+      family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING, 
SqlTypeFamily.STRING),
+          // Third operand optional (operand index 0, 1, 2)
+          number -> number == 2);
+
   public static final SqlSingleOperandTypeChecker CHARACTER =
       family(SqlTypeFamily.CHARACTER);
 
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java 
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index 457ec2f..92ce6d4 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -324,6 +324,7 @@ public enum BuiltInMethod {
   SHA1(SqlFunctions.class, "sha1", String.class),
   EXTRACT_VALUE(XmlFunctions.class, "extractValue", String.class, 
String.class),
   XML_TRANSFORM(XmlFunctions.class, "xmlTransform", String.class, 
String.class),
+  EXTRACT_XML(XmlFunctions.class, "extractXml", String.class, String.class, 
String.class),
   JSONIZE(JsonFunctions.class, "jsonize", Object.class),
   DEJSONIZE(JsonFunctions.class, "dejsonize", String.class),
   JSON_VALUE_EXPRESSION(JsonFunctions.class, "jsonValueExpression",
diff --git 
a/core/src/main/java/org/apache/calcite/util/SimpleNamespaceContext.java 
b/core/src/main/java/org/apache/calcite/util/SimpleNamespaceContext.java
new file mode 100644
index 0000000..55a872c
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/util/SimpleNamespaceContext.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.util;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import javax.xml.XMLConstants;
+import javax.xml.namespace.NamespaceContext;
+
+/**
+ * Simple {@link javax.xml.namespace.NamespaceContext} implementation. Follows 
the standard
+ * NamespaceContext contract, and is loadable via a {@link java.util.Map}
+ */
+public class SimpleNamespaceContext implements NamespaceContext {
+
+  private final Map<String, String> prefixToNamespaceUri = new HashMap<>();
+  private final Map<String, Set<String>> namespaceUriToPrefixes = new 
HashMap<>();
+
+  public SimpleNamespaceContext(Map<String, String> bindings) {
+    bindNamespaceUri(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI);
+    bindNamespaceUri(XMLConstants.XMLNS_ATTRIBUTE, 
XMLConstants.XMLNS_ATTRIBUTE_NS_URI);
+    bindNamespaceUri(XMLConstants.DEFAULT_NS_PREFIX, "");
+    bindings.forEach(this::bindNamespaceUri);
+  }
+
+  @Override public String getNamespaceURI(String prefix) {
+    if (this.prefixToNamespaceUri.containsKey(prefix)) {
+      return this.prefixToNamespaceUri.get(prefix);
+    }
+    return "";
+  }
+
+  @Override public String getPrefix(String namespaceUri) {
+    Set<String> prefixes = getPrefixesSet(namespaceUri);
+    return !prefixes.isEmpty() ? prefixes.iterator().next() : null;
+  }
+
+  @Override public Iterator<String> getPrefixes(String namespaceUri) {
+    return getPrefixesSet(namespaceUri).iterator();
+  }
+
+  private Set<String> getPrefixesSet(String namespaceUri) {
+    Set<String> prefixes = this.namespaceUriToPrefixes.get(namespaceUri);
+    return prefixes != null ? Collections.unmodifiableSet(prefixes) : 
Collections.emptySet();
+  }
+
+  private void bindNamespaceUri(String prefix, String namespaceUri) {
+    this.prefixToNamespaceUri.put(prefix, namespaceUri);
+    Set<String> prefixes = this.namespaceUriToPrefixes
+        .computeIfAbsent(namespaceUri, k -> new LinkedHashSet<>());
+    prefixes.add(prefix);
+  }
+}
diff --git 
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties 
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
index f5d18e7..57da204 100644
--- 
a/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
+++ 
b/core/src/main/resources/org/apache/calcite/runtime/CalciteResource.properties
@@ -299,4 +299,5 @@ InvalidInputForRegexpReplace=Not a valid input for 
REGEXP_REPLACE: ''{0}''
 IllegalXslt=Illegal xslt specified : ''{0}''
 InvalidInputForXmlTransform=Invalid input for XMLTRANSFORM xml: ''{0}''
 InvalidInputForExtractValue=Invalid input for EXTRACTVALUE: xml: ''{0}'', 
xpath expression: ''{1}''
+InvalidInputForExtractXml=Invalid input for EXTRACT xpath: ''{0}'', namespace: 
''{1}''
 # End CalciteResource.properties
diff --git 
a/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java 
b/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
index 4300ba4..5b362b4 100644
--- a/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
+++ b/core/src/test/java/org/apache/calcite/sql/test/SqlOperatorBaseTest.java
@@ -5226,6 +5226,42 @@ public abstract class SqlOperatorBaseTest {
         "VARCHAR(2000)");
   }
 
+  @Test public void testExtractXml() {
+    SqlTester sqlTester = tester(SqlLibrary.ORACLE);
+
+    sqlTester.checkFails("\"EXTRACT\"('', '<','a')",
+        "Invalid input for EXTRACT xpath: '.*", true);
+    sqlTester.checkFails("\"EXTRACT\"('', '<')",
+        "Invalid input for EXTRACT xpath: '.*", true);
+    sqlTester.checkNull("\"EXTRACT\"('', NULL)");
+    sqlTester.checkNull("\"EXTRACT\"(NULL,'')");
+
+    sqlTester.checkString(
+        
"\"EXTRACT\"('<Article><Title>Article1</Title><Authors><Author>Foo</Author><Author>Bar"
+            + "</Author></Authors><Body>article text"
+            + ".</Body></Article>', '/Article/Title')",
+        "<Title>Article1</Title>",
+        "VARCHAR(2000)");
+
+    sqlTester.checkString(
+        
"\"EXTRACT\"('<Article><Title>Article1</Title><Title>Article2</Title><Authors><Author>Foo"
+            + "</Author><Author>Bar</Author></Authors><Body>article text"
+            + ".</Body></Article>', '/Article/Title')",
+        "<Title>Article1</Title><Title>Article2</Title>",
+        "VARCHAR(2000)");
+
+    sqlTester.checkString(
+        "\"EXTRACT\"(\n"
+            + "'<books xmlns=\"http://www.contoso";
+            + ".com/books\"><book><title>Title</title><author>Author 
Name</author><price>5"
+            + ".50</price></book></books>'"
+            + ", '/books:books/books:book', 
'books=\"http://www.contoso.com/books\";'"
+            + ")",
+        "<book 
xmlns=\"http://www.contoso.com/books\";><title>Title</title><author>Author "
+            + "Name</author><price>5.50</price></book>",
+        "VARCHAR(2000)");
+  }
+
   @Test public void testLowerFunc() {
     tester.setFor(SqlStdOperatorTable.LOWER);
 
diff --git 
a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java 
b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
index a506cbe..7da17cd 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
@@ -56,6 +56,32 @@ public class SqlXmlFunctionsTest {
     assertXmlTransformFailed("", xslt, Matchers.expectThrowable(expected));
   }
 
+  @Test public void testExtractXml() {
+    assertExtractXml(null, "", null, nullValue());
+    assertExtractXml("", null, null, nullValue());
+
+    String xpath = "<";
+    String namespace = "a";
+    String message =
+        "Invalid input for EXTRACT xpath: '" + xpath + "', namespace: '" + 
namespace + "'";
+    CalciteException expected = new CalciteException(message, null);
+    assertExtractXmlFailed("", xpath, namespace, 
Matchers.expectThrowable(expected));
+  }
+
+  private void assertExtractXml(String xml, String xpath, String namespace,
+      Matcher<? super String> matcher) {
+    String methodDesc = BuiltInMethod.EXTRACT_XML.getMethodName()
+        + "(" + String.join(", ", xml, xpath, namespace) + ")";
+    assertThat(methodDesc, XmlFunctions.extractXml(xml, xpath, namespace), 
matcher);
+  }
+
+  private void assertExtractXmlFailed(String xml, String xpath, String 
namespace,
+      Matcher<? super Throwable> matcher) {
+    String methodDesc = BuiltInMethod.EXTRACT_XML.getMethodName()
+        + "(" + String.join(", ", xml, xpath, namespace) + ")";
+    assertFailed(methodDesc, () -> XmlFunctions.extractXml(xml, xpath, 
namespace), matcher);
+  }
+
   private void assertXmlTransform(String xml, String xslt,
       Matcher<? super String> matcher) {
     String methodDesc =
diff --git a/core/src/test/resources/sql/functions.iq 
b/core/src/test/resources/sql/functions.iq
index c5cd279..2927770 100644
--- a/core/src/test/resources/sql/functions.iq
+++ b/core/src/test/resources/sql/functions.iq
@@ -59,4 +59,18 @@ SELECT XMLTRANSFORM(
 !ok
 
 
+SELECT "EXTRACT"(
+            
'<Article><Title>Article1</Title><Authors><Author>Foo</Author><Author>Bar</Author></Authors><Body>article
 text</Body></Article>'
+            , '/Article/Title'
+        );
++-------------------------+
+| EXPR$0                  |
++-------------------------+
+| <Title>Article1</Title> |
++-------------------------+
+(1 row)
+
+!ok
+
+
 # End functions.iq
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index ddb041c..37ef3f2 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2281,6 +2281,7 @@ semantics.
 | m | DAYNAME(datetime)                              | Returns the name, in 
the connection's locale, of the weekday in *datetime*; for example, it returns 
'星期日' for both DATE '2020-02-10' and TIMESTAMP '2020-02-10 10:10:10'
 | o | DECODE(value, value1, result1 [, valueN, resultN ]* [, default ]) | 
Compares *value* to each *valueN* value one by one; if *value* is equal to a 
*valueN*, returns the corresponding *resultN*, else returns *default*, or NULL 
if *default* is not specified
 | p | DIFFERENCE(string, string)                     | Returns a measure of 
the similarity of two strings, namely the number of character positions that 
their `SOUNDEX` values have in common: 4 if the `SOUNDEX` values are same and 0 
if the `SOUNDEX` values are totally different
+| o | EXTRACT(xml, xpath, [, namespace ])            | Returns the xml 
fragment of the element or elements matched by the XPath expression. The 
optional namespace value that specifies a default mapping or namespace mapping 
for prefixes, which is used when evaluating the XPath expression
 | m | EXTRACTVALUE(xml, xpathExpr))                  | Returns the text of the 
first text node which is a child of the element or elements matched by the 
XPath expression.
 | o | GREATEST(expr [, expr ]*)                      | Returns the greatest of 
the expressions
 | m | JSON_TYPE(jsonValue)                           | Returns a string value 
indicating the type of a *jsonValue*

Reply via email to