This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d2e5c7045c9 [feature](functions) Impl func xpath_string (#49262)
d2e5c7045c9 is described below
commit d2e5c7045c9c2622fa06fd6cf1b0571d2102b85d
Author: Socrates <[email protected]>
AuthorDate: Wed Apr 2 21:05:59 2025 +0800
[feature](functions) Impl func xpath_string (#49262)
### What problem does this PR solve?
### Release note
Add xpath_string Function
#### Syntax
```sql
VARCHAR xpath_string(VARCHAR xml_string, VARCHAR xpath_expr)
```
#### Arguments
- `xml_string`: A string containing valid XML content
- `xpath_expr`: A valid XPath expression string
#### Return Value
- Returns VARCHAR type
- Returns NULL if either argument is NULL
- Returns empty string if XPath doesn't match any nodes
#### Examples
```sql
-- Basic node value extraction
SELECT xpath_string('<a>123</a>', '/a'); -- Returns: '123'
-- Nested element extraction
SELECT xpath_string('<a><b>123</b></a>', '/a/b'); -- Returns: '123'
-- Using attributes
SELECT xpath_string('<a><b id="1">123</b></a>', '//b[@id="1"]'); --
Returns: '123'
-- Using position predicates
SELECT xpath_string('<a><b>1</b><b>2</b></a>', '/a/b[2]'); -- Returns: '2'
-- Handling CDATA and comments
SELECT xpath_string('<a><![CDATA[123]]></a>', '/a'); -- Returns: '123'
SELECT xpath_string('<a><!-- comment -->123</a>', '/a'); -- Returns: '123'
```
---
be/cmake/thirdparty.cmake | 3 +
be/src/vec/functions/function_string.cpp | 1 +
be/src/vec/functions/function_string.h | 136 +++++++++++++++++++
be/test/vec/function/function_string_test.cpp | 51 +++++++
.../doris/catalog/BuiltinScalarFunctions.java | 2 +
.../expressions/functions/scalar/XpathString.java | 72 ++++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 +
.../string_functions/test_xpath_string.out | Bin 0 -> 1850 bytes
.../string_functions/test_xpath_string.groovy | 149 +++++++++++++++++++++
9 files changed, 419 insertions(+)
diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index 1250e8ab1f5..b3d9202d5b0 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -176,6 +176,9 @@ add_thirdparty(icuuc LIB64)
add_thirdparty(icui18n LIB64)
add_thirdparty(icudata LIB64)
+
+add_thirdparty(pugixml LIB64)
+
if (BUILD_FAISS)
add_thirdparty(openblas LIB64)
add_thirdparty(faiss LIB64)
diff --git a/be/src/vec/functions/function_string.cpp
b/be/src/vec/functions/function_string.cpp
index 1d292b038d3..ca4b20c8c76 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -1281,6 +1281,7 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionOverlay>();
factory.register_function<FunctionStrcmp>();
factory.register_function<FunctionNgramSearch>();
+ factory.register_function<FunctionXPathString>();
factory.register_alias(FunctionLeft::name, "strleft");
factory.register_alias(FunctionRight::name, "strright");
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index d2ae6b6f9e2..fb18848c5b6 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -17,6 +17,7 @@
#pragma once
+#include <glog/logging.h>
#include <sys/types.h>
#include <algorithm>
@@ -83,6 +84,7 @@
#include <string_view>
#include "exprs/math_functions.h"
+#include "pugixml.hpp"
#include "udf/udf.h"
#include "util/md5.h"
#include "util/simd/vstring_function.h"
@@ -4588,4 +4590,138 @@ private:
}
};
+/// xpath_string(xml, xpath) -> String
+/// Returns the text content of the first node that matches the XPath
expression.
+/// Returns NULL if either xml or xpath is NULL.
+/// Returns empty string if the XPath expression matches no nodes.
+/// The text content includes the node and all its descendants.
+/// Example:
+/// xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[1]') = 'b1'
+/// xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[2]') = 'b2'
+/// xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/c') = ''
+/// xpath_string('invalid xml', '/a/b[1]') = NULL
+/// xpath_string(NULL, '/a/b[1]') = NULL
+/// xpath_string('<a><b>b1</b><b>b2</b></a>', NULL) = NULL
+class FunctionXPathString : public IFunction {
+public:
+ static constexpr auto name = "xpath_string";
+ static FunctionPtr create() { return
std::make_shared<FunctionXPathString>(); }
+ String get_name() const override { return name; }
+ size_t get_number_of_arguments() const override { return 2; }
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<DataTypeString>());
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ uint32_t result, size_t input_rows_count) const
override {
+ CHECK_EQ(arguments.size(), 2);
+ auto col_res = ColumnNullable::create(ColumnString::create(),
ColumnUInt8::create());
+ const auto& [left_col, left_const] =
+ unpack_if_const(block.get_by_position(arguments[0]).column);
+ const auto& [right_col, right_const] =
+ unpack_if_const(block.get_by_position(arguments[1]).column);
+ const auto& xml_col = *assert_cast<const
ColumnString*>(left_col.get());
+ const auto& xpath_col = *assert_cast<const
ColumnString*>(right_col.get());
+
+ Status status;
+ if (left_const && right_const) {
+ status = execute_vector<true, true>(input_rows_count, xml_col,
xpath_col, *col_res);
+ } else if (left_const) {
+ status = execute_vector<true, false>(input_rows_count, xml_col,
xpath_col, *col_res);
+ } else if (right_const) {
+ status = execute_vector<false, true>(input_rows_count, xml_col,
xpath_col, *col_res);
+ } else {
+ status = execute_vector<false, false>(input_rows_count, xml_col,
xpath_col, *col_res);
+ }
+ if (!status.ok()) {
+ return status;
+ }
+
+ block.get_by_position(result).column = std::move(col_res);
+ return Status::OK();
+ }
+
+private:
+ // Build the text of the node and all its children.
+ static std::string get_text(const pugi::xml_node& node) {
+ std::string result;
+ build_text(node, result);
+ return result;
+ }
+
+ static void build_text(const pugi::xml_node& node, std::string& builder) {
+ if (node.type() == pugi::node_pcdata || node.type() ==
pugi::node_cdata) {
+ builder += node.value();
+ }
+ for (pugi::xml_node child : node.children()) {
+ build_text(child, builder);
+ }
+ }
+
+ static Status parse_xml(const StringRef& xml_str, pugi::xml_document&
xml_doc) {
+ pugi::xml_parse_result result = xml_doc.load_buffer(xml_str.data,
xml_str.size);
+ if (!result) {
+ return Status::InvalidArgument("Function {} failed to parse XML
string: {}", name,
+ result.description());
+ }
+ return Status::OK();
+ }
+
+ template <bool left_const, bool right_const>
+ static Status execute_vector(const size_t input_rows_count, const
ColumnString& xml_col,
+ const ColumnString& xpath_col,
ColumnNullable& res_col) {
+ pugi::xml_document xml_doc;
+ StringRef xpath_str;
+ // first check right_const, because we want to check empty input first
+ if constexpr (right_const) {
+ xpath_str = xpath_col.get_data_at(0);
+ if (xpath_str.empty()) {
+ // should return null if xpath_str is empty
+ res_col.insert_many_defaults(input_rows_count);
+ return Status::OK();
+ }
+ }
+ if constexpr (left_const) {
+ auto xml_str = xml_col.get_data_at(0);
+ if (xml_str.empty()) {
+ // should return null if xml_str is empty
+ res_col.insert_many_defaults(input_rows_count);
+ return Status::OK();
+ }
+ RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
+ }
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if constexpr (!right_const) {
+ xpath_str = xpath_col.get_data_at(i);
+ if (xpath_str.empty()) {
+ // should return null if xpath_str is empty
+ res_col.insert_default();
+ continue;
+ }
+ }
+ if constexpr (!left_const) {
+ auto xml_str = xml_col.get_data_at(i);
+ if (xml_str.empty()) {
+ // should return null if xml_str is empty
+ res_col.insert_default();
+ continue;
+ }
+ RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
+ }
+ // NOTE!!!: don't use to_string_view(), because xpath_str maybe
not null-terminated
+ pugi::xpath_node node =
xml_doc.select_node(xpath_str.to_string().c_str());
+ if (!node) {
+ // should return empty string if not found
+ auto empty_str = std::string("");
+ res_col.insert_data(empty_str.data(), empty_str.size());
+ continue;
+ }
+ auto text = get_text(node.node());
+ res_col.insert_data(text.data(), text.size());
+ }
+ return Status::OK();
+ }
+};
+
} // namespace doris::vectorized
diff --git a/be/test/vec/function/function_string_test.cpp
b/be/test/vec/function/function_string_test.cpp
index 45a29fbfc05..28d83a4c1b1 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -3374,4 +3374,55 @@ TEST(function_string_test, function_rpad_test) {
check_function_all_arg_comb<DataTypeString, true>(func_name, input_types,
data_set);
}
+TEST(function_string_test, function_xpath_string_test) {
+ std::string func_name = "xpath_string";
+ BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+ DataSet data_set = {
+ {{std::string("<a>123</a>"), std::string("/a")},
std::string("123")},
+ {{std::string("<a><b>123</b></a>"), std::string("/a/b")},
std::string("123")},
+ {{std::string("<a><b>123</b><c>456</c></a>"),
std::string("/a/c")}, std::string("456")},
+ {{std::string("<a><b>123</b><c>456</c></a>"),
std::string("/a/d")}, std::string("")},
+ {{std::string("<a><b>123</b><b>456</b></a>"),
std::string("/a/b[1]")},
+ std::string("123")},
+ {{std::string("<a><b>123</b><b>456</b></a>"),
std::string("/a/b[2]")},
+ std::string("456")},
+ {{std::string("<a><b>123</b><b>456</b></a>"),
std::string("/a/b[3]")}, std::string("")},
+ {{std::string("<a><b attr='val'>123</b></a>"),
std::string("/a/b[@attr]")},
+ std::string("123")},
+ {{std::string("<a><b attr='val'>123</b></a>"),
std::string("/a/b[@attr='val']")},
+ std::string("123")},
+ {{std::string("<a><b attr='val'>123</b></a>"),
std::string("/a/b[@attr='wrong']")},
+ std::string("")},
+ {{std::string("<a><!-- comment -->123</a>"), std::string("/a")},
std::string("123")},
+ {{std::string("<a><![CDATA[123]]></a>"), std::string("/a")},
std::string("123")},
+ {{std::string("<a>123<b>456</b>789</a>"), std::string("/a")},
std::string("123456789")},
+ {{std::string("<a> 123 </a>"), std::string("/a")}, std::string("
123 ")},
+ {{std::string("<a></a>"), std::string("/a")}, std::string("")},
+ {{std::string("<a/>"), std::string("/a")}, std::string("")},
+ {{std::string("<a>123</a>"), std::string("")}, Null()},
+ {{std::string(""), std::string("/a")}, Null()},
+ {{Null(), std::string("/a")}, Null()},
+ {{std::string("<a>123</a>"), Null()}, Null()},
+ {{std::string("<book><title>Intro to Hive</title><author>John "
+ "Doe</author><publisher>Tech
Press</publisher></book>"),
+ std::string("//title/text()")},
+ std::string("Intro to Hive")},
+ {{std::string("<book><title>Intro to Hive</title><author>John "
+ "Doe</author><publisher>Tech
Press</publisher></book>"),
+ std::string("//author/text()")},
+ std::string("John Doe")},
+ {{std::string("<book><title>Intro to Hive</title><author>John "
+ "Doe</author><publisher>Tech
Press</publisher></book>"),
+ std::string("//publisher/text()")},
+ std::string("Tech Press")},
+ {{std::string("<book><title>Intro to Hive</title><author>John "
+ "Doe</author><publisher>Tech
Press</publisher></book>"),
+ std::string("/book")},
+ std::string("Intro to HiveJohn DoeTech Press")},
+ {{Null(), Null()}, Null()}};
+
+ check_function_all_arg_comb<DataTypeString, true>(func_name, input_types,
data_set);
+}
+
} // namespace doris::vectorized
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index c7e39967c0b..4248e5ef907 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -476,6 +476,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub;
import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Xor;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.XpathString;
import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32;
import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
@@ -976,6 +977,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(XxHash32.class, "xxhash_32"),
scalar(XxHash64.class, "xxhash_64"),
scalar(Xor.class, "xor"),
+ scalar(XpathString.class, "xpath_string"),
scalar(Year.class, "year"),
scalar(YearCeil.class, "year_ceil"),
scalar(YearFloor.class, "year_floor"),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XpathString.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XpathString.java
new file mode 100644
index 00000000000..734af357d71
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XpathString.java
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'xpath_string'.
+ */
+public class XpathString extends ScalarFunction
+ implements BinaryExpression, ExplicitlyCastableSignature,
AlwaysNullable {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+ FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
+ .args(VarcharType.SYSTEM_DEFAULT,
VarcharType.SYSTEM_DEFAULT),
+ FunctionSignature.ret(StringType.INSTANCE)
+ .args(StringType.INSTANCE, StringType.INSTANCE)
+ );
+
+ /**
+ * constructor with 2 arguments.
+ */
+ public XpathString(Expression arg0, Expression arg1) {
+ super("xpath_string", arg0, arg1);
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public XpathString withChildren(List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 2);
+ return new XpathString(children.get(0), children.get(1));
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitXpathString(this, context);
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 5cfd3b62503..25e9036eddf 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -473,6 +473,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff;
import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub;
import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Xor;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.XpathString;
import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32;
import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
@@ -2334,6 +2335,10 @@ public interface ScalarFunctionVisitor<R, C> {
return visitScalarFunction(xor, context);
}
+ default R visitXpathString(XpathString xpathString, C context) {
+ return visitScalarFunction(xpathString, context);
+ }
+
// struct function
default R visitCreateStruct(CreateStruct createStruct, C context) {
diff --git
a/regression-test/data/query_p0/sql_functions/string_functions/test_xpath_string.out
b/regression-test/data/query_p0/sql_functions/string_functions/test_xpath_string.out
new file mode 100644
index 00000000000..3451f0737f4
Binary files /dev/null and
b/regression-test/data/query_p0/sql_functions/string_functions/test_xpath_string.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/string_functions/test_xpath_string.groovy
b/regression-test/suites/query_p0/sql_functions/string_functions/test_xpath_string.groovy
new file mode 100644
index 00000000000..c73f70b26f2
--- /dev/null
+++
b/regression-test/suites/query_p0/sql_functions/string_functions/test_xpath_string.groovy
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_xpath_string") {
+ sql "drop table if exists xpath_string_args;"
+ sql """
+ create table xpath_string_args (
+ k0 int,
+ xml_not_null string not null,
+ xml_null string null,
+ xpath_not_null string not null,
+ xpath_null string null
+ )
+ DISTRIBUTED BY HASH(k0)
+ PROPERTIES
+ (
+ "replication_num" = "1"
+ );
+ """
+
+ order_qt_empty_nullable "select xpath_string(xml_null, xpath_null) from
xpath_string_args"
+ order_qt_empty_not_nullable "select xpath_string(xml_not_null,
xpath_not_null) from xpath_string_args"
+ order_qt_empty_partial_nullable "select xpath_string(xml_null,
xpath_not_null), xpath_string(xml_not_null, xpath_null) from xpath_string_args"
+
+ sql "insert into xpath_string_args values (1, '<a><b>123</b></a>', null,
'/a/b', null)"
+
+ order_qt_all_null "select xpath_string(xml_null, xpath_null) from
xpath_string_args"
+ order_qt_all_not_null "select xpath_string(xml_not_null, xpath_not_null)
from xpath_string_args"
+ order_qt_partial_nullable "select xpath_string(xml_null, xpath_not_null),
xpath_string(xml_not_null, xpath_null) from xpath_string_args"
+ order_qt_nullable_no_null "select xpath_string(xml_null,
nullable(xpath_not_null)), xpath_string(nullable(xml_not_null), xpath_null)
from xpath_string_args"
+
+ sql "truncate table xpath_string_args"
+
+ sql """
+ insert into xpath_string_args values
+ (2, '<a>123</a>', '<a>456</a>', '/a', '/a'),
+ (3, '<a><b>123</b><c>456</c></a>', null, '/a/c', '/a/b'),
+ (4, '<a><b>123</b><c>456</c></a>', '<a><d>789</d></a>', '/a/d', null),
+ (5, '<a><b>123</b><b>456</b></a>', '<a><b>789</b></a>', '/a/b[1]',
'/a/b'),
+ (6, '<a><b>123</b><b>456</b></a>', null, '/a/b[2]', '/a/b[1]'),
+ (7, '<a><b attr="val">123</b></a>', '<a><b attr="other">456</b></a>',
'/a/b[@attr]', '/a/b[@attr="val"]'),
+ (8, '<a><!-- comment -->123</a>', '<a>456</a>', '/a', null),
+ (9, '<a><![CDATA[123]]></a>', null, '/a', '/a'),
+ (10, '<a>123<b>456</b>789</a>', '<a><b>test</b></a>', '/a', '/a/b'),
+ (11, '<a> 123 </a>', '<a>456</a>', '/a', null),
+ (12, '<book><title>Intro to Hive</title><author>John
Doe</author></book>',
+ '<book><title>SQL Guide</title></book>',
+ '//title/text()',
+ '//author/text()'),
+ (13, '<root><user
id="1"><name>Alice</name><age>25</age></user></root>',
+ '<root><user id="2"><name>Bob</name></user></root>',
+ '/root/user[@id="1"]/name',
+ '/root/user/age'),
+ (14, '<products><item price="10.99">Book</item><item
price="20.99">Pen</item></products>',
+ null,
+ '/products/item[@price="20.99"]',
+ '/products/item[1]'),
+ (15, '<data><![CDATA[<nested>value</nested>]]></data>',
+ '<data><plain>text</plain></data>',
+ '/data',
+ '//plain/text()'),
+ (16,
'<menu><item>Coffee<price>3.99</price></item><item>Tea<price>2.99</price></item></menu>',
+ '<menu><item><price>5.99</price></item></menu>',
+ '//item[price="2.99"]',
+ '/menu/item[1]/price'),
+ (17, '<doc><section id="1">First</section><section
id="2">Second</section></doc>',
+ null,
+ '/doc/section[@id="2"]',
+ '/doc/section[1]'),
+ (18, '<list><elem pos="1">A</elem><elem pos="2">B</elem><elem
pos="3">C</elem></list>',
+ '<list><elem>X</elem></list>',
+ '/list/elem[@pos="2"]',
+ '/list/elem[last()]'),
+ (19, '<nested><a><b><c>Deep</c></b></a></nested>',
+ '<nested><x><y>Shallow</y></x></nested>',
+ '//c',
+ '/nested/x/y'),
+ (20, '<mixed>Text<b>Bold</b>Normal<i>Italic</i>End</mixed>',
+ '<mixed><b>Only Bold</b></mixed>',
+ '/mixed',
+ '//b/text()'),
+ (21, '<empty></empty>',
+ '<empty/>',
+ '/empty',
+ '/empty/text()')
+ """
+
+ order_qt_all_null "select xpath_string(xml_null, xpath_null) from
xpath_string_args"
+ order_qt_all_not_null "select xpath_string(xml_not_null, xpath_not_null)
from xpath_string_args"
+ order_qt_partial_nullable "select xpath_string(xml_null, xpath_not_null),
xpath_string(xml_not_null, xpath_null) from xpath_string_args"
+ order_qt_nullable_no_null "select xpath_string(xml_null,
nullable(xpath_not_null)), xpath_string(nullable(xml_not_null), xpath_null)
from xpath_string_args"
+
+ /// consts. most by BE-UT
+ order_qt_const_nullable "select xpath_string(xml_null, NULL),
xpath_string(NULL, xpath_null) from xpath_string_args"
+ order_qt_const_not_nullable "select xpath_string(xml_not_null, '/a/b'),
xpath_string('<a><b>123</b></a>', xpath_not_null) from xpath_string_args"
+ order_qt_const_partial_nullable "select xpath_string(xml_null,
nullable('/a/b')), xpath_string(xml_not_null, nullable(xpath_null)) from
xpath_string_args"
+ order_qt_const_nullable_no_null "select
xpath_string(nullable(xml_not_null), nullable('/a/b')),
xpath_string(nullable('<a><b>123</b></a>'), nullable(xpath_not_null)) from
xpath_string_args"
+
+ order_qt_1 "select xpath_string('', '')"
+ order_qt_2 "select xpath_string(NULL, NULL)"
+ order_qt_3 "select xpath_string(NULL, '/a/b')"
+ order_qt_4 "select xpath_string('<a><b>123</b></a>', NULL)"
+ order_qt_5 "select xpath_string('<a><b>123</b></a>', '/a/b')"
+ order_qt_6 "select xpath_string('<a>123</a>', '/a')"
+ order_qt_7 "select xpath_string('<a><b>123</b><c>456</c></a>', '/a/c')"
+ order_qt_8 "select xpath_string('<a><b>123</b><b>456</b></a>', '/a/b[1]')"
+ order_qt_9 "select xpath_string('<a><b attr=\"val\">123</b></a>',
'/a/b[@attr]')"
+ order_qt_10 "select xpath_string('<a><!-- comment -->123</a>', '/a')"
+ order_qt_11 "select xpath_string('<a><![CDATA[123]]></a>', '/a')"
+ order_qt_12 "select xpath_string('<book><title>Intro to
Hive</title></book>', '//title/text()')"
+ order_qt_13 "select xpath_string(nullable('<a><b>123</b></a>'),
nullable('/a/b'))"
+ order_qt_14 "select xpath_string('<a><b>123</b></a>', nullable('/a/b'))"
+ order_qt_15 "select xpath_string(nullable('<a><b>123</b></a>'), '/a/b')"
+ order_qt_16 "select xpath_string('<root><user
id=\"1\"><name>Alice</name></user></root>', '/root/user[@id=\"1\"]/name')"
+ order_qt_17 "select xpath_string('<products><item
price=\"10.99\">Book</item></products>', '/products/item[@price=\"10.99\"]')"
+ order_qt_18 "select
xpath_string('<menu><item><price>3.99</price></item></menu>',
'//item/price/text()')"
+ order_qt_19 "select xpath_string('<data><a>1</a><a>2</a><a>3</a></data>',
'/data/a[last()]')"
+ order_qt_20 "select
xpath_string('<nested><a><b><c>Deep</c></b></a></nested>', '//c/text()')"
+ order_qt_21 "select xpath_string('<mixed>Text<b>Bold</b>Normal</mixed>',
'/mixed/text()')"
+ order_qt_22 "select xpath_string('<doc><item
pos=\"1\">First</item></doc>', '/doc/item[@pos=\"1\"]/text()')"
+ order_qt_23 "select xpath_string('<test><a>x</a><b>y</b><c>z</c></test>',
'/test/*[2]')"
+ order_qt_24 "select
xpath_string('<data><![CDATA[<nested>value</nested>]]></data>', '/data')"
+ order_qt_25 "select xpath_string('<root><elem><!-- comment
-->value</elem></root>', '/root/elem')"
+ order_qt_26 "select
xpath_string('<doc><section><title>Test</title><para>Text</para></section></doc>',
'/doc/section[title=\"Test\"]/para')"
+ order_qt_27 "select xpath_string('<list><item val=\"1\"/><item
val=\"2\"/></list>', '/list/item[@val=\"2\"]')"
+ order_qt_28 "select
xpath_string('<data><group><name>A</name><value>1</value></group></data>',
'/data/group[name=\"A\"]/value')"
+ order_qt_29 "select
xpath_string('<root><a><b>1</b></a><a><b>2</b></a></root>', '//a[b=\"2\"]/b')"
+ order_qt_30 "select xpath_string('<doc><p
class=\"main\">Content</p></doc>', '//p[@class=\"main\"]/text()')"
+
+ /// error cases:
+ test {
+ sql """ select xpath_string('wrong xml', '//a/c') """
+ exception "Function xpath_string failed to parse XML string: No
document element found"
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]