This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-c108335-hive-sql
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-c108335-hive-sql by
this push:
new d73a9b46ea6 [fix](function) fix json_extra fucntion and support
unhex_null function (#49393)
d73a9b46ea6 is described below
commit d73a9b46ea6c267605b01d7c14814fc947b54438
Author: Tiewei Fang <[email protected]>
AuthorDate: Tue Mar 25 17:00:25 2025 +0800
[fix](function) fix json_extra fucntion and support unhex_null function
(#49393)
picked from #49392
But this PR change the origin behavior
---
be/src/vec/functions/function_json.cpp | 18 ++++--
be/src/vec/functions/function_string.cpp | 40 ++++++++++++
be/src/vec/functions/function_totype.h | 38 ++++++++++-
.../doris/catalog/BuiltinScalarFunctions.java | 2 +
.../expressions/functions/scalar/UnhexNull.java | 71 +++++++++++++++++++++
.../expressions/visitor/ScalarFunctionVisitor.java | 5 ++
gensrc/script/doris_builtins_functions.py | 7 +-
.../string_functions/test_string_function.out | Bin 4892 -> 5056 bytes
.../string_functions/test_string_function.groovy | 8 +++
9 files changed, 182 insertions(+), 7 deletions(-)
diff --git a/be/src/vec/functions/function_json.cpp
b/be/src/vec/functions/function_json.cpp
index a299f547660..91bf15c3d41 100644
--- a/be/src/vec/functions/function_json.cpp
+++ b/be/src/vec/functions/function_json.cpp
@@ -909,11 +909,19 @@ struct FunctionJsonExtractImpl {
null_map[row] = 1;
result_column.insert_default();
} else {
- // write value as string
- buf.Clear();
- writer.Reset(buf);
- value.Accept(writer);
- result_column.insert_data(buf.GetString(), buf.GetSize());
+ // Check if the value is a string
+ if (value.IsString()) {
+ // Get the string value without quotes
+ const char* str_ptr = value.GetString();
+ size_t len = value.GetStringLength();
+ result_column.insert_data(str_ptr, len); // Insert without
quotes
+ } else {
+ // Write value as string for other types
+ buf.Clear();
+ writer.Reset(buf);
+ value.Accept(writer);
+ result_column.insert_data(buf.GetString(), buf.GetSize());
+ }
}
};
if (data_columns.size() == 2) {
diff --git a/be/src/vec/functions/function_string.cpp
b/be/src/vec/functions/function_string.cpp
index 1c328d5d145..9b829a8c58a 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -886,6 +886,44 @@ struct UnHexImpl {
return Status::OK();
}
+
+ static Status vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
+ ColumnString::Chars& dst_data, ColumnString::Offsets&
dst_offsets,
+ ColumnUInt8::Container& null_map_data) {
+ auto rows_count = offsets.size();
+ dst_offsets.resize(rows_count);
+
+ for (int i = 0; i < rows_count; ++i) {
+ const auto* source = reinterpret_cast<const char*>(&data[offsets[i
- 1]]);
+ ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
+
+ if (srclen == 0) {
+ StringOP::push_null_string(i, dst_data, dst_offsets,
null_map_data);
+ continue;
+ }
+
+ char dst_array[MAX_STACK_CIPHER_LEN];
+ char* dst = dst_array;
+
+ int cipher_len = srclen / 2;
+ std::unique_ptr<char[]> dst_uptr;
+ if (cipher_len > MAX_STACK_CIPHER_LEN) {
+ dst_uptr.reset(new char[cipher_len]);
+ dst = dst_uptr.get();
+ }
+
+ int outlen = hex_decode(source, srclen, dst);
+ if (outlen == 0) {
+ LOG(INFO) << "--ftw: outlen == 0 ";
+ StringOP::push_null_string(i, dst_data, dst_offsets,
null_map_data);
+ continue;
+ }
+
+ StringOP::push_value_string(std::string_view(dst, outlen), i,
dst_data, dst_offsets);
+ }
+
+ return Status::OK();
+ }
};
struct NameStringSpace {
@@ -1154,6 +1192,7 @@ using FunctionToUpper =
FunctionStringToString<TransferImpl<NameToUpper>, NameTo
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
using FunctionUnHex = FunctionStringEncode<UnHexImpl>;
+using FunctionUnHexNullable = FunctionStringEncodeNullable<UnHexImpl>;
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl>;
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
@@ -1179,6 +1218,7 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionAutoPartitionName>();
factory.register_function<FunctionReverseCommon>();
factory.register_function<FunctionUnHex>();
+ factory.register_function<FunctionUnHexNullable>();
factory.register_function<FunctionToLower>();
factory.register_function<FunctionToUpper>();
factory.register_function<FunctionToInitcap>();
diff --git a/be/src/vec/functions/function_totype.h
b/be/src/vec/functions/function_totype.h
index 4e3e6dec8a4..953cf721eb9 100644
--- a/be/src/vec/functions/function_totype.h
+++ b/be/src/vec/functions/function_totype.h
@@ -504,7 +504,6 @@ public:
uint32_t result, size_t input_rows_count) const
override {
auto& col_ptr = block.get_by_position(arguments[0]).column;
- auto res = Impl::ColumnType::create();
if (const auto* col =
check_and_get_column<ColumnString>(col_ptr.get())) {
auto col_res = Impl::ColumnType::create();
static_cast<void>(Impl::vector(col->get_chars(),
col->get_offsets(),
@@ -519,4 +518,41 @@ public:
}
};
+template <typename Impl>
+class FunctionStringEncodeNullable : public IFunction {
+public:
+ static constexpr auto name = "unhex_null";
+
+ static FunctionPtr create() { return
std::make_shared<FunctionStringEncodeNullable>(); }
+
+ String get_name() const override { return name; }
+
+ size_t get_number_of_arguments() const override { return 1; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<typename Impl::ReturnType>());
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ uint32_t result, size_t input_rows_count) const
override {
+ auto& col_ptr = block.get_by_position(arguments[0]).column;
+ auto null_map = ColumnUInt8::create(input_rows_count, 0);
+ // auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
+ auto& null_map_data = null_map->get_data();
+ if (const auto* col =
check_and_get_column<ColumnString>(col_ptr.get())) {
+ auto col_res = Impl::ColumnType::create();
+ static_cast<void>(Impl::vector(col->get_chars(),
col->get_offsets(),
+ col_res->get_chars(),
col_res->get_offsets(),
+ null_map_data));
+ // block.replace_by_position(result, std::move(col_res));
+ block.get_by_position(result).column =
+ ColumnNullable::create(std::move(col_res),
std::move(null_map));
+ } else {
+ return Status::RuntimeError("Illegal column {} of argument of
function {}",
+
block.get_by_position(arguments[0]).column->get_name(),
+ get_name());
+ }
+ return Status::OK();
+ }
+};
} // namespace doris::vectorized
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index 2cf23620b3e..2787cb0d9ca 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -460,6 +460,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.TrimIn;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Truncate;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Uncompress;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Unhex;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.UnhexNull;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.UnixTimestamp;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Upper;
import org.apache.doris.nereids.trees.expressions.functions.scalar.UrlDecode;
@@ -962,6 +963,7 @@ public class BuiltinScalarFunctions implements
FunctionHelper {
scalar(TrimIn.class, "trim_in"),
scalar(Truncate.class, "truncate"),
scalar(Unhex.class, "unhex"),
+ scalar(UnhexNull.class, "unhex_null"),
scalar(UnixTimestamp.class, "unix_timestamp"),
scalar(Upper.class, "ucase", "upper"),
scalar(Quote.class, "quote"),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnhexNull.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnhexNull.java
new file mode 100644
index 00000000000..ba2ae5b59f1
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/UnhexNull.java
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import
org.apache.doris.nereids.trees.expressions.functions.PropagateNullLiteral;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'unhex_null'
+ */
+public class UnhexNull extends ScalarFunction
+ implements UnaryExpression, ExplicitlyCastableSignature,
AlwaysNullable, PropagateNullLiteral {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT),
+
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE)
+ );
+
+ /**
+ * constructor with 1 argument.
+ */
+ public UnhexNull(Expression arg) {
+ super("unhex_null", arg);
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public UnhexNull withChildren(List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 1);
+ return new UnhexNull(children.get(0));
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitUnhexNull(this, context);
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 6b0450c1d8b..01c51c20c43 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -457,6 +457,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.TrimIn;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Truncate;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Uncompress;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Unhex;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.UnhexNull;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.UnixTimestamp;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Upper;
import org.apache.doris.nereids.trees.expressions.functions.scalar.UrlDecode;
@@ -2204,6 +2205,10 @@ public interface ScalarFunctionVisitor<R, C> {
return visitScalarFunction(unhex, context);
}
+ default R visitUnhexNull(UnhexNull unhexNull, C context) {
+ return visitScalarFunction(unhexNull, context);
+ }
+
default R visitUnixTimestamp(UnixTimestamp unixTimestamp, C context) {
return visitScalarFunction(unixTimestamp, context);
}
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index a14251a6bf5..4dd2e8a93e2 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1442,6 +1442,9 @@ visible_functions = {
[['round_bankers', 'round_bankers'], 'DOUBLE', ['DOUBLE', 'INT'], ''],
[['sign'], 'TINYINT', ['DOUBLE'], ''],
+ [['sign_float'], 'DECIMAL32', ['DECIMAL32'], ''],
+ [['sign_float'], 'DECIMAL32', ['DECIMAL64'], ''],
+ [['sign_float'], 'DECIMAL32', ['DECIMAL128'], ''],
[['sin'], 'DOUBLE', ['DOUBLE'], ''],
[['sqrt', 'dsqrt'], 'DOUBLE', ['DOUBLE'], ''],
@@ -1454,7 +1457,9 @@ visible_functions = {
[['truncate'], 'DECIMAL128', ['DECIMAL128', 'INT'], ''],
[['unhex'], 'VARCHAR', ['VARCHAR'], 'DEPEND_ON_ARGUMENT'],
- [['unhex'], 'STRING', ['STRING'], 'DEPEND_ON_ARGUMENT']
+ [['unhex'], 'STRING', ['STRING'], 'DEPEND_ON_ARGUMENT'],
+ [['unhex_null'], 'VARCHAR', ['VARCHAR'], 'DEPEND_ON_ARGUMENT'],
+ [['unhex_null'], 'STRING', ['STRING'], 'DEPEND_ON_ARGUMENT']
],
# Conditional Functions
diff --git
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
index 4af2997eda2..433447d8fff 100644
Binary files
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
and
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
index 9a44bf6d756..2aae6e1deac 100644
---
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
+++
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy
@@ -71,6 +71,14 @@ suite("test_string_function", "arrow_flight_sql") {
qt_sql "select unhex('');"
qt_sql "select unhex(NULL);"
+
+ qt_sql_unhex_null "select unhex_null('@');"
+ qt_sql_unhex_null "select unhex_null('68656C6C6F2C646F726973');"
+ qt_sql_unhex_null "select unhex_null('41');"
+ qt_sql_unhex_null "select unhex_null('4142');"
+ qt_sql_unhex_null "select unhex_null('');"
+ qt_sql_unhex_null "select unhex_null(NULL);"
+
qt_sql_instr "select instr(\"abc\", \"b\");"
qt_sql_instr "select instr(\"abc\", \"d\");"
qt_sql_instr "select instr(\"abc\", null);"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]