This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 454b45b [feature](vectorize)(function) support regexp&&sm4&&aes
functions (#8307)
454b45b is described below
commit 454b45bea3cb5d0c6eb0c2391465a07a19aceed4
Author: zhangstar333 <[email protected]>
AuthorDate: Tue Mar 8 13:14:02 2022 +0800
[feature](vectorize)(function) support regexp&&sm4&&aes functions (#8307)
---
be/src/exprs/encryption_functions.cpp | 21 --
be/src/exprs/encryption_functions.h | 21 +-
be/src/exprs/string_functions.cpp | 4 +-
be/src/exprs/string_functions.h | 3 +
be/src/vec/CMakeLists.txt | 3 +
.../aggregate_function_percentile_approx.cpp | 1 -
be/src/vec/functions/function_encryption.cpp | 245 +++++++++++++++++++++
be/src/vec/functions/function_hex.cpp | 183 +++++++++++++++
be/src/vec/functions/function_regexp.cpp | 216 ++++++++++++++++++
be/src/vec/functions/function_string.cpp | 132 +----------
be/src/vec/functions/function_string.h | 21 +-
be/src/vec/functions/math.cpp | 51 -----
be/src/vec/functions/simple_function_factory.h | 6 +
be/test/vec/function/function_like_test.cpp | 59 +++++
be/test/vec/function/function_string_test.cpp | 241 +++++++++++++++++++-
gensrc/script/doris_builtins_functions.py | 74 +++----
16 files changed, 1034 insertions(+), 247 deletions(-)
diff --git a/be/src/exprs/encryption_functions.cpp
b/be/src/exprs/encryption_functions.cpp
index 19ec1a7..a4d93b7 100644
--- a/be/src/exprs/encryption_functions.cpp
+++ b/be/src/exprs/encryption_functions.cpp
@@ -22,34 +22,13 @@
#include "runtime/string_value.h"
#include "runtime/tuple_row.h"
#include "util/debug_util.h"
-#include "util/encryption_util.h"
#include "util/md5.h"
#include "util/sm3.h"
-#include "util/string_util.h"
#include "util/url_coding.h"
namespace doris {
void EncryptionFunctions::init() {}
-StringCaseUnorderedMap<EncryptionMode> aes_mode_map {
- {"AES_128_ECB", AES_128_ECB}, {"AES_192_ECB", AES_192_ECB},
- {"AES_256_ECB", AES_256_ECB}, {"AES_128_CBC", AES_128_CBC},
- {"AES_192_CBC", AES_192_CBC}, {"AES_256_CBC", AES_256_CBC},
- {"AES_128_CFB", AES_128_CFB}, {"AES_192_CFB", AES_192_CFB},
- {"AES_256_CFB", AES_256_CFB}, {"AES_128_CFB1", AES_128_CFB1},
- {"AES_192_CFB1", AES_192_CFB1}, {"AES_256_CFB1", AES_256_CFB1},
- {"AES_128_CFB8", AES_128_CFB8}, {"AES_192_CFB8", AES_192_CFB8},
- {"AES_256_CFB8", AES_256_CFB8}, {"AES_128_CFB128", AES_128_CFB128},
- {"AES_192_CFB128", AES_192_CFB128}, {"AES_256_CFB128", AES_256_CFB128},
- {"AES_128_CTR", AES_128_CTR}, {"AES_192_CTR", AES_192_CTR},
- {"AES_256_CTR", AES_256_CTR}, {"AES_128_OFB", AES_128_OFB},
- {"AES_192_OFB", AES_192_OFB}, {"AES_256_OFB", AES_256_OFB}};
-StringCaseUnorderedMap<EncryptionMode> sm4_mode_map {{"SM4_128_ECB",
SM4_128_ECB},
- {"SM4_128_CBC",
SM4_128_CBC},
- {"SM4_128_CFB128",
SM4_128_CFB128},
- {"SM4_128_OFB",
SM4_128_OFB},
- {"SM4_128_CTR",
SM4_128_CTR}};
-
StringVal encrypt(FunctionContext* ctx, const StringVal& src, const StringVal&
key,
const StringVal& iv, EncryptionMode mode) {
if (src.len == 0 || src.is_null) {
diff --git a/be/src/exprs/encryption_functions.h
b/be/src/exprs/encryption_functions.h
index 8a7e45d..0046da8 100644
--- a/be/src/exprs/encryption_functions.h
+++ b/be/src/exprs/encryption_functions.h
@@ -22,13 +22,32 @@
#include "udf/udf.h"
#include "udf/udf_internal.h"
+#include "util/encryption_util.h"
+#include "util/string_util.h"
namespace doris {
class Expr;
struct ExprValue;
class TupleRow;
-
+static StringCaseUnorderedMap<EncryptionMode> aes_mode_map {
+ {"AES_128_ECB", AES_128_ECB}, {"AES_192_ECB", AES_192_ECB},
+ {"AES_256_ECB", AES_256_ECB}, {"AES_128_CBC", AES_128_CBC},
+ {"AES_192_CBC", AES_192_CBC}, {"AES_256_CBC", AES_256_CBC},
+ {"AES_128_CFB", AES_128_CFB}, {"AES_192_CFB", AES_192_CFB},
+ {"AES_256_CFB", AES_256_CFB}, {"AES_128_CFB1", AES_128_CFB1},
+ {"AES_192_CFB1", AES_192_CFB1}, {"AES_256_CFB1", AES_256_CFB1},
+ {"AES_128_CFB8", AES_128_CFB8}, {"AES_192_CFB8", AES_192_CFB8},
+ {"AES_256_CFB8", AES_256_CFB8}, {"AES_128_CFB128", AES_128_CFB128},
+ {"AES_192_CFB128", AES_192_CFB128}, {"AES_256_CFB128", AES_256_CFB128},
+ {"AES_128_CTR", AES_128_CTR}, {"AES_192_CTR", AES_192_CTR},
+ {"AES_256_CTR", AES_256_CTR}, {"AES_128_OFB", AES_128_OFB},
+ {"AES_192_OFB", AES_192_OFB}, {"AES_256_OFB", AES_256_OFB}};
+static StringCaseUnorderedMap<EncryptionMode> sm4_mode_map {{"SM4_128_ECB",
SM4_128_ECB},
+ {"SM4_128_CBC",
SM4_128_CBC},
+ {"SM4_128_CFB128",
SM4_128_CFB128},
+ {"SM4_128_OFB",
SM4_128_OFB},
+ {"SM4_128_CTR",
SM4_128_CTR}};
class EncryptionFunctions {
public:
static void init();
diff --git a/be/src/exprs/string_functions.cpp
b/be/src/exprs/string_functions.cpp
index 00f2643..825762c 100644
--- a/be/src/exprs/string_functions.cpp
+++ b/be/src/exprs/string_functions.cpp
@@ -488,7 +488,7 @@ bool StringFunctions::set_re2_options(const StringVal&
match_parameter, std::str
}
// The caller owns the returned regex. Returns nullptr if the pattern could
not be compiled.
-static re2::RE2* compile_regex(const StringVal& pattern, std::string*
error_str,
+re2::RE2* StringFunctions::compile_regex(const StringVal& pattern,
std::string* error_str,
const StringVal& match_parameter) {
re2::StringPiece pattern_sp(reinterpret_cast<char*>(pattern.ptr),
pattern.len);
re2::RE2::Options options;
@@ -512,7 +512,7 @@ static re2::RE2* compile_regex(const StringVal& pattern,
std::string* error_str,
return nullptr;
}
return re;
-}
+}
void StringFunctions::regexp_prepare(FunctionContext* context,
FunctionContext::FunctionStateScope
scope) {
diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h
index af13f4f..dc14906 100644
--- a/be/src/exprs/string_functions.h
+++ b/be/src/exprs/string_functions.h
@@ -184,6 +184,9 @@ public:
static doris_udf::IntVal bit_length(doris_udf::FunctionContext* context,
const doris_udf::StringVal& str);
+ // The caller owns the returned regex. Returns nullptr if the pattern
could not be compiled.
+ static re2::RE2* compile_regex(const StringVal& pattern, std::string*
error_str,
+ const StringVal& match_parameter);
};
} // namespace doris
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index 91f65f9..8365714 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -117,6 +117,9 @@ set(VEC_FILES
functions/function_case.cpp
functions/function_cast.cpp
functions/function_conv.cpp
+ functions/function_encryption.cpp
+ functions/function_regexp.cpp
+ functions/function_hex.cpp
functions/function_string.cpp
functions/function_timestamp.cpp
functions/function_utility.cpp
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.cpp
b/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.cpp
index 7179a65..976565f 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.cpp
@@ -28,7 +28,6 @@ AggregateFunctionPtr
create_aggregate_function_percentile_approx(const std::stri
const
DataTypes& argument_types,
const Array&
parameters,
const bool
result_is_nullable) {
- LOG(INFO) << "percentile_approx function size is: " <<
argument_types.size();
if (argument_types.size() == 1) {
return
std::make_shared<AggregateFunctionPercentileApproxMerge>(argument_types);
diff --git a/be/src/vec/functions/function_encryption.cpp
b/be/src/vec/functions/function_encryption.cpp
new file mode 100644
index 0000000..3c84d44
--- /dev/null
+++ b/be/src/vec/functions/function_encryption.cpp
@@ -0,0 +1,245 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/encryption_functions.h"
+#include "runtime/string_search.hpp"
+#include "util/encryption_util.h"
+#include "util/string_util.h"
+#include "vec/common/pod_array_fwd.h"
+#include "vec/functions/function_string.h"
+#include "vec/functions/function_string_to_string.h"
+#include "vec/functions/function_totype.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+template <typename Impl, typename FunctionName>
+class FunctionEncryptionAndDecrypt : public IFunction {
+public:
+ static constexpr auto name = FunctionName::name;
+
+ String get_name() const override { return name; }
+
+ static FunctionPtr create() { return
std::make_shared<FunctionEncryptionAndDecrypt>(); }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<DataTypeString>());
+ }
+
+ DataTypes get_variadic_argument_types_impl() const override {
+ return Impl::get_variadic_argument_types_impl();
+ }
+
+ size_t get_number_of_arguments() const override {
+ return get_variadic_argument_types_impl().size();
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
+ bool use_default_implementation_for_constants() const override { return
true; }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ size_t argument_size = arguments.size();
+ ColumnPtr argument_columns[argument_size];
+ std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
+ std::vector<const ColumnString::Chars*> chars_list(argument_size);
+
+ auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
+ auto result_data_column = ColumnString::create();
+
+ auto& result_data = result_data_column->get_chars();
+ auto& result_offset = result_data_column->get_offsets();
+ result_offset.resize(input_rows_count);
+
+ for (int i = 0; i < argument_size; ++i) {
+ argument_columns[i] =
+
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+ if (auto* nullable =
check_and_get_column<ColumnNullable>(*argument_columns[i])) {
+ VectorizedUtils::update_null_map(result_null_map->get_data(),
+
nullable->get_null_map_data());
+ argument_columns[i] = nullable->get_nested_column_ptr();
+ }
+ }
+
+ for (size_t i = 0; i < argument_size; ++i) {
+ auto col_str = assert_cast<const
ColumnString*>(argument_columns[i].get());
+ offsets_list[i] = &col_str->get_offsets();
+ chars_list[i] = &col_str->get_chars();
+ }
+
+ Impl::vector_vector(offsets_list, chars_list, input_rows_count,
result_data, result_offset,
+ result_null_map->get_data());
+ block.get_by_position(result).column =
+ ColumnNullable::create(std::move(result_data_column),
std::move(result_null_map));
+ return Status::OK();
+ }
+};
+
+template <typename Impl, bool is_encrypt>
+static void exectue_result(std::vector<const ColumnString::Offsets*>&
offsets_list,
+ std::vector<const ColumnString::Chars*>&
chars_list, size_t i,
+ EncryptionMode& encryption_mode, const char* iv_raw,
+ ColumnString::Chars& result_data,
ColumnString::Offsets& result_offset,
+ NullMap& null_map) {
+ int src_size = (*offsets_list[0])[i] - (*offsets_list[0])[i - 1] - 1;
+ const auto src_raw =
+ reinterpret_cast<const
char*>(&(*chars_list[0])[(*offsets_list[0])[i - 1]]);
+ int key_size = (*offsets_list[1])[i] - (*offsets_list[1])[i - 1] - 1;
+ const auto key_raw =
+ reinterpret_cast<const
char*>(&(*chars_list[1])[(*offsets_list[1])[i - 1]]);
+ if (*src_raw == '\0' || src_size == 0) {
+ StringOP::push_null_string(i, result_data, result_offset, null_map);
+ return;
+ }
+ int cipher_len = src_size;
+ if constexpr (is_encrypt) {
+ cipher_len += 16;
+ }
+ std::unique_ptr<char[]> p;
+ p.reset(new char[cipher_len]);
+ int ret_code = 0;
+
+ ret_code = Impl::exectue_impl(encryption_mode, (unsigned char*)src_raw,
src_size,
+ (unsigned char*)key_raw, key_size, iv_raw,
true,
+ (unsigned char*)p.get());
+
+ if (ret_code < 0) {
+ StringOP::push_null_string(i, result_data, result_offset, null_map);
+ } else {
+ StringOP::push_value_string(std::string_view(p.get(), ret_code), i,
result_data,
+ result_offset);
+ }
+}
+
+template <typename Impl, EncryptionMode mode, bool is_encrypt>
+struct EncryptionAndDecryptTwoImpl {
+ static DataTypes get_variadic_argument_types_impl() {
+ return {std::make_shared<DataTypeString>(),
std::make_shared<DataTypeString>()};
+ }
+
+ static Status vector_vector(std::vector<const ColumnString::Offsets*>&
offsets_list,
+ std::vector<const ColumnString::Chars*>&
chars_list,
+ size_t input_rows_count, ColumnString::Chars&
result_data,
+ ColumnString::Offsets& result_offset, NullMap&
null_map) {
+ for (int i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ StringOP::push_null_string(i, result_data, result_offset,
null_map);
+ continue;
+ }
+ EncryptionMode encryption_mode = mode;
+ exectue_result<Impl, is_encrypt>(offsets_list, chars_list, i,
encryption_mode, nullptr,
+ result_data, result_offset,
null_map);
+ }
+ return Status::OK();
+ }
+};
+
+template <typename Impl, EncryptionMode mode, bool is_encrypt, bool is_sm_mode>
+struct EncryptionAndDecryptFourImpl {
+ static DataTypes get_variadic_argument_types_impl() {
+ return {std::make_shared<DataTypeString>(),
std::make_shared<DataTypeString>(),
+ std::make_shared<DataTypeString>(),
std::make_shared<DataTypeString>()};
+ }
+
+ static Status vector_vector(std::vector<const ColumnString::Offsets*>&
offsets_list,
+ std::vector<const ColumnString::Chars*>&
chars_list,
+ size_t input_rows_count, ColumnString::Chars&
result_data,
+ ColumnString::Offsets& result_offset, NullMap&
null_map) {
+ for (int i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ StringOP::push_null_string(i, result_data, result_offset,
null_map);
+ continue;
+ }
+
+ EncryptionMode encryption_mode = mode;
+ int mode_size = (*offsets_list[3])[i] - (*offsets_list[3])[i - 1]
- 1;
+ const auto mode_raw =
+ reinterpret_cast<const
char*>(&(*chars_list[3])[(*offsets_list[3])[i - 1]]);
+ const auto iv_raw =
+ reinterpret_cast<const
char*>(&(*chars_list[2])[(*offsets_list[2])[i - 1]]);
+ if (*mode_raw != '\0' || mode_size != 0) {
+ std::string mode_str(mode_raw, mode_size);
+ if constexpr (is_sm_mode) {
+ if (sm4_mode_map.count(mode_str) == 0) {
+ StringOP::push_null_string(i, result_data,
result_offset, null_map);
+ continue;
+ }
+ encryption_mode = sm4_mode_map.at(mode_str);
+ } else {
+ if (aes_mode_map.count(mode_str) == 0) {
+ StringOP::push_null_string(i, result_data,
result_offset, null_map);
+ continue;
+ }
+ encryption_mode = aes_mode_map.at(mode_str);
+ }
+ }
+
+ exectue_result<Impl, is_encrypt>(offsets_list, chars_list, i,
encryption_mode, iv_raw,
+ result_data, result_offset,
null_map);
+ }
+ return Status::OK();
+ }
+};
+
+struct EncryptImpl {
+ static int exectue_impl(EncryptionMode mode, const unsigned char* source,
+ uint32_t source_length, const unsigned char* key,
uint32_t key_length,
+ const char* iv, bool padding, unsigned char*
encrypt) {
+ return EncryptionUtil::encrypt(mode, source, source_length, key,
key_length, iv, true,
+ encrypt);
+ }
+};
+
+struct DecryptImpl {
+ static int exectue_impl(EncryptionMode mode, const unsigned char* source,
+ uint32_t source_length, const unsigned char* key,
uint32_t key_length,
+ const char* iv, bool padding, unsigned char*
encrypt) {
+ return EncryptionUtil::decrypt(mode, source, source_length, key,
key_length, iv, true,
+ encrypt);
+ }
+};
+
+struct SM4EncryptName {
+ static constexpr auto name = "sm4_encrypt";
+};
+
+struct SM4DecryptName {
+ static constexpr auto name = "sm4_decrypt";
+};
+
+struct AESEncryptName {
+ static constexpr auto name = "aes_encrypt";
+};
+
+struct AESDecryptName {
+ static constexpr auto name = "aes_decrypt";
+};
+
+void register_function_encryption(SimpleFunctionFactory& factory) {
+
factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptTwoImpl<EncryptImpl,
SM4_128_ECB, true>, SM4EncryptName>>();
+
factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptTwoImpl<DecryptImpl,
SM4_128_ECB, false>, SM4DecryptName>>();
+
factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptTwoImpl<EncryptImpl,
AES_128_ECB, true>, AESEncryptName>>();
+
factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptTwoImpl<DecryptImpl,
AES_128_ECB, false>, AESDecryptName>>();
+
+
factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptFourImpl<EncryptImpl,
SM4_128_ECB, true, true>, SM4EncryptName>>();
+
factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptFourImpl<DecryptImpl,
SM4_128_ECB, false, true>, SM4DecryptName>>();
+
factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptFourImpl<EncryptImpl,
AES_128_ECB, true, false>, AESEncryptName>>();
+
factory.register_function<FunctionEncryptionAndDecrypt<EncryptionAndDecryptFourImpl<DecryptImpl,
AES_128_ECB, false, false>, AESDecryptName>>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_hex.cpp
b/be/src/vec/functions/function_hex.cpp
new file mode 100644
index 0000000..2e26104
--- /dev/null
+++ b/be/src/vec/functions/function_hex.cpp
@@ -0,0 +1,183 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+#include "vec/columns/column_complex.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_hll.h"
+#include "vec/functions/function_const.h"
+#include "vec/functions/function_string.h"
+#include "vec/functions/function_totype.h"
+#include "vec/functions/simple_function_factory.h"
+#include "util/simd/vstring_function.h" //place this header file at last to
compile
+
+namespace doris::vectorized {
+template <typename Impl>
+class FunctionHexVariadic : public IFunction {
+public:
+ static constexpr auto name = "hex";
+
+ static FunctionPtr create() { return
std::make_shared<FunctionHexVariadic>(); }
+
+ String get_name() const override { return name; }
+
+ size_t get_number_of_arguments() const override { return 1; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return std::make_shared<DataTypeString>();
+ }
+
+ DataTypes get_variadic_argument_types_impl() const override {
+ return Impl::get_variadic_argument_types();
+ }
+
+ bool use_default_implementation_for_constants() const override { return
true; }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ ColumnPtr argument_column =
+
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+
+ auto result_data_column = ColumnString::create();
+ auto& result_data = result_data_column->get_chars();
+ auto& result_offset = result_data_column->get_offsets();
+
+ Impl::vector(argument_column, input_rows_count, result_data,
result_offset);
+ block.replace_by_position(result, std::move(result_data_column));
+ return Status::OK();
+ }
+};
+
+static void hex_encode(const unsigned char* source, size_t srclen, unsigned
char*& dst_data_ptr,
+ size_t& offset) {
+ if (srclen == 0) {
+ DCHECK(*source == '\0');
+ *dst_data_ptr = '\0';
+ dst_data_ptr++;
+ offset++;
+ } else {
+ doris::simd::VStringFunctions::hex_encode(source, srclen,
+
reinterpret_cast<char*>(dst_data_ptr));
+ dst_data_ptr[srclen * 2] = '\0';
+ dst_data_ptr += (srclen * 2 + 1);
+ offset += (srclen * 2 + 1);
+ }
+}
+
+struct HexStringImpl {
+ static DataTypes get_variadic_argument_types() { return
{std::make_shared<DataTypeString>()}; }
+
+ static Status vector(ColumnPtr argument_column, size_t input_rows_count,
+ ColumnString::Chars& dst_data, ColumnString::Offsets&
dst_offsets) {
+ const auto* str_col =
check_and_get_column<ColumnString>(argument_column.get());
+ auto& data = str_col->get_chars();
+ auto& offsets = str_col->get_offsets();
+ dst_offsets.resize(input_rows_count);
+ dst_data.resize(data.size() * 2);
+
+ size_t offset = 0;
+ auto dst_data_ptr = dst_data.data();
+ for (int i = 0; i < input_rows_count; ++i) {
+ auto source = reinterpret_cast<const unsigned
char*>(&data[offsets[i - 1]]);
+ size_t srclen = offsets[i] - offsets[i - 1] - 1;
+ hex_encode(source, srclen, dst_data_ptr, offset);
+ dst_offsets[i] = offset;
+ }
+ return Status::OK();
+ }
+};
+
+struct HexIntImpl {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<vectorized::DataTypeInt64>()};
+ }
+
+ static std::string_view hex(uint64_t num, char* ans) {
+ static constexpr auto hex_table = "0123456789ABCDEF";
+ // uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F'
+ if (num == 0) {
+ return {hex_table, 1};
+ }
+
+ int i = 0;
+ while (num) {
+ ans[i++] = hex_table[num & 15];
+ num = num >> 4;
+ }
+ ans[i] = '\0';
+
+ // reverse
+ for (int k = 0, j = i - 1; k <= j && k <= 16; k++, j--) {
+ char tmp = ans[j];
+ ans[j] = ans[k];
+ ans[k] = tmp;
+ }
+
+ return {ans, static_cast<size_t>(i)};
+ }
+
+ static Status vector(ColumnPtr argument_column, size_t input_rows_count,
+ ColumnString::Chars& res_data, ColumnString::Offsets&
res_offsets) {
+ const auto* str_col =
check_and_get_column<ColumnVector<Int64>>(argument_column.get());
+ auto& data = str_col->get_data();
+
+ res_offsets.resize(input_rows_count);
+ char ans[17];
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ StringOP::push_value_string(hex(data[i], ans), i, res_data,
res_offsets);
+ }
+ return Status::OK();
+ }
+};
+
+struct HexHLLImpl {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<vectorized::DataTypeHLL>()};
+ }
+
+ static Status vector(ColumnPtr argument_column, size_t input_rows_count,
+ ColumnString::Chars& res_data, ColumnString::Offsets&
res_offsets) {
+ const auto* str_col =
check_and_get_column<ColumnHLL>(argument_column.get());
+ const auto& hll_data = str_col->get_data();
+ res_offsets.resize(input_rows_count);
+ size_t total_length = 0, offset = 0;
+ std::string hll_str;
+ unsigned char* dst_data_ptr = nullptr;
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ hll_str.resize(hll_data[i].max_serialized_size(), '0');
+ size_t actual_size =
hll_data[i].serialize((uint8_t*)hll_str.data());
+ hll_str.resize(actual_size);
+ total_length += actual_size;
+
+ res_data.resize(total_length * 2 + (i + 1));
+ dst_data_ptr = res_data.data() + offset;
+ hex_encode(reinterpret_cast<const unsigned char*>(hll_str.data()),
hll_str.length(),
+ dst_data_ptr, offset);
+ res_offsets[i] = offset;
+ hll_str.clear();
+ }
+ return Status::OK();
+ }
+};
+
+void register_function_hex_variadic(SimpleFunctionFactory& factory) {
+ factory.register_function<FunctionHexVariadic<HexStringImpl>>();
+ factory.register_function<FunctionHexVariadic<HexIntImpl>>();
+ factory.register_function<FunctionHexVariadic<HexHLLImpl>>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_regexp.cpp
b/be/src/vec/functions/function_regexp.cpp
new file mode 100644
index 0000000..4613fa9
--- /dev/null
+++ b/be/src/vec/functions/function_regexp.cpp
@@ -0,0 +1,216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <re2/re2.h>
+
+#include <random>
+
+#include "exprs/string_functions.h"
+#include "runtime/string_value.h"
+#include "udf/udf.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function_string.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/util.hpp"
+namespace doris::vectorized {
+
+template <typename Impl>
+class FunctionRegexp : public IFunction {
+public:
+ static constexpr auto name = Impl::name;
+
+ static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); }
+
+ String get_name() const override { return name; }
+
+ bool use_default_implementation_for_constants() const override { return
false; }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
+ size_t get_number_of_arguments() const override { return 3; }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<DataTypeString>());
+ }
+
+ Status prepare(FunctionContext* context,
FunctionContext::FunctionStateScope scope) override {
+ if (scope != FunctionContext::FRAGMENT_LOCAL) {
+ return Status::OK();
+ }
+
+ if (context->is_col_constant(1)) {
+ const auto pattern_col = context->get_constant_col(1)->column_ptr;
+ const auto& pattern = pattern_col->get_data_at(0).to_string_val();
+ if (pattern.is_null) {
+ return Status::OK();
+ }
+
+ std::string error_str;
+ re2::RE2* re = StringFunctions::compile_regex(pattern, &error_str,
StringVal::null());
+ if (re == nullptr) {
+ context->set_error(error_str.c_str());
+ return Status::InvalidArgument(error_str);
+ }
+ context->set_function_state(scope, re);
+ }
+ return Status::OK();
+ }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ size_t argument_size = arguments.size();
+ ColumnPtr argument_columns[argument_size];
+ auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
+ auto result_data_column = ColumnString::create();
+
+ auto& result_data = result_data_column->get_chars();
+ auto& result_offset = result_data_column->get_offsets();
+ result_offset.resize(input_rows_count);
+
+ for (int i = 0; i < argument_size; ++i) {
+ argument_columns[i] =
+
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+ if (auto* nullable =
check_and_get_column<ColumnNullable>(*argument_columns[i])) {
+ VectorizedUtils::update_null_map(result_null_map->get_data(),
+
nullable->get_null_map_data());
+ argument_columns[i] = nullable->get_nested_column_ptr();
+ }
+ }
+
+ Impl::execute_impl(context, argument_columns, input_rows_count,
result_data, result_offset,
+ result_null_map->get_data());
+
+ block.get_by_position(result).column =
+ ColumnNullable::create(std::move(result_data_column),
std::move(result_null_map));
+ return Status::OK();
+ }
+
+ Status close(FunctionContext* context, FunctionContext::FunctionStateScope
scope) override {
+ if (scope == FunctionContext::FRAGMENT_LOCAL) {
+ re2::RE2* re =
reinterpret_cast<re2::RE2*>(context->get_function_state(scope));
+ delete re;
+ }
+ return Status::OK();
+ }
+};
+
+struct RegexpReplaceImpl {
+ static constexpr auto name = "regexp_replace";
+
+ static Status execute_impl(FunctionContext* context, ColumnPtr
argument_columns[],
+ size_t input_rows_count, ColumnString::Chars&
result_data,
+ ColumnString::Offsets& result_offset, NullMap&
null_map) {
+ const auto* str_col =
check_and_get_column<ColumnString>(argument_columns[0].get());
+ const auto* pattern_col =
check_and_get_column<ColumnString>(argument_columns[1].get());
+ const auto* replace_col =
check_and_get_column<ColumnString>(argument_columns[2].get());
+
+ for (int i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ StringOP::push_null_string(i, result_data, result_offset,
null_map);
+ continue;
+ }
+ re2::RE2* re = reinterpret_cast<re2::RE2*>(
+
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+ std::unique_ptr<re2::RE2> scoped_re; // destroys re if state->re
is nullptr
+ if (re == nullptr) {
+ std::string error_str;
+ const auto& pattern =
pattern_col->get_data_at(i).to_string_val();
+ re = StringFunctions::compile_regex(pattern, &error_str,
StringVal::null());
+ if (re == nullptr) {
+ context->add_warning(error_str.c_str());
+ StringOP::push_null_string(i, result_data, result_offset,
null_map);
+ continue;
+ }
+ scoped_re.reset(re);
+ }
+
+ re2::StringPiece replace_str =
+ re2::StringPiece(replace_col->get_data_at(i).to_string());
+ std::string result_str(str_col->get_data_at(i).to_string());
+ re2::RE2::GlobalReplace(&result_str, *re, replace_str);
+ StringOP::push_value_string(result_str, i, result_data,
result_offset);
+ }
+
+ return Status::OK();
+ }
+};
+
+struct RegexpExtractImpl {
+ static constexpr auto name = "regexp_extract";
+
+ static Status execute_impl(FunctionContext* context, ColumnPtr
argument_columns[],
+ size_t input_rows_count, ColumnString::Chars&
result_data,
+ ColumnString::Offsets& result_offset, NullMap&
null_map) {
+ const auto* str_col =
check_and_get_column<ColumnString>(argument_columns[0].get());
+ const auto* pattern_col =
check_and_get_column<ColumnString>(argument_columns[1].get());
+ const auto* index_col =
+
check_and_get_column<ColumnVector<Int64>>(argument_columns[2].get());
+ for (int i = 0; i < input_rows_count; ++i) {
+ if (null_map[i]) {
+ StringOP::push_null_string(i, result_data, result_offset,
null_map);
+ continue;
+ }
+ const auto& index_data = index_col->get_int(i);
+ if (index_data < 0) {
+ StringOP::push_empty_string(i, result_data, result_offset);
+ continue;
+ }
+ re2::RE2* re = reinterpret_cast<re2::RE2*>(
+
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+ std::unique_ptr<re2::RE2> scoped_re;
+ if (re == nullptr) {
+ std::string error_str;
+ const auto& pattern =
pattern_col->get_data_at(i).to_string_val();
+ re = StringFunctions::compile_regex(pattern, &error_str,
StringVal::null());
+ if (re == nullptr) {
+ context->add_warning(error_str.c_str());
+ StringOP::push_null_string(i, result_data, result_offset,
null_map);
+ continue;
+ }
+ scoped_re.reset(re);
+ }
+ const auto& str = str_col->get_data_at(i);
+ re2::StringPiece str_sp = re2::StringPiece(str.data,str.size);
+
+ int max_matches = 1 + re->NumberOfCapturingGroups();
+ if (index_data >= max_matches) {
+ StringOP::push_empty_string(i, result_data, result_offset);
+ continue;
+ }
+
+ std::vector<re2::StringPiece> matches(max_matches);
+ bool success = re->Match(str_sp, 0, str.size,
re2::RE2::UNANCHORED, &matches[0],
+ max_matches);
+ if (!success) {
+ StringOP::push_empty_string(i, result_data, result_offset);
+ continue;
+ }
+ const re2::StringPiece& match = matches[index_data];
+ StringOP::push_value_string(std::string_view(match.data(),
match.size()), i,
+ result_data, result_offset);
+ }
+ return Status::OK();
+ }
+};
+
+void register_function_regexp_extract(SimpleFunctionFactory& factory) {
+ factory.register_function<FunctionRegexp<RegexpReplaceImpl>>();
+ factory.register_function<FunctionRegexp<RegexpExtractImpl>>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_string.cpp
b/be/src/vec/functions/function_string.cpp
index 926e8ed..5d84f01 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -261,43 +261,6 @@ struct ReverseImpl {
}
};
-struct HexStringName {
- static constexpr auto name = "hex";
-};
-
-struct HexStringImpl {
- static DataTypes get_variadic_argument_types() { return
{std::make_shared<DataTypeString>()}; }
-
- static Status vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
- ColumnString::Chars& dst_data, ColumnString::Offsets&
dst_offsets) {
- auto rows_count = offsets.size();
- dst_offsets.resize(rows_count);
- dst_data.resize(data.size() * 2);
-
- size_t offset = 0;
- auto dst_data_ptr = dst_data.data();
- for (int i = 0; i < rows_count; ++i) {
- auto source = reinterpret_cast<const unsigned
char*>(&data[offsets[i - 1]]);
- size_t srclen = offsets[i] - offsets[i - 1] - 1;
-
- if (srclen == 0) {
- DCHECK(*source == '\0');
- *dst_data_ptr = '\0';
- dst_data_ptr++;
- offset++;
- } else {
- simd::VStringFunctions::hex_encode(source, srclen,
-
reinterpret_cast<char*>(dst_data_ptr));
- dst_data_ptr[srclen * 2] = '\0';
- dst_data_ptr += (srclen * 2 + 1);
- offset += (srclen * 2 + 1);
- }
- dst_offsets[i] = offset;
- }
- return Status::OK();
- }
-};
-
struct NameToLower {
static constexpr auto name = "lower";
};
@@ -479,86 +442,6 @@ struct StringSpace {
}
};
-struct AesEncryptImpl {
- static constexpr auto name = "aes_encrypt";
- using Chars = ColumnString::Chars;
- using Offsets = ColumnString::Offsets;
- using ReturnType = DataTypeString;
- using ColumnType = ColumnString;
- static void vector_vector(FunctionContext* context, const Chars& ldata,
const Offsets& loffsets,
- const Chars& rdata, const Offsets& roffsets,
Chars& res_data,
- Offsets& res_offsets, NullMap& null_map_data) {
- DCHECK_EQ(loffsets.size(), roffsets.size());
- size_t input_rows_count = loffsets.size();
- res_offsets.resize(input_rows_count);
-
- for (size_t i = 0; i < input_rows_count; ++i) {
- int l_size = loffsets[i] - loffsets[i - 1] - 1;
- const auto l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i
- 1]]);
-
- int r_size = roffsets[i] - roffsets[i - 1] - 1;
- const auto r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i
- 1]]);
-
- if (*l_raw == '\0' || l_size == 0) {
- StringOP::push_null_string(i, res_data, res_offsets,
null_map_data);
- continue;
- }
-
- int cipher_len = l_size + 16;
- char p[cipher_len];
-
- int outlen = EncryptionUtil::encrypt(AES_128_ECB, (unsigned
char*)l_raw, l_size,
- (unsigned char*)r_raw,
r_size, NULL, true,
- (unsigned char*)p);
- if (outlen < 0) {
- StringOP::push_null_string(i, res_data, res_offsets,
null_map_data);
- } else {
- StringOP::push_value_string(std::string_view(p, outlen), i,
res_data, res_offsets);
- }
- }
- }
-};
-
-struct AesDecryptImpl {
- static constexpr auto name = "aes_decrypt";
- using Chars = ColumnString::Chars;
- using Offsets = ColumnString::Offsets;
- using ReturnType = DataTypeString;
- using ColumnType = ColumnString;
- static void vector_vector(FunctionContext* context, const Chars& ldata,
const Offsets& loffsets,
- const Chars& rdata, const Offsets& roffsets,
Chars& res_data,
- Offsets& res_offsets, NullMap& null_map_data) {
- DCHECK_EQ(loffsets.size(), roffsets.size());
- size_t input_rows_count = loffsets.size();
- res_offsets.resize(input_rows_count);
-
- for (size_t i = 0; i < input_rows_count; ++i) {
- int l_size = loffsets[i] - loffsets[i - 1] - 1;
- const auto l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i
- 1]]);
-
- int r_size = roffsets[i] - roffsets[i - 1] - 1;
- const auto r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i
- 1]]);
-
- if (*l_raw == '\0' || l_size == 0) {
- StringOP::push_null_string(i, res_data, res_offsets,
null_map_data);
- continue;
- }
-
- int cipher_len = l_size;
- char p[cipher_len];
-
- int outlen = EncryptionUtil::decrypt(AES_128_ECB, (unsigned
char*)l_raw, l_size,
- (unsigned char*)r_raw,
r_size, NULL, true,
- (unsigned char*)p);
- if (outlen < 0) {
- StringOP::push_null_string(i, res_data, res_offsets,
null_map_data);
- } else {
- StringOP::push_value_string(std::string_view(p, outlen), i,
res_data, res_offsets);
- }
- }
- }
-};
-
struct ToBase64Impl {
static constexpr auto name = "to_base64";
using ReturnType = DataTypeString;
@@ -724,8 +607,6 @@ using FunctionStringFindInSet =
using FunctionReverse = FunctionStringToString<ReverseImpl, NameReverse>;
-using FunctionHexString = FunctionStringToString<HexStringImpl, HexStringName>;
-
using FunctionUnHex = FunctionStringOperateToNullType<UnHexImpl>;
using FunctionToLower = FunctionStringToString<TransferImpl<::tolower>,
NameToLower>;
@@ -738,10 +619,6 @@ using FunctionRTrim =
FunctionStringToString<TrimImpl<false, true>, NameRTrim>;
using FunctionTrim = FunctionStringToString<TrimImpl<true, true>, NameTrim>;
-using FunctionAesEncrypt =
FunctionBinaryStringOperateToNullType<AesEncryptImpl>;
-
-using FunctionAesDecrypt =
FunctionBinaryStringOperateToNullType<AesDecryptImpl>;
-
using FunctionToBase64 = FunctionStringOperateToNullType<ToBase64Impl>;
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
@@ -764,7 +641,6 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionStringLocate>();
factory.register_function<FunctionStringLocatePos>();
factory.register_function<FunctionReverse>();
- factory.register_function<FunctionHexString>();
factory.register_function<FunctionUnHex>();
factory.register_function<FunctionToLower>();
factory.register_function<FunctionToUpper>();
@@ -782,25 +658,25 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionStringRepeat>();
factory.register_function<FunctionStringLPad>();
factory.register_function<FunctionStringRPad>();
- factory.register_function<FunctionAesEncrypt>();
- factory.register_function<FunctionAesDecrypt>();
factory.register_function<FunctionToBase64>();
factory.register_function<FunctionFromBase64>();
factory.register_function<FunctionSplitPart>();
- factory.register_function<FunctionStringMd5sum>();
+ factory.register_function<FunctionStringMd5AndSM3<MD5Sum>>();
factory.register_function<FunctionStringParseUrl>();
factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl>>();
+ factory.register_function<FunctionStringMd5AndSM3<SM3Sum>>();
factory.register_alias(FunctionLeft::name, "strleft");
factory.register_alias(FunctionRight::name, "strright");
factory.register_alias(SubstringUtil::name, "substr");
factory.register_alias(FunctionToLower::name, "lcase");
factory.register_alias(FunctionToUpper::name, "ucase");
- factory.register_alias(FunctionStringMd5sum::name, "md5");
+ factory.register_alias(FunctionStringMd5AndSM3<MD5Sum>::name, "md5");
factory.register_alias(FunctionStringUTF8Length::name, "character_length");
+ factory.register_alias(FunctionStringMd5AndSM3<SM3Sum>::name, "sm3");
}
} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index c14e639..613d4ca 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -30,6 +30,7 @@
#include "runtime/string_value.hpp"
#include "udf/udf.h"
#include "util/md5.h"
+#include "util/sm3.h"
#include "util/url_parser.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_nullable.h"
@@ -922,10 +923,21 @@ public:
}
};
-class FunctionStringMd5sum : public IFunction {
-public:
+struct SM3Sum {
+ static constexpr auto name = "sm3sum";
+ using ObjectData = SM3Digest;
+};
+
+struct MD5Sum {
static constexpr auto name = "md5sum";
- static FunctionPtr create() { return
std::make_shared<FunctionStringMd5sum>(); }
+ using ObjectData = Md5Digest;
+};
+
+template <typename Impl>
+class FunctionStringMd5AndSM3 : public IFunction {
+public:
+ static constexpr auto name = Impl::name;
+ static FunctionPtr create() { return
std::make_shared<FunctionStringMd5AndSM3>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 0; }
bool is_variadic() const override { return true; }
@@ -964,7 +976,8 @@ public:
res_offset.resize(input_rows_count);
for (size_t i = 0; i < input_rows_count; ++i) {
- Md5Digest digest;
+ using ObjectData = typename Impl::ObjectData;
+ ObjectData digest;
for (size_t j = 0; j < offsets_list.size(); ++j) {
auto& current_offsets = *offsets_list[j];
auto& current_chars = *chars_list[j];
diff --git a/be/src/vec/functions/math.cpp b/be/src/vec/functions/math.cpp
index a265168..92ef20f 100644
--- a/be/src/vec/functions/math.cpp
+++ b/be/src/vec/functions/math.cpp
@@ -167,56 +167,6 @@ struct CeilName {
};
using FunctionCeil = FunctionMathUnary<UnaryFunctionVectorized<CeilName,
std::ceil, DataTypeInt64>>;
-struct HexIntName {
- static constexpr auto name = "hex";
-};
-
-struct HexIntImpl {
- using ReturnType = DataTypeString;
- static constexpr auto TYPE_INDEX = TypeIndex::Int64;
- using Type = Int64;
- using ReturnColumnType = ColumnString;
-
- static DataTypes get_variadic_argument_types() {
- return {std::make_shared<vectorized::DataTypeInt64>()};
- }
-
- static std::string_view hex(uint64_t num, char* ans){
- static constexpr auto hex_table = "0123456789ABCDEF";
- // uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F'
- if (num == 0) { return {hex_table, 1};}
-
- int i = 0;
- while (num) {
- ans[i++] = hex_table[num & 15];
- num = num >> 4;
- }
- ans[i] = '\0';
-
- // reverse
- for (int k = 0, j = i - 1; k <= j && k <= 16; k++, j--) {
- char tmp = ans[j];
- ans[j] = ans[k];
- ans[k] = tmp;
- }
-
- return {ans, static_cast<size_t>(i)};
- }
-
- static Status vector(const ColumnInt64::Container& data,
ColumnString::Chars& res_data,
- ColumnString::Offsets& res_offsets) {
- res_offsets.resize(data.size());
- size_t input_size = res_offsets.size();
- char ans[17];
- for (size_t i = 0; i < input_size; ++i) {
- StringOP::push_value_string(hex(data[i], ans), i, res_data,
res_offsets);
- }
- return Status::OK();
- }
-};
-
-using FunctionHexInt = FunctionUnaryToType<HexIntImpl, HexIntName>;
-
template <typename A>
struct SignImpl {
using ResultType = Int8;
@@ -459,7 +409,6 @@ void register_function_math(SimpleFunctionFactory& factory)
{
factory.register_function<FunctionCeil>();
factory.register_alias("ceil", "dceil");
factory.register_alias("ceil", "ceiling");
- factory.register_function<FunctionHexInt>();
factory.register_function<FunctionE>();
factory.register_function<FunctionLn>();
factory.register_alias("ln", "dlog1");
diff --git a/be/src/vec/functions/simple_function_factory.h
b/be/src/vec/functions/simple_function_factory.h
index 7ab9061..f6fdb6c 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -74,6 +74,9 @@ void register_function_convert_tz(SimpleFunctionFactory&
factory);
void register_function_least_greast(SimpleFunctionFactory& factory);
void register_function_fake(SimpleFunctionFactory& factory);
+void register_function_encryption(SimpleFunctionFactory& factory);
+void register_function_regexp_extract(SimpleFunctionFactory& factory);
+void register_function_hex_variadic(SimpleFunctionFactory& factory);
class SimpleFunctionFactory {
using Creator = std::function<FunctionBuilderPtr()>;
using FunctionCreators = phmap::flat_hash_map<std::string, Creator>;
@@ -194,6 +197,9 @@ public:
register_function_convert_tz(instance);
register_function_least_greast(instance);
register_function_fake(instance);
+ register_function_encryption(instance);
+ register_function_regexp_extract(instance);
+ register_function_hex_variadic(instance);
});
return instance;
}
diff --git a/be/test/vec/function/function_like_test.cpp
b/be/test/vec/function/function_like_test.cpp
index e27c479..5674c07 100644
--- a/be/test/vec/function/function_like_test.cpp
+++ b/be/test/vec/function/function_like_test.cpp
@@ -106,6 +106,65 @@ TEST(FunctionLikeTest, regexp) {
check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
}
+
+TEST(FunctionLikeTest, regexp_extract) {
+ std::string func_name = "regexp_extract";
+
+ DataSet data_set = {
+ {{std::string("x=a3&x=18abc&x=2&y=3&x=4"),
std::string("x=([0-9]+)([a-z]+)"), (int64_t)0}, std::string("x=18abc")},
+ {{std::string("x=a3&x=18abc&x=2&y=3&x=4"),
std::string("^x=([a-z]+)([0-9]+)"),(int64_t)0}, std::string("x=a3")},
+ {{std::string("x=a3&x=18abc&x=2&y=3&x=4"),
std::string("^x=([a-z]+)([0-9]+)"),(int64_t)1}, std::string("a")},
+
{{std::string("http://a.m.baidu.com/i41915173660.htm"),
std::string("i([0-9]+)"),(int64_t)0}, std::string("i41915173660")},
+
{{std::string("http://a.m.baidu.com/i41915173660.htm"),
std::string("i([0-9]+)"),(int64_t)1}, std::string("41915173660")},
+
+ {{std::string("hitdecisiondlist"),
std::string("(i)(.*?)(e)"),(int64_t)0}, std::string("itde")},
+ {{std::string("hitdecisiondlist"),
std::string("(i)(.*?)(e)"),(int64_t)1}, std::string("i")},
+ {{std::string("hitdecisiondlist"),
std::string("(i)(.*?)(e)"),(int64_t)2}, std::string("td")},
+ // null
+ {{std::string("abc"), Null(), (int64_t)0}, Null()},
+ {{Null(), std::string("i([0-9]+)"), (int64_t)0},
Null()}};
+
+ // pattern is constant value
+ InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted
{TypeIndex::String}, TypeIndex::Int64};
+ for (const auto& line : data_set) {
+ DataSet const_pattern_dataset = {line};
+ check_function<DataTypeString, true>(func_name,
const_pattern_input_types,
+ const_pattern_dataset);
+ }
+
+ // pattern is not constant value
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String ,
TypeIndex::Int64};
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(FunctionLikeTest, regexp_replace) {
+ std::string func_name = "regexp_replace";
+
+ DataSet data_set = {
+ {{std::string("2022-03-02"), std::string("-"),
std::string("")}, std::string("20220302")},
+ {{std::string("2022-03-02"),
std::string(""),std::string("s")}, std::string("s2s0s2s2s-s0s3s-s0s2s")},
+ {{std::string("100-200"),
std::string("(\\d+)"),std::string("doris")}, std::string("doris-doris")},
+
+ {{std::string("a b c"), std::string("
"),std::string("-")}, std::string("a-b-c")},
+ {{std::string("a b c"),
std::string("(b)"),std::string("<\\1>")}, std::string("a <b> c")},
+ {{std::string("qwewe"),
std::string(""),std::string("true")},
std::string("trueqtruewtrueetruewtrueetrue")},
+ // null
+ {{std::string("abc"), std::string("x=18abc"), Null()},
Null()},
+ {{Null(), std::string("i([0-9]+)"),
std::string("x=18abc")}, Null()}};
+
+ // pattern is constant value
+ InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted
{TypeIndex::String}, TypeIndex::String};
+ for (const auto& line : data_set) {
+ DataSet const_pattern_dataset = {line};
+ check_function<DataTypeString, true>(func_name,
const_pattern_input_types,
+ const_pattern_dataset);
+ }
+
+ // pattern is not constant value
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String ,
TypeIndex::String};
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
} // namespace doris::vectorized
int main(int argc, char** argv) {
diff --git a/be/test/vec/function/function_string_test.cpp
b/be/test/vec/function/function_string_test.cpp
index e32d911..0a36af2 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -561,9 +561,54 @@ TEST(function_string_test, function_md5sum_test) {
}
}
+TEST(function_string_test, function_sm3sum_test) {
+ std::string func_name = "sm3sum";
+
+ {
+ InputTypeSet input_types = {TypeIndex::String};
+ DataSet data_set = {
+ {{std::string("asd你好")},
{std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}},
+ {{std::string("hello world")},
{std::string("44f0061e69fa6fdfc290c494654a05dc0c053da7e5c52b84ef93a9d67d3fff88")}},
+ {{std::string("HELLO,!^%")},
{std::string("5fc6e38f40b31a659a59e1daba9b68263615f20c02037b419d9deb3509e6b5c6")}},
+ {{std::string("")},
{std::string("1ab21d8355cfa17f8e61194831e81a8f22bec8c728fefb747ed035eb5082aa2b")}},
+ {{std::string(" ")},
{std::string("2ae1d69bb8483e5944310c877573b21d0a420c3bf4a2a91b1a8370d760ba67c5")}},
+ {{Null()}, {Null()}},
+ {{std::string("MYtestSTR")},
{std::string("3155ae9f834cae035385fc15b69b6f2c051b91de943ea9a03ab8bfd497aef4c6")}},
+ {{std::string("ò&ø")},
{std::string("aa47ac31c85aa819d4cc80c932e7900fa26a3073a67aa7eb011bc2ba4924a066")}}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+ }
+
+ {
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+ DataSet data_set = {{{std::string("asd"), std::string("你好")},
+
{std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}},
+ {{std::string("hello "), std::string("world")},
+
{std::string("44f0061e69fa6fdfc290c494654a05dc0c053da7e5c52b84ef93a9d67d3fff88")}},
+ {{std::string("HELLO "), std::string(",!^%")},
+
{std::string("1f5866e786ebac9ffed0dbd8f2586e3e99d1d05f7efe7c5915478b57b7423570")}},
+ {{Null(), std::string("HELLO")}, {Null()}}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+ }
+
+ {
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String,
TypeIndex::String};
+ DataSet data_set = {{{std::string("a"), std::string("sd"),
std::string("你好")},
+
{std::string("0d6b9dfa8fe5708eb0dccfbaff4f2964abaaa976cc4445a7ecace49c0ceb31d3")}},
+ {{std::string(""), std::string(""),
std::string("")},
+
{std::string("1ab21d8355cfa17f8e61194831e81a8f22bec8c728fefb747ed035eb5082aa2b")}},
+ {{std::string("HEL"), std::string("LO,!"),
std::string("^%")},
+
{std::string("5fc6e38f40b31a659a59e1daba9b68263615f20c02037b419d9deb3509e6b5c6")}},
+ {{Null(), std::string("HELLO"), Null()},
{Null()}}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+ }
+}
+
TEST(function_string_test, function_aes_encrypt_test) {
std::string func_name = "aes_encrypt";
-
+{
InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
const char* key = "doris";
@@ -590,10 +635,43 @@ TEST(function_string_test, function_aes_encrypt_test) {
check_function<DataTypeString, true>(func_name, input_types, data_set);
}
+{
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String,
TypeIndex::String, TypeIndex::String};
+ const char* iv = "0123456789abcdef";
+ const char* mode = "AES_256_ECB";
+ const char* key = "vectorized";
+ const char* src[6] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee",
""};
+ std::string r[5];
+
+ for (int i = 0; i < 5; i++) {
+ int cipher_len = strlen(src[i]) + 16;
+ char p[cipher_len];
+ int iv_len = 32;
+ std::unique_ptr<char[]> init_vec;
+ init_vec.reset(new char[iv_len]);
+ std::memset(init_vec.get(), 0, strlen(iv) + 1);
+ memcpy(init_vec.get(), iv, strlen(iv));
+ int outlen = EncryptionUtil::encrypt(AES_256_ECB, (unsigned
char*)src[i], strlen(src[i]),
+ (unsigned char*)key, strlen(key),
init_vec.get(), true,
+ (unsigned char*)p);
+ r[i] = std::string(p, outlen);
+ }
+
+ DataSet data_set = {{{std::string(src[0]), std::string(key),
std::string(iv), std::string(mode)}, r[0]},
+ {{std::string(src[1]), std::string(key),
std::string(iv), std::string(mode)}, r[1]},
+ {{std::string(src[2]), std::string(key),
std::string(iv), std::string(mode)}, r[2]},
+ {{std::string(src[3]), std::string(key),
std::string(iv), std::string(mode)}, r[3]},
+ {{std::string(src[4]), std::string(key),
std::string(iv), std::string(mode)}, r[4]},
+ {{std::string(src[5]), std::string(key),
std::string(iv), std::string(mode)}, Null()},
+ {{Null(), std::string(key), std::string(iv),
std::string(mode)}, Null()}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+}
TEST(function_string_test, function_aes_decrypt_test) {
std::string func_name = "aes_decrypt";
-
+{
InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
const char* key = "doris";
@@ -619,6 +697,165 @@ TEST(function_string_test, function_aes_decrypt_test) {
check_function<DataTypeString, true>(func_name, input_types, data_set);
}
+{
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String,
TypeIndex::String, TypeIndex::String};
+ const char* key = "vectorized";
+ const char* iv = "0123456789abcdef";
+ const char* mode = "AES_128_OFB";
+ const char* src[5] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee"};
+
+ std::string r[5];
+ for (int i = 0; i < 5; i++) {
+ int cipher_len = strlen(src[i]) + 16;
+ char p[cipher_len];
+ int iv_len = 32;
+ std::unique_ptr<char[]> init_vec;
+ init_vec.reset(new char[iv_len]);
+ std::memset(init_vec.get(), 0, strlen(iv) + 1);
+ memcpy(init_vec.get(), iv, strlen(iv));
+ int outlen = EncryptionUtil::encrypt(AES_128_OFB, (unsigned
char*)src[i], strlen(src[i]),
+ (unsigned char*)key, strlen(key),
init_vec.get(), true,
+ (unsigned char*)p);
+ r[i] = std::string(p, outlen);
+ }
+ DataSet data_set = {{{r[0], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[0])},
+ {{r[1], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[1])},
+ {{r[2], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[2])},
+ {{r[3], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[3])},
+ {{r[4], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[4])},
+ {{Null(), std::string(key), std::string(iv),
std::string(mode)}, Null()}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+}
+
+TEST(function_string_test, function_sm4_encrypt_test) {
+ std::string func_name = "sm4_encrypt";
+{
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+ const char* key = "doris";
+ const char* src[6] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee",
""};
+ std::string r[5];
+
+ for (int i = 0; i < 5; i++) {
+ int cipher_len = strlen(src[i]) + 16;
+ char p[cipher_len];
+
+ int outlen = EncryptionUtil::encrypt(SM4_128_ECB, (unsigned
char*)src[i], strlen(src[i]),
+ (unsigned char*)key, strlen(key),
NULL, true,
+ (unsigned char*)p);
+ r[i] = std::string(p, outlen);
+ }
+
+ DataSet data_set = {{{std::string(src[0]), std::string(key)}, r[0]},
+ {{std::string(src[1]), std::string(key)}, r[1]},
+ {{std::string(src[2]), std::string(key)}, r[2]},
+ {{std::string(src[3]), std::string(key)}, r[3]},
+ {{std::string(src[4]), std::string(key)}, r[4]},
+ {{std::string(src[5]), std::string(key)}, Null()},
+ {{Null(), std::string(key)}, Null()}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+{
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String,
TypeIndex::String, TypeIndex::String};
+
+ const char* key = "vectorized";
+ const char* iv = "0123456789abcdef";
+ const char* mode = "SM4_128_CTR";
+ const char* src[6] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee",
""};
+ std::string r[5];
+
+ for (int i = 0; i < 5; i++) {
+ int cipher_len = strlen(src[i]) + 16;
+ char p[cipher_len];
+ int iv_len = 32;
+ std::unique_ptr<char[]> init_vec;
+ init_vec.reset(new char[iv_len]);
+ std::memset(init_vec.get(), 0, strlen(iv) + 1);
+ memcpy(init_vec.get(), iv, strlen(iv));
+ int outlen = EncryptionUtil::encrypt(SM4_128_CTR, (unsigned
char*)src[i], strlen(src[i]),
+ (unsigned char*)key, strlen(key),
init_vec.get(), true,
+ (unsigned char*)p);
+ r[i] = std::string(p, outlen);
+ }
+
+ DataSet data_set = {{{std::string(src[0]), std::string(key),
std::string(iv), std::string(mode)}, r[0]},
+ {{std::string(src[1]), std::string(key),
std::string(iv), std::string(mode)}, r[1]},
+ {{std::string(src[2]), std::string(key),
std::string(iv), std::string(mode)}, r[2]},
+ {{std::string(src[3]), std::string(key),
std::string(iv), std::string(mode)}, r[3]},
+ {{std::string(src[4]), std::string(key),
std::string(iv), std::string(mode)}, r[4]},
+ {{std::string(src[5]), std::string(key),
std::string(iv), std::string(mode)}, Null()},
+ {{Null(), std::string(key), std::string(iv),
std::string(mode)}, Null()}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+}
+
+TEST(function_string_test, function_sm4_decrypt_test) {
+ std::string func_name = "sm4_decrypt";
+{
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+ const char* key = "doris";
+ const char* src[5] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee"};
+ std::string r[5];
+
+ for (int i = 0; i < 5; i++) {
+ int cipher_len = strlen(src[i]) + 16;
+ char p[cipher_len];
+
+ int outlen = EncryptionUtil::encrypt(SM4_128_ECB, (unsigned
char*)src[i], strlen(src[i]),
+ (unsigned char*)key, strlen(key),
NULL, true,
+ (unsigned char*)p);
+ r[i] = std::string(p, outlen);
+ }
+
+ DataSet data_set = {{{r[0], std::string(key)}, std::string(src[0])},
+ {{r[1], std::string(key)}, std::string(src[1])},
+ {{r[2], std::string(key)}, std::string(src[2])},
+ {{r[3], std::string(key)}, std::string(src[3])},
+ {{r[4], std::string(key)}, std::string(src[4])},
+ {{Null(), std::string(key)}, Null()}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+{
+ InputTypeSet input_types = {TypeIndex::String, TypeIndex::String,
TypeIndex::String, TypeIndex::String};
+
+ const char* key = "vectorized";
+ const char* iv = "0123456789abcdef";
+ const char* mode = "SM4_128_OFB";
+ const char* src[5] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee"};
+ std::string r[5];
+
+ for (int i = 0; i < 5; i++) {
+ int cipher_len = strlen(src[i]) + 16;
+ char p[cipher_len];
+ int iv_len = 32;
+ std::unique_ptr<char[]> init_vec;
+ init_vec.reset(new char[iv_len]);
+ std::memset(init_vec.get(), 0, strlen(iv) + 1);
+ memcpy(init_vec.get(), iv, strlen(iv));
+ int outlen = EncryptionUtil::encrypt(SM4_128_OFB, (unsigned
char*)src[i], strlen(src[i]),
+ (unsigned char*)key, strlen(key),
init_vec.get(), true,
+ (unsigned char*)p);
+ r[i] = std::string(p, outlen);
+ }
+
+ DataSet data_set = {{{r[0], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[0])},
+ {{r[1], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[1])},
+ {{r[2], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[2])},
+ {{r[3], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[3])},
+ {{r[4], std::string(key), std::string(iv),
std::string(mode)}, std::string(src[4])},
+ {{Null(), Null(), std::string(iv), std::string(mode)},
Null()}};
+
+ check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+}
TEST(function_string_test, function_parse_url_test) {
std::string func_name = "parse_url";
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index 8bd6829..bf1a895 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -897,14 +897,14 @@ visible_functions = [
'_ZN5doris15StringFunctions14regexp_prepareEPN9doris_udf'
'15FunctionContextENS2_18FunctionStateScopeE',
'_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf'
- '15FunctionContextENS2_18FunctionStateScopeE', '', ''],
+ '15FunctionContextENS2_18FunctionStateScopeE', 'vec',
'ALWAYS_NULLABLE'],
[['regexp_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR'],
'_ZN5doris15StringFunctions14regexp_replaceEPN9doris_udf'
'15FunctionContextERKNS1_9StringValES6_S6_',
'_ZN5doris15StringFunctions14regexp_prepareEPN9doris_udf'
'15FunctionContextENS2_18FunctionStateScopeE',
'_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf'
- '15FunctionContextENS2_18FunctionStateScopeE', '', ''],
+ '15FunctionContextENS2_18FunctionStateScopeE', 'vec',
'ALWAYS_NULLABLE'],
[['concat'], 'VARCHAR', ['VARCHAR', '...'],
'_ZN5doris15StringFunctions6concatEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', 'vec', ''],
@@ -1019,14 +1019,14 @@ visible_functions = [
'_ZN5doris15StringFunctions14regexp_prepareEPN9doris_udf'
'15FunctionContextENS2_18FunctionStateScopeE',
'_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf'
- '15FunctionContextENS2_18FunctionStateScopeE', '', ''],
+ '15FunctionContextENS2_18FunctionStateScopeE', 'vec',
'ALWAYS_NULLABLE'],
[['regexp_replace'], 'STRING', ['STRING', 'STRING', 'STRING'],
'_ZN5doris15StringFunctions14regexp_replaceEPN9doris_udf'
'15FunctionContextERKNS1_9StringValES6_S6_',
'_ZN5doris15StringFunctions14regexp_prepareEPN9doris_udf'
'15FunctionContextENS2_18FunctionStateScopeE',
'_ZN5doris15StringFunctions12regexp_closeEPN9doris_udf'
- '15FunctionContextENS2_18FunctionStateScopeE', '', ''],
+ '15FunctionContextENS2_18FunctionStateScopeE', 'vec',
'ALWAYS_NULLABLE'],
[['concat'], 'STRING', ['STRING', '...'],
'_ZN5doris15StringFunctions6concatEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', 'vec', ''],
@@ -1252,22 +1252,22 @@ visible_functions = [
'15FunctionContextERKNS1_9StringValES6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
[['aes_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'],
'_ZN5doris19EncryptionFunctions11aes_encryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
+ '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
[['aes_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'],
'_ZN5doris19EncryptionFunctions11aes_decryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
- [['sm4_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],
- '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''],
- [['sm4_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],
- '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''],
- [['sm4_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'],
- '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
- [['sm4_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'],
- '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
+ '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['sm4_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],
+ '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf'
+ '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['sm4_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],
+ '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf'
+ '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['sm4_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'],
+ '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf'
+ '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['sm4_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'],
+ '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf'
+ '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
[['from_base64'], 'VARCHAR', ['VARCHAR'],
'_ZN5doris19EncryptionFunctions11from_base64EPN9doris_udf'
'15FunctionContextERKNS1_9StringValE', '', '', 'vec',
'ALWAYS_NULLABLE'],
@@ -1276,25 +1276,25 @@ visible_functions = [
'15FunctionContextERKNS1_9StringValES6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
[['aes_decrypt'], 'STRING', ['STRING', 'STRING'],
'_ZN5doris19EncryptionFunctions11aes_decryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''],
+ '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
[['aes_encrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'],
'_ZN5doris19EncryptionFunctions11aes_encryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
+ '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
[['aes_decrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'],
'_ZN5doris19EncryptionFunctions11aes_decryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
- [['sm4_encrypt'], 'STRING', ['STRING', 'STRING'],
- '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''],
- [['sm4_decrypt'], 'STRING', ['STRING', 'STRING'],
- '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''],
- [['sm4_encrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'],
- '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
- [['sm4_decrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'],
- '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf'
- '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
+ '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['sm4_encrypt'], 'STRING', ['STRING', 'STRING'],
+ '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf'
+ '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['sm4_decrypt'], 'STRING', ['STRING', 'STRING'],
+ '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf'
+ '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['sm4_encrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'],
+ '_ZN5doris19EncryptionFunctions11sm4_encryptEPN9doris_udf'
+ '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
+ [['sm4_decrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'],
+ '_ZN5doris19EncryptionFunctions11sm4_decryptEPN9doris_udf'
+ '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', 'vec',
'ALWAYS_NULLABLE'],
[['from_base64'], 'STRING', ['STRING'],
'_ZN5doris19EncryptionFunctions11from_base64EPN9doris_udf'
'15FunctionContextERKNS1_9StringValE', '', '', 'vec',
'ALWAYS_NULLABLE'],
@@ -1316,13 +1316,13 @@ visible_functions = [
'_ZN5doris19EncryptionFunctions6md5sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', 'vec', ''],
[['sm3'], 'VARCHAR', ['VARCHAR'],
-
'_ZN5doris19EncryptionFunctions3sm3EPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', '', ''],
+
'_ZN5doris19EncryptionFunctions3sm3EPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', ''],
[['sm3sum'], 'VARCHAR', ['VARCHAR', '...'],
-
'_ZN5doris19EncryptionFunctions6sm3sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', '', ''],
+
'_ZN5doris19EncryptionFunctions6sm3sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', 'vec', ''],
[['sm3'], 'VARCHAR', ['STRING'],
-
'_ZN5doris19EncryptionFunctions3sm3EPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', '', ''],
+
'_ZN5doris19EncryptionFunctions3sm3EPN9doris_udf15FunctionContextERKNS1_9StringValE',
'', '', 'vec', ''],
[['sm3sum'], 'VARCHAR', ['STRING', '...'],
-
'_ZN5doris19EncryptionFunctions6sm3sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', '', ''],
+
'_ZN5doris19EncryptionFunctions6sm3sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
'', '', 'vec', ''],
# geo functions
[['ST_Point'], 'VARCHAR', ['DOUBLE', 'DOUBLE'],
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]