This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch vectorized
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/vectorized by this push:
new 5830710 [Vectorized][Feature] fix core dump when using function
override and function alias at the same time && support substr(str,int)
override (#7640)
5830710 is described below
commit 583071046f2464a8c6eb7bbe90bde9fb7223f674
Author: Pxl <[email protected]>
AuthorDate: Thu Jan 6 18:57:05 2022 +0800
[Vectorized][Feature] fix core dump when using function override and
function alias at the same time && support substr(str,int) override (#7640)
---
be/src/vec/functions/function_string.cpp | 19 +++--
be/src/vec/functions/function_string.h | 109 ++++++++++++++++++++-----
be/src/vec/functions/function_timestamp.cpp | 11 +--
be/src/vec/functions/simple_function_factory.h | 10 ++-
gensrc/script/doris_builtins_functions.py | 4 +-
5 files changed, 111 insertions(+), 42 deletions(-)
diff --git a/be/src/vec/functions/function_string.cpp
b/be/src/vec/functions/function_string.cpp
index 73e2413..34f1c6b 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -293,7 +293,7 @@ struct HexStringImpl {
dst_data_ptr++;
offset++;
} else {
- VStringFunctions::hex_encode(source, srclen,
reinterpret_cast<char *>(dst_data_ptr));
+ VStringFunctions::hex_encode(source, srclen,
reinterpret_cast<char*>(dst_data_ptr));
dst_data_ptr[srclen * 2] = '\0';
dst_data_ptr += (srclen * 2 + 1);
offset += (srclen * 2 + 1);
@@ -513,9 +513,9 @@ struct AesEncryptImpl {
int cipher_len = l_size + 16;
char p[cipher_len];
- int outlen =
- EncryptionUtil::encrypt(AES_128_ECB, (unsigned
char*)l_raw, l_size,
- (unsigned char*)r_raw, r_size, NULL,
true, (unsigned char*)p);
+ int outlen = EncryptionUtil::encrypt(AES_128_ECB, (unsigned
char*)l_raw, l_size,
+ (unsigned char*)r_raw,
r_size, NULL, true,
+ (unsigned char*)p);
if (outlen < 0) {
StringOP::push_null_string(i, res_data, res_offsets,
null_map_data);
} else {
@@ -553,9 +553,9 @@ struct AesDecryptImpl {
int cipher_len = l_size;
char p[cipher_len];
- int outlen =
- EncryptionUtil::decrypt(AES_128_ECB, (unsigned
char*)l_raw, l_size,
- (unsigned char*)r_raw, r_size, NULL,
true, (unsigned char*)p);
+ int outlen = EncryptionUtil::decrypt(AES_128_ECB, (unsigned
char*)l_raw, l_size,
+ (unsigned char*)r_raw,
r_size, NULL, true,
+ (unsigned char*)p);
if (outlen < 0) {
StringOP::push_null_string(i, res_data, res_offsets,
null_map_data);
} else {
@@ -774,7 +774,8 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_function<FunctionLTrim>();
factory.register_function<FunctionRTrim>();
factory.register_function<FunctionTrim>();
- factory.register_function<FunctionSubstring>();
+ factory.register_function<FunctionSubstring<Substr3Imp>>();
+ factory.register_function<FunctionSubstring<Substr2Imp>>();
factory.register_function<FunctionLeft>();
factory.register_function<FunctionRight>();
factory.register_function<FunctionNullOrEmpty>();
@@ -794,7 +795,7 @@ void register_function_string(SimpleFunctionFactory&
factory) {
factory.register_alias(FunctionLeft::name, "strleft");
factory.register_alias(FunctionRight::name, "strright");
- factory.register_alias(FunctionSubstring::name, "substr");
+ factory.register_alias(SubstringUtil::name, "substr");
factory.register_alias(FunctionToLower::name, "lcase");
factory.register_alias(FunctionStringMd5sum::name, "md5");
}
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index efeef41..3f3e538 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -88,25 +88,9 @@ struct StringOP {
}
};
-class FunctionSubstring : public IFunction {
-public:
+struct SubstringUtil {
static constexpr auto name = "substring";
- static FunctionPtr create() { return
std::make_shared<FunctionSubstring>(); }
- String get_name() const override { return name; }
- size_t get_number_of_arguments() const override { return 3; }
-
- DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
- return make_nullable(std::make_shared<DataTypeString>());
- }
-
- bool use_default_implementation_for_nulls() const override { return false;
}
- bool use_default_implementation_for_constants() const override { return
true; }
- Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
- size_t result, size_t input_rows_count) override {
- substring_execute(block, arguments, result, input_rows_count);
- return Status::OK();
- }
static void substring_execute(Block& block, const ColumnNumbers&
arguments, size_t result,
size_t input_rows_count) {
DCHECK_EQ(arguments.size(), 3);
@@ -201,12 +185,89 @@ private:
}
};
-class FunctionLeft : public FunctionSubstring {
+template <typename Impl>
+class FunctionSubstring : public IFunction {
+public:
+ static constexpr auto name = SubstringUtil::name;
+ String get_name() const override { return name; }
+ static FunctionPtr create() { return
std::make_shared<FunctionSubstring<Impl>>(); }
+
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<DataTypeString>());
+ }
+ DataTypes get_variadic_argument_types_impl() const override {
+ return Impl::get_variadic_argument_types();
+ }
+ size_t get_number_of_arguments() const override {
+ return get_variadic_argument_types_impl().size();
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
+ bool use_default_implementation_for_constants() const override { return
true; }
+
+ Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ size_t result, size_t input_rows_count) override {
+ return Impl::execute_impl(context, block, arguments, result,
input_rows_count);
+ }
+};
+
+struct Substr3Imp {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeString>(),
std::make_shared<DataTypeInt32>(),
+ std::make_shared<DataTypeInt32>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ SubstringUtil::substring_execute(block, arguments, result,
input_rows_count);
+ return Status::OK();
+ }
+};
+
+struct Substr2Imp {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeString>(),
std::make_shared<DataTypeInt32>()};
+ }
+
+ static Status execute_impl(FunctionContext* context, Block& block,
+ const ColumnNumbers& arguments, size_t result,
+ size_t input_rows_count) {
+ auto params = ColumnInt32::create(input_rows_count);
+ auto& strlen_data = params->get_data();
+
+ auto str_col =
+
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+ if (auto* nullable = check_and_get_column<const
ColumnNullable>(*str_col)) {
+ str_col = nullable->get_nested_column_ptr();
+ }
+ auto& str_offset = assert_cast<const
ColumnString*>(str_col.get())->get_offsets();
+
+ for (int i = 0; i < input_rows_count; ++i) {
+ strlen_data[i] = str_offset[i] - str_offset[i - 1];
+ }
+
+ block.insert({std::move(params), std::make_shared<DataTypeInt32>(),
"strlen"});
+
+ ColumnNumbers temp_arguments = {arguments[0], arguments[1],
block.columns() - 1};
+
+ SubstringUtil::substring_execute(block, temp_arguments, result,
input_rows_count);
+ return Status::OK();
+ }
+};
+
+class FunctionLeft : public IFunction {
public:
static constexpr auto name = "left";
static FunctionPtr create() { return std::make_shared<FunctionLeft>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<DataTypeString>());
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
+ bool use_default_implementation_for_constants() const override { return
true; }
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
@@ -219,17 +280,23 @@ public:
temp_arguments[0] = arguments[0];
temp_arguments[1] = num_columns_without_result;
temp_arguments[2] = arguments[1];
- FunctionSubstring::substring_execute(block, temp_arguments, result,
input_rows_count);
+ SubstringUtil::substring_execute(block, temp_arguments, result,
input_rows_count);
return Status::OK();
}
};
-class FunctionRight : public FunctionSubstring {
+class FunctionRight : public IFunction {
public:
static constexpr auto name = "right";
static FunctionPtr create() { return std::make_shared<FunctionRight>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
+ DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
+ return make_nullable(std::make_shared<DataTypeString>());
+ }
+
+ bool use_default_implementation_for_nulls() const override { return false;
}
+ bool use_default_implementation_for_constants() const override { return
true; }
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
@@ -275,7 +342,7 @@ public:
temp_arguments[0] = arguments[0];
temp_arguments[1] = num_columns_without_result;
temp_arguments[2] = num_columns_without_result + 1;
- FunctionSubstring::substring_execute(block, temp_arguments, result,
input_rows_count);
+ SubstringUtil::substring_execute(block, temp_arguments, result,
input_rows_count);
return Status::OK();
}
};
diff --git a/be/src/vec/functions/function_timestamp.cpp
b/be/src/vec/functions/function_timestamp.cpp
index c7335c4..f5216e9 100644
--- a/be/src/vec/functions/function_timestamp.cpp
+++ b/be/src/vec/functions/function_timestamp.cpp
@@ -177,9 +177,7 @@ struct UnixTimeStampImpl {
};
struct UnixTimeStampDateImpl {
- static DataTypes get_variadic_argument_types() {
- return {std::make_shared<vectorized::DataTypeDate>()};
- }
+ static DataTypes get_variadic_argument_types() { return
{std::make_shared<DataTypeDate>()}; }
static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName&
arguments) {
return make_nullable(std::make_shared<DataTypeInt32>());
@@ -226,14 +224,13 @@ struct UnixTimeStampDateImpl {
struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl {
static DataTypes get_variadic_argument_types() {
- return {std::make_shared<vectorized::DataTypeDateTime>()};
+ return {std::make_shared<DataTypeDateTime>()};
}
};
struct UnixTimeStampStrImpl {
static DataTypes get_variadic_argument_types() {
- return {std::make_shared<vectorized::DataTypeString>(),
- std::make_shared<vectorized::DataTypeString>()};
+ return {std::make_shared<DataTypeString>(),
std::make_shared<DataTypeString>()};
}
static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName&
arguments) {
@@ -294,8 +291,6 @@ public:
String get_name() const override { return name; }
- bool is_variadic() const override { return true; }
-
bool use_default_implementation_for_nulls() const override { return false;
}
size_t get_number_of_arguments() const override {
diff --git a/be/src/vec/functions/simple_function_factory.h
b/be/src/vec/functions/simple_function_factory.h
index 149ce88..c016d35 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -95,14 +95,19 @@ public:
}
void register_alias(const std::string& name, const std::string& alias) {
- function_creators[alias] = function_creators[name];
+ function_alias[alias] = name;
}
FunctionBasePtr get_function(const std::string& name, const
ColumnsWithTypeAndName& arguments,
const DataTypePtr& return_type) {
std::string key_str = name;
+
+ if (function_alias.count(name)) {
+ key_str = function_alias[name];
+ }
+
// if function is variadic, added types_str as key
- if (function_variadic_set.count(name)) {
+ if (function_variadic_set.count(key_str)) {
for (auto& arg : arguments) {
key_str.append(arg.type->is_nullable()
? reinterpret_cast<const
DataTypeNullable*>(arg.type.get())
@@ -123,6 +128,7 @@ public:
private:
FunctionCreators function_creators;
FunctionIsVariadic function_variadic_set;
+ std::unordered_map<std::string, std::string> function_alias;
template <typename Function>
static FunctionBuilderPtr createDefaultFunction() {
diff --git a/gensrc/script/doris_builtins_functions.py
b/gensrc/script/doris_builtins_functions.py
index 1093a89..fed25d6 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -825,7 +825,7 @@ visible_functions = [
# String builtin functions
[['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT'],
'_ZN5doris15StringFunctions9substringEPN'
- '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '',
'', 'ALWAYS_NULLABLE'],
+ '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '',
'vec', 'ALWAYS_NULLABLE'],
[['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT', 'INT'],
'_ZN5doris15StringFunctions9substringEPN'
'9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValES9_', '',
'', 'vec', 'ALWAYS_NULLABLE'],
@@ -947,7 +947,7 @@ visible_functions = [
# Longtext function
[['substr', 'substring'], 'STRING', ['STRING', 'INT'],
'_ZN5doris15StringFunctions9substringEPN'
- '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '',
'', 'ALWAYS_NULLABLE'],
+ '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '',
'vec', 'ALWAYS_NULLABLE'],
[['substr', 'substring'], 'STRING', ['STRING', 'INT', 'INT'],
'_ZN5doris15StringFunctions9substringEPN'
'9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValES9_', '',
'', 'vec', 'ALWAYS_NULLABLE'],
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]