This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new a6d52f52317 [improve](function) add error msg if exceeded maximum
default value in repeat function (#32219) (#32588)
a6d52f52317 is described below
commit a6d52f523176389c800b5a1978abb407bf8531b3
Author: zhangstar333 <[email protected]>
AuthorDate: Thu Mar 21 19:05:37 2024 +0800
[improve](function) add error msg if exceeded maximum default value in
repeat function (#32219) (#32588)
---
be/src/vec/functions/function_string.h | 39 ++++++++++++++++------
be/test/vec/function/function_string_test.cpp | 21 +++++++-----
.../datatype_p0/string/test_string_basic.groovy | 5 ++-
.../max_msg_size_of_result_receiver.groovy | 14 ++++----
4 files changed, 53 insertions(+), 26 deletions(-)
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index c2dbed38d39..6f3eca6cc82 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -29,6 +29,9 @@
#include <cstdlib>
#include <memory>
#include <ostream>
+#include <random>
+#include <sstream>
+#include <stdexcept>
#include <tuple>
#include <utility>
#include <vector>
@@ -1174,6 +1177,14 @@ public:
static FunctionPtr create() { return
std::make_shared<FunctionStringRepeat>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
+ std::string error_msg(int default_value, int repeat_value) const {
+ auto error_msg = fmt::format(
+ "The second parameter of repeat function exceeded maximum
default value, "
+ "default_value is {}, and now input is {} . you could try
change default value "
+ "greater than value eg: set repeat_max_num = {}.",
+ default_value, repeat_value, repeat_value + 10);
+ return error_msg;
+ }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
return make_nullable(std::make_shared<DataTypeString>());
@@ -1191,9 +1202,10 @@ public:
if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0]))
{
if (auto* col2 =
check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
- vector_vector(col1->get_chars(), col1->get_offsets(),
col2->get_data(),
- res->get_chars(), res->get_offsets(),
null_map->get_data(),
- context->state()->repeat_max_num());
+ RETURN_IF_ERROR(vector_vector(col1->get_chars(),
col1->get_offsets(),
+ col2->get_data(),
res->get_chars(),
+ res->get_offsets(),
null_map->get_data(),
+
context->state()->repeat_max_num()));
block.replace_by_position(
result, ColumnNullable::create(std::move(res),
std::move(null_map)));
return Status::OK();
@@ -1203,8 +1215,11 @@ public:
if constexpr (use_old_function) {
repeat = col2_const->get_int(0);
} else {
- repeat = std::min<int>(col2_const->get_int(0),
- context->state()->repeat_max_num());
+ repeat = col2_const->get_int(0);
+ if (repeat > context->state()->repeat_max_num()) {
+ return Status::InvalidArgument(
+ error_msg(context->state()->repeat_max_num(),
repeat));
+ }
}
if (repeat <= 0) {
null_map->get_data().resize_fill(input_rows_count, 0);
@@ -1223,10 +1238,10 @@ public:
argument_ptr[0]->get_name(),
argument_ptr[1]->get_name());
}
- void vector_vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
- const ColumnInt32::Container& repeats,
ColumnString::Chars& res_data,
- ColumnString::Offsets& res_offsets,
ColumnUInt8::Container& null_map,
- const int repeat_max_num) {
+ Status vector_vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
+ const ColumnInt32::Container& repeats,
ColumnString::Chars& res_data,
+ ColumnString::Offsets& res_offsets,
ColumnUInt8::Container& null_map,
+ const int repeat_max_num) const {
size_t input_row_size = offsets.size();
fmt::memory_buffer buffer;
@@ -1240,7 +1255,10 @@ public:
if constexpr (use_old_function) {
repeat = repeats[i];
} else {
- repeat = std::min<int>(repeats[i], repeat_max_num);
+ repeat = repeats[i];
+ if (repeat > repeat_max_num) {
+ return Status::InvalidArgument(error_msg(repeat_max_num,
repeat));
+ }
}
if (repeat <= 0) {
StringOP::push_empty_string(i, res_data, res_offsets);
@@ -1254,6 +1272,7 @@ public:
res_data, res_offsets);
}
}
+ return Status::OK();
}
// TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
diff --git a/be/test/vec/function/function_string_test.cpp
b/be/test/vec/function/function_string_test.cpp
index 03a580e5192..73e35d9a090 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -178,15 +178,20 @@ TEST(function_string_test, function_string_repeat_test) {
std::string func_name = "repeat";
InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
- DataSet data_set = {
- {{std::string("a"), 3}, std::string("aaa")},
- {{std::string("hel lo"), 2}, std::string("hel lohel lo")},
- {{std::string("hello word"), -1}, std::string("")},
- {{std::string(""), 1}, std::string("")},
- {{std::string("a"), 1073741825}, std::string("aaaaaaaaaa")}, // ut
repeat max num 10
- {{std::string("HELLO,!^%"), 2}, std::string("HELLO,!^%HELLO,!^%")},
- {{std::string("你"), 2}, std::string("你你")}};
+ DataSet data_set = {{{std::string("a"), 3}, std::string("aaa")},
+ {{std::string("hel lo"), 2}, std::string("hel lohel
lo")},
+ {{std::string("hello word"), -1}, std::string("")},
+ {{std::string(""), 1}, std::string("")},
+ {{std::string("HELLO,!^%"), 2},
std::string("HELLO,!^%HELLO,!^%")},
+ {{std::string("你"), 2}, std::string("你你")}};
check_function<DataTypeString, true>(func_name, input_types, data_set);
+
+ {
+ DataSet data_set = {{{std::string("a"), 1073741825},
+ std::string("aaaaaaaaaa")}}; // ut repeat max num
10
+ Status st = check_function<DataTypeString, true>(func_name,
input_types, data_set, true);
+ EXPECT_NE(Status::OK(), st);
+ }
}
TEST(function_string_test, function_string_reverse_test) {
diff --git a/regression-test/suites/datatype_p0/string/test_string_basic.groovy
b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
index 2aa9f9e86e4..36fbddede2d 100644
--- a/regression-test/suites/datatype_p0/string/test_string_basic.groovy
+++ b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
@@ -129,7 +129,10 @@ suite("test_string_basic") {
(2, repeat("test1111", 131072))
"""
order_qt_select_str_tb "select k1, md5(v1), length(v1) from ${tbName}"
-
+ test {
+ sql """SELECT repeat("test1111", 131073 + 100);"""
+ exception "repeat function exceeded maximum default value"
+ }
sql """drop table if exists test_string_cmp;"""
sql """
diff --git
a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
index e7fead33d90..f9afdd8eadb 100644
--- a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
+++ b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
@@ -27,13 +27,14 @@ suite("max_msg_size_of_result_receiver") {
ENGINE=OLAP DISTRIBUTED BY HASH(id)
PROPERTIES("replication_num"="1")
"""
-
+ sql """set repeat_max_num=100000;"""
+ sql """set max_msg_size_of_result_receiver=90000;""" // so the test of
repeat("a", 80000) could pass, and repeat("a", 100000) will be failed
sql """
- INSERT INTO ${table_name} VALUES (104, repeat("a", ${MESSAGE_SIZE_BASE
* 104}))
+ INSERT INTO ${table_name} VALUES (104, repeat("a", 80000))
"""
sql """
- INSERT INTO ${table_name} VALUES (105, repeat("a", ${MESSAGE_SIZE_BASE
* 105}))
+ INSERT INTO ${table_name} VALUES (105, repeat("a", 100000))
"""
def with_exception = false
@@ -44,10 +45,9 @@ suite("max_msg_size_of_result_receiver") {
}
assertEquals(with_exception, false)
- try {
- sql "SELECT * FROM ${table_name} WHERE id = 105"
- } catch (Exception e) {
- assertTrue(e.getMessage().contains('MaxMessageSize reached, try
increase max_msg_size_of_result_receiver'))
+ test {
+ sql """SELECT * FROM ${table_name} WHERE id = 105;"""
+ exception "MaxMessageSize reached, try increase
max_msg_size_of_result_receiver"
}
try {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]