This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new abeaafee8fd [Chore](function) remove repeat max num (#37907)
abeaafee8fd is described below
commit abeaafee8fdbd783c416b6f5c7aef9bcf77892c8
Author: Pxl <[email protected]>
AuthorDate: Thu Jul 18 16:00:09 2024 +0800
[Chore](function) remove repeat max num (#37907)
## Proposed changes
remove repeat max num
---
be/src/runtime/runtime_state.h | 11 ----
be/src/vec/functions/function_string.h | 70 +++++++---------------
.../expression/rules/FoldConstantRuleOnBE.java | 1 -
.../java/org/apache/doris/qe/SessionVariable.java | 7 ---
.../org/apache/doris/qe/SessionVariablesTest.java | 2 +-
gensrc/thrift/PaloInternalService.thrift | 2 +-
.../sys/test_sys_string/test_sys_string_basic.py | 3 -
.../datatype_p0/string/test_string_basic.groovy | 7 +--
.../suites/query_p1/test_big_pad.groovy | 1 -
.../max_msg_size_of_result_receiver.groovy | 1 -
10 files changed, 25 insertions(+), 80 deletions(-)
diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index e89e7be66f5..1bfd4348d18 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -508,17 +508,6 @@ public:
: 0;
}
- int repeat_max_num() const {
-#ifndef BE_TEST
- if (!_query_options.__isset.repeat_max_num) {
- return 10000;
- }
- return _query_options.repeat_max_num;
-#else
- return 10;
-#endif
- }
-
int64_t external_sort_bytes_threshold() const {
if (_query_options.__isset.external_sort_bytes_threshold) {
return _query_options.external_sort_bytes_threshold;
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index 46539f681dd..5e119e2146c 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -846,9 +846,8 @@ public:
}
}
}
- if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
- return Status::BufferAllocFailed("concat output is too large to
allocate");
- }
+
+ ColumnString::check_chars_length(res_reserve_size, 0);
res_data.resize(res_reserve_size);
@@ -1202,14 +1201,6 @@ public:
static FunctionPtr create() { return
std::make_shared<FunctionStringRepeat>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
- std::string error_msg(int default_value, int repeat_value) const {
- auto error_msg = fmt::format(
- "The second parameter of repeat function exceeded maximum
default value, "
- "default_value is {}, and now input is {} . you could try
change default value "
- "greater than value eg: set repeat_max_num = {}.",
- default_value, repeat_value, repeat_value + 10);
- return error_msg;
- }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
return make_nullable(std::make_shared<DataTypeString>());
@@ -1225,22 +1216,18 @@ public:
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
argument_ptr[1] = block.get_by_position(arguments[1]).column;
- if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0]))
{
- if (auto* col2 =
check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
+ if (const auto* col1 =
check_and_get_column<ColumnString>(*argument_ptr[0])) {
+ if (const auto* col2 =
check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
RETURN_IF_ERROR(vector_vector(col1->get_chars(),
col1->get_offsets(),
col2->get_data(),
res->get_chars(),
- res->get_offsets(),
null_map->get_data(),
-
context->state()->repeat_max_num()));
+ res->get_offsets(),
null_map->get_data()));
block.replace_by_position(
result, ColumnNullable::create(std::move(res),
std::move(null_map)));
return Status::OK();
- } else if (auto* col2_const =
check_and_get_column<ColumnConst>(*argument_ptr[1])) {
+ } else if (const auto* col2_const =
+
check_and_get_column<ColumnConst>(*argument_ptr[1])) {
DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
int repeat = col2_const->get_int(0);
- if (repeat > context->state()->repeat_max_num()) {
- return Status::InvalidArgument(
- error_msg(context->state()->repeat_max_num(),
repeat));
- }
if (repeat <= 0) {
null_map->get_data().resize_fill(input_rows_count, 0);
res->insert_many_defaults(input_rows_count);
@@ -1260,8 +1247,8 @@ public:
Status vector_vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
const ColumnInt32::Container& repeats,
ColumnString::Chars& res_data,
- ColumnString::Offsets& res_offsets,
ColumnUInt8::Container& null_map,
- const int repeat_max_num) const {
+ ColumnString::Offsets& res_offsets,
+ ColumnUInt8::Container& null_map) const {
size_t input_row_size = offsets.size();
fmt::memory_buffer buffer;
@@ -1272,15 +1259,10 @@ public:
const char* raw_str = reinterpret_cast<const
char*>(&data[offsets[i - 1]]);
size_t size = offsets[i] - offsets[i - 1];
int repeat = repeats[i];
- if (repeat > repeat_max_num) {
- return Status::InvalidArgument(error_msg(repeat_max_num,
repeat));
- }
-
if (repeat <= 0) {
StringOP::push_empty_string(i, res_data, res_offsets);
- } else if (repeat * size > DEFAULT_MAX_STRING_SIZE) {
- StringOP::push_null_string(i, res_data, res_offsets, null_map);
} else {
+ ColumnString::check_chars_length(repeat * size +
res_data.size(), 0);
for (int j = 0; j < repeat; ++j) {
buffer.append(raw_str, raw_str + size);
}
@@ -1306,16 +1288,13 @@ public:
buffer.clear();
const char* raw_str = reinterpret_cast<const
char*>(&data[offsets[i - 1]]);
size_t size = offsets[i] - offsets[i - 1];
+ ColumnString::check_chars_length(repeat * size + res_data.size(),
0);
- if (repeat * size > DEFAULT_MAX_STRING_SIZE) {
- StringOP::push_null_string(i, res_data, res_offsets, null_map);
- } else {
- for (int j = 0; j < repeat; ++j) {
- buffer.append(raw_str, raw_str + size);
- }
- StringOP::push_value_string(std::string_view(buffer.data(),
buffer.size()), i,
- res_data, res_offsets);
+ for (int j = 0; j < repeat; ++j) {
+ buffer.append(raw_str, raw_str + size);
}
+ StringOP::push_value_string(std::string_view(buffer.data(),
buffer.size()), i, res_data,
+ res_offsets);
}
}
};
@@ -1369,7 +1348,6 @@ public:
const bool str_const = col_const[0];
const bool len_const = col_const[1];
const bool pad_const = col_const[2];
- const int repeat_max_num = context->state()->repeat_max_num();
for (size_t i = 0; i < input_rows_count; ++i) {
str_index.clear();
pad_index.clear();
@@ -1404,15 +1382,6 @@ public:
res_chars, res_offsets);
continue;
}
- if (len > repeat_max_num) {
- return Status::InvalidArgument(
- " {} function the length argument is {} exceeded
maximum default "
- "value: {}."
- "if you really need this length, you could change
the session "
- "variable "
- "set repeat_max_num = xxx.",
- get_name(), len, repeat_max_num);
- }
// make compatible with mysql. return empty string if pad is
empty
if (pad_char_size == 0) {
@@ -1422,7 +1391,9 @@ public:
const int32_t pad_times = (len - str_char_size) /
pad_char_size;
const int32_t pad_remainder = (len - str_char_size) %
pad_char_size;
- buffer.reserve(str_len + (pad_times + 1) * pad_len);
+ size_t new_capacity = str_len + size_t(pad_times + 1) *
pad_len;
+ ColumnString::check_chars_length(new_capacity, 0);
+ buffer.reserve(new_capacity);
auto* buffer_data = buffer.data();
int32_t buffer_len = 0;
if constexpr (!Impl::is_lpad) {
@@ -2993,6 +2964,8 @@ private:
return str;
}
std::string result;
+ ColumnString::check_chars_length(
+ str.length() * (new_str.length() + 1) +
new_str.length(), 0);
result.reserve(str.length() * (new_str.length() + 1) +
new_str.length());
for (char c : str) {
result += new_str;
@@ -3211,6 +3184,7 @@ public:
auto& res_chars = col_res->get_chars();
res_offset.resize(input_rows_count);
// max pinyin size is 6, double of utf8 chinese word 3, add one char
to set '~'
+ ColumnString::check_chars_length(str_chars.size() * 2 +
input_rows_count, 0);
res_chars.resize(str_chars.size() * 2 + input_rows_count);
size_t in_len = 0, out_len = 0;
@@ -3493,7 +3467,7 @@ public:
if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
return Status::BufferAllocFailed("function char output is too
large to allocate");
}
-
+ ColumnString::check_chars_length(res_reserve_size, 0);
res_data.resize(res_reserve_size);
res_offset.resize(input_rows_count);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
index f5a19553ed1..c7e9695cfd7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
@@ -277,7 +277,6 @@ public class FoldConstantRuleOnBE implements
ExpressionPatternRuleFactory {
}
TQueryOptions tQueryOptions = new TQueryOptions();
-
tQueryOptions.setRepeatMaxNum(context.getSessionVariable().repeatMaxNum);
tQueryOptions.setBeExecVersion(Config.be_exec_version);
TFoldConstantParams tParams = new TFoldConstantParams(paramMap,
queryGlobals);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 9a5caa5a7d1..4daa717f8bc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -394,8 +394,6 @@ public class SessionVariable implements Serializable,
Writable {
// support unicode in label, table, column, common name check
public static final String ENABLE_UNICODE_NAME_SUPPORT =
"enable_unicode_name_support";
- public static final String REPEAT_MAX_NUM = "repeat_max_num";
-
public static final String GROUP_CONCAT_MAX_LEN = "group_concat_max_len";
public static final String ENABLE_TWO_PHASE_READ_OPT =
"enable_two_phase_read_opt";
@@ -1433,9 +1431,6 @@ public class SessionVariable implements Serializable,
Writable {
@VariableMgr.VarAttr(name = ENABLE_UNICODE_NAME_SUPPORT, needForward =
true)
public boolean enableUnicodeNameSupport = false;
- @VariableMgr.VarAttr(name = REPEAT_MAX_NUM, needForward = true)
- public int repeatMaxNum = 10000;
-
@VariableMgr.VarAttr(name = GROUP_CONCAT_MAX_LEN)
public long groupConcatMaxLen = 2147483646;
@@ -3538,8 +3533,6 @@ public class SessionVariable implements Serializable,
Writable {
tResult.setPartitionedHashJoinRowsThreshold(partitionedHashJoinRowsThreshold);
tResult.setPartitionedHashAggRowsThreshold(partitionedHashAggRowsThreshold);
- tResult.setRepeatMaxNum(repeatMaxNum);
-
tResult.setExternalSortBytesThreshold(externalSortBytesThreshold);
tResult.setExternalAggBytesThreshold(0); // disable for now
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java
index bad7842e01e..05408d3b674 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java
@@ -113,7 +113,7 @@ public class SessionVariablesTest extends TestWithFeService
{
Assert.assertNotEquals(sessionVar.isEnableBucketShuffleJoin(),
bucketShuffle);
// 4. set experimental for none experimental var
- sql = "set experimental_repeat_max_num=5";
+ sql = "set experimental_group_concat_max_len=5";
setStmt = (SetStmt) parseAndAnalyzeStmt(sql, connectContext);
SetExecutor setExecutor2 = new SetExecutor(connectContext, setStmt);
ExceptionChecker.expectThrowsWithMsg(DdlException.class, "Unknown
system variable",
diff --git a/gensrc/thrift/PaloInternalService.thrift
b/gensrc/thrift/PaloInternalService.thrift
index 0e0a87eae0b..2c6746034ac 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -193,7 +193,7 @@ struct TQueryOptions {
// non-pipelinex engine removed. always true.
57: optional bool enable_pipeline_engine = true
- 58: optional i32 repeat_max_num = 0
+ 58: optional i32 repeat_max_num = 0 // Deprecated
59: optional i64 external_sort_bytes_threshold = 0
diff --git a/pytest/sys/test_sys_string/test_sys_string_basic.py
b/pytest/sys/test_sys_string/test_sys_string_basic.py
index 48f8696a6cc..374154d758d 100644
--- a/pytest/sys/test_sys_string/test_sys_string_basic.py
+++ b/pytest/sys/test_sys_string/test_sys_string_basic.py
@@ -52,9 +52,6 @@ broker_info = palo_config.broker_info
def setup_module():
"""setup"""
client = common.get_client()
- ret = client.show_variables('repeat_max_num')
- if len(ret) == 1:
- client.set_variables('repeat_max_num', '200000', True)
def teardown_module():
diff --git a/regression-test/suites/datatype_p0/string/test_string_basic.groovy
b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
index 36fbddede2d..625239f7369 100644
--- a/regression-test/suites/datatype_p0/string/test_string_basic.groovy
+++ b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
@@ -119,8 +119,6 @@ suite("test_string_basic") {
CREATE TABLE IF NOT EXISTS ${tbName} (k1 VARCHAR(10) NULL, v1 STRING
NULL)
UNIQUE KEY(k1) DISTRIBUTED BY HASH(k1) BUCKETS 5
properties("replication_num" = "1")
"""
- // default repeat maximum is 10000
- sql """set repeat_max_num=131073"""
sql """
INSERT INTO ${tbName} VALUES
("", ""),
@@ -129,10 +127,7 @@ suite("test_string_basic") {
(2, repeat("test1111", 131072))
"""
order_qt_select_str_tb "select k1, md5(v1), length(v1) from ${tbName}"
- test {
- sql """SELECT repeat("test1111", 131073 + 100);"""
- exception "repeat function exceeded maximum default value"
- }
+
sql """drop table if exists test_string_cmp;"""
sql """
diff --git a/regression-test/suites/query_p1/test_big_pad.groovy
b/regression-test/suites/query_p1/test_big_pad.groovy
index b96a380ac8b..9c781a5189c 100644
--- a/regression-test/suites/query_p1/test_big_pad.groovy
+++ b/regression-test/suites/query_p1/test_big_pad.groovy
@@ -34,7 +34,6 @@ suite("test_big_pad") {
sql "select rpad('a',15000,'asd');"
exception "rpad function the length argument is 15000 exceeded maximum
default value"
}
- sql """ set repeat_max_num = 2000000001 """ // default value is 10000
qt_sql_rpad"select length(rpad('a',15000,'asd'));"
sql "insert into d_table
values(1,2000000000,1,'a'),(1,2000000000,1,'a'),(1,2000000000,1,'a');"
diff --git
a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
index f9afdd8eadb..df550748a11 100644
--- a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
+++ b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
@@ -27,7 +27,6 @@ suite("max_msg_size_of_result_receiver") {
ENGINE=OLAP DISTRIBUTED BY HASH(id)
PROPERTIES("replication_num"="1")
"""
- sql """set repeat_max_num=100000;"""
sql """set max_msg_size_of_result_receiver=90000;""" // so the test of
repeat("a", 80000) could pass, and repeat("a", 100000) will be failed
sql """
INSERT INTO ${table_name} VALUES (104, repeat("a", 80000))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]