This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new abeaafee8fd [Chore](function) remove repeat max num (#37907)
abeaafee8fd is described below

commit abeaafee8fdbd783c416b6f5c7aef9bcf77892c8
Author: Pxl <[email protected]>
AuthorDate: Thu Jul 18 16:00:09 2024 +0800

    [Chore](function) remove repeat max num (#37907)
    
    ## Proposed changes
    remove repeat max num
---
 be/src/runtime/runtime_state.h                     | 11 ----
 be/src/vec/functions/function_string.h             | 70 +++++++---------------
 .../expression/rules/FoldConstantRuleOnBE.java     |  1 -
 .../java/org/apache/doris/qe/SessionVariable.java  |  7 ---
 .../org/apache/doris/qe/SessionVariablesTest.java  |  2 +-
 gensrc/thrift/PaloInternalService.thrift           |  2 +-
 .../sys/test_sys_string/test_sys_string_basic.py   |  3 -
 .../datatype_p0/string/test_string_basic.groovy    |  7 +--
 .../suites/query_p1/test_big_pad.groovy            |  1 -
 .../max_msg_size_of_result_receiver.groovy         |  1 -
 10 files changed, 25 insertions(+), 80 deletions(-)

diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index e89e7be66f5..1bfd4348d18 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -508,17 +508,6 @@ public:
                        : 0;
     }
 
-    int repeat_max_num() const {
-#ifndef BE_TEST
-        if (!_query_options.__isset.repeat_max_num) {
-            return 10000;
-        }
-        return _query_options.repeat_max_num;
-#else
-        return 10;
-#endif
-    }
-
     int64_t external_sort_bytes_threshold() const {
         if (_query_options.__isset.external_sort_bytes_threshold) {
             return _query_options.external_sort_bytes_threshold;
diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index 46539f681dd..5e119e2146c 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -846,9 +846,8 @@ public:
                 }
             }
         }
-        if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
-            return Status::BufferAllocFailed("concat output is too large to 
allocate");
-        }
+
+        ColumnString::check_chars_length(res_reserve_size, 0);
 
         res_data.resize(res_reserve_size);
 
@@ -1202,14 +1201,6 @@ public:
     static FunctionPtr create() { return 
std::make_shared<FunctionStringRepeat>(); }
     String get_name() const override { return name; }
     size_t get_number_of_arguments() const override { return 2; }
-    std::string error_msg(int default_value, int repeat_value) const {
-        auto error_msg = fmt::format(
-                "The second parameter of repeat function exceeded maximum 
default value, "
-                "default_value is {}, and now input is {} . you could try 
change default value "
-                "greater than value eg: set repeat_max_num = {}.",
-                default_value, repeat_value, repeat_value + 10);
-        return error_msg;
-    }
 
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
         return make_nullable(std::make_shared<DataTypeString>());
@@ -1225,22 +1216,18 @@ public:
                 
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
         argument_ptr[1] = block.get_by_position(arguments[1]).column;
 
-        if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) 
{
-            if (auto* col2 = 
check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
+        if (const auto* col1 = 
check_and_get_column<ColumnString>(*argument_ptr[0])) {
+            if (const auto* col2 = 
check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
                 RETURN_IF_ERROR(vector_vector(col1->get_chars(), 
col1->get_offsets(),
                                               col2->get_data(), 
res->get_chars(),
-                                              res->get_offsets(), 
null_map->get_data(),
-                                              
context->state()->repeat_max_num()));
+                                              res->get_offsets(), 
null_map->get_data()));
                 block.replace_by_position(
                         result, ColumnNullable::create(std::move(res), 
std::move(null_map)));
                 return Status::OK();
-            } else if (auto* col2_const = 
check_and_get_column<ColumnConst>(*argument_ptr[1])) {
+            } else if (const auto* col2_const =
+                               
check_and_get_column<ColumnConst>(*argument_ptr[1])) {
                 
DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
                 int repeat = col2_const->get_int(0);
-                if (repeat > context->state()->repeat_max_num()) {
-                    return Status::InvalidArgument(
-                            error_msg(context->state()->repeat_max_num(), 
repeat));
-                }
                 if (repeat <= 0) {
                     null_map->get_data().resize_fill(input_rows_count, 0);
                     res->insert_many_defaults(input_rows_count);
@@ -1260,8 +1247,8 @@ public:
 
     Status vector_vector(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,
                          const ColumnInt32::Container& repeats, 
ColumnString::Chars& res_data,
-                         ColumnString::Offsets& res_offsets, 
ColumnUInt8::Container& null_map,
-                         const int repeat_max_num) const {
+                         ColumnString::Offsets& res_offsets,
+                         ColumnUInt8::Container& null_map) const {
         size_t input_row_size = offsets.size();
 
         fmt::memory_buffer buffer;
@@ -1272,15 +1259,10 @@ public:
             const char* raw_str = reinterpret_cast<const 
char*>(&data[offsets[i - 1]]);
             size_t size = offsets[i] - offsets[i - 1];
             int repeat = repeats[i];
-            if (repeat > repeat_max_num) {
-                return Status::InvalidArgument(error_msg(repeat_max_num, 
repeat));
-            }
-
             if (repeat <= 0) {
                 StringOP::push_empty_string(i, res_data, res_offsets);
-            } else if (repeat * size > DEFAULT_MAX_STRING_SIZE) {
-                StringOP::push_null_string(i, res_data, res_offsets, null_map);
             } else {
+                ColumnString::check_chars_length(repeat * size + 
res_data.size(), 0);
                 for (int j = 0; j < repeat; ++j) {
                     buffer.append(raw_str, raw_str + size);
                 }
@@ -1306,16 +1288,13 @@ public:
             buffer.clear();
             const char* raw_str = reinterpret_cast<const 
char*>(&data[offsets[i - 1]]);
             size_t size = offsets[i] - offsets[i - 1];
+            ColumnString::check_chars_length(repeat * size + res_data.size(), 
0);
 
-            if (repeat * size > DEFAULT_MAX_STRING_SIZE) {
-                StringOP::push_null_string(i, res_data, res_offsets, null_map);
-            } else {
-                for (int j = 0; j < repeat; ++j) {
-                    buffer.append(raw_str, raw_str + size);
-                }
-                StringOP::push_value_string(std::string_view(buffer.data(), 
buffer.size()), i,
-                                            res_data, res_offsets);
+            for (int j = 0; j < repeat; ++j) {
+                buffer.append(raw_str, raw_str + size);
             }
+            StringOP::push_value_string(std::string_view(buffer.data(), 
buffer.size()), i, res_data,
+                                        res_offsets);
         }
     }
 };
@@ -1369,7 +1348,6 @@ public:
         const bool str_const = col_const[0];
         const bool len_const = col_const[1];
         const bool pad_const = col_const[2];
-        const int repeat_max_num = context->state()->repeat_max_num();
         for (size_t i = 0; i < input_rows_count; ++i) {
             str_index.clear();
             pad_index.clear();
@@ -1404,15 +1382,6 @@ public:
                                                 res_chars, res_offsets);
                     continue;
                 }
-                if (len > repeat_max_num) {
-                    return Status::InvalidArgument(
-                            " {} function the length argument is {} exceeded 
maximum default "
-                            "value: {}."
-                            "if you really need this length, you could change 
the session "
-                            "variable "
-                            "set repeat_max_num = xxx.",
-                            get_name(), len, repeat_max_num);
-                }
 
                 // make compatible with mysql. return empty string if pad is 
empty
                 if (pad_char_size == 0) {
@@ -1422,7 +1391,9 @@ public:
 
                 const int32_t pad_times = (len - str_char_size) / 
pad_char_size;
                 const int32_t pad_remainder = (len - str_char_size) % 
pad_char_size;
-                buffer.reserve(str_len + (pad_times + 1) * pad_len);
+                size_t new_capacity = str_len + size_t(pad_times + 1) * 
pad_len;
+                ColumnString::check_chars_length(new_capacity, 0);
+                buffer.reserve(new_capacity);
                 auto* buffer_data = buffer.data();
                 int32_t buffer_len = 0;
                 if constexpr (!Impl::is_lpad) {
@@ -2993,6 +2964,8 @@ private:
                     return str;
                 }
                 std::string result;
+                ColumnString::check_chars_length(
+                        str.length() * (new_str.length() + 1) + 
new_str.length(), 0);
                 result.reserve(str.length() * (new_str.length() + 1) + 
new_str.length());
                 for (char c : str) {
                     result += new_str;
@@ -3211,6 +3184,7 @@ public:
         auto& res_chars = col_res->get_chars();
         res_offset.resize(input_rows_count);
         // max pinyin size is 6, double of utf8 chinese word 3, add one char 
to set '~'
+        ColumnString::check_chars_length(str_chars.size() * 2 + 
input_rows_count, 0);
         res_chars.resize(str_chars.size() * 2 + input_rows_count);
 
         size_t in_len = 0, out_len = 0;
@@ -3493,7 +3467,7 @@ public:
         if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
             return Status::BufferAllocFailed("function char output is too 
large to allocate");
         }
-
+        ColumnString::check_chars_length(res_reserve_size, 0);
         res_data.resize(res_reserve_size);
         res_offset.resize(input_rows_count);
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
index f5a19553ed1..c7e9695cfd7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnBE.java
@@ -277,7 +277,6 @@ public class FoldConstantRuleOnBE implements 
ExpressionPatternRuleFactory {
             }
 
             TQueryOptions tQueryOptions = new TQueryOptions();
-            
tQueryOptions.setRepeatMaxNum(context.getSessionVariable().repeatMaxNum);
             tQueryOptions.setBeExecVersion(Config.be_exec_version);
 
             TFoldConstantParams tParams = new TFoldConstantParams(paramMap, 
queryGlobals);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 9a5caa5a7d1..4daa717f8bc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -394,8 +394,6 @@ public class SessionVariable implements Serializable, 
Writable {
     // support unicode in label, table, column, common name check
     public static final String ENABLE_UNICODE_NAME_SUPPORT = 
"enable_unicode_name_support";
 
-    public static final String REPEAT_MAX_NUM = "repeat_max_num";
-
     public static final String GROUP_CONCAT_MAX_LEN = "group_concat_max_len";
 
     public static final String ENABLE_TWO_PHASE_READ_OPT = 
"enable_two_phase_read_opt";
@@ -1433,9 +1431,6 @@ public class SessionVariable implements Serializable, 
Writable {
     @VariableMgr.VarAttr(name = ENABLE_UNICODE_NAME_SUPPORT, needForward = 
true)
     public boolean enableUnicodeNameSupport = false;
 
-    @VariableMgr.VarAttr(name = REPEAT_MAX_NUM, needForward = true)
-    public int repeatMaxNum = 10000;
-
     @VariableMgr.VarAttr(name = GROUP_CONCAT_MAX_LEN)
     public long groupConcatMaxLen = 2147483646;
 
@@ -3538,8 +3533,6 @@ public class SessionVariable implements Serializable, 
Writable {
         
tResult.setPartitionedHashJoinRowsThreshold(partitionedHashJoinRowsThreshold);
         
tResult.setPartitionedHashAggRowsThreshold(partitionedHashAggRowsThreshold);
 
-        tResult.setRepeatMaxNum(repeatMaxNum);
-
         tResult.setExternalSortBytesThreshold(externalSortBytesThreshold);
 
         tResult.setExternalAggBytesThreshold(0); // disable for now
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java
index bad7842e01e..05408d3b674 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/qe/SessionVariablesTest.java
@@ -113,7 +113,7 @@ public class SessionVariablesTest extends TestWithFeService 
{
         Assert.assertNotEquals(sessionVar.isEnableBucketShuffleJoin(), 
bucketShuffle);
 
         // 4. set experimental for none experimental var
-        sql = "set experimental_repeat_max_num=5";
+        sql = "set experimental_group_concat_max_len=5";
         setStmt = (SetStmt) parseAndAnalyzeStmt(sql, connectContext);
         SetExecutor setExecutor2 = new SetExecutor(connectContext, setStmt);
         ExceptionChecker.expectThrowsWithMsg(DdlException.class, "Unknown 
system variable",
diff --git a/gensrc/thrift/PaloInternalService.thrift 
b/gensrc/thrift/PaloInternalService.thrift
index 0e0a87eae0b..2c6746034ac 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -193,7 +193,7 @@ struct TQueryOptions {
   // non-pipelinex engine removed. always true.
   57: optional bool enable_pipeline_engine = true
 
-  58: optional i32 repeat_max_num = 0
+  58: optional i32 repeat_max_num = 0 // Deprecated
 
   59: optional i64 external_sort_bytes_threshold = 0
 
diff --git a/pytest/sys/test_sys_string/test_sys_string_basic.py 
b/pytest/sys/test_sys_string/test_sys_string_basic.py
index 48f8696a6cc..374154d758d 100644
--- a/pytest/sys/test_sys_string/test_sys_string_basic.py
+++ b/pytest/sys/test_sys_string/test_sys_string_basic.py
@@ -52,9 +52,6 @@ broker_info = palo_config.broker_info
 def setup_module():
     """setup"""
     client = common.get_client()
-    ret = client.show_variables('repeat_max_num')
-    if len(ret) == 1:
-        client.set_variables('repeat_max_num', '200000', True)
 
 
 def teardown_module():
diff --git a/regression-test/suites/datatype_p0/string/test_string_basic.groovy 
b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
index 36fbddede2d..625239f7369 100644
--- a/regression-test/suites/datatype_p0/string/test_string_basic.groovy
+++ b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
@@ -119,8 +119,6 @@ suite("test_string_basic") {
         CREATE TABLE IF NOT EXISTS ${tbName} (k1 VARCHAR(10) NULL, v1 STRING 
NULL) 
         UNIQUE KEY(k1) DISTRIBUTED BY HASH(k1) BUCKETS 5 
properties("replication_num" = "1")
         """
-    // default repeat maximum is 10000
-    sql """set repeat_max_num=131073"""
     sql """
         INSERT INTO ${tbName} VALUES
          ("", ""),
@@ -129,10 +127,7 @@ suite("test_string_basic") {
          (2, repeat("test1111", 131072))
         """
     order_qt_select_str_tb "select k1, md5(v1), length(v1) from ${tbName}"
-    test {
-        sql """SELECT repeat("test1111", 131073 + 100);"""
-        exception "repeat function exceeded maximum default value"
-    }
+
     sql """drop table if exists test_string_cmp;"""
 
     sql """
diff --git a/regression-test/suites/query_p1/test_big_pad.groovy 
b/regression-test/suites/query_p1/test_big_pad.groovy
index b96a380ac8b..9c781a5189c 100644
--- a/regression-test/suites/query_p1/test_big_pad.groovy
+++ b/regression-test/suites/query_p1/test_big_pad.groovy
@@ -34,7 +34,6 @@ suite("test_big_pad") {
         sql "select rpad('a',15000,'asd');" 
         exception "rpad function the length argument is 15000 exceeded maximum 
default value"
     }
-    sql """ set repeat_max_num = 2000000001 """ // default value is 10000
     qt_sql_rpad"select length(rpad('a',15000,'asd'));"
 
     sql "insert into d_table 
values(1,2000000000,1,'a'),(1,2000000000,1,'a'),(1,2000000000,1,'a');"
diff --git 
a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy 
b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
index f9afdd8eadb..df550748a11 100644
--- a/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
+++ b/regression-test/suites/variable_p0/max_msg_size_of_result_receiver.groovy
@@ -27,7 +27,6 @@ suite("max_msg_size_of_result_receiver") {
         ENGINE=OLAP DISTRIBUTED BY HASH(id)
         PROPERTIES("replication_num"="1")
     """
-    sql """set repeat_max_num=100000;"""
     sql """set max_msg_size_of_result_receiver=90000;""" // so the test of 
repeat("a", 80000) could pass, and repeat("a", 100000) will be failed
     sql """
         INSERT INTO ${table_name} VALUES (104, repeat("a", 80000))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to