This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 09cd0cc6c3e [fix](load) avoid inserting too large string with type of
agg_state (#36117)
09cd0cc6c3e is described below
commit 09cd0cc6c3ecdf6a68ee04852149ab3103cbd0da
Author: Jerry Hu <[email protected]>
AuthorDate: Mon Jun 17 09:55:15 2024 +0800
[fix](load) avoid inserting too large string with type of agg_state (#36117)
## Proposed changes
<!--Describe your changes.-->
---
be/src/vec/sink/vtablet_block_convertor.cpp | 25 +++++++---
.../data/query_p1/test_insert_limit.out | 4 ++
.../suites/query_p1/test_insert_limit.groovy | 58 ++++++++++++++++++++++
3 files changed, 80 insertions(+), 7 deletions(-)
diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp
b/be/src/vec/sink/vtablet_block_convertor.cpp
index d93a654728d..7f7f4c76008 100644
--- a/be/src/vec/sink/vtablet_block_convertor.cpp
+++ b/be/src/vec/sink/vtablet_block_convertor.cpp
@@ -209,13 +209,7 @@ Status
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
return !_filter_map[row] && (null_map == nullptr || null_map[j] == 0);
};
- switch (type.type) {
- case TYPE_CHAR:
- case TYPE_VARCHAR:
- case TYPE_STRING: {
- const auto column_string =
- assert_cast<const
vectorized::ColumnString*>(real_column_ptr.get());
-
+ auto string_column_checker = [&](const ColumnString* column_string) {
size_t limit = config::string_type_length_soft_limit_bytes;
// when type.len is negative, std::min will return overflow value, so
we need to check it
if (type.len > 0) {
@@ -257,6 +251,16 @@ Status
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
}
}
}
+ return Status::OK();
+ };
+
+ switch (type.type) {
+ case TYPE_CHAR:
+ case TYPE_VARCHAR:
+ case TYPE_STRING: {
+ const auto column_string =
+ assert_cast<const
vectorized::ColumnString*>(real_column_ptr.get());
+ RETURN_IF_ERROR(string_column_checker(column_string));
break;
}
case TYPE_JSONB: {
@@ -418,6 +422,13 @@ Status
OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type
}
break;
}
+ case TYPE_AGG_STATE: {
+ auto* column_string =
vectorized::check_and_get_column<ColumnString>(*real_column_ptr);
+ if (column_string) {
+ RETURN_IF_ERROR(string_column_checker(column_string));
+ }
+ break;
+ }
default:
break;
}
diff --git a/regression-test/data/query_p1/test_insert_limit.out
b/regression-test/data/query_p1/test_insert_limit.out
new file mode 100644
index 00000000000..b3620441e4f
--- /dev/null
+++ b/regression-test/data/query_p1/test_insert_limit.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+1 1310728
+
diff --git a/regression-test/suites/query_p1/test_insert_limit.groovy
b/regression-test/suites/query_p1/test_insert_limit.groovy
new file mode 100644
index 00000000000..d2d11b1a28d
--- /dev/null
+++ b/regression-test/suites/query_p1/test_insert_limit.groovy
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_insert_limit") {
+ sql "set enable_agg_state=true"
+ sql "DROP TABLE IF EXISTS `tbl_test_insert_limit`"
+ sql """
+ create table `tbl_test_insert_limit`(
+ k1 int null,
+ k2 agg_state<group_concat(string)> generic
+ )
+ aggregate key (k1)
+ distributed BY hash(k1) buckets 3
+ properties("replication_num" = "1");
+ """
+
+ sql """
+ insert into `tbl_test_insert_limit` values(1,
group_concat_state('abcd'));
+ """
+
+ def error = ""
+ for (i in 1..24) {
+ test {
+ sql " insert into `tbl_test_insert_limit` select * from
tbl_test_insert_limit; "
+ check{result, exception, startTime, endTime ->
+ if (exception != null) {
+ error = exception
+ }
+ }
+ }
+
+ if (error != "") {
+ break
+ }
+ }
+
+ assertTrue(error != "")
+
+ qt_select """
+ select k1, length(k2) from `tbl_test_insert_limit`;
+ """
+
+ sql "DROP TABLE IF EXISTS `tbl_test_insert_limit`"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]