This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 121c2002868 [fix](array_range) fix array_range func for large param
which should return error (#38284)
121c2002868 is described below
commit 121c20028682e44aef875acd21acb00b5a1495e8
Author: amory <[email protected]>
AuthorDate: Sat Jul 27 18:08:16 2024 +0800
[fix](array_range) fix array_range func for large param which should return
error (#38284)
if array_range with large size , which will make be oom , so we should
avoid this with max_array_size_as_field
---
.../vec/functions/array/function_array_range.cpp | 31 ++++++++++++++++------
.../scalar_function/Array.groovy | 29 ++++++++++++++++++++
.../array_functions/test_array_functions.groovy | 22 +++++++++++++++
3 files changed, 74 insertions(+), 8 deletions(-)
diff --git a/be/src/vec/functions/array/function_array_range.cpp
b/be/src/vec/functions/array/function_array_range.cpp
index a84b94d8cfc..1a5dd08aaac 100644
--- a/be/src/vec/functions/array/function_array_range.cpp
+++ b/be/src/vec/functions/array/function_array_range.cpp
@@ -168,9 +168,9 @@ struct RangeImplUtil {
dest_nested_column->reserve(input_rows_count);
dest_nested_null_map.reserve(input_rows_count);
- vector(start_column->get_data(), end_column->get_data(),
step_column->get_data(),
- args_null_map->get_data(), nested_column->get_data(),
dest_nested_null_map,
- dest_offsets);
+ RETURN_IF_ERROR(vector(start_column->get_data(),
end_column->get_data(),
+ step_column->get_data(),
args_null_map->get_data(),
+ nested_column->get_data(),
dest_nested_null_map, dest_offsets));
block.get_by_position(result).column =
ColumnNullable::create(std::move(dest_array_column_ptr),
std::move(args_null_map));
@@ -178,11 +178,12 @@ struct RangeImplUtil {
}
private:
- static void vector(const PaddedPODArray<SourceDataType>& start,
- const PaddedPODArray<SourceDataType>& end, const
PaddedPODArray<Int32>& step,
- NullMap& args_null_map, PaddedPODArray<SourceDataType>&
nested_column,
- PaddedPODArray<UInt8>& dest_nested_null_map,
- ColumnArray::Offsets64& dest_offsets) {
+ static Status vector(const PaddedPODArray<SourceDataType>& start,
+ const PaddedPODArray<SourceDataType>& end,
+ const PaddedPODArray<Int32>& step, NullMap&
args_null_map,
+ PaddedPODArray<SourceDataType>& nested_column,
+ PaddedPODArray<UInt8>& dest_nested_null_map,
+ ColumnArray::Offsets64& dest_offsets) {
int rows = start.size();
for (auto row = 0; row < rows; ++row) {
auto idx = start[row];
@@ -195,6 +196,13 @@ private:
dest_offsets.push_back(dest_offsets.back());
continue;
} else {
+ if (idx < end_row && step_row > 0 &&
+ ((static_cast<__int128_t>(end_row) -
static_cast<__int128_t>(step_row) -
+ 1) / static_cast<__int128_t>(step_row) +
+ 1) > max_array_size_as_field) {
+ return Status::InvalidArgument("Array size exceeds the
limit {}",
+
max_array_size_as_field);
+ }
int offset = dest_offsets.back();
while (idx < end[row]) {
nested_column.push_back(idx);
@@ -219,11 +227,17 @@ private:
using UNIT =
std::conditional_t<std::is_same_v<TimeUnitOrVoid, void>,
std::integral_constant<TimeUnit, TimeUnit::DAY>,
TimeUnitOrVoid>;
+ int move = 0;
while (doris::datetime_diff<UNIT::value,
DateTimeV2ValueType,
DateTimeV2ValueType>(idx,
end_row) > 0) {
+ if (move > max_array_size_as_field) {
+ return Status::InvalidArgument("Array size exceeds
the limit {}",
+
max_array_size_as_field);
+ }
nested_column.push_back(idx);
dest_nested_null_map.push_back(0);
offset++;
+ move++;
idx = doris::vectorized::date_time_add<
UNIT::value, DateV2Value<DateTimeV2ValueType>,
DateV2Value<DateTimeV2ValueType>,
DateTimeV2>(idx, step_row,
@@ -233,6 +247,7 @@ private:
}
}
}
+ return Status::OK();
}
};
diff --git
a/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
b/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
index 5957ced51af..ef3813d6deb 100644
--- a/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
+++ b/regression-test/suites/nereids_function_p0/scalar_function/Array.groovy
@@ -623,6 +623,11 @@ suite("nereids_scalar_fn_Array") {
order_qt_sql_array_range_two_param_notnull "select array_range(kint, 1000)
from fn_test_not_nullable order by id"
order_qt_sql_array_range_three_param "select array_range(kint, 10000,
ktint) from fn_test order by id"
order_qt_sql_array_range_three_param_notnull "select array_range(kint,
10000, ktint) from fn_test_not_nullable order by id"
+ // make a large size of array element, expect to throw error
+ test {
+ sql "select array_range(kint, 1000000000) from fn_test"
+ exception ('Array size exceeds the limit 1000000')
+ }
// array_remove
order_qt_sql_array_remove_Double "select array_remove(kadbl, kdbl) from
fn_test"
@@ -1276,6 +1281,30 @@ suite("nereids_scalar_fn_Array") {
qt_sequence_datetime_hour """select sequence(kdtmv2s1, date_add(kdtmv2s1,
interval kint-3 hour), interval kint hour) from fn_test order by kdtmv2s1;"""
qt_sequence_datetime_minute """select sequence(kdtmv2s1,
date_add(kdtmv2s1, interval kint+1 minute), interval kint minute) from fn_test
order by kdtmv2s1;"""
qt_sequence_datetime_second """select sequence(kdtmv2s1,
date_add(kdtmv2s1, interval kint second), interval kint-1 second) from fn_test
order by kdtmv2s1;"""
+ // make large error size
+ test {
+ sql "select array_size(sequence(kdtmv2s1, date_add(kdtmv2s1, interval
kint+1000 year), interval kint hour)) from fn_test order by kdtmv2s1;"
+ check{result, exception, startTime, endTime ->
+ assertTrue(exception != null)
+ logger.info(exception.message)
+ }
+ }
+
+ test {
+ sql "select array_size(sequence(kdtmv2s1, date_add(kdtmv2s1, interval
kint+10000 month), interval kint hour)) from fn_test order by kdtmv2s1;"
+ check{result, exception, startTime, endTime ->
+ assertTrue(exception != null)
+ logger.info(exception.message)
+ }
+ }
+
+ test {
+ sql "select array_size(sequence(kdtmv2s1, date_add(kdtmv2s1, interval
kint+1000001 day), interval kint day)) from fn_test order by kdtmv2s1;"
+ check{result, exception, startTime, endTime ->
+ assertTrue(exception != null)
+ logger.info(exception.message)
+ }
+ }
// with array empty
qt_array_empty_fe """select array()"""
diff --git
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
index ac12b1ffccb..96bca3eb4ff 100644
---
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
+++
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
@@ -421,4 +421,26 @@ suite("test_array_functions") {
qt_const_select "select sequence(cast('2022-35-38 12:00:10' as
datetimev2(0)), cast('2022-05-18 22:00:30' as datetimev2(0))); "
qt_const_select "select sequence(1, 10, 0); "
qt_const_select "select sequence(cast('2022-05-15 12:00:00' as
datetimev2(0)), cast('2022-05-17 12:00:00' as datetimev2(0)), interval 0 day); "
+ // test large size of array
+ test {
+ sql """ select sequence(cast('2022-05-01 12:00:00' as datetimev2(0)),
cast('2022-05-17 12:00:00' as datetimev2(0)), interval 10000000000 week); """
+ check{result, exception, startTime, endTime ->
+ assertTrue(exception != null)
+ logger.info(exception.message)
+ }
+ }
+ test {
+ sql """ select sequence(1, 10000000000); """
+ check{result, exception, startTime, endTime ->
+ assertTrue(exception != null)
+ logger.info(exception.message)
+ }
+ }
+ test {
+ sql """ select sequence(1, 10000000000, 2); """
+ check{result, exception, startTime, endTime ->
+ assertTrue(exception != null)
+ logger.info(exception.message)
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]