This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2438d784e80 [Exec](performance) speed up array element at performance
(#56880)
2438d784e80 is described below
commit 2438d784e80e0dd539d9c1fe962af889be8d42a1
Author: HappenLee <[email protected]>
AuthorDate: Tue Mar 31 15:23:37 2026 +0800
[Exec](performance) speed up array element at performance (#56880)
before
```
MySQL [operator]> select sum([1,2][UserID % 2 +1]) from array_index_table;
+---------------------------+
| sum([1,2][UserID % 2 +1]) |
+---------------------------+
| 150000000 |
+---------------------------+
1 row in set (4.231 sec)
```
after
```
MySQL [operator]> select sum([1,2][UserID % 2 +1]) from array_index_table;
+---------------------------+
| sum([1,2][UserID % 2 +1]) |
+---------------------------+
| 150000000 |
+---------------------------+
1 row in set (3.851 sec)
```
---
.../exprs/function/array/function_array_element.h | 125 ++++++++++++---------
1 file changed, 69 insertions(+), 56 deletions(-)
diff --git a/be/src/exprs/function/array/function_array_element.h
b/be/src/exprs/function/array/function_array_element.h
index 6cc26f4e669..8449019e2a5 100644
--- a/be/src/exprs/function/array/function_array_element.h
+++ b/be/src/exprs/function/array/function_array_element.h
@@ -64,8 +64,7 @@ namespace doris {
class FunctionArrayElement : public IFunction {
public:
- /// The count of items in the map may exceed 128(Int8).
- using MapIndiceDataType = DataTypeInt16;
+ using MapIndiceDataType = DataTypeInt64;
static constexpr auto name = "element_at";
static FunctionPtr create() { return
std::make_shared<FunctionArrayElement>(); }
@@ -171,24 +170,25 @@ private:
return matched_indices;
}
- template <typename ColumnType>
+ template <typename ColumnType, typename IndexColumnType>
ColumnPtr _execute_number(const ColumnArray::Offsets64& offsets, const
IColumn& nested_column,
const UInt8* arr_null_map, const IColumn&
indices,
- const UInt8* nested_null_map, UInt8*
dst_null_map) const {
+ const UInt8* nested_null_map, UInt8*
dst_null_map,
+ const UInt8* idx_null_map, bool is_const_index)
const {
const auto& nested_data = reinterpret_cast<const
ColumnType&>(nested_column).get_data();
+ const auto& index_data = assert_cast<const
IndexColumnType&>(indices).get_data();
auto dst_column = nested_column.clone_empty();
auto& dst_data = reinterpret_cast<ColumnType&>(*dst_column).get_data();
dst_data.resize(offsets.size());
- // process
for (size_t row = 0; row < offsets.size(); ++row) {
- size_t off = offsets[row - 1];
+ size_t off = row == 0 ? 0 : offsets[row - 1];
size_t len = offsets[row] - off;
- auto index = indices.get_int(row);
- // array is nullable
+ size_t idx = index_check_const(row, is_const_index);
+ auto index =
+ (idx_null_map && idx_null_map[idx]) ? 0 :
static_cast<Int64>(index_data[idx]);
bool null_flag = bool(arr_null_map && arr_null_map[row]);
- // calc index in nested column
if (!null_flag && index > 0 && index <= len) {
index += off - 1;
} else if (!null_flag && index < 0 && -index <= len) {
@@ -196,30 +196,25 @@ private:
} else {
null_flag = true;
}
- // nested column nullable check
if (!null_flag && nested_null_map && nested_null_map[index]) {
null_flag = true;
}
- // actual data copy
- if (null_flag) {
- dst_null_map[row] = true;
- dst_data[row] = typename ColumnType::value_type();
- } else {
- DCHECK(index >= 0 && index < nested_data.size());
- dst_null_map[row] = false;
- dst_data[row] = nested_data[index];
- }
+ dst_null_map[row] = null_flag;
+ dst_data[row] = !null_flag ? nested_data[index] : typename
ColumnType::value_type();
}
return dst_column;
}
+ template <typename IndexColumnType>
ColumnPtr _execute_string(const ColumnArray::Offsets64& offsets, const
IColumn& nested_column,
const UInt8* arr_null_map, const IColumn&
indices,
- const UInt8* nested_null_map, UInt8*
dst_null_map) const {
+ const UInt8* nested_null_map, UInt8*
dst_null_map,
+ const UInt8* idx_null_map, bool is_const_index)
const {
const auto& src_str_offs =
reinterpret_cast<const
ColumnString&>(nested_column).get_offsets();
const auto& src_str_chars =
reinterpret_cast<const
ColumnString&>(nested_column).get_chars();
+ const auto& index_data = assert_cast<const
IndexColumnType&>(indices).get_data();
// prepare return data
auto dst_column = ColumnString::create();
@@ -228,14 +223,13 @@ private:
auto& dst_str_chars = dst_column->get_chars();
dst_str_chars.reserve(src_str_chars.size());
- // process
for (size_t row = 0; row < offsets.size(); ++row) {
- size_t off = offsets[row - 1];
+ size_t off = row == 0 ? 0 : offsets[row - 1];
size_t len = offsets[row] - off;
- auto index = indices.get_int(row);
- // array is nullable
+ size_t idx = index_check_const(row, is_const_index);
+ auto index =
+ (idx_null_map && idx_null_map[idx]) ? 0 :
static_cast<Int64>(index_data[idx]);
bool null_flag = bool(arr_null_map && arr_null_map[row]);
- // calc index in nested column
if (!null_flag && index > 0 && index <= len) {
index += off - 1;
} else if (!null_flag && index < 0 && -index <= len) {
@@ -243,24 +237,21 @@ private:
} else {
null_flag = true;
}
- // nested column nullable check
if (!null_flag && nested_null_map && nested_null_map[index]) {
null_flag = true;
}
- // actual string copy
if (!null_flag) {
- DCHECK(index >= 0 && index < src_str_offs.size());
dst_null_map[row] = false;
auto element_size = src_str_offs[index] - src_str_offs[index -
1];
- dst_str_offs[row] = dst_str_offs[row - 1] + element_size;
+ dst_str_offs[row] = (row == 0 ? 0 : dst_str_offs[row - 1]) +
element_size;
auto src_string_pos = src_str_offs[index - 1];
- auto dst_string_pos = dst_str_offs[row - 1];
+ auto dst_string_pos = row == 0 ? 0 : dst_str_offs[row - 1];
dst_str_chars.resize(dst_string_pos + element_size);
memcpy(&dst_str_chars[dst_string_pos],
&src_str_chars[src_string_pos],
element_size);
} else {
dst_null_map[row] = true;
- dst_str_offs[row] = dst_str_offs[row - 1];
+ dst_str_offs[row] = row == 0 ? 0 : dst_str_offs[row - 1];
}
}
return dst_column;
@@ -290,21 +281,23 @@ private:
return _execute_nullable(args, input_rows_count, src_null_map,
dst_null_map);
}
+ template <typename IndexColumnType>
ColumnPtr _execute_common(const ColumnArray::Offsets64& offsets, const
IColumn& nested_column,
const UInt8* arr_null_map, const IColumn&
indices,
- const UInt8* nested_null_map, UInt8*
dst_null_map) const {
- // prepare return data
+ const UInt8* nested_null_map, UInt8*
dst_null_map,
+ const UInt8* idx_null_map, bool is_const_index)
const {
+ const auto& index_data = assert_cast<const
IndexColumnType&>(indices).get_data();
+
auto dst_column = nested_column.clone_empty();
dst_column->reserve(offsets.size());
- // process
for (size_t row = 0; row < offsets.size(); ++row) {
- size_t off = offsets[row - 1];
+ size_t off = row == 0 ? 0 : offsets[row - 1];
size_t len = offsets[row] - off;
- auto index = indices.get_int(row);
- // array is nullable
+ size_t idx = index_check_const(row, is_const_index);
+ auto index =
+ (idx_null_map && idx_null_map[idx]) ? 0 :
static_cast<Int64>(index_data[idx]);
bool null_flag = bool(arr_null_map && arr_null_map[row]);
- // calc index in nested column
if (!null_flag && index > 0 && index <= len) {
index += off - 1;
} else if (!null_flag && index < 0 && -index <= len) {
@@ -312,11 +305,9 @@ private:
} else {
null_flag = true;
}
- // nested column nullable check
if (!null_flag && nested_null_map && nested_null_map[index]) {
null_flag = true;
}
- // actual data copy
if (!null_flag) {
dst_null_map[row] = false;
dst_column->insert_from(nested_column, index);
@@ -350,25 +341,47 @@ private:
auto left_element_type = remove_nullable(
assert_cast<const
DataTypeArray&>(*remove_nullable(arguments[0].type))
.get_nested_type());
- // because we impl use_default_implementation_for_nulls
- // we should handle array index column by-self, and array index should
not be nullable.
- auto idx_col = remove_nullable(arguments[1].column);
- // we should dispatch branch according to data type rather than column
type
-
- auto call = [&](const auto& type) -> bool {
- using DispatchType = std::decay_t<decltype(type)>;
- res = _execute_number<typename DispatchType::ColumnType>(
- offsets, *nested_column, src_null_map, *idx_col,
nested_null_map, dst_null_map);
+ const UInt8* idx_null_map = nullptr;
+ auto idx_col_with_const = unpack_if_const(arguments[1].column);
+ if (idx_col_with_const.first->is_nullable()) {
+ const auto& idx_null_column =
+ reinterpret_cast<const
ColumnNullable&>(*idx_col_with_const.first);
+ idx_null_map =
idx_null_column.get_null_map_column().get_data().data();
+ }
+ auto idx_col_raw = remove_nullable(idx_col_with_const.first);
+ bool is_const_index = idx_col_with_const.second;
+
+ PrimitiveType idx_ptype =
remove_nullable(arguments[1].type)->get_primitive_type();
+
+ // Outer dispatch on index column type (Int8/Int16/Int32/Int64),
+ // inner dispatch on nested data column type.
+ auto idx_dispatch = [&](const auto& idx_type) -> bool {
+ using IdxDispatchType = std::decay_t<decltype(idx_type)>;
+ using IndexColumnType = typename IdxDispatchType::ColumnType;
+
+ auto data_call = [&](const auto& data_type) -> bool {
+ using DataDispatchType = std::decay_t<decltype(data_type)>;
+ res = _execute_number<typename DataDispatchType::ColumnType,
IndexColumnType>(
+ offsets, *nested_column, src_null_map, *idx_col_raw,
nested_null_map,
+ dst_null_map, idx_null_map, is_const_index);
+ return true;
+ };
+
+ if (is_string_type(left_element_type->get_primitive_type())) {
+ res = _execute_string<IndexColumnType>(offsets,
*nested_column, src_null_map,
+ *idx_col_raw,
nested_null_map, dst_null_map,
+ idx_null_map,
is_const_index);
+ } else if
(!dispatch_switch_scalar(left_element_type->get_primitive_type(),
+ data_call)) {
+ res = _execute_common<IndexColumnType>(offsets,
*nested_column, src_null_map,
+ *idx_col_raw,
nested_null_map, dst_null_map,
+ idx_null_map,
is_const_index);
+ }
return true;
};
- if (is_string_type(left_element_type->get_primitive_type())) {
- res = _execute_string(offsets, *nested_column, src_null_map,
*idx_col, nested_null_map,
- dst_null_map);
- } else if
(!dispatch_switch_scalar(left_element_type->get_primitive_type(), call)) {
- res = _execute_common(offsets, *nested_column, src_null_map,
*idx_col, nested_null_map,
- dst_null_map);
- }
+ bool dispatched = dispatch_switch_int(idx_ptype, idx_dispatch);
+ DCHECK(dispatched) << "Unsupported index column type for element_at: "
<< idx_ptype;
return res;
}
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]