This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new aaae1497cd [Refactor](function) opt the exec of function with null
column (#16256)
aaae1497cd is described below
commit aaae1497cdc04fc4fb0afba40c01c24e51ff90e8
Author: HappenLee <[email protected]>
AuthorDate: Wed Feb 1 15:56:31 2023 +0800
[Refactor](function) opt the exec of function with null column (#16256)
---
be/src/vec/exprs/vectorized_fn_call.cpp | 2 +
be/src/vec/functions/function.cpp | 14 ++--
be/src/vec/functions/function_cast.h | 11 ++-
be/src/vec/functions/function_helpers.cpp | 123 ++++++++++++++----------------
be/src/vec/functions/function_helpers.h | 26 +++----
5 files changed, 86 insertions(+), 90 deletions(-)
diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp
b/be/src/vec/exprs/vectorized_fn_call.cpp
index d8f614c570..021eac15e9 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -41,6 +41,8 @@ doris::Status VectorizedFnCall::prepare(doris::RuntimeState*
state,
argument_template.reserve(_children.size());
std::vector<std::string_view> child_expr_name;
for (auto child : _children) {
+ // TODO: rethink we really create column here. maybe only need nullptr
just to
+ // get the function
auto column = child->data_type()->create_column();
argument_template.emplace_back(std::move(column), child->data_type(),
child->expr_name());
child_expr_name.emplace_back(child->expr_name());
diff --git a/be/src/vec/functions/function.cpp
b/be/src/vec/functions/function.cpp
index 41f3141c06..662a2a58af 100644
--- a/be/src/vec/functions/function.cpp
+++ b/be/src/vec/functions/function.cpp
@@ -217,11 +217,12 @@ Status
PreparedFunctionImpl::default_implementation_for_nulls(
}
if (null_presence.has_nullable) {
- Block temporary_block = create_block_with_nested_columns(block, args,
result);
+ auto [temporary_block, new_args, new_result] =
+ create_block_with_nested_columns(block, args, result);
RETURN_IF_ERROR(execute_without_low_cardinality_columns(
- context, temporary_block, args, result,
temporary_block.rows(), dry_run));
+ context, temporary_block, new_args, new_result,
temporary_block.rows(), dry_run));
block.get_by_position(result).column =
-
wrap_in_nullable(temporary_block.get_by_position(result).column, block, args,
+
wrap_in_nullable(temporary_block.get_by_position(new_result).column, block,
args,
result, input_rows_count);
*executed = true;
return Status::OK();
@@ -295,10 +296,9 @@ DataTypePtr
FunctionBuilderImpl::get_return_type_without_low_cardinality(
}
if (null_presence.has_nullable) {
ColumnNumbers numbers(arguments.size());
- for (size_t i = 0; i < arguments.size(); i++) {
- numbers[i] = i;
- }
- Block nested_block =
create_block_with_nested_columns(Block(arguments), numbers);
+ std::iota(numbers.begin(), numbers.end(), 0);
+ auto [nested_block, _] =
+ create_block_with_nested_columns(Block(arguments),
numbers, false);
auto return_type = get_return_type_impl(
ColumnsWithTypeAndName(nested_block.begin(),
nested_block.end()));
return make_nullable(return_type);
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index e3baaecdd2..a6817134ea 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1592,7 +1592,9 @@ private:
Block tmp_block;
size_t tmp_res_index = 0;
if (source_is_nullable) {
- tmp_block =
create_block_with_nested_columns_only_args(block, arguments);
+ auto [t_block, tmp_args] =
+ create_block_with_nested_columns(block, arguments,
true);
+ tmp_block = std::move(t_block);
tmp_res_index = tmp_block.columns();
tmp_block.insert({nullptr, nested_type, ""});
@@ -1624,7 +1626,8 @@ private:
return [wrapper, skip_not_null_check](FunctionContext* context,
Block& block,
const ColumnNumbers&
arguments,
const size_t result, size_t
input_rows_count) {
- Block tmp_block = create_block_with_nested_columns(block,
arguments, result);
+ auto [tmp_block, tmp_args, tmp_res] =
+ create_block_with_nested_columns(block, arguments,
result);
/// Check that all values are not-NULL.
/// Check can be skipped in case if LowCardinality dictionary
is transformed.
@@ -1640,8 +1643,8 @@ private:
}
}
- RETURN_IF_ERROR(wrapper(context, tmp_block, arguments, result,
input_rows_count));
- block.get_by_position(result).column =
tmp_block.get_by_position(result).column;
+ RETURN_IF_ERROR(wrapper(context, tmp_block, tmp_args, tmp_res,
input_rows_count));
+ block.get_by_position(result).column =
tmp_block.get_by_position(tmp_res).column;
return Status::OK();
};
} else {
diff --git a/be/src/vec/functions/function_helpers.cpp
b/be/src/vec/functions/function_helpers.cpp
index fcfdd4b3a2..c77f3c5ab7 100644
--- a/be/src/vec/functions/function_helpers.cpp
+++ b/be/src/vec/functions/function_helpers.cpp
@@ -20,88 +20,83 @@
#include "vec/functions/function_helpers.h"
+#include "common/consts.h"
#include "vec/columns/column_nullable.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/functions/function.h"
namespace doris::vectorized {
-Block create_block_with_nested_columns_only_args(const Block& block, const
ColumnNumbers& args) {
- std::set<size_t> args_set(args.begin(), args.end());
+std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block&
block,
+ const
ColumnNumbers& args,
+ const bool
need_check_same) {
Block res;
+ ColumnNumbers res_args(args.size());
+
+ // only build temp block by args column, if args[i] == args[j]
+ // just keep one
+ for (size_t i = 0; i < args.size(); ++i) {
+ bool is_in_res = false;
+ size_t pre_loc = 0;
+
+ if (need_check_same) {
+ for (int j = 0; j < i; ++j) {
+ if (args[j] == args[i]) {
+ is_in_res = true;
+ pre_loc = res_args[j];
+ break;
+ }
+ }
+ }
- for (auto i : args_set) {
- const auto& col = block.get_by_position(i);
-
- if (col.type->is_nullable()) {
- const DataTypePtr& nested_type =
- static_cast<const
DataTypeNullable&>(*col.type).get_nested_type();
-
- if (!col.column) {
- res.insert({nullptr, nested_type, col.name});
- } else if (auto* nullable =
check_and_get_column<ColumnNullable>(*col.column)) {
- const auto& nested_col = nullable->get_nested_column_ptr();
- res.insert({nested_col, nested_type, col.name});
- } else if (auto* const_column =
check_and_get_column<ColumnConst>(*col.column)) {
- const auto& nested_col =
-
check_and_get_column<ColumnNullable>(const_column->get_data_column())
- ->get_nested_column_ptr();
- res.insert({ColumnConst::create(nested_col,
col.column->size()), nested_type,
- col.name});
+ if (!is_in_res) {
+ const auto& col = block.get_by_position(args[i]);
+ if (col.type->is_nullable()) {
+ const DataTypePtr& nested_type =
+ static_cast<const
DataTypeNullable&>(*col.type).get_nested_type();
+
+ if (!col.column) {
+ res.insert({nullptr, nested_type, col.name});
+ } else if (auto* nullable =
check_and_get_column<ColumnNullable>(*col.column)) {
+ const auto& nested_col = nullable->get_nested_column_ptr();
+ res.insert({nested_col, nested_type, col.name});
+ } else if (auto* const_column =
check_and_get_column<ColumnConst>(*col.column)) {
+ const auto& nested_col =
+
check_and_get_column<ColumnNullable>(const_column->get_data_column())
+ ->get_nested_column_ptr();
+ res.insert({ColumnConst::create(nested_col,
col.column->size()), nested_type,
+ col.name});
+ } else {
+ LOG(FATAL) << "Illegal column for DataTypeNullable";
+ }
} else {
- LOG(FATAL) << "Illegal column for DataTypeNullable";
+ res.insert(col);
}
+
+ res_args[i] = res.columns() - 1;
} else {
- res.insert(col);
+ res_args[i] = pre_loc;
}
}
- return res;
-}
-
-static Block create_block_with_nested_columns_impl(const Block& block,
- const
std::unordered_set<size_t>& args) {
- Block res;
- size_t columns = block.columns();
-
- for (size_t i = 0; i < columns; ++i) {
- const auto& col = block.get_by_position(i);
-
- if (args.count(i) && col.type->is_nullable()) {
- const DataTypePtr& nested_type =
- static_cast<const
DataTypeNullable&>(*col.type).get_nested_type();
-
- if (!col.column) {
- res.insert({nullptr, nested_type, col.name});
- } else if (auto* nullable =
check_and_get_column<ColumnNullable>(*col.column)) {
- const auto& nested_col = nullable->get_nested_column_ptr();
- res.insert({nested_col, nested_type, col.name});
- } else if (auto* const_column =
check_and_get_column<ColumnConst>(*col.column)) {
- const auto& nested_col =
-
check_and_get_column<ColumnNullable>(const_column->get_data_column())
- ->get_nested_column_ptr();
- res.insert({ColumnConst::create(nested_col,
col.column->size()), nested_type,
- col.name});
- } else {
- LOG(FATAL) << "Illegal column for DataTypeNullable";
- }
- } else
- res.insert(col);
+ // TODO: only support match function, rethink the logic
+ for (const auto& ctn : block) {
+ if (ctn.name.size() > BeConsts::BLOCK_TEMP_COLUMN_PREFIX.size() &&
+ starts_with(ctn.name, BeConsts::BLOCK_TEMP_COLUMN_PREFIX)) {
+ res.insert(ctn);
+ }
}
- return res;
-}
-
-Block create_block_with_nested_columns(const Block& block, const
ColumnNumbers& args) {
- std::unordered_set<size_t> args_set(args.begin(), args.end());
- return create_block_with_nested_columns_impl(block, args_set);
+ return {res, res_args};
}
-Block create_block_with_nested_columns(const Block& block, const
ColumnNumbers& args,
- size_t result) {
- std::unordered_set<size_t> args_set(args.begin(), args.end());
- args_set.insert(result);
- return create_block_with_nested_columns_impl(block, args_set);
+std::tuple<Block, ColumnNumbers, size_t>
create_block_with_nested_columns(const Block& block,
+
const ColumnNumbers& args,
+
size_t result) {
+ auto [res, res_args] = create_block_with_nested_columns(block, args, true);
+ // insert result column in temp block
+ res.insert(block.get_by_position(result));
+ return {res, res_args, res.columns() - 1};
}
void validate_argument_type(const IFunction& func, const DataTypes& arguments,
diff --git a/be/src/vec/functions/function_helpers.h
b/be/src/vec/functions/function_helpers.h
index 777bd415dc..474ac435bf 100644
--- a/be/src/vec/functions/function_helpers.h
+++ b/be/src/vec/functions/function_helpers.h
@@ -86,21 +86,17 @@ inline std::enable_if_t<IsDecimalNumber<T>, Field>
to_field(const T& x, UInt32 s
Columns convert_const_tuple_to_constant_elements(const ColumnConst& column);
-/// Returns the copy of a given block in which each column specified in
-/// the "arguments" parameter is replaced with its respective nested
-/// column if it is nullable.
-Block create_block_with_nested_columns(const Block& block, const
ColumnNumbers& args);
-
-/// Similar function as above. Additionally transform the result type if
needed.
-Block create_block_with_nested_columns(const Block& block, const
ColumnNumbers& args,
- size_t result);
-
-/// Returns the copy of a given block in only args column specified in
-/// the "arguments" parameter is replaced with its respective nested
-/// column if it is nullable.
-/// TODO: the old funciton `create_block_with_nested_columns` have performance
problem, replace all
-/// by the function and delete old one.
-Block create_block_with_nested_columns_only_args(const Block& block, const
ColumnNumbers& args);
+/// Returns the copy of a tmp block and temp args order same as args
+/// in which only args column each column specified in the "arguments"
+/// parameter is replaced with its respective nested column if it is nullable.
+std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block&
block,
+ const
ColumnNumbers& args,
+ const bool
need_check_same);
+
+// Same as above and return the new_res loc in tuple
+std::tuple<Block, ColumnNumbers, size_t>
create_block_with_nested_columns(const Block& block,
+
const ColumnNumbers& args,
+
size_t result);
/// Checks argument type at specified index with predicate.
/// throws if there is no argument at specified index or if predicate returns
false.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]