This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1244eed1cd [Opt](exec) opt the dispose nullable column logic (#17192)
1244eed1cd is described below
commit 1244eed1cd88bc49e915039e8a559ab92353ebd1
Author: HappenLee <[email protected]>
AuthorDate: Wed Mar 1 23:25:40 2023 +0800
[Opt](exec) opt the dispose nullable column logic (#17192)
---
be/src/vec/core/block.cpp | 36 +++++++++++--------
be/src/vec/core/block.h | 6 ++--
.../exec/format/parquet/vparquet_group_reader.cpp | 4 +--
be/src/vec/exec/join/vnested_loop_join_node.cpp | 4 +--
be/src/vec/functions/function.cpp | 40 ++++++++++++++--------
5 files changed, 54 insertions(+), 36 deletions(-)
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index ec84a19b7b..4ac86f213a 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -177,15 +177,26 @@ void Block::erase(const std::set<size_t>& positions) {
}
}
-void Block::erase(size_t position) {
- if (data.empty()) {
- LOG(FATAL) << "Block is empty";
+void Block::erase_tail(size_t start) {
+ DCHECK(start <= data.size()) << fmt::format(
+ "Position out of bound in Block::erase(), max position = {}",
data.size());
+ data.erase(data.begin() + start, data.end());
+ for (auto it = index_by_name.begin(); it != index_by_name.end();) {
+ if (it->second >= start) {
+ index_by_name.erase(it++);
+ } else {
+ ++it;
+ }
}
-
- if (position >= data.size()) {
- LOG(FATAL) << fmt::format("Position out of bound in Block::erase(),
max position = {}",
- data.size() - 1);
+ if (start < row_same_bit.size()) {
+ row_same_bit.erase(row_same_bit.begin() + start, row_same_bit.end());
}
+}
+
+void Block::erase(size_t position) {
+ DCHECK(!data.empty()) << "Block is empty";
+ DCHECK(position < data.size()) << fmt::format(
+ "Position out of bound in Block::erase(), max position = {}",
data.size() - 1);
erase_impl(position);
}
@@ -700,21 +711,16 @@ void Block::append_block_by_selector(MutableBlock* dst,
const IColumn::Selector&
Status Block::filter_block(Block* block, const std::vector<uint32_t>&
columns_to_filter,
int filter_column_id, int column_to_keep) {
- ColumnPtr filter_column = block->get_by_position(filter_column_id).column;
+ const auto& filter_column =
block->get_by_position(filter_column_id).column;
if (auto* nullable_column =
check_and_get_column<ColumnNullable>(*filter_column)) {
- ColumnPtr nested_column = nullable_column->get_nested_column_ptr();
+ const auto& nested_column = nullable_column->get_nested_column_ptr();
MutableColumnPtr mutable_holder =
nested_column->use_count() == 1
? nested_column->assume_mutable()
: nested_column->clone_resized(nested_column->size());
- ColumnUInt8* concrete_column =
typeid_cast<ColumnUInt8*>(mutable_holder.get());
- if (!concrete_column) {
- return Status::InvalidArgument(
- "Illegal type {} of column for filter. Must be UInt8 or
Nullable(UInt8).",
- filter_column->get_name());
- }
+ ColumnUInt8* concrete_column =
assert_cast<ColumnUInt8*>(mutable_holder.get());
auto* __restrict null_map =
nullable_column->get_null_map_data().data();
IColumn::Filter& filter = concrete_column->get_data();
auto* __restrict filter_data = filter.data();
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index a57ca2c543..ba1809f19a 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -100,6 +100,8 @@ public:
void insert_unique(ColumnWithTypeAndName&& elem);
/// remove the column at the specified position
void erase(size_t position);
+ /// remove the column at the [start, end)
+ void erase_tail(size_t start);
/// remove the columns at the specified positions
void erase(const std::set<size_t>& positions);
/// remove the column with the specified name
@@ -290,9 +292,7 @@ public:
static Status filter_block(Block* block, int filter_column_id, int
column_to_keep);
static void erase_useless_column(Block* block, int column_to_keep) {
- for (int i = block->columns() - 1; i >= column_to_keep; --i) {
- block->erase(i);
- }
+ block->erase_tail(column_to_keep);
}
// serialize block to PBlock
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index b5a3bf37b6..71f77f3735 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -256,7 +256,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t
batch_size, size_t* re
// generated from next batch, so the filter column is removed
ahead.
DCHECK_EQ(block->rows(), 0);
} else {
- ColumnPtr& filter_column =
block->get_by_position(filter_column_id).column;
+ const auto& filter_column =
block->get_by_position(filter_column_id).column;
RETURN_IF_ERROR(_filter_block(block, filter_column,
origin_column_num,
_lazy_read_ctx.all_predicate_col_ids));
}
@@ -496,7 +496,7 @@ Status RowGroupReader::_build_pos_delete_filter(size_t
read_rows) {
Status RowGroupReader::_filter_block(Block* block, const ColumnPtr&
filter_column,
int column_to_keep, std::vector<uint32_t>
columns_to_filter) {
if (auto* nullable_column =
check_and_get_column<ColumnNullable>(*filter_column)) {
- const ColumnPtr& nested_column =
nullable_column->get_nested_column_ptr();
+ const auto& nested_column = nullable_column->get_nested_column_ptr();
MutableColumnPtr mutable_holder =
nested_column->use_count() == 1
diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp
b/be/src/vec/exec/join/vnested_loop_join_node.cpp
index 8ba770c106..2555cea209 100644
--- a/be/src/vec/exec/join/vnested_loop_join_node.cpp
+++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp
@@ -525,9 +525,9 @@ Status
VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block,
DCHECK((*_vjoin_conjunct_ptr) != nullptr);
int result_column_id = -1;
RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->execute(block,
&result_column_id));
- ColumnPtr filter_column =
block->get_by_position(result_column_id).column;
+ const auto& filter_column =
block->get_by_position(result_column_id).column;
if (auto* nullable_column =
check_and_get_column<ColumnNullable>(*filter_column)) {
- ColumnPtr nested_column = nullable_column->get_nested_column_ptr();
+ const auto& nested_column =
nullable_column->get_nested_column_ptr();
MutableColumnPtr mutable_holder =
nested_column->use_count() == 1
diff --git a/be/src/vec/functions/function.cpp
b/be/src/vec/functions/function.cpp
index 662a2a58af..e7c6871690 100644
--- a/be/src/vec/functions/function.cpp
+++ b/be/src/vec/functions/function.cpp
@@ -37,35 +37,40 @@ namespace doris::vectorized {
ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const
ColumnNumbers& args,
size_t result, size_t input_rows_count) {
ColumnPtr result_null_map_column;
-
/// If result is already nullable.
ColumnPtr src_not_nullable = src;
+ MutableColumnPtr mutable_result_null_map_column;
- if (src->only_null())
- return src;
- else if (auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
+ if (auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
src_not_nullable = nullable->get_nested_column_ptr();
result_null_map_column = nullable->get_null_map_column_ptr();
}
for (const auto& arg : args) {
const ColumnWithTypeAndName& elem = block.get_by_position(arg);
- if (!elem.type->is_nullable()) continue;
+ if (!elem.type->is_nullable()) {
+ continue;
+ }
+ bool is_const = is_column_const(*elem.column);
/// Const Nullable that are NULL.
- if (elem.column->only_null())
+ if (is_const && assert_cast<const
ColumnConst*>(elem.column.get())->only_null()) {
return
block.get_by_position(result).type->create_column_const(input_rows_count,
Null());
+ }
+ if (is_const) {
+ continue;
+ }
- if (is_column_const(*elem.column)) continue;
-
- if (auto* nullable =
check_and_get_column<ColumnNullable>(*elem.column)) {
+ if (auto* nullable = assert_cast<const
ColumnNullable*>(elem.column.get())) {
const ColumnPtr& null_map_column =
nullable->get_null_map_column_ptr();
if (!result_null_map_column) {
- result_null_map_column =
null_map_column->clone_resized(null_map_column->size());
+ result_null_map_column =
null_map_column->clone_resized(input_rows_count);
} else {
- MutableColumnPtr mutable_result_null_map_column =
- (*std::move(result_null_map_column)).assume_mutable();
+ if (!mutable_result_null_map_column) {
+ mutable_result_null_map_column =
+
(*std::move(result_null_map_column)).assume_mutable();
+ }
NullMap& result_null_map =
assert_cast<ColumnUInt8&>(*mutable_result_null_map_column).get_data();
@@ -73,12 +78,19 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const
Block& block, const Colum
assert_cast<const
ColumnUInt8&>(*null_map_column).get_data();
VectorizedUtils::update_null_map(result_null_map,
src_null_map);
- result_null_map_column =
std::move(mutable_result_null_map_column);
}
}
}
- if (!result_null_map_column) return make_nullable(src);
+ if (!result_null_map_column) {
+ if (is_column_const(*src)) {
+ return ColumnConst::create(
+ make_nullable(assert_cast<const
ColumnConst&>(*src).get_data_column_ptr(),
+ false),
+ input_rows_count);
+ }
+ return ColumnNullable::create(src,
ColumnUInt8::create(input_rows_count, 0));
+ }
return
ColumnNullable::create(src_not_nullable->convert_to_full_column_if_const(),
result_null_map_column);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]