This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1244eed1cd [Opt](exec) opt the dispose nullable column logic (#17192)
1244eed1cd is described below

commit 1244eed1cd88bc49e915039e8a559ab92353ebd1
Author: HappenLee <[email protected]>
AuthorDate: Wed Mar 1 23:25:40 2023 +0800

    [Opt](exec) opt the dispose nullable column logic (#17192)
---
 be/src/vec/core/block.cpp                          | 36 +++++++++++--------
 be/src/vec/core/block.h                            |  6 ++--
 .../exec/format/parquet/vparquet_group_reader.cpp  |  4 +--
 be/src/vec/exec/join/vnested_loop_join_node.cpp    |  4 +--
 be/src/vec/functions/function.cpp                  | 40 ++++++++++++++--------
 5 files changed, 54 insertions(+), 36 deletions(-)

diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index ec84a19b7b..4ac86f213a 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -177,15 +177,26 @@ void Block::erase(const std::set<size_t>& positions) {
     }
 }
 
-void Block::erase(size_t position) {
-    if (data.empty()) {
-        LOG(FATAL) << "Block is empty";
+void Block::erase_tail(size_t start) {
+    DCHECK(start <= data.size()) << fmt::format(
+            "Position out of bound in Block::erase(), max position = {}", 
data.size());
+    data.erase(data.begin() + start, data.end());
+    for (auto it = index_by_name.begin(); it != index_by_name.end();) {
+        if (it->second >= start) {
+            index_by_name.erase(it++);
+        } else {
+            ++it;
+        }
     }
-
-    if (position >= data.size()) {
-        LOG(FATAL) << fmt::format("Position out of bound in Block::erase(), 
max position = {}",
-                                  data.size() - 1);
+    if (start < row_same_bit.size()) {
+        row_same_bit.erase(row_same_bit.begin() + start, row_same_bit.end());
     }
+}
+
+void Block::erase(size_t position) {
+    DCHECK(!data.empty()) << "Block is empty";
+    DCHECK(position < data.size()) << fmt::format(
+            "Position out of bound in Block::erase(), max position = {}", 
data.size() - 1);
 
     erase_impl(position);
 }
@@ -700,21 +711,16 @@ void Block::append_block_by_selector(MutableBlock* dst, 
const IColumn::Selector&
 
 Status Block::filter_block(Block* block, const std::vector<uint32_t>& 
columns_to_filter,
                            int filter_column_id, int column_to_keep) {
-    ColumnPtr filter_column = block->get_by_position(filter_column_id).column;
+    const auto& filter_column = 
block->get_by_position(filter_column_id).column;
     if (auto* nullable_column = 
check_and_get_column<ColumnNullable>(*filter_column)) {
-        ColumnPtr nested_column = nullable_column->get_nested_column_ptr();
+        const auto& nested_column = nullable_column->get_nested_column_ptr();
 
         MutableColumnPtr mutable_holder =
                 nested_column->use_count() == 1
                         ? nested_column->assume_mutable()
                         : nested_column->clone_resized(nested_column->size());
 
-        ColumnUInt8* concrete_column = 
typeid_cast<ColumnUInt8*>(mutable_holder.get());
-        if (!concrete_column) {
-            return Status::InvalidArgument(
-                    "Illegal type {} of column for filter. Must be UInt8 or 
Nullable(UInt8).",
-                    filter_column->get_name());
-        }
+        ColumnUInt8* concrete_column = 
assert_cast<ColumnUInt8*>(mutable_holder.get());
         auto* __restrict null_map = 
nullable_column->get_null_map_data().data();
         IColumn::Filter& filter = concrete_column->get_data();
         auto* __restrict filter_data = filter.data();
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
index a57ca2c543..ba1809f19a 100644
--- a/be/src/vec/core/block.h
+++ b/be/src/vec/core/block.h
@@ -100,6 +100,8 @@ public:
     void insert_unique(ColumnWithTypeAndName&& elem);
     /// remove the column at the specified position
     void erase(size_t position);
+    /// remove the column at the [start, end)
+    void erase_tail(size_t start);
     /// remove the columns at the specified positions
     void erase(const std::set<size_t>& positions);
     /// remove the column with the specified name
@@ -290,9 +292,7 @@ public:
     static Status filter_block(Block* block, int filter_column_id, int 
column_to_keep);
 
     static void erase_useless_column(Block* block, int column_to_keep) {
-        for (int i = block->columns() - 1; i >= column_to_keep; --i) {
-            block->erase(i);
-        }
+        block->erase_tail(column_to_keep);
     }
 
     // serialize block to PBlock
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index b5a3bf37b6..71f77f3735 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -256,7 +256,7 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t 
batch_size, size_t* re
             // generated from next batch, so the filter column is removed 
ahead.
             DCHECK_EQ(block->rows(), 0);
         } else {
-            ColumnPtr& filter_column = 
block->get_by_position(filter_column_id).column;
+            const auto& filter_column = 
block->get_by_position(filter_column_id).column;
             RETURN_IF_ERROR(_filter_block(block, filter_column, 
origin_column_num,
                                           
_lazy_read_ctx.all_predicate_col_ids));
         }
@@ -496,7 +496,7 @@ Status RowGroupReader::_build_pos_delete_filter(size_t 
read_rows) {
 Status RowGroupReader::_filter_block(Block* block, const ColumnPtr& 
filter_column,
                                      int column_to_keep, std::vector<uint32_t> 
columns_to_filter) {
     if (auto* nullable_column = 
check_and_get_column<ColumnNullable>(*filter_column)) {
-        const ColumnPtr& nested_column = 
nullable_column->get_nested_column_ptr();
+        const auto& nested_column = nullable_column->get_nested_column_ptr();
 
         MutableColumnPtr mutable_holder =
                 nested_column->use_count() == 1
diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp 
b/be/src/vec/exec/join/vnested_loop_join_node.cpp
index 8ba770c106..2555cea209 100644
--- a/be/src/vec/exec/join/vnested_loop_join_node.cpp
+++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp
@@ -525,9 +525,9 @@ Status 
VNestedLoopJoinNode::_do_filtering_and_update_visited_flags(Block* block,
         DCHECK((*_vjoin_conjunct_ptr) != nullptr);
         int result_column_id = -1;
         RETURN_IF_ERROR((*_vjoin_conjunct_ptr)->execute(block, 
&result_column_id));
-        ColumnPtr filter_column = 
block->get_by_position(result_column_id).column;
+        const auto& filter_column = 
block->get_by_position(result_column_id).column;
         if (auto* nullable_column = 
check_and_get_column<ColumnNullable>(*filter_column)) {
-            ColumnPtr nested_column = nullable_column->get_nested_column_ptr();
+            const auto& nested_column = 
nullable_column->get_nested_column_ptr();
 
             MutableColumnPtr mutable_holder =
                     nested_column->use_count() == 1
diff --git a/be/src/vec/functions/function.cpp 
b/be/src/vec/functions/function.cpp
index 662a2a58af..e7c6871690 100644
--- a/be/src/vec/functions/function.cpp
+++ b/be/src/vec/functions/function.cpp
@@ -37,35 +37,40 @@ namespace doris::vectorized {
 ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const 
ColumnNumbers& args,
                            size_t result, size_t input_rows_count) {
     ColumnPtr result_null_map_column;
-
     /// If result is already nullable.
     ColumnPtr src_not_nullable = src;
+    MutableColumnPtr mutable_result_null_map_column;
 
-    if (src->only_null())
-        return src;
-    else if (auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
+    if (auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
         src_not_nullable = nullable->get_nested_column_ptr();
         result_null_map_column = nullable->get_null_map_column_ptr();
     }
 
     for (const auto& arg : args) {
         const ColumnWithTypeAndName& elem = block.get_by_position(arg);
-        if (!elem.type->is_nullable()) continue;
+        if (!elem.type->is_nullable()) {
+            continue;
+        }
 
+        bool is_const = is_column_const(*elem.column);
         /// Const Nullable that are NULL.
-        if (elem.column->only_null())
+        if (is_const && assert_cast<const 
ColumnConst*>(elem.column.get())->only_null()) {
             return 
block.get_by_position(result).type->create_column_const(input_rows_count,
                                                                            
Null());
+        }
+        if (is_const) {
+            continue;
+        }
 
-        if (is_column_const(*elem.column)) continue;
-
-        if (auto* nullable = 
check_and_get_column<ColumnNullable>(*elem.column)) {
+        if (auto* nullable = assert_cast<const 
ColumnNullable*>(elem.column.get())) {
             const ColumnPtr& null_map_column = 
nullable->get_null_map_column_ptr();
             if (!result_null_map_column) {
-                result_null_map_column = 
null_map_column->clone_resized(null_map_column->size());
+                result_null_map_column = 
null_map_column->clone_resized(input_rows_count);
             } else {
-                MutableColumnPtr mutable_result_null_map_column =
-                        (*std::move(result_null_map_column)).assume_mutable();
+                if (!mutable_result_null_map_column) {
+                    mutable_result_null_map_column =
+                            
(*std::move(result_null_map_column)).assume_mutable();
+                }
 
                 NullMap& result_null_map =
                         
assert_cast<ColumnUInt8&>(*mutable_result_null_map_column).get_data();
@@ -73,12 +78,19 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const 
Block& block, const Colum
                         assert_cast<const 
ColumnUInt8&>(*null_map_column).get_data();
 
                 VectorizedUtils::update_null_map(result_null_map, 
src_null_map);
-                result_null_map_column = 
std::move(mutable_result_null_map_column);
             }
         }
     }
 
-    if (!result_null_map_column) return make_nullable(src);
+    if (!result_null_map_column) {
+        if (is_column_const(*src)) {
+            return ColumnConst::create(
+                    make_nullable(assert_cast<const 
ColumnConst&>(*src).get_data_column_ptr(),
+                                  false),
+                    input_rows_count);
+        }
+        return ColumnNullable::create(src, 
ColumnUInt8::create(input_rows_count, 0));
+    }
 
     return 
ColumnNullable::create(src_not_nullable->convert_to_full_column_if_const(),
                                   result_null_map_column);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to