This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 8c237e82a3c [Bug](exec) fix intersections/differences bug (#34675)
8c237e82a3c is described below

commit 8c237e82a3c7329dbc2a06380c7d9e3a1d112cd2
Author: HappenLee <[email protected]>
AuthorDate: Sat May 11 11:27:56 2024 +0800

    [Bug](exec) fix intersections/differences bug (#34675)
---
 be/src/pipeline/exec/set_sink_operator.cpp         | 27 ++++++-------
 be/src/pipeline/exec/set_source_operator.cpp       |  9 +----
 be/src/pipeline/pipeline_x/dependency.h            | 12 +++++-
 be/src/vec/exec/vset_operation_node.cpp            | 47 ++++++++--------------
 .../data/correctness_p0/test_set_operation.out     |  1 +
 .../correctness_p0/test_set_operation.groovy       |  2 +
 6 files changed, 43 insertions(+), 55 deletions(-)

diff --git a/be/src/pipeline/exec/set_sink_operator.cpp 
b/be/src/pipeline/exec/set_sink_operator.cpp
index 2042e3eb1a1..b5774276fdf 100644
--- a/be/src/pipeline/exec/set_sink_operator.cpp
+++ b/be/src/pipeline/exec/set_sink_operator.cpp
@@ -140,19 +140,13 @@ Status 
SetSinkOperatorX<is_intersect>::_extract_build_column(
 
         block.get_by_position(result_col_id).column =
                 
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
-        const auto* column = block.get_by_position(result_col_id).column.get();
-
-        if (const auto* nullable = 
check_and_get_column<vectorized::ColumnNullable>(*column)) {
-            const auto& col_nested = nullable->get_nested_column();
-            if (local_state._shared_state->build_not_ignore_null[i]) {
-                raw_ptrs[i] = nullable;
-            } else {
-                raw_ptrs[i] = &col_nested;
-            }
-
-        } else {
-            raw_ptrs[i] = column;
+        if (local_state._shared_state->build_not_ignore_null[i]) {
+            block.get_by_position(result_col_id).column =
+                    make_nullable(block.get_by_position(result_col_id).column);
         }
+
+        const auto* column = block.get_by_position(result_col_id).column.get();
+        raw_ptrs[i] = column;
         DCHECK_GE(result_col_id, 0);
         local_state._shared_state->build_col_idx.insert({result_col_id, i});
     }
@@ -191,11 +185,14 @@ Status 
SetSinkLocalState<is_intersect>::open(RuntimeState* state) {
     auto& parent = _parent->cast<Parent>();
     DCHECK(parent._cur_child_id == 0);
     auto& child_exprs_lists = _shared_state->child_exprs_lists;
-
+    
_shared_state->build_not_ignore_null.resize(child_exprs_lists[parent._cur_child_id].size());
     _shared_state->hash_table_variants = 
std::make_unique<vectorized::SetHashTableVariants>();
 
-    for (const auto& ctx : child_exprs_lists[parent._cur_child_id]) {
-        
_shared_state->build_not_ignore_null.push_back(ctx->root()->is_nullable());
+    for (const auto& ctl : child_exprs_lists) {
+        for (int i = 0; i < ctl.size(); ++i) {
+            _shared_state->build_not_ignore_null[i] =
+                    _shared_state->build_not_ignore_null[i] || 
ctl[i]->root()->is_nullable();
+        }
     }
     _shared_state->hash_table_init();
     return Status::OK();
diff --git a/be/src/pipeline/exec/set_source_operator.cpp 
b/be/src/pipeline/exec/set_source_operator.cpp
index 88d38d325af..61fd428b492 100644
--- a/be/src/pipeline/exec/set_source_operator.cpp
+++ b/be/src/pipeline/exec/set_source_operator.cpp
@@ -177,14 +177,7 @@ void SetSourceOperatorX<is_intersect>::_add_result_columns(
     auto it = value.begin();
     for (auto idx = build_col_idx.begin(); idx != build_col_idx.end(); ++idx) {
         auto& column = *build_block.get_by_position(idx->first).column;
-        if (local_state._mutable_cols[idx->second]->is_nullable() ^ 
column.is_nullable()) {
-            DCHECK(local_state._mutable_cols[idx->second]->is_nullable());
-            
((vectorized::ColumnNullable*)(local_state._mutable_cols[idx->second].get()))
-                    ->insert_from_not_nullable(column, it->row_num);
-
-        } else {
-            local_state._mutable_cols[idx->second]->insert_from(column, 
it->row_num);
-        }
+        local_state._mutable_cols[idx->second]->insert_from(column, 
it->row_num);
     }
     block_size++;
 }
diff --git a/be/src/pipeline/pipeline_x/dependency.h 
b/be/src/pipeline/pipeline_x/dependency.h
index 19099b32f8a..693bde10f36 100644
--- a/be/src/pipeline/pipeline_x/dependency.h
+++ b/be/src/pipeline/pipeline_x/dependency.h
@@ -689,8 +689,18 @@ public:
             }
             return;
         }
+
+        // here need to change type to nullable, because some case eg:
+        // (select 0) intersect (select null) the build side hash table should 
not
+        // ignore null value.
+        std::vector<DataTypePtr> data_types;
+        for (const auto& ctx : child_exprs_lists[0]) {
+            data_types.emplace_back(build_not_ignore_null[0]
+                                            ? 
make_nullable(ctx->root()->data_type())
+                                            : ctx->root()->data_type());
+        }
         if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32, 
RowRefListWithFlags>(
-                    *hash_table_variants, child_exprs_lists[0])) {
+                    *hash_table_variants, data_types)) {
             hash_table_variants->emplace<SetSerializedHashTableContext>();
         }
     }
diff --git a/be/src/vec/exec/vset_operation_node.cpp 
b/be/src/vec/exec/vset_operation_node.cpp
index c4802bee4cf..209e755636e 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -155,21 +155,20 @@ Status 
VSetOperationNode<is_intersect>::prepare(RuntimeState* state) {
     auto column_nums = _child_expr_lists[0].size();
     DCHECK_EQ(output_data_types.size(), column_nums)
             << output_data_types.size() << " " << column_nums;
-    // the nullable is not depend on child, it's should use _row_descriptor 
from FE plan
-    // some case all not nullable column from children, but maybe need output 
nullable.
-    vector<bool> nullable_flags(column_nums, false);
-    for (int i = 0; i < column_nums; ++i) {
-        nullable_flags[i] = output_data_types[i]->is_nullable();
-    }
-
     for (int i = 0; i < _child_expr_lists.size(); ++i) {
         RETURN_IF_ERROR(VExpr::prepare(_child_expr_lists[i], state, 
child(i)->row_desc()));
     }
     for (int i = 0; i < _child_expr_lists[0].size(); ++i) {
         const auto& ctx = _child_expr_lists[0][i];
-        _build_not_ignore_null.push_back(ctx->root()->is_nullable());
-        _left_table_data_types.push_back(nullable_flags[i] ? 
make_nullable(ctx->root()->data_type())
-                                                           : 
ctx->root()->data_type());
+        // the nullable is not depend on child, it's should use 
_row_descriptor from FE plan
+        // some case all not nullable column from children, but maybe need 
output nullable.
+        if (output_data_types[i]->is_nullable()) {
+            _build_not_ignore_null.push_back(true);
+            
_left_table_data_types.push_back(make_nullable(ctx->root()->data_type()));
+        } else {
+            _build_not_ignore_null.push_back(false);
+            _left_table_data_types.push_back(ctx->root()->data_type());
+        }
     }
     hash_table_init();
 
@@ -213,7 +212,7 @@ void VSetOperationNode<is_intersect>::hash_table_init() {
         return;
     }
     if (!try_get_hash_map_context_fixed<NormalHashMap, HashCRC32, 
RowRefListWithFlags>(
-                *_hash_table_variants, _child_expr_lists[0])) {
+                *_hash_table_variants, _left_table_data_types)) {
         _hash_table_variants->emplace<SetSerializedHashTableContext>();
     }
 }
@@ -337,14 +336,7 @@ void 
VSetOperationNode<is_intersect>::add_result_columns(RowRefListWithFlags& va
     auto it = value.begin();
     for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end(); 
++idx) {
         const auto& column = *_build_block.get_by_position(idx->first).column;
-        if (_mutable_cols[idx->second]->is_nullable() ^ column.is_nullable()) {
-            DCHECK(_mutable_cols[idx->second]->is_nullable());
-            ((ColumnNullable*)(_mutable_cols[idx->second].get()))
-                    ->insert_from_not_nullable(column, it->row_num);
-
-        } else {
-            _mutable_cols[idx->second]->insert_from(column, it->row_num);
-        }
+        _mutable_cols[idx->second]->insert_from(column, it->row_num);
     }
     block_size++;
 }
@@ -427,19 +419,12 @@ Status 
VSetOperationNode<is_intersect>::extract_build_column(Block& block,
 
         block.get_by_position(result_col_id).column =
                 
block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
-        const auto* column = block.get_by_position(result_col_id).column.get();
-
-        if (const auto* nullable = 
check_and_get_column<ColumnNullable>(*column)) {
-            const auto& col_nested = nullable->get_nested_column();
-            if (_build_not_ignore_null[i]) {
-                raw_ptrs[i] = nullable;
-            } else {
-                raw_ptrs[i] = &col_nested;
-            }
-
-        } else {
-            raw_ptrs[i] = column;
+        if (_build_not_ignore_null[i]) {
+            block.get_by_position(result_col_id).column =
+                    make_nullable(block.get_by_position(result_col_id).column);
         }
+        const auto* column = block.get_by_position(result_col_id).column.get();
+        raw_ptrs[i] = column;
         DCHECK_GE(result_col_id, 0);
         _build_col_idx.insert({result_col_id, i});
     }
diff --git a/regression-test/data/correctness_p0/test_set_operation.out 
b/regression-test/data/correctness_p0/test_set_operation.out
index f6d9f32d4cc..09fa8314065 100644
--- a/regression-test/data/correctness_p0/test_set_operation.out
+++ b/regression-test/data/correctness_p0/test_set_operation.out
@@ -10,3 +10,4 @@ bbbb
 aaaa
 bbbb
 
+-- !select1 --
diff --git a/regression-test/suites/correctness_p0/test_set_operation.groovy 
b/regression-test/suites/correctness_p0/test_set_operation.groovy
index e3e28a9e3a4..5ee6348a037 100644
--- a/regression-test/suites/correctness_p0/test_set_operation.groovy
+++ b/regression-test/suites/correctness_p0/test_set_operation.groovy
@@ -125,4 +125,6 @@ suite("test_set_operation") {
     """
     qt_select1  """  SELECT DISTINCT * FROM((SELECT sku_code FROM test_B) 
INTERSECT (SELECT sku_code FROM test_B) UNION (SELECT sku_code FROM test_A)) as 
t order by 1; """
 
+    qt_select1  """ (select 0) intersect (select null); """
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to