This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fb9363f04210e6a082da236ac5bce2d25ce75bb5
Author: Jerry Hu <[email protected]>
AuthorDate: Thu May 30 10:33:13 2024 +0800

    [fix](set) incorrect result of set operator (#35607)
    
    If there are duplicated expressions in the select list, the result will
    be incorrect.
    
    ## Proposed changes
    
    Issue Number: close #28438
    
    <!--Describe your changes.-->
---
 be/src/pipeline/exec/set_sink_operator.cpp                   |  2 +-
 be/src/pipeline/exec/set_source_operator.cpp                 |  4 ++--
 be/src/pipeline/pipeline_x/dependency.h                      |  4 ++--
 be/src/vec/exec/vset_operation_node.cpp                      |  6 +++---
 be/src/vec/exec/vset_operation_node.h                        |  4 ++--
 regression-test/data/query_p0/operator/test_set_operator.out | 12 ++++++++++++
 .../suites/query_p0/operator/test_set_operator.groovy        |  8 ++++++++
 7 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/be/src/pipeline/exec/set_sink_operator.cpp 
b/be/src/pipeline/exec/set_sink_operator.cpp
index b5774276fdf..aceeac25596 100644
--- a/be/src/pipeline/exec/set_sink_operator.cpp
+++ b/be/src/pipeline/exec/set_sink_operator.cpp
@@ -148,7 +148,7 @@ Status 
SetSinkOperatorX<is_intersect>::_extract_build_column(
         const auto* column = block.get_by_position(result_col_id).column.get();
         raw_ptrs[i] = column;
         DCHECK_GE(result_col_id, 0);
-        local_state._shared_state->build_col_idx.insert({result_col_id, i});
+        local_state._shared_state->build_col_idx.insert({i, result_col_id});
     }
     return Status::OK();
 }
diff --git a/be/src/pipeline/exec/set_source_operator.cpp 
b/be/src/pipeline/exec/set_source_operator.cpp
index 61fd428b492..0b3ec80211d 100644
--- a/be/src/pipeline/exec/set_source_operator.cpp
+++ b/be/src/pipeline/exec/set_source_operator.cpp
@@ -176,8 +176,8 @@ void SetSourceOperatorX<is_intersect>::_add_result_columns(
 
     auto it = value.begin();
     for (auto idx = build_col_idx.begin(); idx != build_col_idx.end(); ++idx) {
-        auto& column = *build_block.get_by_position(idx->first).column;
-        local_state._mutable_cols[idx->second]->insert_from(column, 
it->row_num);
+        auto& column = *build_block.get_by_position(idx->second).column;
+        local_state._mutable_cols[idx->first]->insert_from(column, 
it->row_num);
     }
     block_size++;
 }
diff --git a/be/src/pipeline/pipeline_x/dependency.h 
b/be/src/pipeline/pipeline_x/dependency.h
index 1d14c66a5bc..72763d03c8f 100644
--- a/be/src/pipeline/pipeline_x/dependency.h
+++ b/be/src/pipeline/pipeline_x/dependency.h
@@ -634,8 +634,8 @@ public:
     vectorized::Block build_block; // build to source
     //record element size in hashtable
     int64_t valid_element_in_hash_tbl = 0;
-    //first:column_id, could point to origin column or cast column
-    //second:idx mapped to column types
+    //first: idx mapped to column types
+    //second: column_id, could point to origin column or cast column
     std::unordered_map<int, int> build_col_idx;
 
     //// shared static states (shared, decided in prepare/open...)
diff --git a/be/src/vec/exec/vset_operation_node.cpp 
b/be/src/vec/exec/vset_operation_node.cpp
index 209e755636e..294ac58482a 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -335,8 +335,8 @@ void 
VSetOperationNode<is_intersect>::add_result_columns(RowRefListWithFlags& va
                                                          int& block_size) {
     auto it = value.begin();
     for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end(); 
++idx) {
-        const auto& column = *_build_block.get_by_position(idx->first).column;
-        _mutable_cols[idx->second]->insert_from(column, it->row_num);
+        const auto& column = *_build_block.get_by_position(idx->second).column;
+        _mutable_cols[idx->first]->insert_from(column, it->row_num);
     }
     block_size++;
 }
@@ -426,7 +426,7 @@ Status 
VSetOperationNode<is_intersect>::extract_build_column(Block& block,
         const auto* column = block.get_by_position(result_col_id).column.get();
         raw_ptrs[i] = column;
         DCHECK_GE(result_col_id, 0);
-        _build_col_idx.insert({result_col_id, i});
+        _build_col_idx.insert({i, result_col_id});
     }
     return Status::OK();
 }
diff --git a/be/src/vec/exec/vset_operation_node.h 
b/be/src/vec/exec/vset_operation_node.h
index ce5a8eb1dbc..f14d1515c5a 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -112,8 +112,8 @@ private:
     std::vector<VExprContextSPtrs> _child_expr_lists;
     //record build column type
     DataTypes _left_table_data_types;
-    //first:column_id, could point to origin column or cast column
-    //second:idx mapped to column types
+    //first: idx mapped to column types
+    //second: column_id, could point to origin column or cast column
     std::unordered_map<int, int> _build_col_idx;
     //record insert column id during probe
     std::vector<uint16_t> _probe_column_inserted_id;
diff --git a/regression-test/data/query_p0/operator/test_set_operator.out 
b/regression-test/data/query_p0/operator/test_set_operator.out
index 1d8bc5ef93e..48eb4a0c9ba 100644
--- a/regression-test/data/query_p0/operator/test_set_operator.out
+++ b/regression-test/data/query_p0/operator/test_set_operator.out
@@ -13,3 +13,15 @@
 9
 9
 
+-- !select_minus --
+3      3
+4      4
+5      5
+7      7
+
+-- !select_except --
+3      3
+4      4
+5      5
+7      7
+
diff --git a/regression-test/suites/query_p0/operator/test_set_operator.groovy 
b/regression-test/suites/query_p0/operator/test_set_operator.groovy
index 1bc9cc29e4c..7d6219585e4 100644
--- a/regression-test/suites/query_p0/operator/test_set_operator.groovy
+++ b/regression-test/suites/query_p0/operator/test_set_operator.groovy
@@ -89,4 +89,12 @@ suite("test_set_operators", "query,p0,arrow_flight_sql") {
             t3 
             on t2.col1=t3.col1;
     """
+
+    order_qt_select_minus """
+        select col1, col1 from t1 minus select col1, col1 from t2;
+    """
+
+    order_qt_select_except """
+        select col1, col1 from t1 except select col1, col1 from t2;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to