This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ec6abb6b432 [fix](set) incorrect result of set operator (#35607)
ec6abb6b432 is described below
commit ec6abb6b4328268cd1b167d0c43b4a1ba6cc2efa
Author: Jerry Hu <[email protected]>
AuthorDate: Thu May 30 10:33:13 2024 +0800
[fix](set) incorrect result of set operator (#35607)
If there are duplicated expressions in the select list, the result will
be incorrect.
## Proposed changes
Issue Number: close #28438
<!--Describe your changes.-->
---
be/src/pipeline/dependency.h | 4 ++--
be/src/pipeline/exec/set_sink_operator.cpp | 2 +-
be/src/pipeline/exec/set_source_operator.cpp | 4 ++--
be/src/vec/exec/vset_operation_node.cpp | 6 +++---
be/src/vec/exec/vset_operation_node.h | 4 ++--
regression-test/data/query_p0/operator/test_set_operator.out | 12 ++++++++++++
.../suites/query_p0/operator/test_set_operator.groovy | 8 ++++++++
7 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h
index cdc0eec3933..d7084f85d5d 100644
--- a/be/src/pipeline/dependency.h
+++ b/be/src/pipeline/dependency.h
@@ -609,8 +609,8 @@ public:
vectorized::Block build_block; // build to source
//record element size in hashtable
int64_t valid_element_in_hash_tbl = 0;
- //first:column_id, could point to origin column or cast column
- //second:idx mapped to column types
+ //first: idx mapped to column types
+ //second: column_id, could point to origin column or cast column
std::unordered_map<int, int> build_col_idx;
//// shared static states (shared, decided in prepare/open...)
diff --git a/be/src/pipeline/exec/set_sink_operator.cpp
b/be/src/pipeline/exec/set_sink_operator.cpp
index be44d4dbf7d..ff6bdfed1a3 100644
--- a/be/src/pipeline/exec/set_sink_operator.cpp
+++ b/be/src/pipeline/exec/set_sink_operator.cpp
@@ -137,7 +137,7 @@ Status
SetSinkOperatorX<is_intersect>::_extract_build_column(
raw_ptrs[i] = block.get_by_position(result_col_id).column.get();
DCHECK_GE(result_col_id, 0);
- local_state._shared_state->build_col_idx.insert({result_col_id, i});
+ local_state._shared_state->build_col_idx.insert({i, result_col_id});
}
return Status::OK();
}
diff --git a/be/src/pipeline/exec/set_source_operator.cpp
b/be/src/pipeline/exec/set_source_operator.cpp
index 0f118a7818e..0994350430b 100644
--- a/be/src/pipeline/exec/set_source_operator.cpp
+++ b/be/src/pipeline/exec/set_source_operator.cpp
@@ -151,8 +151,8 @@ void SetSourceOperatorX<is_intersect>::_add_result_columns(
auto it = value.begin();
for (auto idx = build_col_idx.begin(); idx != build_col_idx.end(); ++idx) {
- auto& column = *build_block.get_by_position(idx->first).column;
- local_state._mutable_cols[idx->second]->insert_from(column,
it->row_num);
+ auto& column = *build_block.get_by_position(idx->second).column;
+ local_state._mutable_cols[idx->first]->insert_from(column,
it->row_num);
}
block_size++;
}
diff --git a/be/src/vec/exec/vset_operation_node.cpp
b/be/src/vec/exec/vset_operation_node.cpp
index c207fb18f05..2b2573d83bc 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -331,8 +331,8 @@ void
VSetOperationNode<is_intersect>::add_result_columns(RowRefListWithFlags& va
int& block_size) {
auto it = value.begin();
for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end();
++idx) {
- const auto& column = *_build_block.get_by_position(idx->first).column;
- _mutable_cols[idx->second]->insert_from(column, it->row_num);
+ const auto& column = *_build_block.get_by_position(idx->second).column;
+ _mutable_cols[idx->first]->insert_from(column, it->row_num);
}
block_size++;
}
@@ -434,7 +434,7 @@ Status
VSetOperationNode<is_intersect>::extract_build_column(Block& block, Colum
}
raw_ptrs[i] = block.get_by_position(result_col_id).column.get();
DCHECK_GE(result_col_id, 0);
- _build_col_idx.insert({result_col_id, i});
+ _build_col_idx.insert({i, result_col_id});
}
return Status::OK();
}
diff --git a/be/src/vec/exec/vset_operation_node.h
b/be/src/vec/exec/vset_operation_node.h
index 508f8073689..9f3ba8fba36 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -112,8 +112,8 @@ private:
std::vector<VExprContextSPtrs> _child_expr_lists;
//record build column type
DataTypes _left_table_data_types;
- //first:column_id, could point to origin column or cast column
- //second:idx mapped to column types
+ //first: idx mapped to column types
+ //second: column_id, could point to origin column or cast column
std::unordered_map<int, int> _build_col_idx;
//record insert column id during probe
std::vector<uint16_t> _probe_column_inserted_id;
diff --git a/regression-test/data/query_p0/operator/test_set_operator.out
b/regression-test/data/query_p0/operator/test_set_operator.out
index 1d8bc5ef93e..48eb4a0c9ba 100644
--- a/regression-test/data/query_p0/operator/test_set_operator.out
+++ b/regression-test/data/query_p0/operator/test_set_operator.out
@@ -13,3 +13,15 @@
9
9
+-- !select_minus --
+3 3
+4 4
+5 5
+7 7
+
+-- !select_except --
+3 3
+4 4
+5 5
+7 7
+
diff --git a/regression-test/suites/query_p0/operator/test_set_operator.groovy
b/regression-test/suites/query_p0/operator/test_set_operator.groovy
index 1bc9cc29e4c..7d6219585e4 100644
--- a/regression-test/suites/query_p0/operator/test_set_operator.groovy
+++ b/regression-test/suites/query_p0/operator/test_set_operator.groovy
@@ -89,4 +89,12 @@ suite("test_set_operators", "query,p0,arrow_flight_sql") {
t3
on t2.col1=t3.col1;
"""
+
+ order_qt_select_minus """
+ select col1, col1 from t1 minus select col1, col1 from t2;
+ """
+
+ order_qt_select_except """
+ select col1, col1 from t1 except select col1, col1 from t2;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]