This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new e967d41c249 branch-4.0: [bug](topn) fix partition topn data
distribution should be hash shuffle in two phase #56542 (#56612)
e967d41c249 is described below
commit e967d41c249227b8629da390845d990fc9dad103
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Sep 29 11:20:03 2025 +0800
branch-4.0: [bug](topn) fix partition topn data distribution should be hash
shuffle in two phase #56542 (#56612)
Cherry-picked from #56542
Co-authored-by: zhangstar333 <[email protected]>
---
.../pipeline/exec/partition_sort_sink_operator.cpp | 4 +-
.../pipeline/exec/partition_sort_sink_operator.h | 6 +-
.../spill/q67_test_data_distribution.out | 103 +++++++++++++++++++++
.../spill/q67_test_data_distribution.groovy | 69 ++++++++++++++
4 files changed, 179 insertions(+), 3 deletions(-)
diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.cpp
b/be/src/pipeline/exec/partition_sort_sink_operator.cpp
index cc759ad0547..5d9c60d9201 100644
--- a/be/src/pipeline/exec/partition_sort_sink_operator.cpp
+++ b/be/src/pipeline/exec/partition_sort_sink_operator.cpp
@@ -76,7 +76,9 @@
PartitionSortSinkOperatorX::PartitionSortSinkOperatorX(ObjectPool* pool, int ope
_topn_phase(tnode.partition_sort_node.ptopn_phase),
_has_global_limit(tnode.partition_sort_node.has_global_limit),
_top_n_algorithm(tnode.partition_sort_node.top_n_algorithm),
-
_partition_inner_limit(tnode.partition_sort_node.partition_inner_limit) {}
+
_partition_inner_limit(tnode.partition_sort_node.partition_inner_limit),
+ _distribute_exprs(tnode.__isset.distribute_expr_lists ?
tnode.distribute_expr_lists[0]
+ :
std::vector<TExpr> {}) {}
Status PartitionSortSinkOperatorX::init(const TPlanNode& tnode, RuntimeState*
state) {
RETURN_IF_ERROR(DataSinkOperatorX::init(tnode, state));
diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.h
b/be/src/pipeline/exec/partition_sort_sink_operator.h
index aae733460c6..b6cd7576e98 100644
--- a/be/src/pipeline/exec/partition_sort_sink_operator.h
+++ b/be/src/pipeline/exec/partition_sort_sink_operator.h
@@ -81,7 +81,8 @@ public:
_partition_exprs_num(partition_exprs_num),
_topn_phase(TPartTopNPhase::ONE_PHASE_GLOBAL),
_has_global_limit(has_global_limit),
- _partition_inner_limit(partition_inner_limit) {}
+ _partition_inner_limit(partition_inner_limit),
+ _distribute_exprs({}) {}
#endif
Status init(const TDataSink& tsink) override {
@@ -95,7 +96,7 @@ public:
Status sink(RuntimeState* state, vectorized::Block* in_block, bool eos)
override;
DataDistribution required_data_distribution() const override {
if (_topn_phase == TPartTopNPhase::TWO_PHASE_GLOBAL) {
- return
DataSinkOperatorX<PartitionSortSinkLocalState>::required_data_distribution();
+ return DataDistribution(ExchangeType::HASH_SHUFFLE,
_distribute_exprs);
}
return {ExchangeType::PASSTHROUGH};
}
@@ -114,6 +115,7 @@ private:
const int64_t _partition_inner_limit = 0;
vectorized::VExprContextSPtrs _partition_expr_ctxs;
+ const std::vector<TExpr> _distribute_exprs;
// Expressions and parameters used for build _sort_description
vectorized::VSortExecExprs _vsort_exec_exprs;
std::vector<bool> _is_asc_order;
diff --git
a/regression-test/data/tpcds_sf1_unique_p1/spill/q67_test_data_distribution.out
b/regression-test/data/tpcds_sf1_unique_p1/spill/q67_test_data_distribution.out
new file mode 100644
index 00000000000..c4cb47b666a
--- /dev/null
+++
b/regression-test/data/tpcds_sf1_unique_p1/spill/q67_test_data_distribution.out
@@ -0,0 +1,103 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !q67 --
+\N \N \N \N \N \N \N \N 3113996.92
2
+\N \N \N \N \N \N \N \N 1019789218.69
1
+\N \N \N \N \N \N \N 1628997.00
3
+\N \N \N \N \N \N 596191.74
4
+\N \N \N \N \N 102328.59
62
+\N 2000 \N \N \N 102328.59
62
+\N ablepriablebarought \N \N \N \N
117683.53 38
+\N ablepriablebarought 2000 \N \N \N
117683.53 38
+\N antiationeinganti \N \N \N \N
142234.21 15
+\N antiationeinganti 2000 \N \N \N
142234.21 15
+\N barationableeing \N \N \N \N
133715.91 25
+\N barationableeing 2000 \N \N \N
133715.91 25
+\N barationableeing 2000 4 \N \N
66366.69 100
+\N eingcallyoughteing \N \N \N \N
100229.50 67
+\N eingcallyoughteing 2000 \N \N \N
100229.50 67
+\N brandmaxi #2 \N \N \N \N \N
102830.03 59
+\N brandmaxi #2 oughteingought \N \N \N \N
102830.03 59
+\N brandmaxi #2 oughteingought 2000 \N \N \N
102830.03 59
+\N corpunivamalg #3 \N \N \N \N \N
87301.37 80
+\N corpunivamalg #3 eseantin stationought \N \N
\N \N 87301.37 80
+\N corpunivamalg #3 eseantin stationought 2000 \N
\N \N 87301.37 80
+\N edu packamalg #2 \N \N \N \N \N
106947.83 48
+\N edu packamalg #2 oughtn stn stese \N \N
\N \N 106947.83 48
+\N edu packamalg #2 oughtn stn stese 2000 \N
\N \N 106947.83 48
+\N edu packamalgamalg #17 \N \N \N \N \N
91126.70 77
+\N edu packamalgamalg #17 \N \N \N \N
91126.70 77
+\N edu packamalgamalg #17 2000 \N \N \N
91126.70 77
+\N edu packexporti #1 \N \N \N \N \N
163163.84 6
+\N edu packexporti #1 \N \N \N \N
163163.84 6
+\N edu packexporti #1 2000 \N \N \N
163163.84 6
+\N edu packexporti #1 2000 4 \N \N
68406.85 97
+\N exportischolar #2 \N \N \N \N \N
121562.94 31
+\N exportischolar #2 prieingeseought \N \N \N
\N 121562.94 31
+\N exportischolar #2 prieingeseought 2000 \N \N
\N 121562.94 31
+\N importoedu pack #1 \N \N \N \N \N
154984.03 9
+\N importoedu pack #1 \N \N \N \N
154984.03 9
+\N importoedu pack #1 2000 \N \N \N
154984.03 9
+\N importoedu pack #1 2000 4 \N \N
73781.23 90
+\N namelesscorp #1 \N \N \N \N \N
148627.34 12
+\N namelesscorp #1 \N \N \N \N
148627.34 12
+\N namelesscorp #1 2000 \N \N \N
148627.34 12
+\N namelesscorp #1 2000 4 \N \N
67371.60 98
+\N archery \N \N \N \N \N \N 110088.99
44
+\N archery amalgmaxi #6 \N \N \N \N \N
110088.99 44
+\N archery amalgmaxi #6 antioughtn stought \N \N \N
\N 110088.99 44
+\N archery amalgmaxi #6 antioughtn stought 2000 \N \N
\N 110088.99 44
+\N baseball \N \N \N \N \N \N
93607.10 73
+\N baseball \N \N \N \N \N
93607.10 73
+\N baseball \N \N \N \N
93607.10 73
+\N baseball 2000 \N \N \N
93607.10 73
+\N dresses \N \N \N \N \N \N 138018.80
17
+\N dresses \N \N \N \N \N 138018.80
17
+\N dresses antieseoughtcally \N \N \N \N
138018.80 17
+\N dresses antieseoughtcally 2000 \N \N \N
138018.80 17
+\N dresses antieseoughtcally 2000 4 \N \N
69138.80 96
+\N flatware \N \N \N \N \N \N
74808.21 86
+\N flatware \N \N \N \N \N
74808.21 86
+\N flatware oughteingationn st \N \N \N
\N 74808.21 86
+\N flatware oughteingationn st 2000 \N \N
\N 74808.21 86
+\N glassware \N \N \N \N \N \N
98111.30 69
+\N glassware \N \N \N \N \N
98111.30 69
+\N glassware \N \N \N \N
98111.30 69
+\N glassware 2000 \N \N \N
98111.30 69
+\N outdoor \N \N \N \N \N \N 115448.60
40
+\N outdoor namelessnameless #3 \N \N \N \N \N
115448.60 40
+\N outdoor namelessnameless #3 \N \N \N \N
115448.60 40
+\N outdoor namelessnameless #3 2000 \N \N \N
115448.60 40
+\N pants \N \N \N \N \N \N 135855.95
21
+\N pants exportiimporto #2 \N \N \N \N \N
135855.95 21
+\N pants exportiimporto #2 antibarableableought \N \N
\N \N 135855.95 21
+\N pants exportiimporto #2 antibarableableought 2000 \N
\N \N 135855.95 21
+\N pants exportiimporto #2 antibarableableought 2000 4
\N \N 66808.00 99
+\N scanners \N \N \N \N \N \N
118366.60 34
+\N scanners namelessunivamalg #10 \N \N \N \N
\N 118366.60 34
+\N scanners namelessunivamalg #10 n stbaresepri \N \N
\N \N 118366.60 34
+\N scanners namelessunivamalg #10 n stbaresepri 2000 \N
\N \N 118366.60 34
+\N scanners namelessunivamalg #10 n stbaresepri 2000 4
\N \N 70357.97 95
+\N semi-precious \N \N \N \N \N \N
105040.42 51
+\N semi-precious amalgbrand #4 \N \N \N \N \N
105040.42 51
+\N semi-precious amalgbrand #4 ationeseoughtanti \N \N
\N \N 105040.42 51
+\N semi-precious amalgbrand #4 ationeseoughtanti 2000 \N
\N \N 105040.42 51
+\N sports-apparel \N \N \N \N \N \N
104579.05 55
+\N sports-apparel \N \N \N \N \N
104579.05 55
+\N sports-apparel \N \N \N \N
104579.05 55
+\N sports-apparel 2000 \N \N \N
104579.05 55
+\N swimwear \N \N \N \N \N \N
132397.04 27
+\N swimwear edu packamalg #2 \N \N \N \N
\N 132397.04 27
+\N swimwear edu packamalg #2 antieingoughtcally \N
\N \N \N 132397.04 27
+\N swimwear edu packamalg #2 antieingoughtcally 2000
\N \N \N 132397.04 27
+\N tennis \N \N \N \N \N \N 73252.58
91
+\N tennis \N \N \N \N \N 73252.58
91
+\N tennis barpribaranti \N \N \N \N
73252.58 91
+\N tennis barpribaranti 2000 \N \N \N
73252.58 91
+\N womens \N \N \N \N \N \N 180234.29
5
+\N womens \N \N \N \N \N 78649.41
83
+\N womens \N \N \N \N 78649.41
83
+\N womens 2000 \N \N \N 78649.41
83
+\N womens amalgedu pack #2 \N \N \N \N \N
101584.88 64
+\N womens amalgedu pack #2 \N \N \N \N
101584.88 64
+\N womens amalgedu pack #2 2000 \N \N \N
101584.88 64
+
diff --git
a/regression-test/suites/tpcds_sf1_unique_p1/spill/q67_test_data_distribution.groovy
b/regression-test/suites/tpcds_sf1_unique_p1/spill/q67_test_data_distribution.groovy
new file mode 100644
index 00000000000..22898b1fb49
--- /dev/null
+++
b/regression-test/suites/tpcds_sf1_unique_p1/spill/q67_test_data_distribution.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("q67_test_data_distribution") {
+ sql """
+ set experimental_use_serial_exchange = "true";
+ set enable_partition_topn=true;
+ """
+
+ sql """
+ use regression_test_tpcds_sf1_unique_p1;
+ """
+ qt_q67 """
+SELECT *
+FROM
+ (
+ SELECT
+ i_category
+ , i_class
+ , i_brand
+ , i_product_name
+ , d_year
+ , d_qoy
+ , d_moy
+ , s_store_id
+ , sumsales
+ , rank() OVER (PARTITION BY i_category ORDER BY sumsales DESC) rk
+ FROM
+ (
+ SELECT
+ i_category
+ , i_class
+ , i_brand
+ , i_product_name
+ , d_year
+ , d_qoy
+ , d_moy
+ , s_store_id
+ , sum(COALESCE((ss_sales_price * ss_quantity), 0)) sumsales
+ FROM
+ store_sales
+ , date_dim
+ , store
+ , item
+ WHERE (ss_sold_date_sk = d_date_sk)
+ AND (ss_item_sk = i_item_sk)
+ AND (ss_store_sk = s_store_sk)
+ AND (d_month_seq BETWEEN 1200 AND (1200 + 11))
+ GROUP BY ROLLUP (i_category, i_class, i_brand, i_product_name, d_year,
d_qoy, d_moy, s_store_id)
+ ) dw1
+) dw2
+WHERE (rk <= 100)
+ORDER BY i_category ASC, i_class ASC, i_brand ASC, i_product_name ASC, d_year
ASC, d_qoy ASC, d_moy ASC, s_store_id ASC, sumsales ASC, rk ASC
+LIMIT 100
+"""
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]