This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7630fe7b7b [bug](node)fix dense_rank function in partition sort node
return wrong rows (#24727)
7630fe7b7b is described below
commit 7630fe7b7b583ff240b4abeaf73388ba9a2ba9af
Author: zhangstar333 <[email protected]>
AuthorDate: Thu Sep 21 19:13:30 2023 +0800
[bug](node)fix dense_rank function in partition sort node return wrong rows
(#24727)
---
be/src/vec/common/sort/partition_sorter.cpp | 39 +++++++------
be/src/vec/common/sort/partition_sorter.h | 1 +
.../test_select_stddev_variance_window.out | 68 ++++++++++++++++++++++
.../test_select_stddev_variance_window.groovy | 24 ++++++++
4 files changed, 114 insertions(+), 18 deletions(-)
diff --git a/be/src/vec/common/sort/partition_sorter.cpp
b/be/src/vec/common/sort/partition_sorter.cpp
index 1bffb5ed76..083c676ba8 100644
--- a/be/src/vec/common/sort/partition_sorter.cpp
+++ b/be/src/vec/common/sort/partition_sorter.cpp
@@ -101,12 +101,10 @@ Status PartitionSorter::partition_sort_read(Block*
output_block, bool* eos, int
auto& priority_queue = _state->get_priority_queue();
bool get_enough_data = false;
- bool first_compare_row = false;
while (!priority_queue.empty()) {
auto current = priority_queue.top();
priority_queue.pop();
if (UNLIKELY(_previous_row->impl == nullptr)) {
- first_compare_row = true;
*_previous_row = current;
}
@@ -125,34 +123,39 @@ Status PartitionSorter::partition_sort_read(Block*
output_block, bool* eos, int
break;
}
case TopNAlgorithm::DENSE_RANK: {
+ // dense_rank(): 1,1,1,2,2,2,2,.......,2,3,3,3, if SQL: where rk
< 3, need output all 1 and 2
//3 dense_rank() maybe need distinct rows of partition_inner_limit
- if ((current_output_rows + _output_total_rows) <
_partition_inner_limit) {
- for (size_t i = 0; i < num_columns; ++i) {
- merged_columns[i]->insert_from(*current->all_columns[i],
current->pos);
- }
- } else {
+ //3.1 _has_global_limit = true, so check (current_output_rows +
_output_total_rows) >= _partition_inner_limit)
+ //3.2 _has_global_limit = false. so check have output distinct
rows, not _output_total_rows
+ if (_has_global_limit &&
+ (current_output_rows + _output_total_rows) >=
_partition_inner_limit) {
get_enough_data = true;
+ break;
}
if (_has_global_limit) {
current_output_rows++;
} else {
- //when it's first comes, the rows are same no need compare
- if (first_compare_row) {
- current_output_rows++;
- first_compare_row = false;
- } else {
- // not the first comes, so need compare those, when is
distinct row
- // so could current_output_rows++
- bool cmp_res = _previous_row->compare_two_rows(current);
- if (cmp_res == false) { // distinct row
- current_output_rows++;
- *_previous_row = current;
+ bool cmp_res = _previous_row->compare_two_rows(current);
+ //get a distinct row
+ if (cmp_res == false) {
+ _output_distinct_rows++; //need rows++ firstly
+ if (_output_distinct_rows >= _partition_inner_limit) {
+ get_enough_data = true;
+ break;
}
+ *_previous_row = current;
}
}
+ for (size_t i = 0; i < num_columns; ++i) {
+ merged_columns[i]->insert_from(*current->all_columns[i],
current->pos);
+ }
break;
}
case TopNAlgorithm::RANK: {
+ // rank(): 1,1,1,4,5,6,6,6.....,6,100,101. if SQL where rk < 7,
need output all 1,1,1,4,5,6,6,....6
+ //2 rank() maybe need check when have get a distinct row
+ //2.1 _has_global_limit = true: (current_output_rows +
_output_total_rows) >= _partition_inner_limit)
+ //2.2 _has_global_limit = false: so when the cmp_res is get a
distinct row, need check have output all rows num
if (_has_global_limit &&
(current_output_rows + _output_total_rows) >=
_partition_inner_limit) {
get_enough_data = true;
diff --git a/be/src/vec/common/sort/partition_sorter.h
b/be/src/vec/common/sort/partition_sorter.h
index ff17ac2115..ca0cd5c493 100644
--- a/be/src/vec/common/sort/partition_sorter.h
+++ b/be/src/vec/common/sort/partition_sorter.h
@@ -99,6 +99,7 @@ private:
std::unique_ptr<MergeSorterState> _state;
const RowDescriptor& _row_desc;
int64 _output_total_rows = 0;
+ int64 _output_distinct_rows = 0;
bool _has_global_limit = false;
int _partition_inner_limit = 0;
TopNAlgorithm::type _top_n_algorithm = TopNAlgorithm::type::ROW_NUMBER;
diff --git
a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
index cb3e3e4d64..d8542dca56 100644
---
a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
+++
b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
@@ -1019,3 +1019,71 @@
14 1987.5
15 1989.0
+-- !sql_row_number_1 --
+1 -32767 false
+1 255 false
+1 1985 true
+1 1986 false
+1 1989 false
+1 1991 false
+1 1992 true
+1 32767 false
+
+-- !sql_rank_1 --
+1 -32767 false
+1 -32767 false
+1 255 false
+1 1985 true
+1 1986 false
+1 1989 false
+1 1991 false
+1 1991 false
+1 1992 true
+1 32767 false
+
+-- !sql_dense_rank_1 --
+1 -32767 false
+1 -32767 false
+1 255 false
+1 1985 true
+1 1986 false
+1 1989 false
+1 1991 false
+1 1991 false
+1 1992 true
+1 32767 false
+
+-- !sql_row_number --
+1 -32767 false
+1 255 false
+1 1985 true
+1 1986 false
+1 1989 false
+1 1991 false
+1 1992 true
+1 32767 false
+
+-- !sql_rank --
+1 -32767 false
+1 -32767 false
+1 255 false
+1 1985 true
+1 1986 false
+1 1989 false
+1 1991 false
+1 1991 false
+1 1992 true
+1 32767 false
+
+-- !sql_dense_rank --
+1 -32767 false
+1 -32767 false
+1 255 false
+1 1985 true
+1 1986 false
+1 1989 false
+1 1991 false
+1 1991 false
+1 1992 true
+1 32767 false
+
diff --git
a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
index 7cbad40a61..7ec02d90ae 100644
---
a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
+++
b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
@@ -147,6 +147,30 @@ suite("test_select_stddev_variance_window") {
qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between current row and current row) from
${tableName} order by k1;"
qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between current row and unbounded following)
from ${tableName} order by k1;"
qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1) from ${tableName} order by k1;"
+
+ sql "set experimental_enable_nereids_planner = false;"
+
+ qt_sql_row_number_1 """
+ select * from (select row_number() over(partition by k2 order by k6)
as rk,k2,k6 from ${tableName}) as t where rk = 1 order by 1,2,3;
+ """
+ qt_sql_rank_1 """
+ select * from (select rank() over(partition by k2 order by k6) as
rk,k2,k6 from ${tableName}) as t where rk = 1 order by 1,2,3;
+ """
+ qt_sql_dense_rank_1 """
+ select * from (select dense_rank() over(partition by k2 order by k6)
as rk,k2,k6 from ${tableName}) as t where rk = 1 order by 1,2,3;
+ """
+
+ sql "set experimental_enable_nereids_planner = true;"
+
+ qt_sql_row_number """
+ select * from (select row_number() over(partition by k2 order by k6)
as rk,k2,k6 from ${tableName}) as t where rk = 1 order by 1,2,3;
+ """
+ qt_sql_rank """
+ select * from (select rank() over(partition by k2 order by k6) as
rk,k2,k6 from ${tableName}) as t where rk = 1 order by 1,2,3;
+ """
+ qt_sql_dense_rank """
+ select * from (select dense_rank() over(partition by k2 order by k6)
as rk,k2,k6 from ${tableName}) as t where rk = 1 order by 1,2,3;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]