This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1e65c24455e [regression](limit) Add group by limit regression test
case (#37940)
1e65c24455e is described below
commit 1e65c24455ebdf9570cd5f97a8bac1b0956a2f14
Author: HappenLee <[email protected]>
AuthorDate: Fri Jul 19 09:54:55 2024 +0800
[regression](limit) Add group by limit regression test case (#37940)
Add group by limit regression test case
---
be/src/common/config.cpp | 2 +
be/src/common/config.h | 4 ++
be/src/pipeline/exec/aggregation_sink_operator.cpp | 3 +-
.../data/query_p0/limit/test_group_by_limit.out | 66 ++++++++++++++++++++++
.../query_p0/limit/test_group_by_limit.groovy | 64 +++++++++++++++++++++
5 files changed, 138 insertions(+), 1 deletion(-)
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 3e9203987c2..b152111011e 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1343,6 +1343,8 @@ DEFINE_mBool(ignore_not_found_file_in_external_table,
"true");
DEFINE_mBool(enable_hdfs_mem_limiter, "true");
+DEFINE_mInt16(topn_agg_limit_multiplier, "2");
+
// clang-format off
#ifdef BE_TEST
// test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 1ce9c66939c..f4ed1decaa0 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1435,6 +1435,10 @@ DECLARE_mBool(ignore_not_found_file_in_external_table);
DECLARE_mBool(enable_hdfs_mem_limiter);
+// Define how many percent data in hashtable bigger than limit
+// we should do agg limit opt
+DECLARE_mInt16(topn_agg_limit_multiplier);
+
#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp
b/be/src/pipeline/exec/aggregation_sink_operator.cpp
index 79ca07281d9..f3a6942c33f 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp
@@ -503,7 +503,8 @@ Status
AggSinkLocalState::_execute_with_serialized_key_helper(vectorized::Block*
_shared_state->reach_limit =
hash_table_size >=
(_shared_state->do_sort_limit
- ? Base::_parent->template
cast<AggSinkOperatorX>()._limit * 5
+ ? Base::_parent->template
cast<AggSinkOperatorX>()._limit *
+ config::topn_agg_limit_multiplier
: Base::_parent->template
cast<AggSinkOperatorX>()._limit);
if (_shared_state->reach_limit &&
_shared_state->do_sort_limit) {
_shared_state->build_limit_heap(hash_table_size);
diff --git a/regression-test/data/query_p0/limit/test_group_by_limit.out
b/regression-test/data/query_p0/limit/test_group_by_limit.out
new file mode 100644
index 00000000000..d9ac2a2481a
--- /dev/null
+++ b/regression-test/data/query_p0/limit/test_group_by_limit.out
@@ -0,0 +1,66 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+253967024 8491 AIR
+259556658 8641 FOB
+260402265 8669 MAIL
+
+-- !select --
+449872500 15000 1
+386605746 12900 2
+320758616 10717 3
+
+-- !select --
+198674527 6588 0.0
+198679731 6563 0.01
+198501055 6622 0.02
+
+-- !select --
+27137 1 1992-02-02
+45697 1 1992-02-04
+114452 5 1992-02-05
+
+-- !select --
+27137 1 1992-02-02T00:00
+45697 1 1992-02-04T00:00
+114452 5 1992-02-05T00:00
+
+-- !select --
+139015016 4632 1
+130287219 4313 2
+162309750 5334 3
+
+-- !select --
+64774969 2166 AIR 1
+54166166 1804 AIR 2
+45538267 1532 AIR 3
+
+-- !select --
+6882631 228 AIR 1 0.0
+6756423 228 AIR 1 0.01
+7920028 254 AIR 1 0.02
+
+-- !select --
+7618 1 AIR 1 0.0 1992-02-06
+2210 1 AIR 1 0.0 1992-03-24
+16807 1 AIR 1 0.0 1992-03-29
+
+-- !select --
+6882631 228 AIR 1 0.0
+6756423 228 AIR 1 0.01
+7920028 254 AIR 1 0.02
+
+-- !select --
+6882631 228 AIR 1 0.0
+6756423 228 AIR 1 0.01
+7920028 254 AIR 1 0.02
+
+-- !select --
+7707018 238 TRUCK 1 0.0
+7467045 233 TRUCK 1 0.01
+6927206 245 TRUCK 1 0.02
+
+-- !select --
+7661562 249 TRUCK 1 0.08
+6673139 228 TRUCK 1 0.07
+8333862 265 TRUCK 1 0.06
+
diff --git a/regression-test/suites/query_p0/limit/test_group_by_limit.groovy
b/regression-test/suites/query_p0/limit/test_group_by_limit.groovy
new file mode 100644
index 00000000000..271619c4a93
--- /dev/null
+++ b/regression-test/suites/query_p0/limit/test_group_by_limit.groovy
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_group_by_limit", "query") {
+
+sql 'set enable_agg_spill=false'
+
+sql 'set enable_force_spill=false'
+
+sql 'set topn_opt_limit_threshold=10'
+
+
+// different types
+qt_select """ select sum(orderkey), count(partkey), shipmode from
tpch_tiny_lineitem group by shipmode limit 3; """
+
+qt_select """ select sum(orderkey), count(partkey), linenumber from
tpch_tiny_lineitem group by linenumber limit 3; """
+
+qt_select """ select sum(orderkey), count(partkey), tax from
tpch_tiny_lineitem group by tax limit 3; """
+
+qt_select """ select sum(orderkey), count(partkey), commitdate from
tpch_tiny_lineitem group by commitdate limit 3; """
+
+
+// group by functions
+qt_select """ select sum(orderkey), count(partkey), cast(commitdate as
datetime) from tpch_tiny_lineitem group by cast(commitdate as datetime) limit
3; """
+
+qt_select """ select sum(orderkey), count(partkey), month(commitdate) from
tpch_tiny_lineitem group by month(commitdate) limit 3; """
+
+
+// mutli column
+qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber from
tpch_tiny_lineitem group by shipmode, linenumber limit 3; """
+
+qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax limit 3; """
+
+qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax , commitdate from tpch_tiny_lineitem group by shipmode, linenumber, tax,
commitdate limit 3; """
+
+
+// group by + order by
+
+// group by columns eq order by columns
+qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode, linenumber, tax limit 3; """
+
+// group by columns contains order by columns
+qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode limit 3; """
+
+// desc order by column
+qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode desc, linenumber, tax limit 3; """
+
+qt_select """ select sum(orderkey), count(partkey), shipmode, linenumber ,
tax from tpch_tiny_lineitem group by shipmode, linenumber, tax order by
shipmode desc, linenumber, tax desc limit 3; """
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]