This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new aca1d3f49c5 [opt](nereids)set topn lazy materialization threshold to
avoid useless lazy materiation (#55240)
aca1d3f49c5 is described below
commit aca1d3f49c50ab421e8b5226392aea1bb24037ca
Author: minghong <[email protected]>
AuthorDate: Wed Aug 27 09:17:12 2025 +0800
[opt](nereids)set topn lazy materialization threshold to avoid useless lazy
materiation (#55240)
### What problem does this PR solve?
if topn limit is huge, lazy materialization is useless.
In this pr, SessionVariable.enableTopnLazyMaterialization is replaced by
topNLazyMaterializationThreshold,
when topNLazyMaterializationThreshold < topn.limit+topn.offset, lazy
materialization is diabled
---
.../nereids/processor/post/PlanPostProcessors.java | 2 +-
.../processor/post/materialize/LazyMaterializeTopN.java | 4 ++++
.../rules/rewrite/DeferMaterializeTopNResult.java | 2 +-
.../main/java/org/apache/doris/qe/SessionVariable.java | 17 +++++++++++++++--
.../hive/test_hive_topn_lazy_mat.groovy | 4 ++--
.../defer_materialize_topn/lazy_materialize_topn.groovy | 2 +-
regression-test/suites/query_p0/sort/sort.groovy | 2 +-
.../suites/query_p0/sort/topn_2pr_rule.groovy | 2 +-
.../suites/query_p0/sort_spill/sort_spill.groovy | 2 +-
.../suites/query_p0/topn_lazy/topn_lazy.groovy | 7 ++++++-
10 files changed, 33 insertions(+), 11 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java
index 93c53fe5560..7e28e42c541 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java
@@ -65,7 +65,7 @@ public class PlanPostProcessors {
Builder<PlanPostProcessor> builder = ImmutableList.builder();
builder.add(new PushDownFilterThroughProject());
builder.add(new RemoveUselessProjectPostProcessor());
- if
(cascadesContext.getConnectContext().getSessionVariable().enableTopnLazyMaterialization)
{
+ if
(cascadesContext.getConnectContext().getSessionVariable().enableTopnLazyMaterialization())
{
// LazyMaterializeTopN should run before MergeProjectPostProcessor
builder.add(new LazyMaterializeTopN());
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java
index d473bbaa3ed..8c971364155 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java
@@ -32,6 +32,7 @@ import
org.apache.doris.nereids.trees.plans.physical.PhysicalCatalogRelation;
import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterialize;
import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN;
+import org.apache.doris.qe.SessionVariable;
import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
@@ -66,6 +67,9 @@ public class LazyMaterializeTopN extends PlanPostProcessor {
if (hasMaterialized) {
return topN;
}
+ if (SessionVariable.getTopNLazyMaterializationThreshold() <
topN.getLimit() + topN.getOffset()) {
+ return topN;
+ }
/*
topn(output=[x] orderkey=[b])
->project(a as x)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java
index 3e9a9a74433..c93023a8e1c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java
@@ -261,7 +261,7 @@ public class DeferMaterializeTopNResult implements
RewriteRuleFactory {
private Plan deferMaterialize(LogicalResultSink<? extends Plan>
logicalResultSink,
LogicalTopN<? extends Plan> logicalTopN, Optional<LogicalProject<?
extends Plan>> logicalProject,
Optional<LogicalFilter<? extends Plan>> logicalFilter,
LogicalOlapScan logicalOlapScan) {
- if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().enableTopnLazyMaterialization) {
+ if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().enableTopnLazyMaterialization()) {
return null;
}
IdGenerator<ExprId> exprIdGenerator =
StatementScopeIdGenerator.getExprIdGenerator();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index a671fdd5d12..eb4474862d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -1401,10 +1401,23 @@ public class SessionVariable implements Serializable,
Writable {
@VariableMgr.VarAttr(name = USE_RF_DEFAULT)
public boolean useRuntimeFilterDefaultSize = false;
- @VariableMgr.VarAttr(name = "enable_topn_lazy_materialization",
needForward = true,
+ @VariableMgr.VarAttr(name = "topn_lazy_materialization_threshold",
needForward = true,
fuzzy = false,
varType = VariableAnnotation.EXPERIMENTAL)
- public boolean enableTopnLazyMaterialization = true;
+ public int topNLazyMaterializationThreshold = 512 * 1024;
+
+ public boolean enableTopnLazyMaterialization() {
+ return ConnectContext.get() != null
+ &&
ConnectContext.get().getSessionVariable().topNLazyMaterializationThreshold > 0;
+ }
+
+ public static int getTopNLazyMaterializationThreshold() {
+ if (ConnectContext.get() != null) {
+ return
ConnectContext.get().getSessionVariable().topNLazyMaterializationThreshold;
+ } else {
+ return
VariableMgr.getDefaultSessionVariable().topNLazyMaterializationThreshold;
+ }
+ }
@VariableMgr.VarAttr(name = DISABLE_INVERTED_INDEX_V1_FOR_VARIANT,
needForward = true)
private boolean disableInvertedIndexV1ForVaraint = true;
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy
index de8677ff329..1d603fd4e92 100644
---
a/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy
+++
b/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy
@@ -160,7 +160,7 @@ suite("test_hive_topn_lazy_mat",
"p0,external,hive,external_docker,external_dock
sql """
- set enable_topn_lazy_materialization=true;
+ set topn_lazy_materialization_threshold=1024;
set runtime_filter_mode=GLOBAL;
set TOPN_FILTER_RATIO=0.5;
set disable_join_reorder=true;
@@ -199,7 +199,7 @@ suite("test_hive_topn_lazy_mat",
"p0,external,hive,external_docker,external_dock
runTopNLazyMatTests()
- sql """ set enable_topn_lazy_materialization=false; """
+ sql """ set topn_lazy_materialization_threshold=-1; """
runTopNLazyMatTests()
diff --git
a/regression-test/suites/nereids_rules_p0/defer_materialize_topn/lazy_materialize_topn.groovy
b/regression-test/suites/nereids_rules_p0/defer_materialize_topn/lazy_materialize_topn.groovy
index 0488e707f8d..de8fd90bbfa 100644
---
a/regression-test/suites/nereids_rules_p0/defer_materialize_topn/lazy_materialize_topn.groovy
+++
b/regression-test/suites/nereids_rules_p0/defer_materialize_topn/lazy_materialize_topn.groovy
@@ -18,7 +18,7 @@ suite("lazy_materialize_topn") {
sql """
set enable_two_phase_read_opt = true;
set topn_opt_limit_threshold = 1000;
- set enable_topn_lazy_materialization = false;
+ set topn_lazy_materialization_threshold = -1;
"""
sql """
diff --git a/regression-test/suites/query_p0/sort/sort.groovy
b/regression-test/suites/query_p0/sort/sort.groovy
index eb001b3e0a5..ecd8581ebbc 100644
--- a/regression-test/suites/query_p0/sort/sort.groovy
+++ b/regression-test/suites/query_p0/sort/sort.groovy
@@ -21,7 +21,7 @@
suite("sort") {
// this case is used to test defer materialze, and hence turn
topn_lazy_materialization off
- sql """set enable_topn_lazy_materialization=false;"""
+ sql """set topn_lazy_materialization_threshold=-1;"""
qt_sort_string_single_column """ select * from ( select '汇总' as a union
all select '2022-01-01' as a ) a order by 1 """
qt_sort_string_multiple_columns """ select * from ( select '汇总' as a,1 as
b union all select '2022-01-01' as a,1 as b ) a order by 1,2 """
qt_sort_string_on_fe """ select '汇总' > '2022-01-01' """
diff --git a/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy
b/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy
index 9fe4448a969..4a97b87efe4 100644
--- a/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy
+++ b/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy
@@ -19,7 +19,7 @@ suite("topn_2pr_rule") {
sql """set topn_opt_limit_threshold = 1024"""
sql """set enable_two_phase_read_opt= true"""
// this case is used to test defer materialze, and hence turn
topn_lazy_materialization off
- sql """set enable_topn_lazy_materialization=false;"""
+ sql """set topn_lazy_materialization_threshold=-1;"""
def create_table = { table_name, key_type="DUPLICATE" ->
sql "DROP TABLE IF EXISTS ${table_name}"
diff --git a/regression-test/suites/query_p0/sort_spill/sort_spill.groovy
b/regression-test/suites/query_p0/sort_spill/sort_spill.groovy
index 13f7cfa6fa9..a7ce6e37a69 100644
--- a/regression-test/suites/query_p0/sort_spill/sort_spill.groovy
+++ b/regression-test/suites/query_p0/sort_spill/sort_spill.groovy
@@ -32,7 +32,7 @@ suite("sort_spill") {
sql """ set parallel_pipeline_task_num = 2; """
sql """ set batch_size = 100; """
sql """ set enable_force_spill=true; """
- sql """ set enable_topn_lazy_materialization=false;"""
+ sql """ set topn_lazy_materialization_threshold=-1;"""
sql """ set enable_reserve_memory=true; """
sql """ set force_sort_algorithm = "full"; """
sql """ set enable_parallel_result_sink=true; """
diff --git a/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy
b/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy
index 1c56a015481..4f5c6b9a2e6 100644
--- a/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy
+++ b/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy
@@ -17,7 +17,7 @@
suite("topn_lazy") {
sql """
- set enable_topn_lazy_materialization=true;
+ set topn_lazy_materialization_threshold=1024;
set runtime_filter_mode=GLOBAL;
set TOPN_FILTER_RATIO=0.5;
set disable_join_reorder=true;
@@ -32,6 +32,11 @@ suite("topn_lazy") {
contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__lineorder]")
}
+ // no topn lazy since huge limit
+ explain {
+ sql "select lo_suppkey, lo_commitdate from lineorder where
lo_orderkey>100 order by lo_orderkey limit 1025;"
+ notContains("VMaterializeNode")
+ }
// single table select some slots
explain {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]