This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new aca1d3f49c5 [opt](nereids)set topn lazy materialization threshold to 
avoid useless lazy materiation (#55240)
aca1d3f49c5 is described below

commit aca1d3f49c50ab421e8b5226392aea1bb24037ca
Author: minghong <[email protected]>
AuthorDate: Wed Aug 27 09:17:12 2025 +0800

    [opt](nereids)set topn lazy materialization threshold to avoid useless lazy 
materiation (#55240)
    
    ### What problem does this PR solve?
    if topn limit is huge, lazy materialization is useless.
    In this pr, SessionVariable.enableTopnLazyMaterialization is replaced by
    topNLazyMaterializationThreshold,
    when topNLazyMaterializationThreshold < topn.limit+topn.offset, lazy
    materialization is diabled
---
 .../nereids/processor/post/PlanPostProcessors.java      |  2 +-
 .../processor/post/materialize/LazyMaterializeTopN.java |  4 ++++
 .../rules/rewrite/DeferMaterializeTopNResult.java       |  2 +-
 .../main/java/org/apache/doris/qe/SessionVariable.java  | 17 +++++++++++++++--
 .../hive/test_hive_topn_lazy_mat.groovy                 |  4 ++--
 .../defer_materialize_topn/lazy_materialize_topn.groovy |  2 +-
 regression-test/suites/query_p0/sort/sort.groovy        |  2 +-
 .../suites/query_p0/sort/topn_2pr_rule.groovy           |  2 +-
 .../suites/query_p0/sort_spill/sort_spill.groovy        |  2 +-
 .../suites/query_p0/topn_lazy/topn_lazy.groovy          |  7 ++++++-
 10 files changed, 33 insertions(+), 11 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java
index 93c53fe5560..7e28e42c541 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/PlanPostProcessors.java
@@ -65,7 +65,7 @@ public class PlanPostProcessors {
         Builder<PlanPostProcessor> builder = ImmutableList.builder();
         builder.add(new PushDownFilterThroughProject());
         builder.add(new RemoveUselessProjectPostProcessor());
-        if 
(cascadesContext.getConnectContext().getSessionVariable().enableTopnLazyMaterialization)
 {
+        if 
(cascadesContext.getConnectContext().getSessionVariable().enableTopnLazyMaterialization())
 {
             // LazyMaterializeTopN should run before MergeProjectPostProcessor
             builder.add(new LazyMaterializeTopN());
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java
index d473bbaa3ed..8c971364155 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/materialize/LazyMaterializeTopN.java
@@ -32,6 +32,7 @@ import 
org.apache.doris.nereids.trees.plans.physical.PhysicalCatalogRelation;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalLazyMaterialize;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN;
+import org.apache.doris.qe.SessionVariable;
 
 import com.google.common.collect.BiMap;
 import com.google.common.collect.HashBiMap;
@@ -66,6 +67,9 @@ public class LazyMaterializeTopN extends PlanPostProcessor {
         if (hasMaterialized) {
             return topN;
         }
+        if (SessionVariable.getTopNLazyMaterializationThreshold() < 
topN.getLimit() + topN.getOffset()) {
+            return topN;
+        }
         /*
          topn(output=[x] orderkey=[b])
              ->project(a as x)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java
index 3e9a9a74433..c93023a8e1c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DeferMaterializeTopNResult.java
@@ -261,7 +261,7 @@ public class DeferMaterializeTopNResult implements 
RewriteRuleFactory {
     private Plan deferMaterialize(LogicalResultSink<? extends Plan> 
logicalResultSink,
             LogicalTopN<? extends Plan> logicalTopN, Optional<LogicalProject<? 
extends Plan>> logicalProject,
             Optional<LogicalFilter<? extends Plan>> logicalFilter, 
LogicalOlapScan logicalOlapScan) {
-        if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().enableTopnLazyMaterialization) {
+        if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().enableTopnLazyMaterialization()) {
             return null;
         }
         IdGenerator<ExprId> exprIdGenerator = 
StatementScopeIdGenerator.getExprIdGenerator();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index a671fdd5d12..eb4474862d4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -1401,10 +1401,23 @@ public class SessionVariable implements Serializable, 
Writable {
     @VariableMgr.VarAttr(name = USE_RF_DEFAULT)
     public boolean useRuntimeFilterDefaultSize = false;
 
-    @VariableMgr.VarAttr(name = "enable_topn_lazy_materialization", 
needForward = true,
+    @VariableMgr.VarAttr(name = "topn_lazy_materialization_threshold", 
needForward = true,
             fuzzy = false,
             varType = VariableAnnotation.EXPERIMENTAL)
-    public boolean enableTopnLazyMaterialization = true;
+    public int topNLazyMaterializationThreshold = 512 * 1024;
+
+    public boolean enableTopnLazyMaterialization() {
+        return ConnectContext.get() != null
+                && 
ConnectContext.get().getSessionVariable().topNLazyMaterializationThreshold > 0;
+    }
+
+    public static int getTopNLazyMaterializationThreshold() {
+        if (ConnectContext.get() != null) {
+            return 
ConnectContext.get().getSessionVariable().topNLazyMaterializationThreshold;
+        } else {
+            return 
VariableMgr.getDefaultSessionVariable().topNLazyMaterializationThreshold;
+        }
+    }
 
     @VariableMgr.VarAttr(name = DISABLE_INVERTED_INDEX_V1_FOR_VARIANT, 
needForward = true)
     private boolean disableInvertedIndexV1ForVaraint = true;
diff --git 
a/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy 
b/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy
index de8677ff329..1d603fd4e92 100644
--- 
a/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/test_hive_topn_lazy_mat.groovy
@@ -160,7 +160,7 @@ suite("test_hive_topn_lazy_mat", 
"p0,external,hive,external_docker,external_dock
 
 
         sql """
-        set enable_topn_lazy_materialization=true;
+        set topn_lazy_materialization_threshold=1024;
         set runtime_filter_mode=GLOBAL;
         set TOPN_FILTER_RATIO=0.5;
         set disable_join_reorder=true;
@@ -199,7 +199,7 @@ suite("test_hive_topn_lazy_mat", 
"p0,external,hive,external_docker,external_dock
         runTopNLazyMatTests()
 
 
-        sql """ set enable_topn_lazy_materialization=false; """
+        sql """ set topn_lazy_materialization_threshold=-1; """
         runTopNLazyMatTests()
 
 
diff --git 
a/regression-test/suites/nereids_rules_p0/defer_materialize_topn/lazy_materialize_topn.groovy
 
b/regression-test/suites/nereids_rules_p0/defer_materialize_topn/lazy_materialize_topn.groovy
index 0488e707f8d..de8fd90bbfa 100644
--- 
a/regression-test/suites/nereids_rules_p0/defer_materialize_topn/lazy_materialize_topn.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/defer_materialize_topn/lazy_materialize_topn.groovy
@@ -18,7 +18,7 @@ suite("lazy_materialize_topn") {
     sql """
         set enable_two_phase_read_opt = true;
         set topn_opt_limit_threshold = 1000;
-        set enable_topn_lazy_materialization = false;
+        set topn_lazy_materialization_threshold = -1;
     """
 
     sql """
diff --git a/regression-test/suites/query_p0/sort/sort.groovy 
b/regression-test/suites/query_p0/sort/sort.groovy
index eb001b3e0a5..ecd8581ebbc 100644
--- a/regression-test/suites/query_p0/sort/sort.groovy
+++ b/regression-test/suites/query_p0/sort/sort.groovy
@@ -21,7 +21,7 @@
 
 suite("sort") {
     // this case is used to test defer materialze, and hence turn 
topn_lazy_materialization off
-    sql """set enable_topn_lazy_materialization=false;"""
+    sql """set topn_lazy_materialization_threshold=-1;"""
     qt_sort_string_single_column """ select * from ( select '汇总' as a union 
all select '2022-01-01' as a ) a order by 1 """
     qt_sort_string_multiple_columns """ select * from ( select '汇总' as a,1 as 
b union all select '2022-01-01' as a,1 as b ) a order by 1,2 """
     qt_sort_string_on_fe """ select '汇总' > '2022-01-01' """
diff --git a/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy 
b/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy
index 9fe4448a969..4a97b87efe4 100644
--- a/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy
+++ b/regression-test/suites/query_p0/sort/topn_2pr_rule.groovy
@@ -19,7 +19,7 @@ suite("topn_2pr_rule") {
     sql """set topn_opt_limit_threshold = 1024"""
     sql """set enable_two_phase_read_opt= true"""
     // this case is used to test defer materialze, and hence turn 
topn_lazy_materialization off
-    sql """set enable_topn_lazy_materialization=false;"""
+    sql """set topn_lazy_materialization_threshold=-1;"""
 
     def create_table = { table_name, key_type="DUPLICATE" ->
         sql "DROP TABLE IF EXISTS ${table_name}"
diff --git a/regression-test/suites/query_p0/sort_spill/sort_spill.groovy 
b/regression-test/suites/query_p0/sort_spill/sort_spill.groovy
index 13f7cfa6fa9..a7ce6e37a69 100644
--- a/regression-test/suites/query_p0/sort_spill/sort_spill.groovy
+++ b/regression-test/suites/query_p0/sort_spill/sort_spill.groovy
@@ -32,7 +32,7 @@ suite("sort_spill") {
     sql """ set parallel_pipeline_task_num = 2; """
     sql """ set batch_size = 100; """
     sql """ set enable_force_spill=true; """
-    sql """ set enable_topn_lazy_materialization=false;"""
+    sql """ set topn_lazy_materialization_threshold=-1;"""
     sql """ set enable_reserve_memory=true; """
     sql """ set force_sort_algorithm = "full"; """
     sql """ set enable_parallel_result_sink=true; """
diff --git a/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy 
b/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy
index 1c56a015481..4f5c6b9a2e6 100644
--- a/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy
+++ b/regression-test/suites/query_p0/topn_lazy/topn_lazy.groovy
@@ -17,7 +17,7 @@
 
 suite("topn_lazy") {
     sql """
-        set enable_topn_lazy_materialization=true;
+        set topn_lazy_materialization_threshold=1024;
         set runtime_filter_mode=GLOBAL;
         set TOPN_FILTER_RATIO=0.5;
         set disable_join_reorder=true;
@@ -32,6 +32,11 @@ suite("topn_lazy") {
         contains("row_ids: [__DORIS_GLOBAL_ROWID_COL__lineorder]")
     }
 
+    // no topn lazy since huge limit
+    explain {
+        sql "select lo_suppkey, lo_commitdate from lineorder where 
lo_orderkey>100 order by lo_orderkey  limit 1025;"
+        notContains("VMaterializeNode")
+    }
 
     // single table select some slots
     explain {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to