This is an automated email from the ASF dual-hosted git repository.

morrySnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 513bf29622c [fix](fe) Add MergeProjectable after ColumnPruning in 
DPHyp join reorder to merge consecutive projects (#64409)
513bf29622c is described below

commit 513bf29622cbbe2a89fb7acf6d061585970ad450
Author: starocean999 <[email protected]>
AuthorDate: Tue Jun 16 14:56:17 2026 +0800

    [fix](fe) Add MergeProjectable after ColumnPruning in DPHyp join reorder to 
merge consecutive projects (#64409)
    
    Related PR: (https://github.com/apache/doris/pull/61146)
    
    After DPHyp join reorder, the ColumnPruning rule may produce consecutive
    Project nodes in the plan tree. Subsequent optimization rules expect
    normalized plan shapes and may not handle chains of consecutive projects
    correctly, leading to plan corruption or incorrect results.
    
    This is because the DPHyp reorder path runs a separate rewrite pipeline
    (pushDownRewrite → columnPrune) on the reordered plan before
    re-inserting it into the memo. Unlike the main rewrite pipeline which
    includes MergeProjectable in its standard rule sequence, the DPHyp
    post-reorder pipeline omitted this cleanup step.
    
    Fix: Add MergeProjectable after ColumnPruning in the DPHyp rewrite
    pipeline within Optimizer.dpHypOptimize(). This ensures that any
    consecutive Project nodes generated by column pruning are merged into a
    single project, maintaining a normalized plan shape for downstream
    rules.
---
 .../doris/nereids/jobs/executor/Optimizer.java     |   8 +-
 .../suites/statistics/test_hot_value.groovy        | 109 ++++++++++++++++++++-
 2 files changed, 112 insertions(+), 5 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
index b66a6a30c60..c6734460c49 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
@@ -142,17 +142,17 @@ public class Optimizer {
         // 2) run PushDownExpressionsInHashCondition as a plan rewrite on a 
temporary context
         org.apache.doris.nereids.CascadesContext tempCtx = 
CascadesContext.newCurrentTreeContext(cascadesContext);
         tempCtx.setRewritePlan(plan);
-        RewriteJob pushDownRewrite = AbstractBatchJobExecutor.topDown(new 
PushDownExpressionsInHashCondition(),
-                new MergeProjectable());
+        RewriteJob pushDownRewrite = AbstractBatchJobExecutor.topDown(new 
PushDownExpressionsInHashCondition());
         RewriteJob columnPrune = 
AbstractBatchJobExecutor.custom(RuleType.COLUMN_PRUNING, ColumnPruning::new);
+        RewriteJob mergeProjects = AbstractBatchJobExecutor.topDown(new 
MergeProjectable());
         RewriteJob adjustNullable = 
AbstractBatchJobExecutor.custom(RuleType.ADJUST_NULLABLE,
                 () -> new AdjustNullable(false));
         RewriteJob checkAfterRewrite = AbstractBatchJobExecutor.bottomUp(new 
CheckAfterRewrite());
         AbstractBatchJobExecutor executor = new 
AbstractBatchJobExecutor(tempCtx) {
             @Override
             public 
java.util.List<org.apache.doris.nereids.jobs.rewrite.RewriteJob> getJobs() {
-                return 
com.google.common.collect.ImmutableList.of(pushDownRewrite, columnPrune, 
adjustNullable,
-                        checkAfterRewrite);
+                return 
com.google.common.collect.ImmutableList.of(pushDownRewrite, columnPrune, 
mergeProjects,
+                        adjustNullable, checkAfterRewrite);
             }
         };
         boolean oldFeDebugValue = 
tempCtx.getStatementContext().getConnectContext().getSessionVariable().feDebug;
diff --git a/regression-test/suites/statistics/test_hot_value.groovy 
b/regression-test/suites/statistics/test_hot_value.groovy
index 8b62ff715ec..1ac3407bd4a 100644
--- a/regression-test/suites/statistics/test_hot_value.groovy
+++ b/regression-test/suites/statistics/test_hot_value.groovy
@@ -211,5 +211,112 @@ suite("test_hot_value") {
     assertEquals(1, result.size())
     assertEquals("' : ;a':1.0", result[0][17])
 
-    sql """drop database if exists test_hot_value"""
+    sql """set enable_dphyp_optimizer=true"""
+    sql """
+        WITH cte1 AS (
+        SELECT
+            MAX(t0.`col_value`) as `col_value`,
+            COUNT(1) as `count`,
+            SUM(`len`) as `column_length`
+        FROM
+            (
+                SELECT
+                    xxhash_64(SUBSTRING(CAST(`value1` AS STRING), 1, 1024)) AS 
`hash_value`,
+                    `value1` AS `col_value`,
+                    LENGTH(`value1`) as `len`,
+                    assert_true(
+                        `value1` IS NULL
+                        OR LENGTH(`value1`) <= 1024,
+                        'ANALYZE_SKIP_LONG_STRING_COLUMN'
+                    ) AS `__lc`
+                FROM
+                    `internal`.`test_hot_value`.`test1`
+                limit
+                    400
+            ) as `t0`
+        GROUP BY
+            `t0`.`hash_value`
+    ),
+    cte2 AS (
+        SELECT
+            CONCAT('1781162157676', '-', '-1', '-', 'value1') AS `id`,
+            0 AS `catalog_id`,
+            1781162157674 AS `db_id`,
+            1781162157676 AS `tbl_id`,
+            -1 AS `idx_id`,
+            'value1' AS `col_id`,
+            NULL AS `part_id`,
+            10000 AS `row_count`,
+            SUM(`t1`.`count`) * COUNT(`t1`.`col_value`) / (
+                SUM(`t1`.`count`) - SUM(
+                    IF(
+                        `t1`.`count` = 1
+                        and `t1`.`col_value` is not null,
+                        1,
+                        0
+                    )
+                ) + SUM(
+                    IF(
+                        `t1`.`count` = 1
+                        and `t1`.`col_value` is not null,
+                        1,
+                        0
+                    )
+                ) * SUM(`t1`.`count`) / 10000
+            ) as `ndv`,
+            IFNULL(
+                SUM(IF(`t1`.`col_value` IS NULL, `t1`.`count`, 0)),
+                0
+            ) * 25.0 as `null_count`,
+            SUBSTRING(CAST('0' AS STRING), 1, 1024) AS `min`,
+            SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `max`,
+            SUM(`column_length`) * 25.0 AS `data_size`,
+            NOW()
+        FROM
+            cte1 t1
+    ),
+    cte3 AS (
+        SELECT
+            IFNULL(
+                GROUP_CONCAT(
+                    CONCAT(
+                        REPLACE(REPLACE(t2.`col_value`, ":", "\\:"), ";", 
"\\;"),
+                        " :",
+                        ROUND(
+                            t2.`count` / (
+                                SELECT
+                                    SUM(`count`)
+                                FROM
+                                    cte1
+                                WHERE
+                                    `col_value` IS NOT NULL
+                            ),
+                            2
+                        )
+                    ),
+                    " ;"
+                ),
+                ''
+            ) as `hot_value`
+        FROM
+            (
+                SELECT
+                    `col_value`,
+                    `count`
+                FROM
+                    cte1
+                WHERE
+                    `col_value` IS NOT NULL
+                ORDER BY
+                    `count` DESC
+                LIMIT
+                    10
+            ) t2
+    )
+    SELECT
+        *
+    FROM
+        cte2
+        CROSS JOIN cte3;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to