This is an automated email from the ASF dual-hosted git repository.
morrySnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 513bf29622c [fix](fe) Add MergeProjectable after ColumnPruning in
DPHyp join reorder to merge consecutive projects (#64409)
513bf29622c is described below
commit 513bf29622cbbe2a89fb7acf6d061585970ad450
Author: starocean999 <[email protected]>
AuthorDate: Tue Jun 16 14:56:17 2026 +0800
[fix](fe) Add MergeProjectable after ColumnPruning in DPHyp join reorder to
merge consecutive projects (#64409)
Related PR: (https://github.com/apache/doris/pull/61146)
After DPHyp join reorder, the ColumnPruning rule may produce consecutive
Project nodes in the plan tree. Subsequent optimization rules expect
normalized plan shapes and may not handle chains of consecutive projects
correctly, leading to plan corruption or incorrect results.
This is because the DPHyp reorder path runs a separate rewrite pipeline
(pushDownRewrite → columnPrune) on the reordered plan before
re-inserting it into the memo. Unlike the main rewrite pipeline which
includes MergeProjectable in its standard rule sequence, the DPHyp
post-reorder pipeline omitted this cleanup step.
Fix: Add MergeProjectable after ColumnPruning in the DPHyp rewrite
pipeline within Optimizer.dpHypOptimize(). This ensures that any
consecutive Project nodes generated by column pruning are merged into a
single project, maintaining a normalized plan shape for downstream
rules.
---
.../doris/nereids/jobs/executor/Optimizer.java | 8 +-
.../suites/statistics/test_hot_value.groovy | 109 ++++++++++++++++++++-
2 files changed, 112 insertions(+), 5 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
index b66a6a30c60..c6734460c49 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
@@ -142,17 +142,17 @@ public class Optimizer {
// 2) run PushDownExpressionsInHashCondition as a plan rewrite on a
temporary context
org.apache.doris.nereids.CascadesContext tempCtx =
CascadesContext.newCurrentTreeContext(cascadesContext);
tempCtx.setRewritePlan(plan);
- RewriteJob pushDownRewrite = AbstractBatchJobExecutor.topDown(new
PushDownExpressionsInHashCondition(),
- new MergeProjectable());
+ RewriteJob pushDownRewrite = AbstractBatchJobExecutor.topDown(new
PushDownExpressionsInHashCondition());
RewriteJob columnPrune =
AbstractBatchJobExecutor.custom(RuleType.COLUMN_PRUNING, ColumnPruning::new);
+ RewriteJob mergeProjects = AbstractBatchJobExecutor.topDown(new
MergeProjectable());
RewriteJob adjustNullable =
AbstractBatchJobExecutor.custom(RuleType.ADJUST_NULLABLE,
() -> new AdjustNullable(false));
RewriteJob checkAfterRewrite = AbstractBatchJobExecutor.bottomUp(new
CheckAfterRewrite());
AbstractBatchJobExecutor executor = new
AbstractBatchJobExecutor(tempCtx) {
@Override
public
java.util.List<org.apache.doris.nereids.jobs.rewrite.RewriteJob> getJobs() {
- return
com.google.common.collect.ImmutableList.of(pushDownRewrite, columnPrune,
adjustNullable,
- checkAfterRewrite);
+ return
com.google.common.collect.ImmutableList.of(pushDownRewrite, columnPrune,
mergeProjects,
+ adjustNullable, checkAfterRewrite);
}
};
boolean oldFeDebugValue =
tempCtx.getStatementContext().getConnectContext().getSessionVariable().feDebug;
diff --git a/regression-test/suites/statistics/test_hot_value.groovy
b/regression-test/suites/statistics/test_hot_value.groovy
index 8b62ff715ec..1ac3407bd4a 100644
--- a/regression-test/suites/statistics/test_hot_value.groovy
+++ b/regression-test/suites/statistics/test_hot_value.groovy
@@ -211,5 +211,112 @@ suite("test_hot_value") {
assertEquals(1, result.size())
assertEquals("' : ;a':1.0", result[0][17])
- sql """drop database if exists test_hot_value"""
+ sql """set enable_dphyp_optimizer=true"""
+ sql """
+ WITH cte1 AS (
+ SELECT
+ MAX(t0.`col_value`) as `col_value`,
+ COUNT(1) as `count`,
+ SUM(`len`) as `column_length`
+ FROM
+ (
+ SELECT
+ xxhash_64(SUBSTRING(CAST(`value1` AS STRING), 1, 1024)) AS
`hash_value`,
+ `value1` AS `col_value`,
+ LENGTH(`value1`) as `len`,
+ assert_true(
+ `value1` IS NULL
+ OR LENGTH(`value1`) <= 1024,
+ 'ANALYZE_SKIP_LONG_STRING_COLUMN'
+ ) AS `__lc`
+ FROM
+ `internal`.`test_hot_value`.`test1`
+ limit
+ 400
+ ) as `t0`
+ GROUP BY
+ `t0`.`hash_value`
+ ),
+ cte2 AS (
+ SELECT
+ CONCAT('1781162157676', '-', '-1', '-', 'value1') AS `id`,
+ 0 AS `catalog_id`,
+ 1781162157674 AS `db_id`,
+ 1781162157676 AS `tbl_id`,
+ -1 AS `idx_id`,
+ 'value1' AS `col_id`,
+ NULL AS `part_id`,
+ 10000 AS `row_count`,
+ SUM(`t1`.`count`) * COUNT(`t1`.`col_value`) / (
+ SUM(`t1`.`count`) - SUM(
+ IF(
+ `t1`.`count` = 1
+ and `t1`.`col_value` is not null,
+ 1,
+ 0
+ )
+ ) + SUM(
+ IF(
+ `t1`.`count` = 1
+ and `t1`.`col_value` is not null,
+ 1,
+ 0
+ )
+ ) * SUM(`t1`.`count`) / 10000
+ ) as `ndv`,
+ IFNULL(
+ SUM(IF(`t1`.`col_value` IS NULL, `t1`.`count`, 0)),
+ 0
+ ) * 25.0 as `null_count`,
+ SUBSTRING(CAST('0' AS STRING), 1, 1024) AS `min`,
+ SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `max`,
+ SUM(`column_length`) * 25.0 AS `data_size`,
+ NOW()
+ FROM
+ cte1 t1
+ ),
+ cte3 AS (
+ SELECT
+ IFNULL(
+ GROUP_CONCAT(
+ CONCAT(
+ REPLACE(REPLACE(t2.`col_value`, ":", "\\:"), ";",
"\\;"),
+ " :",
+ ROUND(
+ t2.`count` / (
+ SELECT
+ SUM(`count`)
+ FROM
+ cte1
+ WHERE
+ `col_value` IS NOT NULL
+ ),
+ 2
+ )
+ ),
+ " ;"
+ ),
+ ''
+ ) as `hot_value`
+ FROM
+ (
+ SELECT
+ `col_value`,
+ `count`
+ FROM
+ cte1
+ WHERE
+ `col_value` IS NOT NULL
+ ORDER BY
+ `count` DESC
+ LIMIT
+ 10
+ ) t2
+ )
+ SELECT
+ *
+ FROM
+ cte2
+ CROSS JOIN cte3;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]