This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 75801240671 branch-4.1 [fix](nereids)Fix CTE consumer stats derivation
when producer stats are not precomputed #61074 (#61473)
75801240671 is described below
commit 758012406716e294f7a1bdb64ecfa22d25bef93c
Author: minghong <[email protected]>
AuthorDate: Thu Mar 19 14:07:48 2026 +0800
branch-4.1 [fix](nereids)Fix CTE consumer stats derivation when producer
stats are not precomputed #61074 (#61473)
### What problem does this PR solve?
pick #61074
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
.../org/apache/doris/nereids/StatementContext.java | 23 +++++++++++-----------
.../doris/nereids/rules/analysis/AnalyzeCTE.java | 4 +++-
.../rewrite/DecomposeRepeatWithPreAggregation.java | 1 +
.../doris/nereids/rules/rewrite/OrExpansion.java | 2 ++
.../nereids/rules/rewrite/RewriteCteChildren.java | 5 ++++-
.../rules/rewrite/SplitMultiDistinctStrategy.java | 4 +---
.../doris/nereids/rules/rewrite/StatsDerive.java | 15 +++++++++++---
7 files changed, 35 insertions(+), 19 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index b3db083dc2a..b86f0b1eebc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -55,6 +55,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.RelationId;
import org.apache.doris.nereids.trees.plans.TableId;
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer;
+import org.apache.doris.nereids.trees.plans.logical.LogicalCTEProducer;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.nereids.util.RelationUtil;
import org.apache.doris.qe.ConnectContext;
@@ -158,7 +159,7 @@ public class StatementContext implements Closeable {
private final Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers = new
HashMap<>();
private final Map<CTEId, Set<Slot>> cteIdToOutputIds = new HashMap<>();
- private final Map<CTEId, Statistics> cteIdToProducerStats = new
HashMap<>();
+ private final Map<CTEId, LogicalCTEProducer<? extends Plan>>
cteIdToProducer = new HashMap<>();
private final Map<RelationId, Set<Expression>> consumerIdToFilters = new
HashMap<>();
// Used to update consumer's stats
@@ -644,7 +645,7 @@ public class StatementContext implements Closeable {
return new CteEnvironmentSnapshot(
copyMapOfSets(cteIdToConsumers),
copyMapOfSets(cteIdToOutputIds),
- new HashMap<>(cteIdToProducerStats),
+ new HashMap<>(cteIdToProducer),
copyMapOfSets(consumerIdToFilters),
copyMapOfLists(cteIdToConsumerGroup),
new HashMap<>(rewrittenCteProducer),
@@ -659,8 +660,8 @@ public class StatementContext implements Closeable {
cteIdToOutputIds.clear();
cteIdToOutputIds.putAll(snapshot.cteIdToOutputIds);
- cteIdToProducerStats.clear();
- cteIdToProducerStats.putAll(snapshot.cteIdToProducerStats);
+ cteIdToProducer.clear();
+ cteIdToProducer.putAll(snapshot.cteIdToProducer);
consumerIdToFilters.clear();
consumerIdToFilters.putAll(snapshot.consumerIdToFilters);
@@ -695,7 +696,7 @@ public class StatementContext implements Closeable {
public static class CteEnvironmentSnapshot {
private final Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers;
private final Map<CTEId, Set<Slot>> cteIdToOutputIds;
- private final Map<CTEId, Statistics> cteIdToProducerStats;
+ private final Map<CTEId, LogicalCTEProducer<? extends Plan>>
cteIdToProducer;
private final Map<RelationId, Set<Expression>> consumerIdToFilters;
private final Map<CTEId, List<Pair<Multimap<Slot, Slot>, Group>>>
cteIdToConsumerGroup;
private final Map<CTEId, LogicalPlan> rewrittenCteProducer;
@@ -707,14 +708,14 @@ public class StatementContext implements Closeable {
public CteEnvironmentSnapshot(
Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers,
Map<CTEId, Set<Slot>> cteIdToOutputIds,
- Map<CTEId, Statistics> cteIdToProducerStats,
+ Map<CTEId, LogicalCTEProducer<? extends Plan>> cteIdToProducer,
Map<RelationId, Set<Expression>> consumerIdToFilters,
Map<CTEId, List<Pair<Multimap<Slot, Slot>, Group>>>
cteIdToConsumerGroup,
Map<CTEId, LogicalPlan> rewrittenCteProducer,
Map<CTEId, LogicalPlan> rewrittenCteConsumer) {
this.cteIdToConsumers = cteIdToConsumers;
this.cteIdToOutputIds = cteIdToOutputIds;
- this.cteIdToProducerStats = cteIdToProducerStats;
+ this.cteIdToProducer = cteIdToProducer;
this.consumerIdToFilters = consumerIdToFilters;
this.cteIdToConsumerGroup = cteIdToConsumerGroup;
this.rewrittenCteProducer = rewrittenCteProducer;
@@ -1117,12 +1118,12 @@ public class StatementContext implements Closeable {
return prepareStage;
}
- public Statistics getProducerStatsByCteId(CTEId id) {
- return cteIdToProducerStats.get(id);
+ public LogicalCTEProducer<? extends Plan> getCteProducerByCteId(CTEId id) {
+ return cteIdToProducer.get(id);
}
- public void setProducerStats(CTEId id, Statistics stats) {
- cteIdToProducerStats.put(id, stats);
+ public void setCteProducer(CTEId id, LogicalCTEProducer<? extends Plan>
producer) {
+ cteIdToProducer.put(id, producer);
}
public void setIsInsert(boolean isInsert) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
index 4287e8ca7de..c49b3f01c51 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/AnalyzeCTE.java
@@ -109,7 +109,9 @@ public class AnalyzeCTE extends OneAnalysisRuleFactory {
aliasQuery.withChildren(ImmutableList.of(analyzedCtePlan));
outerCteCtx = new CTEContext(cteId, logicalSubQueryAlias,
outerCteCtx);
outerCteCtx.setAnalyzedPlan(logicalSubQueryAlias);
- cteProducerPlans.add(new LogicalCTEProducer<>(cteId,
logicalSubQueryAlias));
+ LogicalCTEProducer<Plan> cteProducer = new
LogicalCTEProducer<>(cteId, logicalSubQueryAlias);
+ cascadesContext.getStatementContext().setCteProducer(cteId,
cteProducer);
+ cteProducerPlans.add(cteProducer);
}
return Pair.of(outerCteCtx, cteProducerPlans);
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
index db97df41fe2..be4066fe221 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DecomposeRepeatWithPreAggregation.java
@@ -492,6 +492,7 @@ public class DecomposeRepeatWithPreAggregation extends
DefaultPlanRewriter<Disti
}
LogicalCTEProducer<LogicalAggregate<Plan>> producer =
new LogicalCTEProducer<>(ctx.statementContext.getNextCTEId(),
preAggClone);
+ ctx.statementContext.setCteProducer(producer.getCteId(), producer);
ctx.cteProducerList.add(producer);
producer.accept(new StatsDerive(false), new DeriveContext());
return producer;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
index 4cb07fd44fd..2048ea8f50e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/OrExpansion.java
@@ -141,10 +141,12 @@ public class OrExpansion extends
DefaultPlanRewriter<OrExpandsionContext> implem
.deepCopy((LogicalPlan) join.left(), new DeepCopierContext());
LogicalCTEProducer<? extends Plan> leftProducer = new
LogicalCTEProducer<>(
ctx.statementContext.getNextCTEId(), leftClone);
+ ctx.statementContext.setCteProducer(leftProducer.getCteId(),
leftProducer);
LogicalPlan rightClone = LogicalPlanDeepCopier.INSTANCE
.deepCopy((LogicalPlan) join.right(), new DeepCopierContext());
LogicalCTEProducer<? extends Plan> rightProducer = new
LogicalCTEProducer<>(
ctx.statementContext.getNextCTEId(), rightClone);
+ ctx.statementContext.setCteProducer(rightProducer.getCteId(),
rightProducer);
Map<Slot, Slot> leftCloneToLeft = new HashMap<>();
for (int i = 0; i < leftClone.getOutput().size(); i++) {
leftCloneToLeft.put(leftClone.getOutput().get(i),
(join.left()).getOutput().get(i));
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
index 31aeb0be1ba..0360c4aa0e7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteCteChildren.java
@@ -147,7 +147,10 @@ public class RewriteCteChildren extends
DefaultPlanRewriter<CascadesContext> imp
cascadesContext.addPlanProcesses(rewrittenCtx.getPlanProcesses());
cascadesContext.getStatementContext().getRewrittenCteProducer().put(cteProducer.getCteId(),
child);
}
- return cteProducer.withChildren(child);
+ LogicalCTEProducer<? extends Plan> rewrittenProducer =
(LogicalCTEProducer<? extends Plan>) cteProducer
+ .withChildren(child);
+
cascadesContext.getStatementContext().setCteProducer(rewrittenProducer.getCteId(),
rewrittenProducer);
+ return rewrittenProducer;
}
private LogicalPlan pushPlanUnderAnchor(LogicalPlan plan) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
index 8fc150c7055..c9585d269aa 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctStrategy.java
@@ -18,7 +18,6 @@
package org.apache.doris.nereids.rules.rewrite;
import
org.apache.doris.nereids.rules.rewrite.DistinctAggStrategySelector.DistinctSelectorContext;
-import org.apache.doris.nereids.rules.rewrite.StatsDerive.DeriveContext;
import org.apache.doris.nereids.trees.copier.DeepCopierContext;
import org.apache.doris.nereids.trees.copier.LogicalPlanDeepCopier;
import org.apache.doris.nereids.trees.expressions.Alias;
@@ -60,9 +59,8 @@ public class SplitMultiDistinctStrategy {
.deepCopy(agg, new DeepCopierContext());
LogicalCTEProducer<Plan> producer = new
LogicalCTEProducer<>(ctx.statementContext.getNextCTEId(),
cloneAgg.child());
+ ctx.statementContext.setCteProducer(producer.getCteId(), producer);
ctx.cteProducerList.add(producer);
- StatsDerive derive = new StatsDerive(false);
- producer.accept(derive, new DeriveContext());
Map<Slot, Slot> originToProducerSlot = new HashMap<>();
for (int i = 0; i < agg.child().getOutput().size(); ++i) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
index e3e06c23e6b..7ad4e30bb22 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/StatsDerive.java
@@ -351,20 +351,29 @@ public class StatsDerive extends PlanVisitor<Statistics,
StatsDerive.DeriveConte
@Override
public Statistics visitLogicalCTEProducer(LogicalCTEProducer<? extends
Plan> cteProducer, DeriveContext context) {
+ // Fallback registration: ensure cteId -> producer mapping is always
available
+ // even if some upstream rewrite path misses explicit registration.
+
ConnectContext.get().getStatementContext().setCteProducer(cteProducer.getCteId(),
cteProducer);
Statistics prodStats = cteProducer.child().accept(this, context);
StatisticsBuilder builder = new StatisticsBuilder(prodStats);
builder.setWidthInJoinCluster(1);
Statistics stats = builder.build();
cteProducer.setStatistics(stats);
-
ConnectContext.get().getStatementContext().setProducerStats(cteProducer.getCteId(),
stats);
return stats;
}
@Override
public Statistics visitLogicalCTEConsumer(LogicalCTEConsumer cteConsumer,
DeriveContext context) {
CTEId cteId = cteConsumer.getCteId();
- Statistics prodStats =
ConnectContext.get().getStatementContext().getProducerStatsByCteId(cteId);
- Preconditions.checkArgument(prodStats != null, String.format("Stats
for CTE: %s not found", cteId));
+ LogicalCTEProducer<? extends Plan> cteProducer =
ConnectContext.get().getStatementContext()
+ .getCteProducerByCteId(cteId);
+ Preconditions.checkState(cteProducer != null,
+ String.format("CTE producer for CTE: %s not found", cteId));
+ Statistics prodStats = cteProducer.getStats();
+ if (prodStats == null || deepDerive) {
+ prodStats = cteProducer.accept(this, context);
+ }
+ Preconditions.checkState(prodStats != null, String.format("Stats for
CTE: %s not found", cteId));
Statistics consumerStats = new Statistics(prodStats.getRowCount(), 1,
new HashMap<>());
for (Slot slot : cteConsumer.getOutput()) {
Slot prodSlot = cteConsumer.getProducerSlot(slot);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]