This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 427091c640e branch-4.0: [fix](CostBasedRewriteJob)restore
StatementContext.rewrittenCteProducer in CostBasedRewriteJob #59517 (#60742)
427091c640e is described below
commit 427091c640ed112b78fd032d7c73f7d79bc03243
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat Feb 14 10:58:33 2026 +0800
branch-4.0: [fix](CostBasedRewriteJob)restore
StatementContext.rewrittenCteProducer in CostBasedRewriteJob #59517 (#60742)
Cherry-picked from #59517
Co-authored-by: minghong <[email protected]>
---
.../org/apache/doris/nereids/StatementContext.java | 160 +++++++++++++++++----
.../nereids/jobs/rewrite/CostBasedRewriteJob.java | 27 ++--
.../costbasedrewrite_producer.groovy | 45 ++++++
3 files changed, 196 insertions(+), 36 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index 2b67e1b363c..92fcd8a1592 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -105,7 +105,8 @@ public class StatementContext implements Closeable {
* indicate where the table come from.
* QUERY: in query sql directly
* INSERT_TARGET: the insert target table
- * MTMV: mtmv itself and its related tables witch do not belong to this
sql, but maybe used in rewrite by mtmv.
+ * MTMV: mtmv itself and its related tables witch do not belong to this
sql, but
+ * maybe used in rewrite by mtmv.
*/
public enum TableFrom {
QUERY,
@@ -122,7 +123,8 @@ public class StatementContext implements Closeable {
private final Map<String, Supplier<Object>> contextCacheMap =
Maps.newLinkedHashMap();
private OriginStatement originStatement;
- // NOTICE: we set the plan parsed by DorisParser to parsedStatement and if
the plan is command, create a
+ // NOTICE: we set the plan parsed by DorisParser to parsedStatement and if
the
+ // plan is command, create a
// LogicalPlanAdapter with the logical plan in the command.
private StatementBase parsedStatement;
private ColumnAliasGenerator columnAliasGenerator;
@@ -134,10 +136,14 @@ public class StatementContext implements Closeable {
private boolean hasNondeterministic = false;
- // hasUnknownColStats true if any column stats in the tables used by this
sql is unknown
- // the algorithm to derive plan when column stats are unknown is
implemented in cascading framework, not in dphyper.
- // And hence, when column stats are unknown, even if the tables used by a
sql is more than
- // MAX_TABLE_COUNT_USE_CASCADES_JOIN_REORDER, join reorder should choose
cascading framework.
+ // hasUnknownColStats true if any column stats in the tables used by this
sql is
+ // unknown
+ // the algorithm to derive plan when column stats are unknown is
implemented in
+ // cascading framework, not in dphyper.
+ // And hence, when column stats are unknown, even if the tables used by a
sql is
+ // more than
+ // MAX_TABLE_COUNT_USE_CASCADES_JOIN_REORDER, join reorder should choose
+ // cascading framework.
// Thus hasUnknownColStats has higher priority than isDpHyp
private boolean hasUnknownColStats = false;
@@ -160,11 +166,13 @@ public class StatementContext implements Closeable {
private final Set<String> viewDdlSqlSet = Sets.newHashSet();
private final SqlCacheContext sqlCacheContext;
- // generate for next id for prepared statement's placeholders, which is
connection level
+ // generate for next id for prepared statement's placeholders, which is
+ // connection level
private final IdGenerator<PlaceholderId> placeHolderIdGenerator =
PlaceholderId.createGenerator();
// relation id to placeholders for prepared statement, ordered by
placeholder id
private final Map<PlaceholderId, Expression> idToPlaceholderRealExpr = new
TreeMap<>();
- // map placeholder id to comparison slot, which will used to replace
conjuncts directly
+ // map placeholder id to comparison slot, which will used to replace
conjuncts
+ // directly
private final Map<PlaceholderId, SlotReference> idToComparisonSlot = new
TreeMap<>();
// collect all hash join conditions to compute node connectivity in join
graph
@@ -173,7 +181,8 @@ public class StatementContext implements Closeable {
private final List<Hint> hints = new ArrayList<>();
private boolean hintForcePreAggOn = false;
- // the columns in Plan.getExpressions(), such as columns in join condition
or filter condition, group by expression
+ // the columns in Plan.getExpressions(), such as columns in join condition
or
+ // filter condition, group by expression
private final Set<SlotReference> keySlots = Sets.newHashSet();
private BitSet disableRules;
@@ -217,19 +226,24 @@ public class StatementContext implements Closeable {
private final List<Column> insertTargetSchema = new ArrayList<>();
// for create view support in nereids
- // key is the start and end position of the sql substring that needs to be
replaced,
+ // key is the start and end position of the sql substring that needs to be
+ // replaced,
// and value is the new string used for replacement.
- private final TreeMap<Pair<Integer, Integer>, String> indexInSqlToString
- = new TreeMap<>(new Pair.PairComparator<>());
- // Record table id mapping, the key is the hash code of union catalogId,
databaseId, tableId
+ private final TreeMap<Pair<Integer, Integer>, String> indexInSqlToString =
new TreeMap<>(
+ new Pair.PairComparator<>());
+ // Record table id mapping, the key is the hash code of union catalogId,
+ // databaseId, tableId
// the value is the auto-increment id in the cascades context
private final Map<List<String>, TableId> tableIdMapping = new
LinkedHashMap<>();
- // Record the materialization statistics by id which is used for cost
estimation.
- // Maybe return null, which means the id according statistics should calc
normally rather than getting
+ // Record the materialization statistics by id which is used for cost
+ // estimation.
+ // Maybe return null, which means the id according statistics should calc
+ // normally rather than getting
// form this map
private final Map<RelationId, Statistics> relationIdToStatisticsMap = new
LinkedHashMap<>();
- // Indicates the query is short-circuited in both plan and execution
phase, typically
+ // Indicates the query is short-circuited in both plan and execution phase,
+ // typically
// for high speed/concurrency point queries
private boolean isShortCircuitQuery;
@@ -251,8 +265,8 @@ public class StatementContext implements Closeable {
private long materializedViewRewriteDuration = 0L;
// Record used table and it's used partitions
- private final Multimap<List<String>, Pair<RelationId, Set<String>>>
tableUsedPartitionNameMap =
- HashMultimap.create();
+ private final Multimap<List<String>, Pair<RelationId, Set<String>>>
tableUsedPartitionNameMap = HashMultimap
+ .create();
private final Map<Integer, Integer> relationIdToCommonTableIdMap = new
HashMap<>();
// Record mtmv and valid partitions map because this is time-consuming
behavior
@@ -270,8 +284,11 @@ public class StatementContext implements Closeable {
// this record the rewritten plan by mv in RBO phase
private final List<Plan> rewrittenPlansByMv = new ArrayList<>();
private boolean forceRecordTmpPlan = false;
- // this record the rule in
PreMaterializedViewRewriter.NEED_PRE_REWRITE_RULE_TYPES if is applied
successfully
- // or not, if success and in PreRewriteStrategy.FOR_IN_ROB or
PreRewriteStrategy.TRY_IN_ROB, mv
+ // this record the rule in
+ // PreMaterializedViewRewriter.NEED_PRE_REWRITE_RULE_TYPES if is applied
+ // successfully
+ // or not, if success and in PreRewriteStrategy.FOR_IN_ROB or
+ // PreRewriteStrategy.TRY_IN_ROB, mv
// would be written in RBO phase
private final BitSet needPreMvRewriteRuleMasks = new
BitSet(RuleType.SENTINEL.ordinal());
// if needed to rewrite in RBO phase, this would be set true
@@ -286,7 +303,8 @@ public class StatementContext implements Closeable {
private Optional<Map<TableIf, Set<Expression>>> mvRefreshPredicates =
Optional.empty();
- // For Iceberg rewrite operations: store file scan tasks to be used by
IcebergScanNode
+ // For Iceberg rewrite operations: store file scan tasks to be used by
+ // IcebergScanNode
// TODO: better solution?
private List<org.apache.iceberg.FileScanTask> icebergRewriteFileScanTasks
= null;
// For Iceberg rewrite operations: control whether to use GATHER
distribution
@@ -315,7 +333,8 @@ public class StatementContext implements Closeable {
exprIdGenerator = ExprId.createGenerator(initialId);
if (connectContext != null && connectContext.getSessionVariable() !=
null) {
if
(CacheAnalyzer.canUseSqlCache(connectContext.getSessionVariable())) {
- // cannot set the queryId here because the queryId for the
current query is set in the subsequent steps.
+ // cannot set the queryId here because the queryId for the
current query is set
+ // in the subsequent steps.
this.sqlCacheContext = new SqlCacheContext(
connectContext.getCurrentUserIdentity());
if (originStatement != null) {
@@ -345,7 +364,7 @@ public class StatementContext implements Closeable {
* cache view info to avoid view's def and sql mode changed before lock it.
*
* @param qualifiedViewName full qualified name of the view
- * @param view view need to cache info
+ * @param view view need to cache info
*
* @return view info, first is view's def sql, second is view's sql mode
*/
@@ -563,8 +582,8 @@ public class StatementContext implements Closeable {
public ColumnAliasGenerator getColumnAliasGenerator() {
return columnAliasGenerator == null
- ? columnAliasGenerator = new ColumnAliasGenerator()
- : columnAliasGenerator;
+ ? columnAliasGenerator = new ColumnAliasGenerator()
+ : columnAliasGenerator;
}
public String generateColumnName() {
@@ -611,6 +630,91 @@ public class StatementContext implements Closeable {
return rewrittenCteConsumer;
}
+ /**
+ * Snapshot current CTE-related environment for temporary
rewrite/optimization.
+ */
+ public CteEnvironmentSnapshot cacheCteEnvironment() {
+ return new CteEnvironmentSnapshot(
+ copyMapOfSets(cteIdToConsumers),
+ copyMapOfSets(cteIdToOutputIds),
+ new HashMap<>(cteIdToProducerStats),
+ copyMapOfSets(consumerIdToFilters),
+ copyMapOfLists(cteIdToConsumerGroup),
+ new HashMap<>(rewrittenCteProducer),
+ new HashMap<>(rewrittenCteConsumer));
+ }
+
+ /** Restore CTE-related environment from snapshot. */
+ public void restoreCteEnvironment(CteEnvironmentSnapshot snapshot) {
+ cteIdToConsumers.clear();
+ cteIdToConsumers.putAll(snapshot.cteIdToConsumers);
+
+ cteIdToOutputIds.clear();
+ cteIdToOutputIds.putAll(snapshot.cteIdToOutputIds);
+
+ cteIdToProducerStats.clear();
+ cteIdToProducerStats.putAll(snapshot.cteIdToProducerStats);
+
+ consumerIdToFilters.clear();
+ consumerIdToFilters.putAll(snapshot.consumerIdToFilters);
+
+ cteIdToConsumerGroup.clear();
+ cteIdToConsumerGroup.putAll(snapshot.cteIdToConsumerGroup);
+
+ rewrittenCteProducer.clear();
+ rewrittenCteProducer.putAll(snapshot.rewrittenCteProducer);
+
+ rewrittenCteConsumer.clear();
+ rewrittenCteConsumer.putAll(snapshot.rewrittenCteConsumer);
+ }
+
+ private static <K, V> Map<K, Set<V>> copyMapOfSets(Map<K, Set<V>> source) {
+ Map<K, Set<V>> copied = new HashMap<>();
+ for (Map.Entry<K, Set<V>> entry : source.entrySet()) {
+ copied.put(entry.getKey(), new HashSet<>(entry.getValue()));
+ }
+ return copied;
+ }
+
+ private static <K, V> Map<K, List<V>> copyMapOfLists(Map<K, List<V>>
source) {
+ Map<K, List<V>> copied = new HashMap<>();
+ for (Map.Entry<K, List<V>> entry : source.entrySet()) {
+ copied.put(entry.getKey(), new ArrayList<>(entry.getValue()));
+ }
+ return copied;
+ }
+
+ /** Holder for cached CTE-related environment. */
+ public static class CteEnvironmentSnapshot {
+ private final Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers;
+ private final Map<CTEId, Set<Slot>> cteIdToOutputIds;
+ private final Map<CTEId, Statistics> cteIdToProducerStats;
+ private final Map<RelationId, Set<Expression>> consumerIdToFilters;
+ private final Map<CTEId, List<Pair<Multimap<Slot, Slot>, Group>>>
cteIdToConsumerGroup;
+ private final Map<CTEId, LogicalPlan> rewrittenCteProducer;
+ private final Map<CTEId, LogicalPlan> rewrittenCteConsumer;
+
+ /**
+ * cte related structures in StatementContext
+ */
+ public CteEnvironmentSnapshot(
+ Map<CTEId, Set<LogicalCTEConsumer>> cteIdToConsumers,
+ Map<CTEId, Set<Slot>> cteIdToOutputIds,
+ Map<CTEId, Statistics> cteIdToProducerStats,
+ Map<RelationId, Set<Expression>> consumerIdToFilters,
+ Map<CTEId, List<Pair<Multimap<Slot, Slot>, Group>>>
cteIdToConsumerGroup,
+ Map<CTEId, LogicalPlan> rewrittenCteProducer,
+ Map<CTEId, LogicalPlan> rewrittenCteConsumer) {
+ this.cteIdToConsumers = cteIdToConsumers;
+ this.cteIdToOutputIds = cteIdToOutputIds;
+ this.cteIdToProducerStats = cteIdToProducerStats;
+ this.consumerIdToFilters = consumerIdToFilters;
+ this.cteIdToConsumerGroup = cteIdToConsumerGroup;
+ this.rewrittenCteProducer = rewrittenCteProducer;
+ this.rewrittenCteConsumer = rewrittenCteConsumer;
+ }
+ }
+
public void addViewDdlSql(String ddlSql) {
this.viewDdlSqlSet.add(ddlSql);
}
@@ -659,6 +763,7 @@ public class StatementContext implements Closeable {
/**
* get used mv hint by hint name
+ *
* @param useMvName hint name, can either be USE_MV or NO_USE_MV
* @return optional of useMvHint
*/
@@ -819,7 +924,7 @@ public class StatementContext implements Closeable {
* Obtain snapshot information of mvcc
*
* @param mvccTableInfo mvccTableInfo
- * @param snapshot snapshot
+ * @param snapshot snapshot
*/
public void setSnapshot(MvccTableInfo mvccTableInfo, MvccSnapshot
snapshot) {
snapshots.put(mvccTableInfo, snapshot);
@@ -1032,7 +1137,8 @@ public class StatementContext implements Closeable {
/**
* Set file scan tasks for Iceberg rewrite operations.
- * This allows IcebergScanNode to use specific file scan tasks instead of
scanning the full table.
+ * This allows IcebergScanNode to use specific file scan tasks instead of
+ * scanning the full table.
*/
public void
setIcebergRewriteFileScanTasks(List<org.apache.iceberg.FileScanTask> tasks) {
this.icebergRewriteFileScanTasks = tasks;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java
index fd5b7a3f04e..453b805a3b3 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/rewrite/CostBasedRewriteJob.java
@@ -19,6 +19,7 @@ package org.apache.doris.nereids.jobs.rewrite;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.CascadesContext;
+import org.apache.doris.nereids.StatementContext;
import org.apache.doris.nereids.cost.Cost;
import org.apache.doris.nereids.hint.Hint;
import org.apache.doris.nereids.hint.UseCboRuleHint;
@@ -28,6 +29,7 @@ import org.apache.doris.nereids.jobs.executor.Rewriter;
import org.apache.doris.nereids.memo.GroupExpression;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor;
import org.apache.doris.nereids.trees.plans.logical.LogicalPlan;
import org.apache.doris.qe.ConnectContext;
@@ -58,7 +60,8 @@ public class CostBasedRewriteJob implements RewriteJob {
@Override
public void execute(JobContext jobContext) {
- // checkHint.first means whether it use hint and checkHint.second
means what kind of hint it used
+ // checkHint.first means whether it use hint and checkHint.second
means what
+ // kind of hint it used
Pair<Boolean, Hint> checkHint = checkRuleHint();
// this means it no_use_cbo_rule(xxx) hint
if (checkHint.first && checkHint.second == null) {
@@ -69,14 +72,18 @@ public class CostBasedRewriteJob implements RewriteJob {
CascadesContext applyCboRuleCtx =
CascadesContext.newCurrentTreeContext(currentCtx);
// execute cbo rule on one candidate
Rewriter.getCteChildrenRewriter(applyCboRuleCtx,
rewriteJobs).execute();
+ Plan applyCboPlan = applyCboRuleCtx.getRewritePlan();
if
(skipCboRuleCtx.getRewritePlan().deepEquals(applyCboRuleCtx.getRewritePlan())) {
// this means rewrite do not do anything
return;
}
+ StatementContext.CteEnvironmentSnapshot cteEnvSnapshot =
currentCtx.getStatementContext().cacheCteEnvironment();
// compare two candidates
Optional<Pair<Cost, GroupExpression>> skipCboRuleCost =
getCost(currentCtx, skipCboRuleCtx, jobContext);
+ currentCtx.getStatementContext().restoreCteEnvironment(cteEnvSnapshot);
Optional<Pair<Cost, GroupExpression>> appliedCboRuleCost =
getCost(currentCtx, applyCboRuleCtx, jobContext);
+ currentCtx.getStatementContext().restoreCteEnvironment(cteEnvSnapshot);
// If one of them optimize failed, just return
if (!skipCboRuleCost.isPresent() || !appliedCboRuleCost.isPresent()) {
LOG.warn("Cbo rewrite execute failed on sql: {}, jobs are {}, plan
is {}.",
@@ -92,19 +99,20 @@ public class CostBasedRewriteJob implements RewriteJob {
}
return;
}
- // If the candidate applied cbo rule is better, replace the original
plan with it.
+ // If the candidate applied cbo rule is better, replace the original
plan with
+ // it.
if (appliedCboRuleCost.get().first.getValue() <
skipCboRuleCost.get().first.getValue()) {
- currentCtx.addPlanProcesses(applyCboRuleCtx.getPlanProcesses());
- currentCtx.setRewritePlan(applyCboRuleCtx.getRewritePlan());
+ currentCtx.setRewritePlan(applyCboPlan);
}
}
/**
* check if we have use rule hint or no use rule hint
- * return an optional object which checkHint.first means whether it
use hint
- * and checkHint.second means what kind of hint it used
- * example, when we use *+ no_use_cbo_rule(xxx) * the optional would
be (true, false)
- * which means it use hint and the hint forbid this kind of rule
+ * return an optional object which checkHint.first means whether it use
hint
+ * and checkHint.second means what kind of hint it used
+ * example, when we use *+ no_use_cbo_rule(xxx) * the optional would be
(true,
+ * false)
+ * which means it use hint and the hint forbid this kind of rule
*/
private Pair<Boolean, Hint> checkRuleHint() {
Pair<Boolean, Hint> checkResult = Pair.of(false, null);
@@ -134,7 +142,8 @@ public class CostBasedRewriteJob implements RewriteJob {
}
/**
- * for these rules we need use_cbo_rule hint to enable it, otherwise it
would be close by default
+ * for these rules we need use_cbo_rule hint to enable it, otherwise it
would be
+ * close by default
*/
private static boolean checkBlackList(RuleType ruleType) {
List<RuleType> ruleWhiteList = new ArrayList<>(Arrays.asList(
diff --git
a/regression-test/suites/nereids_p0/cte/costbasedrewrite_producer/costbasedrewrite_producer.groovy
b/regression-test/suites/nereids_p0/cte/costbasedrewrite_producer/costbasedrewrite_producer.groovy
new file mode 100644
index 00000000000..128f35d7232
--- /dev/null
+++
b/regression-test/suites/nereids_p0/cte/costbasedrewrite_producer/costbasedrewrite_producer.groovy
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("costbasedrewrite_producer") {
+ sql """
+ drop table if exists t1;
+
+ create table t1(a1 int,b1 int)
+ properties("replication_num" = "1");
+
+ insert into t1 values(1,2);
+
+ drop table if exists t2;
+
+ create table t2(a2 int,b2 int)
+ properties("replication_num" = "1");
+
+ insert into t2 values(1,3);
+ """
+
+ sql"""
+ with cte1 as (
+ select t1.a1, t1.b1
+ from t1
+ where t1.a1 > 0 and not exists (select distinct t2.b2 from t2 where t1.a1
= t2.a2 or t1.b1 = t2.a2)
+ ),
+ cte2 as (
+ select * from cte1 union select * from cte1)
+ select * from cte2 join t1 on cte2.a1 = t1.a1;
+
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]