This is an automated email from the ASF dual-hosted git repository.
starocean999 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f2e4ff11e94 [fix](distinct)fix distinct info is lost when union's
children are distinct (#60085)
f2e4ff11e94 is described below
commit f2e4ff11e94143a4df956373916df4e5b934dfe3
Author: starocean999 <[email protected]>
AuthorDate: Fri Jan 23 15:42:51 2026 +0800
[fix](distinct)fix distinct info is lost when union's children are distinct
(#60085)
In bindSetOperation, we merge LogicalSetOperation's children if they are
ProjectMergeable, but the LogicalProject node may be distinct and hasn't
been transfer to LogicalAggregate by ProjectWithDistinctToAggregate. So
we can't merge projects so early, and should leave the merge operation
in rewritten phase
Related PR: https://github.com/apache/doris/pull/43546
---
.../nereids/rules/analysis/BindExpression.java | 7 +++--
.../rules/expression/ExpressionRewrite.java | 30 +++++++++++++++++++---
.../project_distinct_to_agg.out | 4 +++
...ush_project_into_union_with_unique_function.out | 2 +-
.../project_distinct_to_agg.groovy | 17 ++++++++++++
5 files changed, 52 insertions(+), 8 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
index 56394cabbe5..d29f4b43962 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindExpression.java
@@ -92,7 +92,6 @@ import
org.apache.doris.nereids.trees.plans.logical.LogicalSort;
import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias;
import org.apache.doris.nereids.trees.plans.logical.LogicalTVFRelation;
import org.apache.doris.nereids.trees.plans.logical.LogicalUsingJoin;
-import org.apache.doris.nereids.trees.plans.logical.ProjectProcessor;
import org.apache.doris.nereids.trees.plans.visitor.InferPlanOutputAlias;
import org.apache.doris.nereids.types.BooleanType;
import org.apache.doris.nereids.types.DataType;
@@ -418,9 +417,9 @@ public class BindExpression implements AnalysisRuleFactory {
if
(childrenProjections.get(i).stream().allMatch(SlotReference.class::isInstance))
{
newChild = child;
} else {
- List<NamedExpression> parentProject =
childrenProjections.get(i);
- newChild = ProjectProcessor.tryProcessProject(parentProject,
child)
- .orElseGet(() -> new LogicalProject<>(parentProject,
child));
+ // projects can only be mereged if it's not distinct
+ // so we should merge projects after
ProjectWithDistinctToAggregate
+ newChild = new LogicalProject<>(childrenProjections.get(i),
child);
}
newChildren.add(newChild);
childrenOutputs.add((List<SlotReference>) (List)
newChild.getOutput());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
index d35ceaaba28..22c5d29043b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/ExpressionRewrite.java
@@ -51,6 +51,7 @@ import
org.apache.doris.nereids.trees.plans.logical.LogicalSetOperation;
import org.apache.doris.nereids.trees.plans.logical.LogicalSink;
import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
import org.apache.doris.nereids.trees.plans.logical.LogicalTopN;
+import org.apache.doris.nereids.trees.plans.logical.LogicalUnion;
import org.apache.doris.nereids.trees.plans.logical.LogicalWindow;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.nereids.util.JoinUtils;
@@ -384,10 +385,33 @@ public class ExpressionRewrite implements
RewriteRuleFactory {
changed |= result.changed;
newSlotsList.add(result.result);
}
- if (!changed) {
- return setOperation;
+ if (setOperation instanceof LogicalUnion) {
+ LogicalUnion logicalUnion = (LogicalUnion) setOperation;
+ List<List<NamedExpression>> constantExprsList =
logicalUnion.getConstantExprsList();
+ ImmutableList.Builder<List<NamedExpression>>
newConstantListBuilder = ImmutableList.builder();
+ for (List<NamedExpression> oneRowProject :
constantExprsList) {
+ Builder<NamedExpression> rewrittenExprs = ImmutableList
+ .builderWithExpectedSize(oneRowProject.size());
+ for (NamedExpression project : oneRowProject) {
+ NamedExpression newProject = (NamedExpression)
rewriter.rewrite(project, context);
+ if (!changed && !project.deepEquals(newProject)) {
+ changed = true;
+ }
+ rewrittenExprs.add(newProject);
+ }
+ newConstantListBuilder.add(rewrittenExprs.build());
+ }
+ if (!changed) {
+ return setOperation;
+ }
+ return
logicalUnion.withChildrenAndConstExprsList(setOperation.children(),
newSlotsList,
+ newConstantListBuilder.build());
+ } else {
+ if (!changed) {
+ return setOperation;
+ }
+ return
setOperation.withChildrenAndTheirOutputs(setOperation.children(), newSlotsList);
}
- return
setOperation.withChildrenAndTheirOutputs(setOperation.children(), newSlotsList);
})
.toRule(RuleType.REWRITE_SET_OPERATION_EXPRESSION);
}
diff --git
a/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
b/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
index 3b94bae1305..f6e0f7b6cca 100644
---
a/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
+++
b/regression-test/data/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.out
@@ -51,3 +51,7 @@ PhysicalResultSink
-- !agg_result --
7
+-- !select --
+1 1
+1 2
+
diff --git
a/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
b/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
index d1844e88c74..7af0a74a43a 100644
---
a/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
+++
b/regression-test/data/nereids_rules_p0/unique_function/push_project_into_union_with_unique_function.out
@@ -1,7 +1,7 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !push_down_1 --
PhysicalResultSink
---PhysicalUnion(constantExprsList=[[(100.0 + random()) AS `b`, (100.0 +
random()) AS `c`], [(200.0 + random()) AS `b`, (200.0 + random()) AS `c`]])
+--PhysicalUnion(constantExprsList=[[(random() + 100.0) AS `b`, (random() +
100.0) AS `c`], [(random() + 200.0) AS `b`, (random() + 200.0) AS `c`]])
-- !push_down_2 --
PhysicalResultSink
diff --git
a/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
b/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
index f6afb9539a9..c0d12ddcd2c 100644
---
a/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
+++
b/regression-test/suites/nereids_rules_p0/project_distinct_to_agg/project_distinct_to_agg.groovy
@@ -29,4 +29,21 @@ suite('project_distinct_to_agg') {
explainAndOrderResult 'agg', "select distinct sum(a) from ${tbl}"
sql "drop table if exists ${tbl} force"
+
+ qt_select '''
+ (
+ SELECT
+ DISTINCT t_alias.u_col, cast(1 as bigint)
+ FROM (select [1,1,1] k1) as t
+ lateral view explode(k1) t_alias as u_col
+ )
+ UNION ALL (
+ SELECT
+ DISTINCT 1, t_alias.u_col
+ FROM (select [2,2,2] k1) as t
+ lateral view explode(k1) t_alias as u_col
+ )
+ ORDER BY
+ 1, 2;
+ '''
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]