This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 68bd4a1a96 [opt](Nereids) check multiple distinct functions that
cannot be transformed into muti_distinct (#21626)
68bd4a1a96 is described below
commit 68bd4a1a96b988fab31ffc3a1aeeda831347e1bd
Author: 谢健 <[email protected]>
AuthorDate: Mon Jul 24 16:34:17 2023 +0800
[opt](Nereids) check multiple distinct functions that cannot be transformed
into muti_distinct (#21626)
This commit introduces a transformation for SQL queries that contain
multiple distinct aggregate functions. When the number of distinct values
processed by these functions is greater than 1, they are converted into
multi_distinct functions for more efficient handling.
Example:
```
SELECT COUNT(DISTINCT c1), SUM(DISTINCT c2) FROM tbl GROUP BY c3
-- Transformed to
SELECT MULTI_DISTINCT_COUNT(c1), MULTI_DISTINCT_SUM(c2) FROM tbl GROUP BY c3
```
The following functions can be transformed:
- COUNT
- SUM
- AVG
- GROUP_CONCAT
If any unsupported functions are encountered, an error is now reported
during the optimization phase.
To ensure the absence of such cases, a final check has been implemented
after the rewriting phase.
---
.../doris/nereids/jobs/executor/Rewriter.java | 2 +
.../nereids/rules/rewrite/CheckMultiDistinct.java | 62 ++++++++++++++++++++++
.../suites/nereids_syntax_p0/analyze_agg.groovy | 5 ++
3 files changed, 69 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index 3fb3208e5e..8e2f0260bb 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -40,6 +40,7 @@ import org.apache.doris.nereids.rules.rewrite.CTEInline;
import
org.apache.doris.nereids.rules.rewrite.CheckAndStandardizeWindowFunctionAndFrame;
import org.apache.doris.nereids.rules.rewrite.CheckDataTypes;
import org.apache.doris.nereids.rules.rewrite.CheckMatchExpression;
+import org.apache.doris.nereids.rules.rewrite.CheckMultiDistinct;
import org.apache.doris.nereids.rules.rewrite.CollectFilterAboveConsumer;
import org.apache.doris.nereids.rules.rewrite.CollectProjectAboveConsumer;
import org.apache.doris.nereids.rules.rewrite.ColumnPruning;
@@ -291,6 +292,7 @@ public class Rewriter extends AbstractBatchJobExecutor {
bottomUp(
new
ExpressionRewrite(CheckLegalityAfterRewrite.INSTANCE),
new CheckMatchExpression(),
+ new CheckMultiDistinct(),
new CheckAfterRewrite()
)
),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMultiDistinct.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMultiDistinct.java
new file mode 100644
index 0000000000..4488a94b8d
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/CheckMultiDistinct.java
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import
org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Avg;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
+import org.apache.doris.nereids.trees.expressions.functions.agg.GroupConcat;
+import org.apache.doris.nereids.trees.expressions.functions.agg.Sum;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
+
+import com.google.common.collect.ImmutableSet;
+
+/**
+ * If there are multiple distinct aggregate functions that cannot
+ * be transformed into multi_distinct, an error is reported.
+ * The following functions can be transformed into multi_distinct:
+ * - count -> MULTI_DISTINCT_COUNT
+ * - sum -> MULTI_DISTINCT_SUM
+ * - avg -> MULTI_DISTINCT_AVG
+ * - group_concat -> MULTI_DISTINCT_GROUP_CONCAT
+ */
+public class CheckMultiDistinct extends OneRewriteRuleFactory {
+ private final ImmutableSet<Class<? extends AggregateFunction>>
supportedFunctions =
+ ImmutableSet.of(Count.class, Sum.class, Avg.class,
GroupConcat.class);
+
+ @Override
+ public Rule build() {
+ return logicalAggregate().then(agg ->
checkDistinct(agg)).toRule(RuleType.CHECK_ANALYSIS);
+ }
+
+ private LogicalAggregate checkDistinct(LogicalAggregate<? extends Plan>
aggregate) {
+ if (aggregate.getDistinctArguments().size() > 1) {
+
+ for (AggregateFunction func : aggregate.getAggregateFunctions()) {
+ if (func.isDistinct() &&
!supportedFunctions.contains(func.getClass())) {
+ throw new AnalysisException(func.toString() + " can't
support multi distinct.");
+ }
+ }
+ }
+ return aggregate;
+ }
+}
diff --git a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
index e5184234f3..2f0b0d01c6 100644
--- a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
+++ b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
@@ -68,4 +68,9 @@ suite("analyze_agg") {
tt2.d,
tt2.c;
"""
+
+ test {
+ sql "select count(distinct t2.id), max(distinct t2.c) from t2"
+ exception "max(DISTINCT c#2) can't support multi distinct."
+ }
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]