This is an automated email from the ASF dual-hosted git repository.

924060929 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2915bdb6230 [fix](nereids) allow constant output column not in GROUP 
BY under only_full_group_by (#64458)
2915bdb6230 is described below

commit 2915bdb62302da12e80a01a386a21ab074d09662
Author: 924060929 <[email protected]>
AuthorDate: Thu Jun 18 15:58:01 2026 +0800

    [fix](nereids) allow constant output column not in GROUP BY under 
only_full_group_by (#64458)
    
    Problem Summary:
    
    Under `only_full_group_by` (the default), Nereids rejected any
    non-aggregated select
    expression that was neither in `GROUP BY` nor wrapped in an aggregate
    function, even when
    that expression is **constant for every input row**. MySQL accepts such
    expressions via
    functional dependency. For example:
    
    ```sql
    SELECT a AS b, b AS c FROM (SELECT 1 AS a, 2 AS b) t1 GROUP BY b, c;
    ```
    
    The output alias `b` collides with the derived-table column `b`, so
    column-first name
    resolution groups by the column `b` and leaves the constant column `a`
    ungrouped. This
    previously failed with:
    
    ```
    PROJECT expression 'a' must appear in the GROUP BY clause or be used in an 
aggregate function
    ```
    
    `NormalizeAggregate` now only rejects **non-constant** missing slots.
    Constant (uniform)
    slots flow through the existing `any_value()` wrapping (any_value of a
    constant is that
    constant), so the result stays unambiguous and the query returns `(1,
    2)`, matching MySQL.
    The same shape over a real table, where the ungrouped column is
    non-constant, is still
    rejected.
    
    ### Release note
    
    Allow a constant (uniform) non-aggregated column that is not in `GROUP
    BY` under
    `only_full_group_by`, matching MySQL functional-dependency behavior.
---
 .../nereids/rules/analysis/NormalizeAggregate.java |  58 +++++++++---
 .../analysis/OnlyFullGroupByConstantTest.java      | 100 +++++++++++++++++++++
 .../test_group_by_constant_output.out              |  10 +++
 .../test_group_by_constant_output.groovy           |  46 ++++++++++
 4 files changed, 203 insertions(+), 11 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
index 0db8339239a..2306259dec2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
@@ -19,6 +19,7 @@ package org.apache.doris.nereids.rules.analysis;
 
 import org.apache.doris.nereids.CascadesContext;
 import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.properties.DataTrait;
 import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
@@ -66,7 +67,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
-import java.util.stream.Collectors;
 
 /**
  * normalize aggregate's group keys and AggregateFunction's child to 
SlotReference
@@ -299,15 +299,47 @@ public class NormalizeAggregate implements 
RewriteRuleFactory, NormalizeToSlot {
                 }
             }
             if (!missingSlotsInAggregate.isEmpty()) {
-                if (SqlModeHelper.hasOnlyFullGroupBy()) {
-                    throw new AnalysisException(String.format("PROJECT 
expression %s must appear in the GROUP BY"
-                            + " clause or be used in an aggregate function",
-                            missingSlotsInAggregate.stream()
-                                    .map(slot -> "'" + slot.getName() + "'")
-                                    .collect(Collectors.joining(", "))));
-                } else {
-                    // for any slots missing in aggregate's output, we should 
add a any_value(slot) into
-                    // aggregate's output list and slot itself into bottom 
project's output list
+                // Under only_full_group_by, a non-grouped, non-aggregated 
output column that is constant for
+                // every input row (uniform and not null) is valid (MySQL 
functional dependency), e.g.
+                //   SELECT a AS b, b AS c FROM (SELECT 1 AS a, 2 AS b) t 
GROUP BY b, c
+                // where 'a' is a constant column of the derived table. We add 
such a column to the group-by
+                // keys rather than wrapping it in any_value(): grouping by a 
constant does not change the
+                // grouping, makes the column a valid output, and changes no 
exprId, so references in ancestors
+                // (e.g. the result sink) stay valid -- any_value() would need 
a new exprId that this rule
+                // cannot propagate upward. The redundant uniform key is later 
removed by
+                // EliminateGroupByKeyByUniform. Only done when the aggregate 
already has group-by keys; for a
+                // global aggregate adding a key would change empty-input 
semantics. isUniformAndNotNull (not
+                // isUniform) excludes the nullable side of an outer join, 
which holds the uniform value on
+                // matched rows but NULL on unmatched rows of the same group.
+                Set<Slot> constantMissingSlots = new HashSet<>();
+                if (SqlModeHelper.hasOnlyFullGroupBy() && 
!normalizedGroupExprs.isEmpty()) {
+                    DataTrait childTrait = 
aggregate.child().getLogicalProperties().getTrait();
+                    for (Slot slot : missingSlotsInAggregate) {
+                        if (childTrait.isUniformAndNotNull(slot)) {
+                            constantMissingSlots.add(slot);
+                        }
+                    }
+                }
+                if (!constantMissingSlots.isEmpty()) {
+                    bottomProjects = Sets.union(bottomProjects, 
constantMissingSlots);
+                    normalizedGroupExprs = ImmutableList.<Expression>builder()
+                            
.addAll(normalizedGroupExprs).addAll(constantMissingSlots).build();
+                    for (Slot slot : constantMissingSlots) {
+                        normalizedAggOutputBuilder.add(slot);
+                    }
+                    missingSlotsInAggregate.removeAll(constantMissingSlots);
+                }
+                if (!missingSlotsInAggregate.isEmpty()) {
+                    if (SqlModeHelper.hasOnlyFullGroupBy()) {
+                        List<String> invalidSlotNames = new ArrayList<>();
+                        for (Slot slot : missingSlotsInAggregate) {
+                            invalidSlotNames.add("'" + slot.getName() + "'");
+                        }
+                        throw new AnalysisException(String.format("PROJECT 
expression %s must appear in the GROUP BY"
+                                + " clause or be used in an aggregate 
function", String.join(", ", invalidSlotNames)));
+                    }
+                    // only_full_group_by disabled: for the remaining missing 
slots add an any_value(slot) into
+                    // the aggregate's output and the slot itself into the 
bottom project's output.
                     bottomProjects = Sets.union(bottomProjects, 
missingSlotsInAggregate);
                     Map<Expression, Expression> replaceMap = Maps.newHashMap();
                     Map<String, Alias> normalizedAggExistingAlias = 
Maps.newHashMap();
@@ -345,8 +377,12 @@ public class NormalizeAggregate implements 
RewriteRuleFactory, NormalizeToSlot {
         LogicalAggregate<?> newAggregate =
                 aggregate.withNormalized(normalizedGroupExprs, 
normalizedAggOutputBuilder.build(), bottomPlan);
         ExpressionRewriteContext rewriteContext = new 
ExpressionRewriteContext(ctx);
+        // Use the current aggregate output (newAggregate already contains the 
constant group-key slots added
+        // for missing slots above), not the stale normalizedAggOutput 
snapshot, so constant group-key
+        // elimination keeps those outputs and does not leave dangling 
references in upperProjects.
         LogicalProject<Plan> project = 
eliminateGroupByConstant(groupByExprContext, rewriteContext,
-                normalizedGroupExprs, normalizedAggOutput, bottomProjects, 
aggregate, upperProjects, newAggregate);
+                normalizedGroupExprs, newAggregate.getOutputExpressions(), 
bottomProjects, aggregate,
+                upperProjects, newAggregate);
 
         if (!having.isPresent()) {
             return project;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/OnlyFullGroupByConstantTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/OnlyFullGroupByConstantTest.java
new file mode 100644
index 00000000000..8639ee5d6f3
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/OnlyFullGroupByConstantTest.java
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.analysis;
+
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.util.PlanChecker;
+import org.apache.doris.utframe.TestWithFeService;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Under only_full_group_by, a non-grouped, non-aggregated output column that 
is constant for every input
+ * row (uniform and not null) is accepted (MySQL functional dependency): 
NormalizeAggregate adds it to the
+ * group-by keys. Non-constant columns, and uniform-but-nullable columns from 
the nullable side of an outer
+ * join, are still rejected. All cases are verified through rewrite() (not 
only analyze()).
+ */
+public class OnlyFullGroupByConstantTest extends TestWithFeService {
+    @Override
+    protected void runBeforeAll() throws Exception {
+        createDatabase("test");
+        connectContext.setDatabase("test");
+        // columns 'a','b' deliberately collide with the output aliases used 
below
+        createTable("CREATE TABLE test.t_ab (a INT, b INT) ENGINE=OLAP\n"
+                + "DUPLICATE KEY(a) DISTRIBUTED BY HASH(a) BUCKETS 1 
PROPERTIES ('replication_num' = '1');");
+        createTable("CREATE TABLE test.t_v (k INT, v INT) ENGINE=OLAP\n"
+                + "DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 1 
PROPERTIES ('replication_num' = '1');");
+    }
+
+    private void rewriteOk(String sql) {
+        Assertions.assertDoesNotThrow(() -> 
PlanChecker.from(connectContext).analyze(sql).rewrite().getPlan(),
+                "should plan without error: " + sql);
+    }
+
+    private void rejected(String sql) {
+        AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+                () -> PlanChecker.from(connectContext).analyze(sql).rewrite(), 
"should be rejected: " + sql);
+        Assertions.assertTrue(ex.getMessage().contains("must appear in the 
GROUP BY"),
+                "unexpected message: " + ex.getMessage());
+    }
+
+    @Test
+    public void aliasCollidesWithConstantColumn() {
+        // alias 'b' collides with the derived-table column 'b', so 
column-first binding groups by column 'b'
+        // and leaves the constant column 'a' ungrouped. 'a' is constant -> 
allowed.
+        rewriteOk("SELECT a as b, b as c FROM (SELECT 1 as a, 2 as b) t1 GROUP 
BY b, c");
+    }
+
+    @Test
+    public void constantColumnNotInGroupBy() {
+        // 'b' is constant and not grouped (a bare output column) -> allowed
+        rewriteOk("SELECT a, b FROM (SELECT 1 as a, 2 as b) t1 GROUP BY a");
+    }
+
+    @Test
+    public void constantGroupByLiteral() {
+        // group-by key is itself a constant and gets eliminated; constant 
outputs a, b stay valid
+        rewriteOk("SELECT a, b FROM (SELECT 1 as a, 2 as b) t1 GROUP BY 'g'");
+    }
+
+    @Test
+    public void nonConstantColumnStillRejected() {
+        // same shape over a real table: 'a' is non-constant and ungrouped -> 
still rejected (MySQL parity)
+        rejected("SELECT a as b, b as c FROM test.t_ab GROUP BY b, c");
+    }
+
+    @Test
+    public void outerJoinNullableUniformRejected() {
+        // r.v is uniform (LIMIT 1) but propagated through the nullable side 
of a LEFT JOIN, so it is the
+        // uniform value on matched rows and NULL on unmatched rows of the 
same group -> must stay rejected.
+        rejected("SELECT r.v FROM (SELECT 1 AS g, 1 AS k UNION ALL SELECT 1 AS 
g, 2 AS k) l "
+                + "LEFT JOIN (SELECT k, v FROM test.t_v LIMIT 1) r ON l.k = 
r.k GROUP BY l.g");
+    }
+
+    @Test
+    public void mixedReportOnlyNonConstant() {
+        // group by b2; 'k' (constant 1) is allowed, 'a' (non-constant) must 
be reported and 'k' must not.
+        AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+                () -> PlanChecker.from(connectContext).analyze(
+                        "SELECT x.a, x.k, x.b2 FROM (SELECT a, 1 as k, b as b2 
FROM test.t_ab) x GROUP BY x.b2")
+                        .rewrite());
+        Assertions.assertTrue(ex.getMessage().contains("'a'"), "should report 
'a': " + ex.getMessage());
+        Assertions.assertFalse(ex.getMessage().contains("'k'"), "should NOT 
report constant 'k': " + ex.getMessage());
+    }
+}
diff --git 
a/regression-test/data/nereids_syntax_p0/test_group_by_constant_output.out 
b/regression-test/data/nereids_syntax_p0/test_group_by_constant_output.out
new file mode 100644
index 00000000000..37b8301f343
--- /dev/null
+++ b/regression-test/data/nereids_syntax_p0/test_group_by_constant_output.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !const_alias_collision --
+1      2
+
+-- !const_not_in_groupby --
+1      2
+
+-- !const_group_key --
+1      2
+
diff --git 
a/regression-test/suites/nereids_syntax_p0/test_group_by_constant_output.groovy 
b/regression-test/suites/nereids_syntax_p0/test_group_by_constant_output.groovy
new file mode 100644
index 00000000000..7156cd073b8
--- /dev/null
+++ 
b/regression-test/suites/nereids_syntax_p0/test_group_by_constant_output.groovy
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_group_by_constant_output") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+
+    // The output alias 'b' collides with the derived-table column 'b', so 
under only_full_group_by the
+    // column 'b' is grouped and the constant column 'a' is left ungrouped. 
Because 'a' is constant for
+    // every input row it is accepted (MySQL functional-dependency behavior), 
and the query returns (1, 2).
+    qt_const_alias_collision "SELECT a as b, b as c FROM (SELECT 1 as a, 2 as 
b) t1 GROUP BY b, c"
+
+    // A constant column that is a bare output and not in GROUP BY is allowed.
+    qt_const_not_in_groupby "SELECT a, b FROM (SELECT 1 as a, 2 as b) t1 GROUP 
BY a"
+
+    // A constant group-by key is eliminated, and the constant outputs stay 
valid.
+    qt_const_group_key "SELECT a, b FROM (SELECT 1 as a, 2 as b) t1 GROUP BY 
'g'"
+
+    // The same shape over a real table leaves a non-constant column 
ungrouped, which is still rejected
+    // (matching MySQL, where only constant / functionally-dependent columns 
are allowed).
+    sql "DROP TABLE IF EXISTS test_gb_const_t"
+    sql """
+        CREATE TABLE test_gb_const_t (a INT, b INT) ENGINE=OLAP
+            DUPLICATE KEY(a)
+            DISTRIBUTED BY HASH(a) BUCKETS 1
+        PROPERTIES ('replication_num' = '1')
+    """
+    test {
+        sql "SELECT a as b, b as c FROM test_gb_const_t GROUP BY b, c"
+        exception "must appear in the GROUP BY"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to