This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch groupjoin
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/groupjoin by this push:
     new f8d9ad6bdcb do not merge this commit into master. it contains several 
un-safe commits for pk-fk related fix
f8d9ad6bdcb is described below

commit f8d9ad6bdcb28d84649d5b3a2ace9cc3f8ed4172
Author: englefly <[email protected]>
AuthorDate: Thu Jun 25 15:44:53 2026 +0800

    do not merge this commit into master.
    it contains several un-safe commits for pk-fk related fix
    
    fix: guard Count(*) child access with arity() check in 
PushDownAggThroughJoinOnPkFk
    
    COUNT(*) has no arguments; calling child(0) on it throws
    ArrayIndexOutOfBoundsException. Added arity() > 0 guard before
    accessing Count's child slot for FK rewrite.
    
    Regression test testCountStar() added to verify COUNT(*) with
    PK/FK join does not crash.
    
    add constraint for tpch tools
    
    feat: eliminate FD-redundant group-by keys via ANY_VALUE wrapping
    
    When a group-by key is functionally dependent on another key
    (e.g. s_suppkey -> s_name via PK) but required in output,
    remove it from GROUP BY and wrap with ANY_VALUE().
    
    Previously EliminateGroupByKey kept such keys in GROUP BY to
    preserve SQL semantics. Now they are replaced with ANY_VALUE
    wrappers in the output, allowing the group-by set to be
    minimized while keeping the column in SELECT.
    
    Public findCanBeRemovedExpressions() API preserved for backward
    compatibility. Internal logic split into FindResult with separate
    removeExpression and wrapWithAnyValue sets.
    
    Test: testEliminateByPkWithOutputNeeded verifies ANY_VALUE wrapping
    when SELECT contains an FD-redundant group-by key.
    
    fix: add PK constraint unique slots in computeUnique
    
    Added addUniqueFromPk() in LogicalOlapScan.computeUnique to read
    PrimaryKeyConstraint from ConstraintManager and register PK columns
    as unique slots. This cascades to FDs via the computeDataTrait
    mechanism, enabling EliminateGroupByKey to find FD-redundant
    group-by keys.
    
    remove @DependsRules({EliminateGroupBy.class, ColumnPruning.class})
---
 .../nereids/rules/rewrite/EliminateGroupByKey.java | 66 ++++++++++++++++++----
 .../rewrite/PushDownAggThroughJoinOnPkFk.java      |  1 +
 .../trees/plans/logical/LogicalOlapScan.java       | 32 +++++++++++
 .../rules/rewrite/EliminateGroupByKeyTest.java     | 18 ++++++
 .../rewrite/PushDownAggThroughJoinOnPkFkTest.java  | 14 +++++
 .../tpch-tools/constraints/drop-fk-constraints.sql | 11 ++++
 .../constraints/drop-pk-uk-constraints.sql         | 11 ++++
 tools/tpch-tools/constraints/tpch-fd.sql           | 25 ++++++++
 8 files changed, 166 insertions(+), 12 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java
index 4e1b3117ab5..c849e3ce164 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKey.java
@@ -17,14 +17,15 @@
 
 package org.apache.doris.nereids.rules.rewrite;
 
-import org.apache.doris.nereids.annotation.DependsRules;
 import org.apache.doris.nereids.properties.DataTrait;
 import org.apache.doris.nereids.properties.FuncDeps;
 import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.functions.agg.AnyValue;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
 
@@ -45,7 +46,6 @@ import java.util.Set;
  *  for a -> b, we can get:
  *          group by a, b, c  => group by a, c
  */
-@DependsRules({EliminateGroupBy.class, ColumnPruning.class})
 public class EliminateGroupByKey implements RewriteRuleFactory {
 
     @Override
@@ -79,28 +79,59 @@ public class EliminateGroupByKey implements 
RewriteRuleFactory {
     }
 
     LogicalAggregate<Plan> eliminateGroupByKey(LogicalAggregate<? extends 
Plan> agg, Set<Slot> requireOutput) {
-        Set<Expression> removeExpression = findCanBeRemovedExpressions(agg, 
requireOutput,
+        FindResult result = findCanBeRemovedExpressionsInternal(agg, 
requireOutput,
                 agg.child().getLogicalProperties().getTrait());
+        Set<Expression> removeExpression = result.removeExpression;
+        Set<Expression> wrapWithAnyValue = result.wrapWithAnyValue;
+
         List<Expression> newGroupExpression = new ArrayList<>();
         for (Expression expression : agg.getGroupByExpressions()) {
-            if (!removeExpression.contains(expression)) {
+            if (!removeExpression.contains(expression)
+                    && !wrapWithAnyValue.contains(expression)) {
                 newGroupExpression.add(expression);
             }
         }
         List<NamedExpression> newOutput = new ArrayList<>();
         for (NamedExpression expression : agg.getOutputExpressions()) {
-            if (!removeExpression.contains(expression)) {
-                newOutput.add(expression);
+            if (removeExpression.contains(expression)) {
+                continue;
+            }
+            if (wrapWithAnyValue.contains(expression)) {
+                // expression is FD-redundant but needed in output: wrap with 
any_value
+                expression = new Alias(expression.getExprId(),
+                        new AnyValue(expression.toSlot()), 
expression.getName());
             }
+            newOutput.add(expression);
         }
         return agg.withGroupByAndOutput(newGroupExpression, newOutput);
     }
 
     /**
-     * return removeExpression
+     * Return expressions that can be removed from both group-by and output.
+     * Kept for backward compatibility with external callers (e.g. 
PushDownAggThroughJoinOnPkFk).
      */
     public static Set<Expression> 
findCanBeRemovedExpressions(LogicalAggregate<? extends Plan> agg,
             Set<Slot> requireOutput, DataTrait dataTrait) {
+        FindResult result = findCanBeRemovedExpressionsInternal(agg, 
requireOutput, dataTrait);
+        Set<Expression> all = new HashSet<>();
+        all.addAll(result.removeExpression);
+        all.addAll(result.wrapWithAnyValue);
+        return all;
+    }
+
+    /** Result of findCanBeRemovedExpressionsInternal: two sets of 
expressions. */
+    private static class FindResult {
+        final Set<Expression> removeExpression;   // remove from group-by and 
output
+        final Set<Expression> wrapWithAnyValue;   // remove from group-by, 
wrap with ANY_VALUE in output
+
+        FindResult(Set<Expression> removeExpression, Set<Expression> 
wrapWithAnyValue) {
+            this.removeExpression = removeExpression;
+            this.wrapWithAnyValue = wrapWithAnyValue;
+        }
+    }
+
+    private static FindResult 
findCanBeRemovedExpressionsInternal(LogicalAggregate<? extends Plan> agg,
+            Set<Slot> requireOutput, DataTrait dataTrait) {
         Map<Expression, Set<Slot>> groupBySlots = new HashMap<>();
         Set<Slot> validSlots = new HashSet<>();
         for (Expression expression : agg.getGroupByExpressions()) {
@@ -110,17 +141,28 @@ public class EliminateGroupByKey implements 
RewriteRuleFactory {
 
         FuncDeps funcDeps = dataTrait.getAllValidFuncDeps(validSlots);
         if (funcDeps.isEmpty()) {
-            return new HashSet<>();
+            LOG.warn("EliminateGroupByKey no FDs found: validSlots={}, 
aggChild={}",
+                    validSlots, agg.child(0).getClass().getSimpleName());
+            return new FindResult(new HashSet<>(), new HashSet<>());
         }
+        LOG.warn("EliminateGroupByKey FDs found: validSlots={}, funcDeps 
size={}",
+                validSlots, funcDeps.size());
 
         Set<Set<Slot>> minGroupBySlots = funcDeps.eliminateDeps(new 
HashSet<>(groupBySlots.values()), requireOutput);
         Set<Expression> removeExpression = new HashSet<>();
+        Set<Expression> wrapWithAnyValue = new HashSet<>();
         for (Entry<Expression, Set<Slot>> entry : groupBySlots.entrySet()) {
-            if (!minGroupBySlots.contains(entry.getValue())
-                    && !requireOutput.containsAll(entry.getValue())) {
-                removeExpression.add(entry.getKey());
+            if (!minGroupBySlots.contains(entry.getValue())) {
+                // FD redundant: can remove from group-by
+                if (!requireOutput.containsAll(entry.getValue())) {
+                    // Not needed in output either: remove completely
+                    removeExpression.add(entry.getKey());
+                } else {
+                    // Still needed in output: remove from group-by, wrap with 
ANY_VALUE in output
+                    wrapWithAnyValue.add(entry.getKey());
+                }
             }
         }
-        return removeExpression;
+        return new FindResult(removeExpression, wrapWithAnyValue);
     }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java
index 28dcc005ce4..f1d6e0816d1 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFk.java
@@ -215,6 +215,7 @@ public class PushDownAggThroughJoinOnPkFk implements 
RewriteRuleFactory {
             }
             if (expression instanceof Alias
                     && expression.child(0) instanceof Count
+                    && expression.child(0).arity() > 0
                     && expression.child(0).child(0) instanceof Slot) {
                 // count(slot) can be rewritten by circle deps
                 Slot slot = (Slot) expression.child(0).child(0);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
index 66e0dee8aa0..205319dd52a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
@@ -19,10 +19,14 @@ package org.apache.doris.nereids.trees.plans.logical;
 
 import org.apache.doris.analysis.TableScanParams;
 import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.MTMV;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.Table;
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.catalog.constraint.PrimaryKeyConstraint;
+import org.apache.doris.catalog.info.TableNameInfo;
 import org.apache.doris.common.IdGenerator;
 import org.apache.doris.mtmv.MTMVCache;
 import org.apache.doris.nereids.memo.GroupExpression;
@@ -990,6 +994,34 @@ public class LogicalOlapScan extends 
LogicalCatalogRelation implements OlapScan,
             }
             builder.addUniqueSlot(uniqSlots.build());
         }
+        // Add unique slots from primary key constraints
+        addUniqueFromPk(builder);
+    }
+
+    private void addUniqueFromPk(DataTrait.Builder builder) {
+        TableIf table = getTable();
+        if (!(table instanceof Table)) {
+            return;
+        }
+        Table t = (Table) table;
+        ImmutableList<PrimaryKeyConstraint> pks = 
Env.getCurrentEnv().getConstraintManager()
+                .getPrimaryKeyConstraints(new 
TableNameInfo(t.getQualifiedDbName(), t.getName()));
+        if (pks.isEmpty()) {
+            return;
+        }
+        Set<Slot> outputSet = getOutputSet();
+        for (PrimaryKeyConstraint pk : pks) {
+            Set<String> pkNames = pk.getPrimaryKeyNames();
+            ImmutableSet.Builder<Slot> pkSlots = 
ImmutableSet.builderWithExpectedSize(pkNames.size());
+            for (Slot slot : outputSet) {
+                for (String pkName : pkNames) {
+                    if (slot.getName().equals(pkName)) {
+                        pkSlots.add(slot);
+                    }
+                }
+            }
+            builder.addUniqueSlot(pkSlots.build());
+        }
     }
 
     @Override
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKeyTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKeyTest.java
index 7362c81e5af..dbca6cd4759 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKeyTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/EliminateGroupByKeyTest.java
@@ -181,6 +181,24 @@ class EliminateGroupByKeyTest extends TestWithFeService 
implements MemoPatternMa
                                 && 
agg.getGroupByExpressions().get(0).toSql().equals("name")));
     }
 
+    @Test
+    void testEliminateByPkWithOutputNeeded() throws Exception {
+        // Regression: when a group-by key (name) is FD-redundant (id -> name)
+        // but still appears in SELECT, it should be removed from group-by
+        // and wrapped with ANY_VALUE in the output.
+        addConstraint("alter table t1 add constraint pk2 primary key (id)");
+        PlanChecker.from(connectContext)
+                .analyze("select id, name, count(*) from t1 group by id, name")
+                .rewrite()
+                .printlnTree()
+                .matches(logicalAggregate().when(agg ->
+                        agg.getGroupByExpressions().size() == 1
+                                && 
agg.getGroupByExpressions().get(0).toSql().equals("id")
+                                && 
agg.getOutputExpressions().stream().anyMatch(
+                                        e -> e.child(0) instanceof 
org.apache.doris.nereids.trees.expressions.functions.agg.AnyValue)));
+        dropConstraint("alter table t1 drop constraint pk2");
+    }
+
     @Test
     void testRepeatEliminateByEqual() {
         PlanChecker.from(connectContext)
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFkTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFkTest.java
index f2dbf786561..69340329208 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFkTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownAggThroughJoinOnPkFkTest.java
@@ -156,6 +156,20 @@ class PushDownAggThroughJoinOnPkFkTest extends 
TestWithFeService implements Memo
                 .printlnTree();
     }
 
+    @Test
+    void testCountStar() {
+        // Regression: COUNT(*) has no children, and calling child(0) on it
+        // would throw ArrayIndexOutOfBoundsException if not guarded by 
arity() > 0
+        String sql = "select count(*), pri.name from pri inner join 
foreign_not_null\n"
+                + "on pri.id1 = foreign_not_null.id2\n"
+                + "group by pri.id1, pri.name, foreign_not_null.id2";
+        PlanChecker.from(connectContext)
+                .analyze(sql)
+                .rewrite()
+                .matches(logicalJoin(logicalAggregate(), any()))
+                .printlnTree();
+    }
+
     @Test
     void testMissSlot() {
         String sql = "select count(pri.name) from pri inner join 
foreign_not_null on pri.name = foreign_not_null.name";
diff --git a/tools/tpch-tools/constraints/drop-fk-constraints.sql 
b/tools/tpch-tools/constraints/drop-fk-constraints.sql
new file mode 100644
index 00000000000..b5dd9d8da20
--- /dev/null
+++ b/tools/tpch-tools/constraints/drop-fk-constraints.sql
@@ -0,0 +1,11 @@
+-- Drop all TPC-H Foreign Key Constraints
+alter table nation drop constraint n_r_fk;
+alter table supplier drop constraint s_n_fk;
+alter table customer drop constraint c_n_fk;
+alter table partsupp drop constraint ps_p_fk;
+alter table partsupp drop constraint ps_s_fk;
+alter table orders drop constraint o_c_fk;
+alter table lineitem drop constraint l_o_fk;
+alter table lineitem drop constraint l_p_fk;
+alter table lineitem drop constraint l_s_fk;
+alter table lineitem drop constraint l_ps_fk;
diff --git a/tools/tpch-tools/constraints/drop-pk-uk-constraints.sql 
b/tools/tpch-tools/constraints/drop-pk-uk-constraints.sql
new file mode 100644
index 00000000000..163196d5e2f
--- /dev/null
+++ b/tools/tpch-tools/constraints/drop-pk-uk-constraints.sql
@@ -0,0 +1,11 @@
+-- Drop all TPC-H Primary Key and Unique Key Constraints
+alter table region drop r_pk;
+alter table nation drop n_pk;
+alter table supplier drop s_pk;
+alter table customer drop c_pk;
+alter table part drop p_pk;
+alter table partsupp drop ps_pk;
+alter table orders drop o_pk;
+alter table lineitem drop l_pk;
+alter table region drop r_uk;
+alter table nation drop n_uk;
diff --git a/tools/tpch-tools/constraints/tpch-fd.sql 
b/tools/tpch-tools/constraints/tpch-fd.sql
new file mode 100644
index 00000000000..4ac0fdc3362
--- /dev/null
+++ b/tools/tpch-tools/constraints/tpch-fd.sql
@@ -0,0 +1,25 @@
+-- TPC-H Primary Key Constraints
+alter table region add constraint r_pk primary key (r_regionkey);
+alter table nation add constraint n_pk primary key (n_nationkey);
+alter table supplier add constraint s_pk primary key (s_suppkey);
+alter table customer add constraint c_pk primary key (c_custkey);
+alter table part add constraint p_pk primary key (p_partkey);
+alter table partsupp add constraint ps_pk primary key (ps_partkey, ps_suppkey);
+alter table orders add constraint o_pk primary key (o_orderkey);
+alter table lineitem add constraint l_pk primary key (l_orderkey, 
l_linenumber);
+
+-- TPC-H Foreign Key Constraints
+alter table nation add constraint n_r_fk foreign key (n_regionkey) references 
region(r_regionkey);
+alter table supplier add constraint s_n_fk foreign key (s_nationkey) 
references nation(n_nationkey);
+alter table customer add constraint c_n_fk foreign key (c_nationkey) 
references nation(n_nationkey);
+alter table partsupp add constraint ps_p_fk foreign key (ps_partkey) 
references part(p_partkey);
+alter table partsupp add constraint ps_s_fk foreign key (ps_suppkey) 
references supplier(s_suppkey);
+alter table orders add constraint o_c_fk foreign key (o_custkey) references 
customer(c_custkey);
+alter table lineitem add constraint l_o_fk foreign key (l_orderkey) references 
orders(o_orderkey);
+alter table lineitem add constraint l_p_fk foreign key (l_partkey) references 
part(p_partkey);
+alter table lineitem add constraint l_s_fk foreign key (l_suppkey) references 
supplier(s_suppkey);
+alter table lineitem add constraint l_ps_fk foreign key (l_partkey, l_suppkey) 
references partsupp(ps_partkey, ps_suppkey);
+
+-- TPC-H Unique Key Constraints
+alter table region add constraint r_uk unique (r_name);
+alter table nation add constraint n_uk unique (n_name);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to