This is an automated email from the ASF dual-hosted git repository.
zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 0071f17c87 [CALCITE-7083] RelMdDistinctRowCount aggregates
implementation problems
0071f17c87 is described below
commit 0071f17c870858ff66129368cf7b70fb1d072c88
Author: Silun Dong <[email protected]>
AuthorDate: Sat Jul 5 00:39:29 2025 +0800
[CALCITE-7083] RelMdDistinctRowCount aggregates implementation problems
---
.../rel/metadata/RelMdDistinctRowCount.java | 8 +++++-
.../org/apache/calcite/test/RelMetadataTest.java | 31 ++++++++++++++++++++++
2 files changed, 38 insertions(+), 1 deletion(-)
diff --git
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
index b5609acf40..7058dcbb4d 100644
---
a/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
+++
b/core/src/main/java/org/apache/calcite/rel/metadata/RelMdDistinctRowCount.java
@@ -172,12 +172,18 @@ protected RelMdDistinctRowCount() {}
return 1D;
}
}
+ // the result of the aggregation function is difficult to infer, so if the
groupKey contains
+ // the aggregated column, return the full row count of Aggregate. This is
the most conservative
+ // estimate, and the actual ndv will not be greater than it.
+ if (groupKey.anyMatch(key -> key >= rel.getGroupCount())) {
+ return mq.getRowCount(rel);
+ }
// determine which predicates can be applied on the child of the
// aggregate
final List<RexNode> notPushable = new ArrayList<>();
final List<RexNode> pushable = new ArrayList<>();
RelOptUtil.splitFilters(
- rel.getGroupSet(),
+ ImmutableBitSet.range(rel.getGroupCount()),
predicate,
pushable,
notPushable);
diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
index 6fe7b02553..5a5cfcf4f5 100644
--- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
@@ -4463,6 +4463,37 @@ static <K, V> List<String> toSortedStringList(Map<K, V>
map) {
assertThat(ndv, is(100.0));
}
+ /** Test case of
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-7083">[CALCITE-7083]
+ * RelMdDistinctRowCount aggregates implementation problems</a>. */
+ @Test void testAggregateDistinctRowcount() {
+ // test case of groupKey contains aggregated column
+ sql("select name, sum(sal) from (values ('b', 10), ('b', 20), ('b', 30))
as t(name, sal) "
+ + "group by name")
+ .assertThatDistinctRowCount(bitSetOf(1), is(1d));
+ sql("select name, sum(sal) from (values ('a', 10), ('b', 10), ('c', 10))
as t(name, sal) "
+ + "group by name")
+ .assertThatDistinctRowCount(bitSetOf(1), is(3d));
+ // test case of predicate is not null
+ // LogicalFilter(condition=[=($0, 2)])
+ // LogicalAggregate(group=[{1}], sumsal=[SUM($0)])
+ // LogicalValues(tuples=[[{ 10, 1 }, { 20, 2 }, { 30, 3 }]])
+ // since the selectivity of the predicate is guessed, this NDV does not
represent the actual
+ // value. This case is only to verify that the predicate will be correctly
pushed down
+ // to the Value
+ fixture()
+ .withRelFn(b -> {
+ b.values(new String[]{"sal", "id"}, 10, 1, 20, 2, 30, 3);
+ RelBuilder.GroupKey groupKey = b.groupKey(1);
+ b.aggregate(groupKey, b.sum(false, "sumsal", b.field(0)));
+ RexNode predicate =
+ b.equals(b.field(0), b.literal(2));
+ RelNode filter = b.filter(predicate).build();
+ return filter;
+ })
+ .assertThatDistinctRowCount(bitSetOf(0), is(1d));
+ }
+
private void checkInputForCollationAndLimit(RelOptCluster cluster,
RelOptTable empTable,
RelOptTable deptTable) {
final RexBuilder rexBuilder = cluster.getRexBuilder();