This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ad1ad7fea91 [performance](agg) support count push agg in no null
column (#58103)
ad1ad7fea91 is described below
commit ad1ad7fea91d7bcdb38b1be6633825e82ab29dd7
Author: HappenLee <[email protected]>
AuthorDate: Tue Nov 18 19:23:50 2025 +0800
[performance](agg) support count push agg in no null column (#58103)
### What problem does this PR solve?
before :
```
select count(1), max(l_orderkey), min(l_quantity) from lineitem_bak;
+-----------+-----------------+-----------------+
| count(1) | max(l_orderkey) | min(l_quantity) |
+-----------+-----------------+-----------------+
| 600037902 | 600000000 | 1.00 |
+-----------+-----------------+-----------------+
1 row in set (1.89 sec)
```
after:
```
select count(1), max(l_orderkey), min(l_quantity) from lineitem_bak;
+-----------+-----------------+-----------------+
| count(1) | max(l_orderkey) | min(l_quantity) |
+-----------+-----------------+-----------------+
| 600037902 | 600000000 | 1.00 |
+-----------+-----------------+-----------------+
1 row in set (0.83 sec)
```
---
.../rules/implementation/AggregateStrategies.java | 53 ++++++++++++++++------
.../explain/test_pushdown_explain.groovy | 50 ++++++++++++++++++++
2 files changed, 90 insertions(+), 13 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index 8cf96ef3072..1634b9208d7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -61,6 +61,7 @@ import org.apache.doris.qe.ConnectContext;
import com.google.common.collect.ImmutableList;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -558,28 +559,54 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
}
Set<AggregateFunction> aggregateFunctions =
aggregate.getAggregateFunctions();
- Set<Class<? extends AggregateFunction>> functionClasses =
aggregateFunctions
- .stream()
- .map(AggregateFunction::getClass)
- .collect(Collectors.toSet());
-
+ // Use for loop to replace Stream API
+ Set<Class<? extends AggregateFunction>> functionClasses = new
HashSet<>();
Map<Class<? extends AggregateFunction>, PushDownAggOp> supportedAgg =
PushDownAggOp.supportedFunctions();
- if (!supportedAgg.keySet().containsAll(functionClasses)) {
- return canNotPush;
+
+ boolean containsCount = false;
+ Set<SlotReference> checkNullSlots = new HashSet<>();
+
+ // Single loop through aggregateFunctions to handle multiple logic
+ for (AggregateFunction function : aggregateFunctions) {
+ Class<? extends AggregateFunction> functionClass =
function.getClass();
+ functionClasses.add(functionClass);
+ // Check if any function has arity > 1
+ if (function.arity() > 1) {
+ return canNotPush;
+ }
+
+ // Check if contains Count function
+ if (functionClass.equals(Count.class)) {
+ containsCount = true;
+ if (!function.getArguments().isEmpty()) {
+ Expression arg0 = function.getArguments().get(0);
+ if (arg0 instanceof SlotReference) {
+ checkNullSlots.add((SlotReference) arg0);
+ } else if (arg0 instanceof Cast) {
+ Expression child0 = arg0.child(0);
+ if (child0 instanceof SlotReference) {
+ checkNullSlots.add((SlotReference) child0);
+ }
+ }
+ }
+ }
+
+ // Check if function is supported by supportedAgg
+ if (!supportedAgg.containsKey(functionClass)) {
+ return canNotPush;
+ }
}
+
if (logicalScan instanceof LogicalOlapScan) {
LogicalOlapScan logicalOlapScan = (LogicalOlapScan) logicalScan;
KeysType keysType = logicalOlapScan.getTable().getKeysType();
- if (functionClasses.contains(Count.class) && keysType !=
KeysType.DUP_KEYS) {
+ if (containsCount && keysType != KeysType.DUP_KEYS) {
return canNotPush;
}
- if (functionClasses.contains(Count.class) &&
logicalOlapScan.isDirectMvScan()) {
+ if (containsCount && logicalOlapScan.isDirectMvScan()) {
return canNotPush;
}
}
- if (aggregateFunctions.stream().anyMatch(fun -> fun.arity() > 1)) {
- return canNotPush;
- }
// TODO: refactor this to process slot reference or expression together
boolean onlyContainsSlotOrNumericCastSlot = aggregateFunctions.stream()
@@ -665,7 +692,7 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
// NULL value behavior in `count` function is zero, so
// we should not use row_count to speed up query. the col
// must be not null
- if (column.isAllowNull()) {
+ if (column.isAllowNull() && checkNullSlots.contains(slot)) {
return canNotPush;
}
}
diff --git
a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
index b53dd40a964..25053f24023 100644
--- a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
+++ b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
@@ -74,6 +74,56 @@ suite("test_pushdown_explain") {
contains "pushAggOp=NONE"
}
+ // Test cases for NULL column handling in count pushdown optimization
+ sql "DROP TABLE IF EXISTS test_null_columns"
+ sql """ CREATE TABLE `test_null_columns` (
+ `id` INT NOT NULL COMMENT 'ID',
+ `nullable_col` VARCHAR(11) NULL COMMENT 'Nullable column',
+ `non_nullable_col` VARCHAR(11) NOT NULL COMMENT 'Non-nullable column'
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 48
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "min_load_replica_num" = "-1",
+ "is_being_synced" = "false",
+ "colocate_with" = "groupa1",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ ); """
+ sql """ insert into test_null_columns values(1, NULL, "value1"); """
+ sql """ insert into test_null_columns values(2, NULL, "value2"); """
+ sql """ insert into test_null_columns values(3, "not_null", "value3"); """
+
+ // Test count(1) and count(*) with NULL columns - should push Count
optimization
+ explain {
+ sql("select count(1) from test_null_columns;")
+ contains "pushAggOp=COUNT"
+ }
+ explain {
+ sql("select count(*) from test_null_columns;")
+ contains "pushAggOp=COUNT"
+ }
+
+ explain {
+ sql("select count(non_nullable_col), min(non_nullable_col),
max(non_nullable_col) from test_null_columns;")
+ contains "pushAggOp=MIX"
+ }
+ explain {
+ sql("select count(), min(non_nullable_col), max(non_nullable_col) from
test_null_columns;")
+ contains "pushAggOp=MIX"
+ }
+ explain {
+ sql("select count(*), min(non_nullable_col), max(non_nullable_col)
from test_null_columns;")
+ contains "pushAggOp=MIX"
+ }
+ explain {
+ sql("select count(nullable_col), min(nullable_col), max(nullable_col)
from test_null_columns;")
+ contains "pushAggOp=NONE"
+ }
+
sql "DROP TABLE IF EXISTS table_unique0"
sql """
CREATE TABLE `table_unique0` (
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]