This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 4105d6cc574 branch-4.0: [performance](agg) support count push agg in 
no null column #58103 (#58133)
4105d6cc574 is described below

commit 4105d6cc57454b8e4b11402905e438622f9a8553
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Nov 19 14:22:30 2025 +0800

    branch-4.0: [performance](agg) support count push agg in no null column 
#58103 (#58133)
    
    Cherry-picked from #58103
    
    Co-authored-by: HappenLee <[email protected]>
---
 .../rules/implementation/AggregateStrategies.java  | 53 ++++++++++++++++------
 .../explain/test_pushdown_explain.groovy           | 50 ++++++++++++++++++++
 2 files changed, 90 insertions(+), 13 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index 8e52c8aa28c..a5304c6e11b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -61,6 +61,7 @@ import org.apache.doris.qe.ConnectContext;
 
 import com.google.common.collect.ImmutableList;
 
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -558,28 +559,54 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
         }
 
         Set<AggregateFunction> aggregateFunctions = 
aggregate.getAggregateFunctions();
-        Set<Class<? extends AggregateFunction>> functionClasses = 
aggregateFunctions
-                .stream()
-                .map(AggregateFunction::getClass)
-                .collect(Collectors.toSet());
-
+        // Use for loop to replace Stream API
+        Set<Class<? extends AggregateFunction>> functionClasses = new 
HashSet<>();
         Map<Class<? extends AggregateFunction>, PushDownAggOp> supportedAgg = 
PushDownAggOp.supportedFunctions();
-        if (!supportedAgg.keySet().containsAll(functionClasses)) {
-            return canNotPush;
+
+        boolean containsCount = false;
+        Set<SlotReference> checkNullSlots = new HashSet<>();
+
+        // Single loop through aggregateFunctions to handle multiple logic
+        for (AggregateFunction function : aggregateFunctions) {
+            Class<? extends AggregateFunction> functionClass = 
function.getClass();
+            functionClasses.add(functionClass);
+            // Check if any function has arity > 1
+            if (function.arity() > 1) {
+                return canNotPush;
+            }
+
+            // Check if contains Count function
+            if (functionClass.equals(Count.class)) {
+                containsCount = true;
+                if (!function.getArguments().isEmpty()) {
+                    Expression arg0 = function.getArguments().get(0);
+                    if (arg0 instanceof SlotReference) {
+                        checkNullSlots.add((SlotReference) arg0);
+                    } else if (arg0 instanceof Cast) {
+                        Expression child0 = arg0.child(0);
+                        if (child0 instanceof SlotReference) {
+                            checkNullSlots.add((SlotReference) child0);
+                        }
+                    }
+                }
+            }
+
+            // Check if function is supported by supportedAgg
+            if (!supportedAgg.containsKey(functionClass)) {
+                return canNotPush;
+            }
         }
+
         if (logicalScan instanceof LogicalOlapScan) {
             LogicalOlapScan logicalOlapScan = (LogicalOlapScan) logicalScan;
             KeysType keysType = logicalOlapScan.getTable().getKeysType();
-            if (functionClasses.contains(Count.class) && keysType != 
KeysType.DUP_KEYS) {
+            if (containsCount && keysType != KeysType.DUP_KEYS) {
                 return canNotPush;
             }
-            if (functionClasses.contains(Count.class) && 
logicalOlapScan.isDirectMvScan()) {
+            if (containsCount && logicalOlapScan.isDirectMvScan()) {
                 return canNotPush;
             }
         }
-        if (aggregateFunctions.stream().anyMatch(fun -> fun.arity() > 1)) {
-            return canNotPush;
-        }
 
         // TODO: refactor this to process slot reference or expression together
         boolean onlyContainsSlotOrNumericCastSlot = aggregateFunctions.stream()
@@ -665,7 +692,7 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                 // NULL value behavior in `count` function is zero, so
                 // we should not use row_count to speed up query. the col
                 // must be not null
-                if (column.isAllowNull()) {
+                if (column.isAllowNull() && checkNullSlots.contains(slot)) {
                     return canNotPush;
                 }
             }
diff --git 
a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy 
b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
index b53dd40a964..25053f24023 100644
--- a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
+++ b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
@@ -74,6 +74,56 @@ suite("test_pushdown_explain") {
         contains "pushAggOp=NONE"
     }
 
+    // Test cases for NULL column handling in count pushdown optimization
+    sql "DROP TABLE IF EXISTS test_null_columns"
+    sql """ CREATE TABLE `test_null_columns` (
+        `id` INT NOT NULL COMMENT 'ID',
+        `nullable_col` VARCHAR(11) NULL COMMENT 'Nullable column',
+        `non_nullable_col` VARCHAR(11) NOT NULL COMMENT 'Non-nullable column'
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`id`)
+    DISTRIBUTED BY HASH(`id`) BUCKETS 48
+    PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "min_load_replica_num" = "-1",
+        "is_being_synced" = "false",
+        "colocate_with" = "groupa1",
+        "storage_format" = "V2",
+        "light_schema_change" = "true",
+        "disable_auto_compaction" = "false",
+        "enable_single_replica_compaction" = "false"
+    ); """
+    sql """ insert into test_null_columns values(1, NULL, "value1"); """
+    sql """ insert into test_null_columns values(2, NULL, "value2"); """
+    sql """ insert into test_null_columns values(3, "not_null", "value3"); """
+    
+    // Test count(1) and count(*) with NULL columns - should push Count 
optimization
+    explain {
+        sql("select count(1) from test_null_columns;")
+        contains "pushAggOp=COUNT"
+    }
+    explain {
+        sql("select count(*) from test_null_columns;")
+        contains "pushAggOp=COUNT"
+    }
+
+    explain {
+        sql("select count(non_nullable_col), min(non_nullable_col), 
max(non_nullable_col) from test_null_columns;")
+        contains "pushAggOp=MIX"
+    }
+    explain {
+        sql("select count(), min(non_nullable_col), max(non_nullable_col) from 
test_null_columns;")
+        contains "pushAggOp=MIX"
+    }
+    explain {
+        sql("select count(*), min(non_nullable_col), max(non_nullable_col) 
from test_null_columns;")
+        contains "pushAggOp=MIX"
+    }
+    explain {
+        sql("select count(nullable_col), min(nullable_col), max(nullable_col) 
from test_null_columns;")
+        contains "pushAggOp=NONE"
+    }
+
     sql "DROP TABLE IF EXISTS table_unique0"
     sql """ 
         CREATE TABLE `table_unique0` (


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to