This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ad1ad7fea91 [performance](agg) support count push agg in no null 
column (#58103)
ad1ad7fea91 is described below

commit ad1ad7fea91d7bcdb38b1be6633825e82ab29dd7
Author: HappenLee <[email protected]>
AuthorDate: Tue Nov 18 19:23:50 2025 +0800

    [performance](agg) support count push agg in no null column (#58103)
    
    ### What problem does this PR solve?
    
    before :
    ```
    select count(1), max(l_orderkey), min(l_quantity) from lineitem_bak;
    +-----------+-----------------+-----------------+
    | count(1)  | max(l_orderkey) | min(l_quantity) |
    +-----------+-----------------+-----------------+
    | 600037902 |       600000000 |            1.00 |
    +-----------+-----------------+-----------------+
    1 row in set (1.89 sec)
    
    ```
    
    after:
    ```
    select count(1), max(l_orderkey), min(l_quantity) from lineitem_bak;
    +-----------+-----------------+-----------------+
    | count(1)  | max(l_orderkey) | min(l_quantity) |
    +-----------+-----------------+-----------------+
    | 600037902 |       600000000 |            1.00 |
    +-----------+-----------------+-----------------+
    1 row in set (0.83 sec)
    ```
---
 .../rules/implementation/AggregateStrategies.java  | 53 ++++++++++++++++------
 .../explain/test_pushdown_explain.groovy           | 50 ++++++++++++++++++++
 2 files changed, 90 insertions(+), 13 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index 8cf96ef3072..1634b9208d7 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -61,6 +61,7 @@ import org.apache.doris.qe.ConnectContext;
 
 import com.google.common.collect.ImmutableList;
 
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
@@ -558,28 +559,54 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
         }
 
         Set<AggregateFunction> aggregateFunctions = 
aggregate.getAggregateFunctions();
-        Set<Class<? extends AggregateFunction>> functionClasses = 
aggregateFunctions
-                .stream()
-                .map(AggregateFunction::getClass)
-                .collect(Collectors.toSet());
-
+        // Use for loop to replace Stream API
+        Set<Class<? extends AggregateFunction>> functionClasses = new 
HashSet<>();
         Map<Class<? extends AggregateFunction>, PushDownAggOp> supportedAgg = 
PushDownAggOp.supportedFunctions();
-        if (!supportedAgg.keySet().containsAll(functionClasses)) {
-            return canNotPush;
+
+        boolean containsCount = false;
+        Set<SlotReference> checkNullSlots = new HashSet<>();
+
+        // Single loop through aggregateFunctions to handle multiple logic
+        for (AggregateFunction function : aggregateFunctions) {
+            Class<? extends AggregateFunction> functionClass = 
function.getClass();
+            functionClasses.add(functionClass);
+            // Check if any function has arity > 1
+            if (function.arity() > 1) {
+                return canNotPush;
+            }
+
+            // Check if contains Count function
+            if (functionClass.equals(Count.class)) {
+                containsCount = true;
+                if (!function.getArguments().isEmpty()) {
+                    Expression arg0 = function.getArguments().get(0);
+                    if (arg0 instanceof SlotReference) {
+                        checkNullSlots.add((SlotReference) arg0);
+                    } else if (arg0 instanceof Cast) {
+                        Expression child0 = arg0.child(0);
+                        if (child0 instanceof SlotReference) {
+                            checkNullSlots.add((SlotReference) child0);
+                        }
+                    }
+                }
+            }
+
+            // Check if function is supported by supportedAgg
+            if (!supportedAgg.containsKey(functionClass)) {
+                return canNotPush;
+            }
         }
+
         if (logicalScan instanceof LogicalOlapScan) {
             LogicalOlapScan logicalOlapScan = (LogicalOlapScan) logicalScan;
             KeysType keysType = logicalOlapScan.getTable().getKeysType();
-            if (functionClasses.contains(Count.class) && keysType != 
KeysType.DUP_KEYS) {
+            if (containsCount && keysType != KeysType.DUP_KEYS) {
                 return canNotPush;
             }
-            if (functionClasses.contains(Count.class) && 
logicalOlapScan.isDirectMvScan()) {
+            if (containsCount && logicalOlapScan.isDirectMvScan()) {
                 return canNotPush;
             }
         }
-        if (aggregateFunctions.stream().anyMatch(fun -> fun.arity() > 1)) {
-            return canNotPush;
-        }
 
         // TODO: refactor this to process slot reference or expression together
         boolean onlyContainsSlotOrNumericCastSlot = aggregateFunctions.stream()
@@ -665,7 +692,7 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                 // NULL value behavior in `count` function is zero, so
                 // we should not use row_count to speed up query. the col
                 // must be not null
-                if (column.isAllowNull()) {
+                if (column.isAllowNull() && checkNullSlots.contains(slot)) {
                     return canNotPush;
                 }
             }
diff --git 
a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy 
b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
index b53dd40a964..25053f24023 100644
--- a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
+++ b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
@@ -74,6 +74,56 @@ suite("test_pushdown_explain") {
         contains "pushAggOp=NONE"
     }
 
+    // Test cases for NULL column handling in count pushdown optimization
+    sql "DROP TABLE IF EXISTS test_null_columns"
+    sql """ CREATE TABLE `test_null_columns` (
+        `id` INT NOT NULL COMMENT 'ID',
+        `nullable_col` VARCHAR(11) NULL COMMENT 'Nullable column',
+        `non_nullable_col` VARCHAR(11) NOT NULL COMMENT 'Non-nullable column'
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`id`)
+    DISTRIBUTED BY HASH(`id`) BUCKETS 48
+    PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "min_load_replica_num" = "-1",
+        "is_being_synced" = "false",
+        "colocate_with" = "groupa1",
+        "storage_format" = "V2",
+        "light_schema_change" = "true",
+        "disable_auto_compaction" = "false",
+        "enable_single_replica_compaction" = "false"
+    ); """
+    sql """ insert into test_null_columns values(1, NULL, "value1"); """
+    sql """ insert into test_null_columns values(2, NULL, "value2"); """
+    sql """ insert into test_null_columns values(3, "not_null", "value3"); """
+    
+    // Test count(1) and count(*) with NULL columns - should push Count 
optimization
+    explain {
+        sql("select count(1) from test_null_columns;")
+        contains "pushAggOp=COUNT"
+    }
+    explain {
+        sql("select count(*) from test_null_columns;")
+        contains "pushAggOp=COUNT"
+    }
+
+    explain {
+        sql("select count(non_nullable_col), min(non_nullable_col), 
max(non_nullable_col) from test_null_columns;")
+        contains "pushAggOp=MIX"
+    }
+    explain {
+        sql("select count(), min(non_nullable_col), max(non_nullable_col) from 
test_null_columns;")
+        contains "pushAggOp=MIX"
+    }
+    explain {
+        sql("select count(*), min(non_nullable_col), max(non_nullable_col) 
from test_null_columns;")
+        contains "pushAggOp=MIX"
+    }
+    explain {
+        sql("select count(nullable_col), min(nullable_col), max(nullable_col) 
from test_null_columns;")
+        contains "pushAggOp=NONE"
+    }
+
     sql "DROP TABLE IF EXISTS table_unique0"
     sql """ 
         CREATE TABLE `table_unique0` (


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to