(doris) branch master updated: [feat](storage) add read_null_map support for COUNT_NULL push down aggregate (#60996)

panxiaolei Sun, 15 Mar 2026 19:49:54 -0700

This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 03a9b939e88 [feat](storage) add read_null_map support for COUNT_NULL 
push down aggregate (#60996)
03a9b939e88 is described below

commit 03a9b939e88a88df5cd37bff672430e8833c9be5
Author: HappenLee <[email protected]>
AuthorDate: Mon Mar 16 10:49:38 2026 +0800

    [feat](storage) add read_null_map support for COUNT_NULL push down 
aggregate (#60996)
    
    Add a new built-in scalar function to_hll(bigint) that constructs an HLL
    value from a BIGINT directly, bypassing any hash — the bigint is fed
    as-is into HyperLogLog::update(uint64_t).
    
    Changes:
    - BE (vec/functions/function_hll.cpp): implement struct ToHll with
    Status-returning execute/vector/vector_nullable methods; register as
    FunctionAlwaysNotNullable<ToHll, true> so InvalidArgument status is
    propagated on negative input.
    - BE test (be/test/vec/function/function_hll_test.cpp): add unit test
    function_hll_test.function_to_hll_test covering values 0, 1, 2, 100.
    - FE Nereids (scalar/ToHll.java): new ScalarFunction with signature
    BIGINT -> HLL.
    - FE catalog (BuiltinScalarFunctions.java): register to_hll.
    - FE visitor (ScalarFunctionVisitor.java): add visitToHll visitor hook.
---
 be/src/storage/iterator/vgeneric_iterators.cpp     |  51 ++++++++++
 be/src/storage/iterator/vgeneric_iterators.h       |   1 +
 be/src/storage/segment/column_reader.cpp           | 110 +++++++++++++++++++++
 be/src/storage/segment/column_reader.h             |  17 +++-
 .../segment/variant/variant_column_reader.h        |   4 +
 .../glue/translator/PhysicalPlanTranslator.java    |   3 +
 .../rules/implementation/AggregateStrategies.java  |  76 ++++++++++----
 .../physical/PhysicalStorageLayerAggregate.java    |   2 +-
 gensrc/thrift/PlanNodes.thrift                     |   3 +-
 .../query_p0/aggregate/count_null_push_down.out    |  10 ++
 .../explain/test_pushdown_explain.groovy           |   2 +-
 .../query_p0/aggregate/count_null_push_down.groovy |  44 +++++++++
 12 files changed, 297 insertions(+), 26 deletions(-)

diff --git a/be/src/storage/iterator/vgeneric_iterators.cpp 
b/be/src/storage/iterator/vgeneric_iterators.cpp
index 267df0bb715..f80b57d1afb 100644
--- a/be/src/storage/iterator/vgeneric_iterators.cpp
+++ b/be/src/storage/iterator/vgeneric_iterators.cpp
@@ -25,6 +25,7 @@
 #include "core/block/block.h"
 #include "core/block/column_with_type_and_name.h"
 #include "core/column/column.h"
+#include "core/column/column_nullable.h"
 #include "core/data_type/data_type.h"
 #include "storage/cache/schema_cache.h"
 #include "storage/field.h"
@@ -45,6 +46,19 @@ using namespace ErrorCode;
 Status VStatisticsIterator::init(const StorageReadOptions& opts) {
     if (!_init) {
         _push_down_agg_type_opt = opts.push_down_agg_type_opt;
+        _tablet_schema = opts.tablet_schema;
+
+        // COUNT_NULL needs to actually read nullmap pages, so the column 
iterators must be
+        // fully initialized with a valid ColumnIteratorOptions (file_reader, 
stats, io_ctx).
+        // Other agg types (COUNT, MINMAX, MIX) only use zone-map metadata and 
never open
+        // pages, so they do not need init.
+        const bool need_iter_init = (_push_down_agg_type_opt == 
TPushAggOp::COUNT_NULL);
+        ColumnIteratorOptions iter_opts {
+                .use_page_cache = opts.use_page_cache,
+                .file_reader = _segment->file_reader().get(),
+                .stats = opts.stats,
+                .io_ctx = opts.io_ctx,
+        };
 
         for (size_t i = 0; i < _schema.num_column_ids(); i++) {
             auto cid = _schema.column_id(i);
@@ -52,6 +66,16 @@ Status VStatisticsIterator::init(const StorageReadOptions& 
opts) {
             if (_column_iterators_map.count(unique_id) < 1) {
                 RETURN_IF_ERROR(_segment->new_column_iterator(
                         opts.tablet_schema->column(cid), 
&_column_iterators_map[unique_id], &opts));
+                if (need_iter_init) {
+                    
RETURN_IF_ERROR(_column_iterators_map[unique_id]->init(iter_opts));
+                    // Seek to ordinal 0 once during init so that the page 
iterator
+                    // is properly positioned for sequential read_null_map() 
calls
+                    // in next_batch(). We must NOT seek again in next_batch() 
—
+                    // doing so would reset the iterator to ordinal 0 on every 
batch
+                    // and cause rows to be re-read/double-counted for segments
+                    // larger than MAX_ROW_SIZE_IN_COUNT (65535) rows.
+                    
RETURN_IF_ERROR(_column_iterators_map[unique_id]->seek_to_ordinal(0));
+                }
             }
             
_column_iterators.push_back(_column_iterators_map[unique_id].get());
         }
@@ -76,6 +100,33 @@ Status VStatisticsIterator::next_batch(Block* block) {
             for (auto& column : columns) {
                 column->insert_many_defaults(size);
             }
+        } else if (_push_down_agg_type_opt == TPushAggOp::COUNT_NULL) {
+            for (int i = 0; i < (int)columns.size(); ++i) {
+                auto& column = columns[i];
+                auto cid = _schema.column_id(i);
+                auto& tablet_column = _tablet_schema->column(cid);
+
+                if (tablet_column.is_nullable()) {
+                    auto& nullable_col = assert_cast<ColumnNullable&>(*column);
+                    auto& nested_col = nullable_col.get_nested_column();
+
+                    // Read the real nullmap for this column from the current 
position.
+                    // Do NOT seek back to ordinal 0 here: the column iterator 
already
+                    // starts at ordinal 0 after init(), and each call to 
read_null_map
+                    // advances it sequentially. Seeking to 0 on every 
next_batch() call
+                    // would cause large segments (> MAX_ROW_SIZE_IN_COUNT 
rows) to have
+                    // their first portion re-read and counted multiple times, 
producing
+                    // a result higher than the true non-null count.
+                    size_t read_rows = size;
+                    auto& null_map_data = nullable_col.get_null_map_data();
+                    
RETURN_IF_ERROR(_column_iterators[i]->read_null_map(&read_rows, null_map_data));
+
+                    // nested column needs one default value per row
+                    nested_col.insert_many_defaults(size);
+                } else {
+                    column->insert_many_defaults(size);
+                }
+            }
         } else {
             for (int i = 0; i < columns.size(); ++i) {
                 
RETURN_IF_ERROR(_column_iterators[i]->next_batch_of_zone_map(&size, 
columns[i]));
diff --git a/be/src/storage/iterator/vgeneric_iterators.h 
b/be/src/storage/iterator/vgeneric_iterators.h
index 449d5cb1606..326cb750f1a 100644
--- a/be/src/storage/iterator/vgeneric_iterators.h
+++ b/be/src/storage/iterator/vgeneric_iterators.h
@@ -63,6 +63,7 @@ public:
 private:
     std::shared_ptr<Segment> _segment;
     const Schema& _schema;
+    std::shared_ptr<TabletSchema> _tablet_schema;
     size_t _target_rows = 0;
     size_t _output_rows = 0;
     bool _init = false;
diff --git a/be/src/storage/segment/column_reader.cpp 
b/be/src/storage/segment/column_reader.cpp
index d037f4fa84e..5badb4a7008 100644
--- a/be/src/storage/segment/column_reader.cpp
+++ b/be/src/storage/segment/column_reader.cpp
@@ -1252,6 +1252,27 @@ Status MapFileColumnIterator::set_access_paths(const 
TColumnAccessPaths& all_acc
     return Status::OK();
 }
 
+Status MapFileColumnIterator::read_null_map(size_t* n, NullMap& null_map) {
+    if (!_map_reader->is_nullable()) {
+        return Status::InternalError("read_null_map is not supported for 
non-nullable column");
+    }
+    if (!_null_iterator) {
+        // Schema-change scenario: column became nullable but old segment has 
no null data.
+        null_map.resize(*n);
+        memset(null_map.data(), 0, *n);
+        return Status::OK();
+    }
+    auto null_col = ColumnUInt8::create();
+    auto null_col_ptr = null_col->get_ptr();
+    size_t read_rows = *n;
+    bool has_null = false;
+    RETURN_IF_ERROR(_null_iterator->next_batch(&read_rows, null_col_ptr, 
&has_null));
+    *n = read_rows;
+    null_map.resize(read_rows);
+    memcpy(null_map.data(), null_col->get_data().data(), read_rows);
+    return Status::OK();
+}
+
 
////////////////////////////////////////////////////////////////////////////////
 
 StructFileColumnIterator::StructFileColumnIterator(
@@ -1469,6 +1490,27 @@ Status StructFileColumnIterator::set_access_paths(
     return Status::OK();
 }
 
+Status StructFileColumnIterator::read_null_map(size_t* n, NullMap& null_map) {
+    if (!_struct_reader->is_nullable()) {
+        return Status::InternalError("read_null_map is not supported for 
non-nullable column");
+    }
+    if (!_null_iterator) {
+        // Schema-change scenario: column became nullable but old segment has 
no null data.
+        null_map.resize(*n);
+        memset(null_map.data(), 0, *n);
+        return Status::OK();
+    }
+    auto null_col = ColumnUInt8::create();
+    auto null_col_ptr = null_col->get_ptr();
+    size_t read_rows = *n;
+    bool has_null = false;
+    RETURN_IF_ERROR(_null_iterator->next_batch(&read_rows, null_col_ptr, 
&has_null));
+    *n = read_rows;
+    null_map.resize(read_rows);
+    memcpy(null_map.data(), null_col->get_data().data(), read_rows);
+    return Status::OK();
+}
+
 
////////////////////////////////////////////////////////////////////////////////
 Status OffsetFileColumnIterator::init(const ColumnIteratorOptions& opts) {
     RETURN_IF_ERROR(_offset_iterator->init(opts));
@@ -1727,6 +1769,27 @@ Status ArrayFileColumnIterator::set_access_paths(const 
TColumnAccessPaths& all_a
     return Status::OK();
 }
 
+Status ArrayFileColumnIterator::read_null_map(size_t* n, NullMap& null_map) {
+    if (!_array_reader->is_nullable()) {
+        return Status::InternalError("read_null_map is not supported for 
non-nullable column");
+    }
+    if (!_null_iterator) {
+        // Schema-change scenario: column became nullable but old segment has 
no null data.
+        null_map.resize(*n);
+        memset(null_map.data(), 0, *n);
+        return Status::OK();
+    }
+    auto null_col = ColumnUInt8::create();
+    auto null_col_ptr = null_col->get_ptr();
+    size_t read_rows = *n;
+    bool has_null = false;
+    RETURN_IF_ERROR(_null_iterator->next_batch(&read_rows, null_col_ptr, 
&has_null));
+    *n = read_rows;
+    null_map.resize(read_rows);
+    memcpy(null_map.data(), null_col->get_data().data(), read_rows);
+    return Status::OK();
+}
+
 
////////////////////////////////////////////////////////////////////////////////
 
 FileColumnIterator::FileColumnIterator(std::shared_ptr<ColumnReader> reader) : 
_reader(reader) {}
@@ -1901,6 +1964,53 @@ Status FileColumnIterator::next_batch(size_t* n, 
MutableColumnPtr& dst, bool* ha
     return Status::OK();
 }
 
+Status FileColumnIterator::read_null_map(size_t* n, NullMap& null_map) {
+    if (!_reader->is_nullable()) {
+        return Status::InternalError("read_null_map is not supported for 
non-nullable column");
+    }
+
+    null_map.resize(*n);
+    size_t remaining = *n;
+    size_t offset = 0;
+
+    while (remaining > 0) {
+        if (!_page.has_remaining()) {
+            bool eos = false;
+            RETURN_IF_ERROR(_load_next_page(&eos));
+            if (eos) {
+                break;
+            }
+        }
+
+        if (!_page.has_null) {
+            size_t nrows_in_page = std::min(remaining, _page.remaining());
+            memset(null_map.data() + offset, 0, nrows_in_page);
+            offset += nrows_in_page;
+            _current_ordinal += nrows_in_page;
+            _page.offset_in_page += nrows_in_page;
+            remaining -= nrows_in_page;
+            continue;
+        }
+
+        size_t nrows_in_page = std::min(remaining, _page.remaining());
+        size_t this_run = 0;
+        while (this_run < nrows_in_page) {
+            bool is_null = false;
+            size_t run_len = _page.null_decoder.GetNextRun(&is_null, 
nrows_in_page - this_run);
+            memset(null_map.data() + offset + this_run, is_null ? 1 : 0, 
run_len);
+            this_run += run_len;
+        }
+
+        offset += nrows_in_page;
+        _page.offset_in_page += nrows_in_page;
+        _current_ordinal += nrows_in_page;
+        remaining -= nrows_in_page;
+    }
+
+    *n = offset;
+    return Status::OK();
+}
+
 Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t 
count,
                                           MutableColumnPtr& dst) {
     if (_reading_flag == ReadingFlag::SKIP_READING) {
diff --git a/be/src/storage/segment/column_reader.h 
b/be/src/storage/segment/column_reader.h
index 57a4bc4d74a..4571c54c106 100644
--- a/be/src/storage/segment/column_reader.h
+++ b/be/src/storage/segment/column_reader.h
@@ -30,8 +30,9 @@
 
 #include "common/config.h"
 #include "common/logging.h"
-#include "common/status.h"            // for Status
-#include "core/column/column_array.h" // ColumnArray
+#include "common/status.h"               // for Status
+#include "core/column/column_array.h"    // ColumnArray
+#include "core/column/column_nullable.h" // NullMap
 #include "core/data_type/data_type.h"
 #include "io/cache/cached_remote_file_reader.h"
 #include "io/fs/file_reader_writer_fwd.h"
@@ -360,6 +361,10 @@ public:
 
     virtual bool is_all_dict_encoding() const { return false; }
 
+    virtual Status read_null_map(size_t* n, NullMap& null_map) {
+        return Status::NotSupported("read_null_map not implemented");
+    }
+
     virtual Status set_access_paths(const TColumnAccessPaths& all_access_paths,
                                     const TColumnAccessPaths& 
predicate_access_paths) {
         if (!predicate_access_paths.empty()) {
@@ -458,6 +463,8 @@ public:
 
     bool is_all_dict_encoding() const override { return _is_all_dict_encoding; 
}
 
+    Status read_null_map(size_t* n, NullMap& null_map) override;
+
     Status init_prefetcher(const SegmentPrefetchParams& params) override;
     void collect_prefetchers(
             std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>& 
prefetchers,
@@ -583,6 +590,8 @@ public:
 
     void remove_pruned_sub_iterators() override;
 
+    Status read_null_map(size_t* n, NullMap& null_map) override;
+
 private:
     std::shared_ptr<ColumnReader> _map_reader = nullptr;
     ColumnIteratorUPtr _null_iterator;
@@ -624,6 +633,8 @@ public:
             std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>& 
prefetchers,
             PrefetcherInitMethod init_method) override;
 
+    Status read_null_map(size_t* n, NullMap& null_map) override;
+
 private:
     std::shared_ptr<ColumnReader> _struct_reader = nullptr;
     ColumnIteratorUPtr _null_iterator;
@@ -663,6 +674,8 @@ public:
             std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>& 
prefetchers,
             PrefetcherInitMethod init_method) override;
 
+    Status read_null_map(size_t* n, NullMap& null_map) override;
+
 private:
     std::shared_ptr<ColumnReader> _array_reader = nullptr;
     std::unique_ptr<OffsetFileColumnIterator> _offset_iterator;
diff --git a/be/src/storage/segment/variant/variant_column_reader.h 
b/be/src/storage/segment/variant/variant_column_reader.h
index a9982a29170..23ec79b786d 100644
--- a/be/src/storage/segment/variant/variant_column_reader.h
+++ b/be/src/storage/segment/variant/variant_column_reader.h
@@ -452,6 +452,10 @@ public:
 
     ordinal_t get_current_ordinal() const override { return 
_inner_iter->get_current_ordinal(); }
 
+    Status read_null_map(size_t* n, NullMap& null_map) override {
+        return _inner_iter->read_null_map(n, null_map);
+    }
+
     Status init_prefetcher(const SegmentPrefetchParams& params) override;
     void collect_prefetchers(
             std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>& 
prefetchers,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 82936a42e5e..dea018d180c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -1350,6 +1350,9 @@ public class PhysicalPlanTranslator extends 
DefaultPlanVisitor<PlanFragment, Pla
             case MIX:
                 pushAggOp = TPushAggOp.MIX;
                 break;
+            case COUNT_NULL:
+                pushAggOp = TPushAggOp.COUNT_NULL;
+                break;
             default:
                 throw new AnalysisException("Unsupported storage layer 
aggregate: "
                         + storageLayerAggregate.getAggOp());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index c7e7612a246..a9c4ac4acaa 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -565,7 +565,8 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
         Map<Class<? extends AggregateFunction>, PushDownAggOp> supportedAgg = 
PushDownAggOp.supportedFunctions();
 
         boolean containsCount = false;
-        Set<SlotReference> checkNullSlots = new HashSet<>();
+        boolean containsCountStar = false;
+        Set<SlotReference> countNullableSlots = new HashSet<>();
         Set<Expression> expressionAfterProject = new HashSet<>();
 
         // Single loop through aggregateFunctions to handle multiple logic
@@ -580,15 +581,19 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
             // Check if contains Count function
             if (functionClass.equals(Count.class)) {
                 containsCount = true;
+                Count countFunc = (Count) function;
+                if (countFunc.isCountStar()) {
+                    containsCountStar = true;
+                }
                 if (!function.getArguments().isEmpty()) {
                     Expression arg0 = function.getArguments().get(0);
                     if (arg0 instanceof SlotReference) {
-                        checkNullSlots.add((SlotReference) arg0);
+                        countNullableSlots.add((SlotReference) arg0);
                         expressionAfterProject.add(arg0);
                     } else if (arg0 instanceof Cast) {
                         Expression child0 = arg0.child(0);
                         if (child0 instanceof SlotReference) {
-                            checkNullSlots.add((SlotReference) child0);
+                            countNullableSlots.add((SlotReference) child0);
                             expressionAfterProject.add(arg0);
                         }
                     }
@@ -652,7 +657,7 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                 if (argument instanceof SlotReference) {
                     // Argument is valid, continue
                     if (needCheckSlotNull) {
-                        checkNullSlots.add((SlotReference) argument);
+                        countNullableSlots.add((SlotReference) argument);
                     }
                 } else if (argument instanceof Cast) {
                     boolean castMatch = argument.child(0) instanceof 
SlotReference
@@ -662,7 +667,7 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                         return canNotPush;
                     } else {
                         if (needCheckSlotNull) {
-                            checkNullSlots.add((SlotReference) 
argument.child(0));
+                            countNullableSlots.add((SlotReference) 
argument.child(0));
                         }
                     }
                 } else {
@@ -672,20 +677,52 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
             argumentsOfAggregateFunction = processedExpressions;
         }
 
-        Set<PushDownAggOp> pushDownAggOps = functionClasses.stream()
-                .map(supportedAgg::get)
-                .collect(Collectors.toSet());
-
-        PushDownAggOp mergeOp = pushDownAggOps.size() == 1
-                ? pushDownAggOps.iterator().next()
-                : PushDownAggOp.MIX;
-
         Set<SlotReference> aggUsedSlots =
                 ExpressionUtils.collect(argumentsOfAggregateFunction, 
SlotReference.class::isInstance);
 
         List<SlotReference> usedSlotInTable = (List<SlotReference>) 
Project.findProject(aggUsedSlots,
                 logicalScan.getOutput());
 
+        boolean hasCountNullable = false;
+        for (SlotReference slot : usedSlotInTable) {
+            if (!slot.getOriginalColumn().isPresent()) {
+                continue;
+            }
+            Column column = slot.getOriginalColumn().get();
+            if (countNullableSlots.contains(slot) && column.isAllowNull()) {
+                hasCountNullable = true;
+                break;
+            }
+        }
+
+        if (containsCountStar && hasCountNullable) {
+            return canNotPush;
+        }
+
+        boolean hasMinMax = functionClasses.stream()
+                .anyMatch(c -> c.equals(Min.class) || c.equals(Max.class));
+
+        if (hasCountNullable && hasMinMax) {
+            return canNotPush;
+        }
+
+        Set<PushDownAggOp> pushDownAggOps = new HashSet<>();
+        for (Class<? extends AggregateFunction> functionClass : 
functionClasses) {
+            if (functionClass.equals(Count.class)) {
+                if (hasCountNullable) {
+                    pushDownAggOps.add(PushDownAggOp.COUNT_NULL);
+                } else {
+                    pushDownAggOps.add(PushDownAggOp.COUNT);
+                }
+            } else {
+                pushDownAggOps.add(supportedAgg.get(functionClass));
+            }
+        }
+
+        PushDownAggOp mergeOp = pushDownAggOps.size() == 1
+                ? pushDownAggOps.iterator().next()
+                : PushDownAggOp.MIX;
+
         for (SlotReference slot : usedSlotInTable) {
             Column column = slot.getOriginalColumn().get();
             if (column.isAggregated()) {
@@ -703,14 +740,6 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
                     return canNotPush;
                 }
             }
-            if (mergeOp == PushDownAggOp.COUNT || mergeOp == 
PushDownAggOp.MIX) {
-                // NULL value behavior in `count` function is zero, so
-                // we should not use row_count to speed up query. the col
-                // must be not null
-                if (column.isAllowNull() && checkNullSlots.contains(slot)) {
-                    return canNotPush;
-                }
-            }
         }
 
         if (logicalScan instanceof LogicalOlapScan) {
@@ -731,6 +760,11 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
             }
 
         } else if (logicalScan instanceof LogicalFileScan) {
+            // COUNT_NULL requires reading nullmaps from segment files, which 
is not supported
+            // by external file scans (parquet/orc etc.), so we refuse to push 
it down here.
+            if (mergeOp == PushDownAggOp.COUNT_NULL) {
+                return canNotPush;
+            }
             Rule rule = (logicalScan instanceof LogicalHudiScan) ? new 
LogicalHudiScanToPhysicalHudiScan().build()
                     : new LogicalFileScanToPhysicalFileScan().build();
             PhysicalFileScan physicalScan = (PhysicalFileScan) 
rule.transform(logicalScan, cascadesContext)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java
index 2385c0601e6..ef0f0de1523 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java
@@ -109,7 +109,7 @@ public class PhysicalStorageLayerAggregate extends 
PhysicalCatalogRelation {
 
     /** PushAggOp */
     public enum PushDownAggOp {
-        COUNT, MIN_MAX, MIX, COUNT_ON_MATCH;
+        COUNT, MIN_MAX, MIX, COUNT_ON_MATCH, COUNT_NULL;
 
         /** supportedFunctions */
         public static Map<Class<? extends AggregateFunction>, PushDownAggOp> 
supportedFunctions() {
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index 302f26e5945..5267989ebb7 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -859,7 +859,8 @@ enum TPushAggOp {
        MINMAX = 1,
        COUNT = 2,
        MIX = 3,
-       COUNT_ON_INDEX = 4
+       COUNT_ON_INDEX = 4,
+       COUNT_NULL = 5
 }
 
 struct TScoreRangeInfo {
diff --git a/regression-test/data/query_p0/aggregate/count_null_push_down.out 
b/regression-test/data/query_p0/aggregate/count_null_push_down.out
new file mode 100644
index 00000000000..e858bc64ef5
--- /dev/null
+++ b/regression-test/data/query_p0/aggregate/count_null_push_down.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !count_null --
+3
+
+-- !count_star --
+5
+
+-- !count_non_null --
+5
+
diff --git 
a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy 
b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
index 220ab4038c5..8dfecc34b59 100644
--- a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
+++ b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
@@ -114,7 +114,7 @@ suite("test_pushdown_explain") {
     }
     explain {
         sql("select count(a) from (select nullable_col as a from 
test_null_columns) t1;")
-        contains "pushAggOp=NONE"
+        contains "pushAggOp=COUNT_NULL"
     }
     explain {
         sql("select count(a), min(a) from (select non_nullable_col as a from 
test_null_columns) t1;")
diff --git 
a/regression-test/suites/query_p0/aggregate/count_null_push_down.groovy 
b/regression-test/suites/query_p0/aggregate/count_null_push_down.groovy
new file mode 100644
index 00000000000..4a51e951237
--- /dev/null
+++ b/regression-test/suites/query_p0/aggregate/count_null_push_down.groovy
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("count_null_push_down") {
+    def tableName = "count_null_push_down_test"
+
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+    sql """
+            CREATE TABLE IF NOT EXISTS ${tableName} (
+                id INT NOT NULL,
+                value INT NULL
+            )
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES (
+              "replication_num" = "1"
+            )
+        """
+    
+    sql """ INSERT INTO ${tableName} VALUES (1, 1), (2, 2), (3, null), (4, 4), 
(5, null) """
+
+    // Test COUNT(column) on nullable column - should use COUNT_NULL push down
+    qt_count_null """ SELECT COUNT(value) FROM ${tableName} """
+    
+    // Test COUNT(*) - should use COUNT push down
+    qt_count_star """ SELECT COUNT(*) FROM ${tableName} """
+    
+    // Test COUNT on non-nullable column - should use COUNT push down
+    qt_count_non_null """ SELECT COUNT(id) FROM ${tableName} """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [feat](storage) add read_null_map support for COUNT_NULL push down aggregate (#60996)

Reply via email to