This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 03a9b939e88 [feat](storage) add read_null_map support for COUNT_NULL
push down aggregate (#60996)
03a9b939e88 is described below
commit 03a9b939e88a88df5cd37bff672430e8833c9be5
Author: HappenLee <[email protected]>
AuthorDate: Mon Mar 16 10:49:38 2026 +0800
[feat](storage) add read_null_map support for COUNT_NULL push down
aggregate (#60996)
Add a new built-in scalar function to_hll(bigint) that constructs an HLL
value from a BIGINT directly, bypassing any hash — the bigint is fed
as-is into HyperLogLog::update(uint64_t).
Changes:
- BE (vec/functions/function_hll.cpp): implement struct ToHll with
Status-returning execute/vector/vector_nullable methods; register as
FunctionAlwaysNotNullable<ToHll, true> so InvalidArgument status is
propagated on negative input.
- BE test (be/test/vec/function/function_hll_test.cpp): add unit test
function_hll_test.function_to_hll_test covering values 0, 1, 2, 100.
- FE Nereids (scalar/ToHll.java): new ScalarFunction with signature
BIGINT -> HLL.
- FE catalog (BuiltinScalarFunctions.java): register to_hll.
- FE visitor (ScalarFunctionVisitor.java): add visitToHll visitor hook.
---
be/src/storage/iterator/vgeneric_iterators.cpp | 51 ++++++++++
be/src/storage/iterator/vgeneric_iterators.h | 1 +
be/src/storage/segment/column_reader.cpp | 110 +++++++++++++++++++++
be/src/storage/segment/column_reader.h | 17 +++-
.../segment/variant/variant_column_reader.h | 4 +
.../glue/translator/PhysicalPlanTranslator.java | 3 +
.../rules/implementation/AggregateStrategies.java | 76 ++++++++++----
.../physical/PhysicalStorageLayerAggregate.java | 2 +-
gensrc/thrift/PlanNodes.thrift | 3 +-
.../query_p0/aggregate/count_null_push_down.out | 10 ++
.../explain/test_pushdown_explain.groovy | 2 +-
.../query_p0/aggregate/count_null_push_down.groovy | 44 +++++++++
12 files changed, 297 insertions(+), 26 deletions(-)
diff --git a/be/src/storage/iterator/vgeneric_iterators.cpp
b/be/src/storage/iterator/vgeneric_iterators.cpp
index 267df0bb715..f80b57d1afb 100644
--- a/be/src/storage/iterator/vgeneric_iterators.cpp
+++ b/be/src/storage/iterator/vgeneric_iterators.cpp
@@ -25,6 +25,7 @@
#include "core/block/block.h"
#include "core/block/column_with_type_and_name.h"
#include "core/column/column.h"
+#include "core/column/column_nullable.h"
#include "core/data_type/data_type.h"
#include "storage/cache/schema_cache.h"
#include "storage/field.h"
@@ -45,6 +46,19 @@ using namespace ErrorCode;
Status VStatisticsIterator::init(const StorageReadOptions& opts) {
if (!_init) {
_push_down_agg_type_opt = opts.push_down_agg_type_opt;
+ _tablet_schema = opts.tablet_schema;
+
+ // COUNT_NULL needs to actually read nullmap pages, so the column
iterators must be
+ // fully initialized with a valid ColumnIteratorOptions (file_reader,
stats, io_ctx).
+ // Other agg types (COUNT, MINMAX, MIX) only use zone-map metadata and
never open
+ // pages, so they do not need init.
+ const bool need_iter_init = (_push_down_agg_type_opt ==
TPushAggOp::COUNT_NULL);
+ ColumnIteratorOptions iter_opts {
+ .use_page_cache = opts.use_page_cache,
+ .file_reader = _segment->file_reader().get(),
+ .stats = opts.stats,
+ .io_ctx = opts.io_ctx,
+ };
for (size_t i = 0; i < _schema.num_column_ids(); i++) {
auto cid = _schema.column_id(i);
@@ -52,6 +66,16 @@ Status VStatisticsIterator::init(const StorageReadOptions&
opts) {
if (_column_iterators_map.count(unique_id) < 1) {
RETURN_IF_ERROR(_segment->new_column_iterator(
opts.tablet_schema->column(cid),
&_column_iterators_map[unique_id], &opts));
+ if (need_iter_init) {
+
RETURN_IF_ERROR(_column_iterators_map[unique_id]->init(iter_opts));
+ // Seek to ordinal 0 once during init so that the page
iterator
+ // is properly positioned for sequential read_null_map()
calls
+ // in next_batch(). We must NOT seek again in next_batch()
—
+ // doing so would reset the iterator to ordinal 0 on every
batch
+ // and cause rows to be re-read/double-counted for segments
+ // larger than MAX_ROW_SIZE_IN_COUNT (65535) rows.
+
RETURN_IF_ERROR(_column_iterators_map[unique_id]->seek_to_ordinal(0));
+ }
}
_column_iterators.push_back(_column_iterators_map[unique_id].get());
}
@@ -76,6 +100,33 @@ Status VStatisticsIterator::next_batch(Block* block) {
for (auto& column : columns) {
column->insert_many_defaults(size);
}
+ } else if (_push_down_agg_type_opt == TPushAggOp::COUNT_NULL) {
+ for (int i = 0; i < (int)columns.size(); ++i) {
+ auto& column = columns[i];
+ auto cid = _schema.column_id(i);
+ auto& tablet_column = _tablet_schema->column(cid);
+
+ if (tablet_column.is_nullable()) {
+ auto& nullable_col = assert_cast<ColumnNullable&>(*column);
+ auto& nested_col = nullable_col.get_nested_column();
+
+ // Read the real nullmap for this column from the current
position.
+ // Do NOT seek back to ordinal 0 here: the column iterator
already
+ // starts at ordinal 0 after init(), and each call to
read_null_map
+ // advances it sequentially. Seeking to 0 on every
next_batch() call
+ // would cause large segments (> MAX_ROW_SIZE_IN_COUNT
rows) to have
+ // their first portion re-read and counted multiple times,
producing
+ // a result higher than the true non-null count.
+ size_t read_rows = size;
+ auto& null_map_data = nullable_col.get_null_map_data();
+
RETURN_IF_ERROR(_column_iterators[i]->read_null_map(&read_rows, null_map_data));
+
+ // nested column needs one default value per row
+ nested_col.insert_many_defaults(size);
+ } else {
+ column->insert_many_defaults(size);
+ }
+ }
} else {
for (int i = 0; i < columns.size(); ++i) {
RETURN_IF_ERROR(_column_iterators[i]->next_batch_of_zone_map(&size,
columns[i]));
diff --git a/be/src/storage/iterator/vgeneric_iterators.h
b/be/src/storage/iterator/vgeneric_iterators.h
index 449d5cb1606..326cb750f1a 100644
--- a/be/src/storage/iterator/vgeneric_iterators.h
+++ b/be/src/storage/iterator/vgeneric_iterators.h
@@ -63,6 +63,7 @@ public:
private:
std::shared_ptr<Segment> _segment;
const Schema& _schema;
+ std::shared_ptr<TabletSchema> _tablet_schema;
size_t _target_rows = 0;
size_t _output_rows = 0;
bool _init = false;
diff --git a/be/src/storage/segment/column_reader.cpp
b/be/src/storage/segment/column_reader.cpp
index d037f4fa84e..5badb4a7008 100644
--- a/be/src/storage/segment/column_reader.cpp
+++ b/be/src/storage/segment/column_reader.cpp
@@ -1252,6 +1252,27 @@ Status MapFileColumnIterator::set_access_paths(const
TColumnAccessPaths& all_acc
return Status::OK();
}
+Status MapFileColumnIterator::read_null_map(size_t* n, NullMap& null_map) {
+ if (!_map_reader->is_nullable()) {
+ return Status::InternalError("read_null_map is not supported for
non-nullable column");
+ }
+ if (!_null_iterator) {
+ // Schema-change scenario: column became nullable but old segment has
no null data.
+ null_map.resize(*n);
+ memset(null_map.data(), 0, *n);
+ return Status::OK();
+ }
+ auto null_col = ColumnUInt8::create();
+ auto null_col_ptr = null_col->get_ptr();
+ size_t read_rows = *n;
+ bool has_null = false;
+ RETURN_IF_ERROR(_null_iterator->next_batch(&read_rows, null_col_ptr,
&has_null));
+ *n = read_rows;
+ null_map.resize(read_rows);
+ memcpy(null_map.data(), null_col->get_data().data(), read_rows);
+ return Status::OK();
+}
+
////////////////////////////////////////////////////////////////////////////////
StructFileColumnIterator::StructFileColumnIterator(
@@ -1469,6 +1490,27 @@ Status StructFileColumnIterator::set_access_paths(
return Status::OK();
}
+Status StructFileColumnIterator::read_null_map(size_t* n, NullMap& null_map) {
+ if (!_struct_reader->is_nullable()) {
+ return Status::InternalError("read_null_map is not supported for
non-nullable column");
+ }
+ if (!_null_iterator) {
+ // Schema-change scenario: column became nullable but old segment has
no null data.
+ null_map.resize(*n);
+ memset(null_map.data(), 0, *n);
+ return Status::OK();
+ }
+ auto null_col = ColumnUInt8::create();
+ auto null_col_ptr = null_col->get_ptr();
+ size_t read_rows = *n;
+ bool has_null = false;
+ RETURN_IF_ERROR(_null_iterator->next_batch(&read_rows, null_col_ptr,
&has_null));
+ *n = read_rows;
+ null_map.resize(read_rows);
+ memcpy(null_map.data(), null_col->get_data().data(), read_rows);
+ return Status::OK();
+}
+
////////////////////////////////////////////////////////////////////////////////
Status OffsetFileColumnIterator::init(const ColumnIteratorOptions& opts) {
RETURN_IF_ERROR(_offset_iterator->init(opts));
@@ -1727,6 +1769,27 @@ Status ArrayFileColumnIterator::set_access_paths(const
TColumnAccessPaths& all_a
return Status::OK();
}
+Status ArrayFileColumnIterator::read_null_map(size_t* n, NullMap& null_map) {
+ if (!_array_reader->is_nullable()) {
+ return Status::InternalError("read_null_map is not supported for
non-nullable column");
+ }
+ if (!_null_iterator) {
+ // Schema-change scenario: column became nullable but old segment has
no null data.
+ null_map.resize(*n);
+ memset(null_map.data(), 0, *n);
+ return Status::OK();
+ }
+ auto null_col = ColumnUInt8::create();
+ auto null_col_ptr = null_col->get_ptr();
+ size_t read_rows = *n;
+ bool has_null = false;
+ RETURN_IF_ERROR(_null_iterator->next_batch(&read_rows, null_col_ptr,
&has_null));
+ *n = read_rows;
+ null_map.resize(read_rows);
+ memcpy(null_map.data(), null_col->get_data().data(), read_rows);
+ return Status::OK();
+}
+
////////////////////////////////////////////////////////////////////////////////
FileColumnIterator::FileColumnIterator(std::shared_ptr<ColumnReader> reader) :
_reader(reader) {}
@@ -1901,6 +1964,53 @@ Status FileColumnIterator::next_batch(size_t* n,
MutableColumnPtr& dst, bool* ha
return Status::OK();
}
+Status FileColumnIterator::read_null_map(size_t* n, NullMap& null_map) {
+ if (!_reader->is_nullable()) {
+ return Status::InternalError("read_null_map is not supported for
non-nullable column");
+ }
+
+ null_map.resize(*n);
+ size_t remaining = *n;
+ size_t offset = 0;
+
+ while (remaining > 0) {
+ if (!_page.has_remaining()) {
+ bool eos = false;
+ RETURN_IF_ERROR(_load_next_page(&eos));
+ if (eos) {
+ break;
+ }
+ }
+
+ if (!_page.has_null) {
+ size_t nrows_in_page = std::min(remaining, _page.remaining());
+ memset(null_map.data() + offset, 0, nrows_in_page);
+ offset += nrows_in_page;
+ _current_ordinal += nrows_in_page;
+ _page.offset_in_page += nrows_in_page;
+ remaining -= nrows_in_page;
+ continue;
+ }
+
+ size_t nrows_in_page = std::min(remaining, _page.remaining());
+ size_t this_run = 0;
+ while (this_run < nrows_in_page) {
+ bool is_null = false;
+ size_t run_len = _page.null_decoder.GetNextRun(&is_null,
nrows_in_page - this_run);
+ memset(null_map.data() + offset + this_run, is_null ? 1 : 0,
run_len);
+ this_run += run_len;
+ }
+
+ offset += nrows_in_page;
+ _page.offset_in_page += nrows_in_page;
+ _current_ordinal += nrows_in_page;
+ remaining -= nrows_in_page;
+ }
+
+ *n = offset;
+ return Status::OK();
+}
+
Status FileColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t
count,
MutableColumnPtr& dst) {
if (_reading_flag == ReadingFlag::SKIP_READING) {
diff --git a/be/src/storage/segment/column_reader.h
b/be/src/storage/segment/column_reader.h
index 57a4bc4d74a..4571c54c106 100644
--- a/be/src/storage/segment/column_reader.h
+++ b/be/src/storage/segment/column_reader.h
@@ -30,8 +30,9 @@
#include "common/config.h"
#include "common/logging.h"
-#include "common/status.h" // for Status
-#include "core/column/column_array.h" // ColumnArray
+#include "common/status.h" // for Status
+#include "core/column/column_array.h" // ColumnArray
+#include "core/column/column_nullable.h" // NullMap
#include "core/data_type/data_type.h"
#include "io/cache/cached_remote_file_reader.h"
#include "io/fs/file_reader_writer_fwd.h"
@@ -360,6 +361,10 @@ public:
virtual bool is_all_dict_encoding() const { return false; }
+ virtual Status read_null_map(size_t* n, NullMap& null_map) {
+ return Status::NotSupported("read_null_map not implemented");
+ }
+
virtual Status set_access_paths(const TColumnAccessPaths& all_access_paths,
const TColumnAccessPaths&
predicate_access_paths) {
if (!predicate_access_paths.empty()) {
@@ -458,6 +463,8 @@ public:
bool is_all_dict_encoding() const override { return _is_all_dict_encoding;
}
+ Status read_null_map(size_t* n, NullMap& null_map) override;
+
Status init_prefetcher(const SegmentPrefetchParams& params) override;
void collect_prefetchers(
std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>&
prefetchers,
@@ -583,6 +590,8 @@ public:
void remove_pruned_sub_iterators() override;
+ Status read_null_map(size_t* n, NullMap& null_map) override;
+
private:
std::shared_ptr<ColumnReader> _map_reader = nullptr;
ColumnIteratorUPtr _null_iterator;
@@ -624,6 +633,8 @@ public:
std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>&
prefetchers,
PrefetcherInitMethod init_method) override;
+ Status read_null_map(size_t* n, NullMap& null_map) override;
+
private:
std::shared_ptr<ColumnReader> _struct_reader = nullptr;
ColumnIteratorUPtr _null_iterator;
@@ -663,6 +674,8 @@ public:
std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>&
prefetchers,
PrefetcherInitMethod init_method) override;
+ Status read_null_map(size_t* n, NullMap& null_map) override;
+
private:
std::shared_ptr<ColumnReader> _array_reader = nullptr;
std::unique_ptr<OffsetFileColumnIterator> _offset_iterator;
diff --git a/be/src/storage/segment/variant/variant_column_reader.h
b/be/src/storage/segment/variant/variant_column_reader.h
index a9982a29170..23ec79b786d 100644
--- a/be/src/storage/segment/variant/variant_column_reader.h
+++ b/be/src/storage/segment/variant/variant_column_reader.h
@@ -452,6 +452,10 @@ public:
ordinal_t get_current_ordinal() const override { return
_inner_iter->get_current_ordinal(); }
+ Status read_null_map(size_t* n, NullMap& null_map) override {
+ return _inner_iter->read_null_map(n, null_map);
+ }
+
Status init_prefetcher(const SegmentPrefetchParams& params) override;
void collect_prefetchers(
std::map<PrefetcherInitMethod, std::vector<SegmentPrefetcher*>>&
prefetchers,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
index 82936a42e5e..dea018d180c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java
@@ -1350,6 +1350,9 @@ public class PhysicalPlanTranslator extends
DefaultPlanVisitor<PlanFragment, Pla
case MIX:
pushAggOp = TPushAggOp.MIX;
break;
+ case COUNT_NULL:
+ pushAggOp = TPushAggOp.COUNT_NULL;
+ break;
default:
throw new AnalysisException("Unsupported storage layer
aggregate: "
+ storageLayerAggregate.getAggOp());
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index c7e7612a246..a9c4ac4acaa 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -565,7 +565,8 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
Map<Class<? extends AggregateFunction>, PushDownAggOp> supportedAgg =
PushDownAggOp.supportedFunctions();
boolean containsCount = false;
- Set<SlotReference> checkNullSlots = new HashSet<>();
+ boolean containsCountStar = false;
+ Set<SlotReference> countNullableSlots = new HashSet<>();
Set<Expression> expressionAfterProject = new HashSet<>();
// Single loop through aggregateFunctions to handle multiple logic
@@ -580,15 +581,19 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
// Check if contains Count function
if (functionClass.equals(Count.class)) {
containsCount = true;
+ Count countFunc = (Count) function;
+ if (countFunc.isCountStar()) {
+ containsCountStar = true;
+ }
if (!function.getArguments().isEmpty()) {
Expression arg0 = function.getArguments().get(0);
if (arg0 instanceof SlotReference) {
- checkNullSlots.add((SlotReference) arg0);
+ countNullableSlots.add((SlotReference) arg0);
expressionAfterProject.add(arg0);
} else if (arg0 instanceof Cast) {
Expression child0 = arg0.child(0);
if (child0 instanceof SlotReference) {
- checkNullSlots.add((SlotReference) child0);
+ countNullableSlots.add((SlotReference) child0);
expressionAfterProject.add(arg0);
}
}
@@ -652,7 +657,7 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
if (argument instanceof SlotReference) {
// Argument is valid, continue
if (needCheckSlotNull) {
- checkNullSlots.add((SlotReference) argument);
+ countNullableSlots.add((SlotReference) argument);
}
} else if (argument instanceof Cast) {
boolean castMatch = argument.child(0) instanceof
SlotReference
@@ -662,7 +667,7 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
return canNotPush;
} else {
if (needCheckSlotNull) {
- checkNullSlots.add((SlotReference)
argument.child(0));
+ countNullableSlots.add((SlotReference)
argument.child(0));
}
}
} else {
@@ -672,20 +677,52 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
argumentsOfAggregateFunction = processedExpressions;
}
- Set<PushDownAggOp> pushDownAggOps = functionClasses.stream()
- .map(supportedAgg::get)
- .collect(Collectors.toSet());
-
- PushDownAggOp mergeOp = pushDownAggOps.size() == 1
- ? pushDownAggOps.iterator().next()
- : PushDownAggOp.MIX;
-
Set<SlotReference> aggUsedSlots =
ExpressionUtils.collect(argumentsOfAggregateFunction,
SlotReference.class::isInstance);
List<SlotReference> usedSlotInTable = (List<SlotReference>)
Project.findProject(aggUsedSlots,
logicalScan.getOutput());
+ boolean hasCountNullable = false;
+ for (SlotReference slot : usedSlotInTable) {
+ if (!slot.getOriginalColumn().isPresent()) {
+ continue;
+ }
+ Column column = slot.getOriginalColumn().get();
+ if (countNullableSlots.contains(slot) && column.isAllowNull()) {
+ hasCountNullable = true;
+ break;
+ }
+ }
+
+ if (containsCountStar && hasCountNullable) {
+ return canNotPush;
+ }
+
+ boolean hasMinMax = functionClasses.stream()
+ .anyMatch(c -> c.equals(Min.class) || c.equals(Max.class));
+
+ if (hasCountNullable && hasMinMax) {
+ return canNotPush;
+ }
+
+ Set<PushDownAggOp> pushDownAggOps = new HashSet<>();
+ for (Class<? extends AggregateFunction> functionClass :
functionClasses) {
+ if (functionClass.equals(Count.class)) {
+ if (hasCountNullable) {
+ pushDownAggOps.add(PushDownAggOp.COUNT_NULL);
+ } else {
+ pushDownAggOps.add(PushDownAggOp.COUNT);
+ }
+ } else {
+ pushDownAggOps.add(supportedAgg.get(functionClass));
+ }
+ }
+
+ PushDownAggOp mergeOp = pushDownAggOps.size() == 1
+ ? pushDownAggOps.iterator().next()
+ : PushDownAggOp.MIX;
+
for (SlotReference slot : usedSlotInTable) {
Column column = slot.getOriginalColumn().get();
if (column.isAggregated()) {
@@ -703,14 +740,6 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
return canNotPush;
}
}
- if (mergeOp == PushDownAggOp.COUNT || mergeOp ==
PushDownAggOp.MIX) {
- // NULL value behavior in `count` function is zero, so
- // we should not use row_count to speed up query. the col
- // must be not null
- if (column.isAllowNull() && checkNullSlots.contains(slot)) {
- return canNotPush;
- }
- }
}
if (logicalScan instanceof LogicalOlapScan) {
@@ -731,6 +760,11 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
}
} else if (logicalScan instanceof LogicalFileScan) {
+ // COUNT_NULL requires reading nullmaps from segment files, which
is not supported
+ // by external file scans (parquet/orc etc.), so we refuse to push
it down here.
+ if (mergeOp == PushDownAggOp.COUNT_NULL) {
+ return canNotPush;
+ }
Rule rule = (logicalScan instanceof LogicalHudiScan) ? new
LogicalHudiScanToPhysicalHudiScan().build()
: new LogicalFileScanToPhysicalFileScan().build();
PhysicalFileScan physicalScan = (PhysicalFileScan)
rule.transform(logicalScan, cascadesContext)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java
index 2385c0601e6..ef0f0de1523 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalStorageLayerAggregate.java
@@ -109,7 +109,7 @@ public class PhysicalStorageLayerAggregate extends
PhysicalCatalogRelation {
/** PushAggOp */
public enum PushDownAggOp {
- COUNT, MIN_MAX, MIX, COUNT_ON_MATCH;
+ COUNT, MIN_MAX, MIX, COUNT_ON_MATCH, COUNT_NULL;
/** supportedFunctions */
public static Map<Class<? extends AggregateFunction>, PushDownAggOp>
supportedFunctions() {
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index 302f26e5945..5267989ebb7 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -859,7 +859,8 @@ enum TPushAggOp {
MINMAX = 1,
COUNT = 2,
MIX = 3,
- COUNT_ON_INDEX = 4
+ COUNT_ON_INDEX = 4,
+ COUNT_NULL = 5
}
struct TScoreRangeInfo {
diff --git a/regression-test/data/query_p0/aggregate/count_null_push_down.out
b/regression-test/data/query_p0/aggregate/count_null_push_down.out
new file mode 100644
index 00000000000..e858bc64ef5
--- /dev/null
+++ b/regression-test/data/query_p0/aggregate/count_null_push_down.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !count_null --
+3
+
+-- !count_star --
+5
+
+-- !count_non_null --
+5
+
diff --git
a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
index 220ab4038c5..8dfecc34b59 100644
--- a/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
+++ b/regression-test/suites/nereids_p0/explain/test_pushdown_explain.groovy
@@ -114,7 +114,7 @@ suite("test_pushdown_explain") {
}
explain {
sql("select count(a) from (select nullable_col as a from
test_null_columns) t1;")
- contains "pushAggOp=NONE"
+ contains "pushAggOp=COUNT_NULL"
}
explain {
sql("select count(a), min(a) from (select non_nullable_col as a from
test_null_columns) t1;")
diff --git
a/regression-test/suites/query_p0/aggregate/count_null_push_down.groovy
b/regression-test/suites/query_p0/aggregate/count_null_push_down.groovy
new file mode 100644
index 00000000000..4a51e951237
--- /dev/null
+++ b/regression-test/suites/query_p0/aggregate/count_null_push_down.groovy
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("count_null_push_down") {
+ def tableName = "count_null_push_down_test"
+
+ sql """ DROP TABLE IF EXISTS ${tableName} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ id INT NOT NULL,
+ value INT NULL
+ )
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ )
+ """
+
+ sql """ INSERT INTO ${tableName} VALUES (1, 1), (2, 2), (3, null), (4, 4),
(5, null) """
+
+ // Test COUNT(column) on nullable column - should use COUNT_NULL push down
+ qt_count_null """ SELECT COUNT(value) FROM ${tableName} """
+
+ // Test COUNT(*) - should use COUNT push down
+ qt_count_star """ SELECT COUNT(*) FROM ${tableName} """
+
+ // Test COUNT on non-nullable column - should use COUNT push down
+ qt_count_non_null """ SELECT COUNT(id) FROM ${tableName} """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]