Copilot commented on code in PR #60228:
URL: https://github.com/apache/doris/pull/60228#discussion_r2730816015
##########
gensrc/thrift/PaloInternalService.thrift:
##########
@@ -424,8 +424,8 @@ struct TQueryOptions {
183: optional bool enable_use_hybrid_sort = false;
184: optional i32 cte_max_recursion_depth;
-
185: optional bool enable_parquet_file_page_cache = true;
+ 186: optional bool enable_aggregate_function_null_v2 = false;
Review Comment:
There is a mismatch between the default values for
enable_aggregate_function_null_v2. The Java SessionVariable defaults to true
(line 1469 in SessionVariable.java), but the Thrift definition defaults to
false (line 428 in PaloInternalService.thrift). This inconsistency could lead
to different behavior depending on whether the value is explicitly set or uses
the default. The defaults should be aligned.
```suggestion
186: optional bool enable_aggregate_function_null_v2 = true;
```
##########
be/src/vec/aggregate_functions/aggregate_function_null_v2.h:
##########
@@ -0,0 +1,598 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+//
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionNull.h
+// and modified by Doris
+
+#pragma once
+
+#include <glog/logging.h>
+
+#include <memory>
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_distinct.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/string_buffer.hpp"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+#include "common/compile_check_begin.h"
+
+template <typename NestFunction, bool result_is_nullable, typename Derived>
+class AggregateFunctionNullBaseInlineV2 : public
IAggregateFunctionHelper<Derived> {
+protected:
+ std::unique_ptr<NestFunction> nested_function;
+ size_t prefix_size;
+ bool is_window_function = false;
+
+ AggregateDataPtr nested_place(AggregateDataPtr __restrict place) const
noexcept {
+ return place + prefix_size;
+ }
+
+ ConstAggregateDataPtr nested_place(ConstAggregateDataPtr __restrict place)
const noexcept {
+ return place + prefix_size;
+ }
+
+ static void init(AggregateDataPtr __restrict place, bool
is_window_function) noexcept {
+ init_flag(place);
+ init_null_count(place, is_window_function);
+ }
+
+ static void init_flag(AggregateDataPtr __restrict place) noexcept {
+ if constexpr (result_is_nullable) {
+ place[0] = false;
+ }
+ }
+
+ static void set_flag(AggregateDataPtr __restrict place) noexcept {
+ if constexpr (result_is_nullable) {
+ place[0] = true;
+ }
+ }
+
+ static bool get_flag(ConstAggregateDataPtr __restrict place) noexcept {
+ return result_is_nullable ? place[0] : true;
+ }
+
+ static void init_null_count(AggregateDataPtr __restrict place,
+ bool is_window_function) noexcept {
+ if (is_window_function && result_is_nullable) {
+ unaligned_store<int32_t>(place + 1, 0);
+ }
+ }
+
+ static void update_null_count(AggregateDataPtr __restrict place, bool
incremental,
+ bool is_window_function) noexcept {
+ if (is_window_function && result_is_nullable) {
+ auto null_count = unaligned_load<int32_t>(place + 1);
+ incremental ? null_count++ : null_count--;
+ unaligned_store<int32_t>(place + 1, null_count);
+ }
+ }
+
+ static int32_t get_null_count(ConstAggregateDataPtr __restrict place,
+ bool is_window_function) noexcept {
+ int32_t num = 0;
+ if (is_window_function && result_is_nullable) {
+ num = unaligned_load<int32_t>(place + 1);
+ }
+ return num;
+ }
+
+public:
+ AggregateFunctionNullBaseInlineV2(IAggregateFunction* nested_function_,
+ const DataTypes& arguments, bool
is_window_function_)
+ : IAggregateFunctionHelper<Derived>(arguments),
+ nested_function {assert_cast<NestFunction*>(nested_function_)},
+ is_window_function(is_window_function_) {
+ DCHECK(nested_function_ != nullptr);
+ if constexpr (result_is_nullable) {
+ if (this->is_window_function) {
+ //
flag|---null_count----|-------padding-------|--nested_data----|
+ size_t nested_align = nested_function->align_of_data();
+ prefix_size = 1 + sizeof(int32_t);
+ if (prefix_size % nested_align != 0) {
+ prefix_size += (nested_align - (prefix_size %
nested_align));
+ }
+ } else {
+ prefix_size = nested_function->align_of_data();
+ }
+ } else {
+ prefix_size = 0;
+ }
+ }
+
+ MutableColumnPtr create_serialize_column() const override {
+ if constexpr (result_is_nullable) {
+ return
ColumnNullable::create(nested_function->create_serialize_column(),
+ ColumnUInt8::create());
+ }
+ return nested_function->create_serialize_column();
+ }
+
+ DataTypePtr get_serialized_type() const override {
+ if constexpr (result_is_nullable) {
+ return make_nullable(nested_function->get_serialized_type());
+ }
+ return nested_function->get_serialized_type();
+ }
+
+ void set_query_context(QueryContext* ctx) override {
+ return nested_function->set_query_context(ctx);
+ }
+
+ bool is_blockable() const override { return
nested_function->is_blockable(); }
+
+ void set_version(const int version_) override {
+ IAggregateFunctionHelper<Derived>::set_version(version_);
+ nested_function->set_version(version_);
+ }
+
+ String get_name() const override { return "NullableV2(" +
nested_function->get_name() + ")"; }
+
+ DataTypePtr get_return_type() const override {
+ return result_is_nullable ?
make_nullable(nested_function->get_return_type())
+ : nested_function->get_return_type();
+ }
+
+ void create(AggregateDataPtr __restrict place) const override {
+ init(place, this->is_window_function);
+ nested_function->create(nested_place(place));
+ }
+
+ void destroy(AggregateDataPtr __restrict place) const noexcept override {
+ nested_function->destroy(nested_place(place));
+ }
+ void reset(AggregateDataPtr place) const override {
+ init(place, this->is_window_function);
+ nested_function->reset(nested_place(place));
+ }
+
+ bool is_trivial() const override { return false; }
+
+ size_t size_of_data() const override { return prefix_size +
nested_function->size_of_data(); }
+
+ size_t align_of_data() const override {
+ if (this->is_window_function && result_is_nullable) {
+ return std::max(nested_function->align_of_data(),
alignof(int32_t));
+ } else {
+ return nested_function->align_of_data();
+ }
+ }
+
+ void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+ Arena& arena) const override {
+ if (get_flag(rhs)) {
+ set_flag(place);
+ nested_function->merge(nested_place(place), nested_place(rhs),
arena);
+ }
+ }
+
+ void serialize(ConstAggregateDataPtr __restrict place, BufferWritable&
buf) const override {
+ bool flag = get_flag(place);
+ if constexpr (result_is_nullable) {
+ buf.write_binary(flag);
+ }
+ if (flag) {
+ nested_function->serialize(nested_place(place), buf);
+ }
+ }
+
+ void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+ Arena& arena) const override {
+ bool flag = true;
+ if constexpr (result_is_nullable) {
+ buf.read_binary(flag);
+ }
+ if (flag) {
+ set_flag(place);
+ nested_function->deserialize(nested_place(place), buf, arena);
+ }
+ }
+
+ void serialize_to_column(const std::vector<AggregateDataPtr>& places,
size_t offset,
+ MutableColumnPtr& dst, const size_t num_rows)
const override {
+ if constexpr (result_is_nullable) {
+ auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+ auto& nested_col = nullable_col.get_nested_column();
+ auto& null_map = nullable_col.get_null_map_data();
+ MutableColumnPtr nested_col_ptr = nested_col.assume_mutable();
+
+ null_map.resize(num_rows);
+ uint8_t* __restrict null_map_data = null_map.data();
+ for (size_t i = 0; i < num_rows; ++i) {
+ null_map_data[i] = !get_flag(places[i] + offset);
+ }
+ nested_function->serialize_to_column(places, offset + prefix_size,
nested_col_ptr,
+ num_rows);
+ } else {
+ nested_function->serialize_to_column(places, offset, dst,
num_rows);
+ }
+ }
+
+ void streaming_agg_serialize_to_column(const IColumn** columns,
MutableColumnPtr& dst,
+ const size_t num_rows, Arena&
arena) const override {
+ const auto* src_nullable_col = assert_cast<const
ColumnNullable*>(columns[0]);
+ const auto* __restrict src_null_map_data =
src_nullable_col->get_null_map_data().data();
+
+ size_t nested_size = nested_function->size_of_data();
+ std::vector<AggregateDataPtr> nested_places(num_rows);
+ std::vector<char> places_data(num_rows * nested_size);
+ for (size_t i = 0; i < num_rows; ++i) {
+ nested_places[i] = places_data.data() + i * nested_size;
+ }
+
+ if (!nested_function->is_trivial()) {
+ for (int i = 0; i < num_rows; ++i) {
+ try {
+ nested_function->create(nested_places[i]);
+ } catch (...) {
+ for (int j = 0; j < i; ++j) {
+ nested_function->destroy(nested_places[j]);
+ }
+ throw;
+ }
+ }
+ }
+ Defer destroy_places = {[&]() {
+ if (!nested_function->is_trivial()) {
+ for (int i = 0; i < num_rows; ++i) {
+ nested_function->destroy(nested_places[i]);
+ }
+ }
+ }};
+ const IColumn* src_nested_column =
+ src_nullable_col->get_nested_column().assume_mutable().get();
+ if (src_nullable_col->has_null()) {
+ for (size_t i = 0; i < num_rows; ++i) {
+ if (!src_null_map_data[i]) {
+ nested_function->add(nested_places[i], &src_nested_column,
i, arena);
+ }
+ }
+ } else {
+ nested_function->add_batch(num_rows, nested_places.data(), 0,
&src_nested_column, arena,
+ false);
+ }
+
+ if constexpr (result_is_nullable) {
+ auto& dst_nullable_col = assert_cast<ColumnNullable&>(*dst);
+ MutableColumnPtr nested_col_ptr =
dst_nullable_col.get_nested_column().assume_mutable();
+ dst_nullable_col.get_null_map_column().insert_range_from(
+ src_nullable_col->get_null_map_column(), 0, num_rows);
+ nested_function->serialize_to_column(nested_places, 0,
nested_col_ptr, num_rows);
+ } else {
+ nested_function->serialize_to_column(nested_places, 0, dst,
num_rows);
+ }
+ }
+
+ void serialize_without_key_to_column(ConstAggregateDataPtr __restrict
place,
+ IColumn& to) const override {
+ if constexpr (result_is_nullable) {
+ auto& nullable_col = assert_cast<ColumnNullable&>(to);
+ auto& nested_col = nullable_col.get_nested_column();
+ auto& null_map = nullable_col.get_null_map_data();
+
+ bool flag = get_flag(place);
+ if (flag) {
+
nested_function->serialize_without_key_to_column(nested_place(place),
nested_col);
+ null_map.push_back(0);
+ } else {
+ nested_col.insert_default();
+ null_map.push_back(1);
+ }
+ } else {
+
nested_function->serialize_without_key_to_column(nested_place(place), to);
+ }
+ }
+
+ void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t
offset,
+ AggregateDataPtr rhs, const IColumn*
column, Arena& arena,
+ const size_t num_rows) const override {
+ if constexpr (result_is_nullable) {
+ const auto& nullable_col = assert_cast<const
ColumnNullable&>(*column);
+ const auto& nested_col = nullable_col.get_nested_column();
+ const auto* __restrict null_map_data =
nullable_col.get_null_map_data().data();
+
+ for (size_t i = 0; i < num_rows; ++i) {
+ *(places[i] + offset) |= (!null_map_data[i]);
+ }
+ nested_function->deserialize_and_merge_vec(places, offset +
prefix_size, rhs,
+ &nested_col, arena,
num_rows);
+ } else {
+ this->nested_function->deserialize_and_merge_vec(places, offset,
rhs, column, arena,
+ num_rows);
+ }
+ }
+
+ void deserialize_and_merge_vec_selected(const AggregateDataPtr* places,
size_t offset,
+ AggregateDataPtr rhs, const
IColumn* column,
+ Arena& arena, const size_t
num_rows) const override {
+ if constexpr (result_is_nullable) {
+ const auto& nullable_col = assert_cast<const
ColumnNullable&>(*column);
+ const auto& nested_col = nullable_col.get_nested_column();
+ const auto* __restrict null_map_data =
nullable_col.get_null_map_data().data();
+
+ for (size_t i = 0; i < num_rows; ++i) {
+ *(places[i] + offset) |= (!null_map_data[i]);
+ }
+ nested_function->deserialize_and_merge_vec_selected(places, offset
+ prefix_size, rhs,
+ &nested_col,
arena, num_rows);
+ } else {
+ this->nested_function->deserialize_and_merge_vec_selected(places,
offset, rhs, column,
+ arena,
num_rows);
+ }
+ }
+
+ void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict
place,
+ const IColumn& column, size_t
begin, size_t end,
+ Arena& arena) const override {
+ DCHECK(end <= column.size() && begin <= end)
Review Comment:
The DCHECK condition allows end to equal column.size(), which would cause an
out-of-bounds access in the loop at line 356 that uses `i <= end`. Since column
indices are 0-based, the maximum valid index is column.size() - 1. The
condition should be `DCHECK(end < column.size() && begin <= end)` instead.
```suggestion
DCHECK(end < column.size() && begin <= end)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]