Copilot commented on code in PR #60228:
URL: https://github.com/apache/doris/pull/60228#discussion_r2730816015


##########
gensrc/thrift/PaloInternalService.thrift:
##########
@@ -424,8 +424,8 @@ struct TQueryOptions {
   183: optional bool enable_use_hybrid_sort = false;
   184: optional i32 cte_max_recursion_depth;
 
-
   185: optional bool enable_parquet_file_page_cache = true;
+  186: optional bool enable_aggregate_function_null_v2 = false;

Review Comment:
   There is a mismatch between the default values for 
enable_aggregate_function_null_v2. The Java SessionVariable defaults to true 
(line 1469 in SessionVariable.java), but the Thrift definition defaults to 
false (line 428 in PaloInternalService.thrift). This inconsistency could lead 
to different behavior depending on whether the value is explicitly set or uses 
the default. The defaults should be aligned.
   ```suggestion
     186: optional bool enable_aggregate_function_null_v2 = true;
   ```



##########
be/src/vec/aggregate_functions/aggregate_function_null_v2.h:
##########
@@ -0,0 +1,598 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionNull.h
+// and modified by Doris
+
+#pragma once
+
+#include <glog/logging.h>
+
+#include <memory>
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_distinct.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/string_buffer.hpp"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+#include "common/compile_check_begin.h"
+
+template <typename NestFunction, bool result_is_nullable, typename Derived>
+class AggregateFunctionNullBaseInlineV2 : public 
IAggregateFunctionHelper<Derived> {
+protected:
+    std::unique_ptr<NestFunction> nested_function;
+    size_t prefix_size;
+    bool is_window_function = false;
+
+    AggregateDataPtr nested_place(AggregateDataPtr __restrict place) const 
noexcept {
+        return place + prefix_size;
+    }
+
+    ConstAggregateDataPtr nested_place(ConstAggregateDataPtr __restrict place) 
const noexcept {
+        return place + prefix_size;
+    }
+
+    static void init(AggregateDataPtr __restrict place, bool 
is_window_function) noexcept {
+        init_flag(place);
+        init_null_count(place, is_window_function);
+    }
+
+    static void init_flag(AggregateDataPtr __restrict place) noexcept {
+        if constexpr (result_is_nullable) {
+            place[0] = false;
+        }
+    }
+
+    static void set_flag(AggregateDataPtr __restrict place) noexcept {
+        if constexpr (result_is_nullable) {
+            place[0] = true;
+        }
+    }
+
+    static bool get_flag(ConstAggregateDataPtr __restrict place) noexcept {
+        return result_is_nullable ? place[0] : true;
+    }
+
+    static void init_null_count(AggregateDataPtr __restrict place,
+                                bool is_window_function) noexcept {
+        if (is_window_function && result_is_nullable) {
+            unaligned_store<int32_t>(place + 1, 0);
+        }
+    }
+
+    static void update_null_count(AggregateDataPtr __restrict place, bool 
incremental,
+                                  bool is_window_function) noexcept {
+        if (is_window_function && result_is_nullable) {
+            auto null_count = unaligned_load<int32_t>(place + 1);
+            incremental ? null_count++ : null_count--;
+            unaligned_store<int32_t>(place + 1, null_count);
+        }
+    }
+
+    static int32_t get_null_count(ConstAggregateDataPtr __restrict place,
+                                  bool is_window_function) noexcept {
+        int32_t num = 0;
+        if (is_window_function && result_is_nullable) {
+            num = unaligned_load<int32_t>(place + 1);
+        }
+        return num;
+    }
+
+public:
+    AggregateFunctionNullBaseInlineV2(IAggregateFunction* nested_function_,
+                                      const DataTypes& arguments, bool 
is_window_function_)
+            : IAggregateFunctionHelper<Derived>(arguments),
+              nested_function {assert_cast<NestFunction*>(nested_function_)},
+              is_window_function(is_window_function_) {
+        DCHECK(nested_function_ != nullptr);
+        if constexpr (result_is_nullable) {
+            if (this->is_window_function) {
+                // 
flag|---null_count----|-------padding-------|--nested_data----|
+                size_t nested_align = nested_function->align_of_data();
+                prefix_size = 1 + sizeof(int32_t);
+                if (prefix_size % nested_align != 0) {
+                    prefix_size += (nested_align - (prefix_size % 
nested_align));
+                }
+            } else {
+                prefix_size = nested_function->align_of_data();
+            }
+        } else {
+            prefix_size = 0;
+        }
+    }
+
+    MutableColumnPtr create_serialize_column() const override {
+        if constexpr (result_is_nullable) {
+            return 
ColumnNullable::create(nested_function->create_serialize_column(),
+                                          ColumnUInt8::create());
+        }
+        return nested_function->create_serialize_column();
+    }
+
+    DataTypePtr get_serialized_type() const override {
+        if constexpr (result_is_nullable) {
+            return make_nullable(nested_function->get_serialized_type());
+        }
+        return nested_function->get_serialized_type();
+    }
+
+    void set_query_context(QueryContext* ctx) override {
+        return nested_function->set_query_context(ctx);
+    }
+
+    bool is_blockable() const override { return 
nested_function->is_blockable(); }
+
+    void set_version(const int version_) override {
+        IAggregateFunctionHelper<Derived>::set_version(version_);
+        nested_function->set_version(version_);
+    }
+
+    String get_name() const override { return "NullableV2(" + 
nested_function->get_name() + ")"; }
+
+    DataTypePtr get_return_type() const override {
+        return result_is_nullable ? 
make_nullable(nested_function->get_return_type())
+                                  : nested_function->get_return_type();
+    }
+
+    void create(AggregateDataPtr __restrict place) const override {
+        init(place, this->is_window_function);
+        nested_function->create(nested_place(place));
+    }
+
+    void destroy(AggregateDataPtr __restrict place) const noexcept override {
+        nested_function->destroy(nested_place(place));
+    }
+    void reset(AggregateDataPtr place) const override {
+        init(place, this->is_window_function);
+        nested_function->reset(nested_place(place));
+    }
+
+    bool is_trivial() const override { return false; }
+
+    size_t size_of_data() const override { return prefix_size + 
nested_function->size_of_data(); }
+
+    size_t align_of_data() const override {
+        if (this->is_window_function && result_is_nullable) {
+            return std::max(nested_function->align_of_data(), 
alignof(int32_t));
+        } else {
+            return nested_function->align_of_data();
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+               Arena& arena) const override {
+        if (get_flag(rhs)) {
+            set_flag(place);
+            nested_function->merge(nested_place(place), nested_place(rhs), 
arena);
+        }
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& 
buf) const override {
+        bool flag = get_flag(place);
+        if constexpr (result_is_nullable) {
+            buf.write_binary(flag);
+        }
+        if (flag) {
+            nested_function->serialize(nested_place(place), buf);
+        }
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+                     Arena& arena) const override {
+        bool flag = true;
+        if constexpr (result_is_nullable) {
+            buf.read_binary(flag);
+        }
+        if (flag) {
+            set_flag(place);
+            nested_function->deserialize(nested_place(place), buf, arena);
+        }
+    }
+
+    void serialize_to_column(const std::vector<AggregateDataPtr>& places, 
size_t offset,
+                             MutableColumnPtr& dst, const size_t num_rows) 
const override {
+        if constexpr (result_is_nullable) {
+            auto& nullable_col = assert_cast<ColumnNullable&>(*dst);
+            auto& nested_col = nullable_col.get_nested_column();
+            auto& null_map = nullable_col.get_null_map_data();
+            MutableColumnPtr nested_col_ptr = nested_col.assume_mutable();
+
+            null_map.resize(num_rows);
+            uint8_t* __restrict null_map_data = null_map.data();
+            for (size_t i = 0; i < num_rows; ++i) {
+                null_map_data[i] = !get_flag(places[i] + offset);
+            }
+            nested_function->serialize_to_column(places, offset + prefix_size, 
nested_col_ptr,
+                                                 num_rows);
+        } else {
+            nested_function->serialize_to_column(places, offset, dst, 
num_rows);
+        }
+    }
+
+    void streaming_agg_serialize_to_column(const IColumn** columns, 
MutableColumnPtr& dst,
+                                           const size_t num_rows, Arena& 
arena) const override {
+        const auto* src_nullable_col = assert_cast<const 
ColumnNullable*>(columns[0]);
+        const auto* __restrict src_null_map_data = 
src_nullable_col->get_null_map_data().data();
+
+        size_t nested_size = nested_function->size_of_data();
+        std::vector<AggregateDataPtr> nested_places(num_rows);
+        std::vector<char> places_data(num_rows * nested_size);
+        for (size_t i = 0; i < num_rows; ++i) {
+            nested_places[i] = places_data.data() + i * nested_size;
+        }
+
+        if (!nested_function->is_trivial()) {
+            for (int i = 0; i < num_rows; ++i) {
+                try {
+                    nested_function->create(nested_places[i]);
+                } catch (...) {
+                    for (int j = 0; j < i; ++j) {
+                        nested_function->destroy(nested_places[j]);
+                    }
+                    throw;
+                }
+            }
+        }
+        Defer destroy_places = {[&]() {
+            if (!nested_function->is_trivial()) {
+                for (int i = 0; i < num_rows; ++i) {
+                    nested_function->destroy(nested_places[i]);
+                }
+            }
+        }};
+        const IColumn* src_nested_column =
+                src_nullable_col->get_nested_column().assume_mutable().get();
+        if (src_nullable_col->has_null()) {
+            for (size_t i = 0; i < num_rows; ++i) {
+                if (!src_null_map_data[i]) {
+                    nested_function->add(nested_places[i], &src_nested_column, 
i, arena);
+                }
+            }
+        } else {
+            nested_function->add_batch(num_rows, nested_places.data(), 0, 
&src_nested_column, arena,
+                                       false);
+        }
+
+        if constexpr (result_is_nullable) {
+            auto& dst_nullable_col = assert_cast<ColumnNullable&>(*dst);
+            MutableColumnPtr nested_col_ptr = 
dst_nullable_col.get_nested_column().assume_mutable();
+            dst_nullable_col.get_null_map_column().insert_range_from(
+                    src_nullable_col->get_null_map_column(), 0, num_rows);
+            nested_function->serialize_to_column(nested_places, 0, 
nested_col_ptr, num_rows);
+        } else {
+            nested_function->serialize_to_column(nested_places, 0, dst, 
num_rows);
+        }
+    }
+
+    void serialize_without_key_to_column(ConstAggregateDataPtr __restrict 
place,
+                                         IColumn& to) const override {
+        if constexpr (result_is_nullable) {
+            auto& nullable_col = assert_cast<ColumnNullable&>(to);
+            auto& nested_col = nullable_col.get_nested_column();
+            auto& null_map = nullable_col.get_null_map_data();
+
+            bool flag = get_flag(place);
+            if (flag) {
+                
nested_function->serialize_without_key_to_column(nested_place(place), 
nested_col);
+                null_map.push_back(0);
+            } else {
+                nested_col.insert_default();
+                null_map.push_back(1);
+            }
+        } else {
+            
nested_function->serialize_without_key_to_column(nested_place(place), to);
+        }
+    }
+
+    void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t 
offset,
+                                   AggregateDataPtr rhs, const IColumn* 
column, Arena& arena,
+                                   const size_t num_rows) const override {
+        if constexpr (result_is_nullable) {
+            const auto& nullable_col = assert_cast<const 
ColumnNullable&>(*column);
+            const auto& nested_col = nullable_col.get_nested_column();
+            const auto* __restrict null_map_data = 
nullable_col.get_null_map_data().data();
+
+            for (size_t i = 0; i < num_rows; ++i) {
+                *(places[i] + offset) |= (!null_map_data[i]);
+            }
+            nested_function->deserialize_and_merge_vec(places, offset + 
prefix_size, rhs,
+                                                       &nested_col, arena, 
num_rows);
+        } else {
+            this->nested_function->deserialize_and_merge_vec(places, offset, 
rhs, column, arena,
+                                                             num_rows);
+        }
+    }
+
+    void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, 
size_t offset,
+                                            AggregateDataPtr rhs, const 
IColumn* column,
+                                            Arena& arena, const size_t 
num_rows) const override {
+        if constexpr (result_is_nullable) {
+            const auto& nullable_col = assert_cast<const 
ColumnNullable&>(*column);
+            const auto& nested_col = nullable_col.get_nested_column();
+            const auto* __restrict null_map_data = 
nullable_col.get_null_map_data().data();
+
+            for (size_t i = 0; i < num_rows; ++i) {
+                *(places[i] + offset) |= (!null_map_data[i]);
+            }
+            nested_function->deserialize_and_merge_vec_selected(places, offset 
+ prefix_size, rhs,
+                                                                &nested_col, 
arena, num_rows);
+        } else {
+            this->nested_function->deserialize_and_merge_vec_selected(places, 
offset, rhs, column,
+                                                                      arena, 
num_rows);
+        }
+    }
+
+    void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict 
place,
+                                                 const IColumn& column, size_t 
begin, size_t end,
+                                                 Arena& arena) const override {
+        DCHECK(end <= column.size() && begin <= end)

Review Comment:
   The DCHECK condition allows end to equal column.size(), which would cause an 
out-of-bounds access in the loop at line 356 that uses `i <= end`. Since column 
indices are 0-based, the maximum valid index is column.size() - 1. The 
condition should be `DCHECK(end < column.size() && begin <= end)` instead.
   ```suggestion
           DCHECK(end < column.size() && begin <= end)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to