Copilot commented on code in PR #61641:
URL: https://github.com/apache/doris/pull/61641#discussion_r2978694520
##########
be/test/exprs/aggregate/agg_min_max_test.cpp:
##########
@@ -196,4 +199,199 @@ TEST_P(AggMinMaxTest, any_json_test) {
INSTANTIATE_TEST_SUITE_P(Params, AggMinMaxTest,
::testing::ValuesIn(std::vector<std::string> {"min",
"max"}));
+// Test that nullable min/max streaming_agg_serialize_to_column produces
correct results.
+// This is a regression test for a bug where min/max with is_trivial()=true
caused the
+// streaming aggregation path to skip create(), leaving states
zero-initialized instead of
+// sentinel-initialized (MAX_VALUE for min, MIN_VALUE for max). This led to
incorrect results
+// (e.g., 0 for numeric types, empty strings for datetime types).
+TEST_P(AggMinMaxTest, nullable_streaming_agg_int32_test) {
+ Arena arena;
+ std::string min_max_type = GetParam();
+
+ // Create nullable Int32 column with values [100, 200, 300, 400]
+ auto nested_col = ColumnInt32::create();
+ auto null_map = ColumnUInt8::create();
+ std::vector<int32_t> values = {100, 200, 300, 400};
Review Comment:
These nullable streaming regression cases use only positive values. That
reliably fails for the previous `min` bug (zero-init makes the min become 0),
but it may not fail for the previous `max` bug (zero-init still yields the
correct max when all inputs are > 0). To ensure the test would have caught the
historical `max` failure too, include a `max` case with all-negative (or
otherwise < 0 / < MIN sentinel) inputs, e.g. choose the input vector based on
`min_max_type` or add a dedicated `max`-only test.
```suggestion
// Create nullable Int32 column with values chosen to expose zero-init
vs sentinel-init bugs.
auto nested_col = ColumnInt32::create();
auto null_map = ColumnUInt8::create();
// For "min", use positive values so a zero-initialized state
incorrectly yields 0.
// For "max", use negative values so a zero-initialized state
incorrectly yields 0.
std::vector<int32_t> values;
if (min_max_type == "min") {
values = {100, 200, 300, 400};
} else {
values = {-100, -200, -300, -400};
}
```
##########
be/test/exprs/aggregate/agg_min_max_test.cpp:
##########
@@ -196,4 +199,199 @@ TEST_P(AggMinMaxTest, any_json_test) {
INSTANTIATE_TEST_SUITE_P(Params, AggMinMaxTest,
::testing::ValuesIn(std::vector<std::string> {"min",
"max"}));
+// Test that nullable min/max streaming_agg_serialize_to_column produces
correct results.
+// This is a regression test for a bug where min/max with is_trivial()=true
caused the
+// streaming aggregation path to skip create(), leaving states
zero-initialized instead of
+// sentinel-initialized (MAX_VALUE for min, MIN_VALUE for max). This led to
incorrect results
+// (e.g., 0 for numeric types, empty strings for datetime types).
+TEST_P(AggMinMaxTest, nullable_streaming_agg_int32_test) {
+ Arena arena;
+ std::string min_max_type = GetParam();
+
+ // Create nullable Int32 column with values [100, 200, 300, 400]
+ auto nested_col = ColumnInt32::create();
+ auto null_map = ColumnUInt8::create();
+ std::vector<int32_t> values = {100, 200, 300, 400};
+ for (auto v : values) {
+ nested_col->insert_value(v);
+ null_map->insert_value(0); // not null
+ }
+ auto nullable_col = ColumnNullable::create(std::move(nested_col),
std::move(null_map));
+
+ // Create nullable aggregate function
+ AggregateFunctionSimpleFactory factory;
+ register_aggregate_function_minmax(factory);
+ DataTypes data_types = {make_nullable(std::make_shared<DataTypeInt32>())};
+ auto agg_function = factory.get(min_max_type, data_types, nullptr, true,
-1);
+ ASSERT_NE(agg_function, nullptr);
+
+ // Call streaming_agg_serialize_to_column — this is the bug path where the
V2 nullable
+ // wrapper allocates per-row states and may skip create() if is_trivial()
returns true.
+ auto dst = agg_function->create_serialize_column();
+ const IColumn* columns[1] = {nullable_col.get()};
+ agg_function->streaming_agg_serialize_to_column(columns, dst,
values.size(), arena);
+
+ // Deserialize each row and verify the value matches the original.
+ // In streaming mode, each row is independently aggregated (single value
per state),
+ // so the result should be the original value itself.
+ for (size_t i = 0; i < values.size(); ++i) {
+ std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place = memory.get();
+ agg_function->create(place);
+
+ agg_function->deserialize_and_merge_from_column_range(place, *dst, i,
i, arena);
+
+ auto result_col = ColumnNullable::create(ColumnInt32::create(),
ColumnUInt8::create());
+ agg_function->insert_result_into(place, *result_col);
+
+ ASSERT_FALSE(result_col->is_null_at(0)) << "Row " << i << " should not
be null";
+ const auto& result_nested =
+ assert_cast<const
ColumnInt32&>(result_col->get_nested_column());
+ EXPECT_EQ(values[i], result_nested.get_element(0))
+ << "Row " << i << " mismatch for " << min_max_type;
+ agg_function->destroy(place);
+ }
+}
+
+// Test nullable min/max streaming with some null values
+TEST_P(AggMinMaxTest, nullable_streaming_agg_with_nulls_test) {
+ Arena arena;
+ std::string min_max_type = GetParam();
+
+ // Create nullable Int64 column: [10, NULL, 30, NULL, 50]
+ auto nested_col = ColumnInt64::create();
+ auto null_map = ColumnUInt8::create();
+ std::vector<int64_t> values = {10, 0, 30, 0, 50};
+ std::vector<uint8_t> nulls = {0, 1, 0, 1, 0};
+ for (size_t i = 0; i < values.size(); i++) {
+ nested_col->insert_value(values[i]);
+ null_map->insert_value(nulls[i]);
+ }
+ auto nullable_col = ColumnNullable::create(std::move(nested_col),
std::move(null_map));
+
+ AggregateFunctionSimpleFactory factory;
+ register_aggregate_function_minmax(factory);
+ DataTypes data_types = {make_nullable(std::make_shared<DataTypeInt64>())};
+ auto agg_function = factory.get(min_max_type, data_types, nullptr, true,
-1);
+ ASSERT_NE(agg_function, nullptr);
+
+ auto dst = agg_function->create_serialize_column();
+ const IColumn* columns[1] = {nullable_col.get()};
+ agg_function->streaming_agg_serialize_to_column(columns, dst,
values.size(), arena);
+
+ for (size_t i = 0; i < values.size(); ++i) {
+ std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place = memory.get();
+ agg_function->create(place);
+
+ agg_function->deserialize_and_merge_from_column_range(place, *dst, i,
i, arena);
+
+ auto result_col = ColumnNullable::create(ColumnInt64::create(),
ColumnUInt8::create());
+ agg_function->insert_result_into(place, *result_col);
+
+ if (nulls[i]) {
+ EXPECT_TRUE(result_col->is_null_at(0)) << "Row " << i << " should
be null";
+ } else {
+ ASSERT_FALSE(result_col->is_null_at(0)) << "Row " << i << " should
not be null";
+ const auto& result_nested =
+ assert_cast<const
ColumnInt64&>(result_col->get_nested_column());
+ EXPECT_EQ(values[i], result_nested.get_element(0))
+ << "Row " << i << " mismatch for " << min_max_type;
+ }
+ agg_function->destroy(place);
+ }
+}
+
+// Test nullable min/max streaming with DateTimeV2 type — this was the
original symptom
+// where zero-initialized DateTimeV2 (0000-00-00 00:00:00) produced empty
strings.
+TEST_P(AggMinMaxTest, nullable_streaming_agg_datetimev2_test) {
+ Arena arena;
+ std::string min_max_type = GetParam();
+
+ // Create nullable DateTimeV2 column with some timestamps.
+ // DateTimeV2 is stored as UInt64 internally via ColumnDateTimeV2.
+ auto nested_col = ColumnDateTimeV2::create();
+ auto null_map = ColumnUInt8::create();
+ // Use valid DateTimeV2 encoded values (year<<46 | month<<42 | day<<37 |
hour<<32 | min<<26 |
+ // sec<<20 | usec). These are 2000-01-01, 2024-06-15 10:30:45, 2024-12-25
23:59:59.
+ std::vector<uint64_t> values = {140742023840792576ULL,
142454833089085440ULL,
+ 142482653553098752ULL};
+ for (auto v : values) {
+ nested_col->insert_value(binary_cast<uint64_t,
DateV2Value<DateTimeV2ValueType>>(v));
+ null_map->insert_value(0);
+ }
+ auto nullable_col = ColumnNullable::create(std::move(nested_col),
std::move(null_map));
+
+ AggregateFunctionSimpleFactory factory;
+ register_aggregate_function_minmax(factory);
+ DataTypes data_types =
{make_nullable(std::make_shared<DataTypeDateTimeV2>())};
+ auto agg_function = factory.get(min_max_type, data_types, nullptr, true,
-1);
+ ASSERT_NE(agg_function, nullptr);
+
+ auto dst = agg_function->create_serialize_column();
+ const IColumn* columns[1] = {nullable_col.get()};
+ agg_function->streaming_agg_serialize_to_column(columns, dst,
values.size(), arena);
+
+ for (size_t i = 0; i < values.size(); ++i) {
+ std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
+ AggregateDataPtr place = memory.get();
+ agg_function->create(place);
+
+ agg_function->deserialize_and_merge_from_column_range(place, *dst, i,
i, arena);
+
+ auto result_col = ColumnNullable::create(ColumnDateTimeV2::create(),
ColumnUInt8::create());
+ agg_function->insert_result_into(place, *result_col);
+
+ ASSERT_FALSE(result_col->is_null_at(0)) << "Row " << i << " should not
be null";
+ const auto& result_nested =
+ assert_cast<const
ColumnDateTimeV2&>(result_col->get_nested_column());
+ EXPECT_EQ(values[i], result_nested.get_element(0).to_date_int_val())
+ << "Row " << i << " mismatch for " << min_max_type;
+ agg_function->destroy(place);
+ }
+}
+
+// Test that any_value still works correctly with the trivial path
(is_trivial() should
+// still return true for any_value with fixed-length types since it uses
has_value guard).
+TEST(AggMinMaxTest, any_value_nullable_streaming_agg_test) {
Review Comment:
The test suite name `AggMinMaxTest` is already used by parameterized tests
(`TEST_P` with fixture `AggMinMaxTest`). Defining a non-parameterized
`TEST(AggMinMaxTest, ...)` in the same suite uses a different fixture type
(`::testing::Test`) and will trigger gtest's "all tests in a test suite must
use the same fixture" fatal at runtime. Please change this to
`TEST_F(AggMinMaxTest, ...)` (and avoid calling `GetParam()`), or rename the
suite to a different name (e.g. `AggAnyValueTest`).
```suggestion
TEST_F(AggMinMaxTest, any_value_nullable_streaming_agg_test) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]