andishgar commented on code in PR #46031: URL: https://github.com/apache/arrow/pull/46031#discussion_r2057966031
########## cpp/src/arrow/record_batch_test.cc: ########## @@ -1423,34 +1467,148 @@ TEST_F(TestRecordBatch, MakeStatisticsArrayMaxApproximate) { AssertArraysEqual(*expected_statistics_array, *statistics_array, true); } -TEST_F(TestRecordBatch, MakeStatisticsArrayString) { - auto schema = - ::arrow::schema({field("no-statistics", boolean()), field("string", utf8())}); - auto no_statistics_array = ArrayFromJSON(boolean(), "[true, false, true]"); - auto string_array_data = ArrayFromJSON(utf8(), "[\"a\", null, \"c\"]")->data()->Copy(); - string_array_data->statistics = std::make_shared<ArrayStatistics>(); - string_array_data->statistics->is_max_exact = true; - string_array_data->statistics->max = "c"; - auto string_array = MakeArray(std::move(string_array_data)); - auto batch = RecordBatch::Make(schema, string_array->length(), - {no_statistics_array, string_array}); +template <typename DataType> +class TestRecordBatchMakeStatisticsArrayBinary : public ::testing::Test { + public: + void TestMaxApproximation() { + auto max = ArrayStatistics::ValueType{"c"}; + auto schema = + ::arrow::schema({field("no-statistics", boolean()), field("string", type())}); + auto no_statistics_array = ArrayFromJSON(boolean(), "[true, false, true]"); + auto string_array = GenerateString(type()); + string_array->data()->statistics = std::make_shared<ArrayStatistics>(); + string_array->data()->statistics->max = max; + + auto batch = RecordBatch::Make(schema, string_array->length(), + {no_statistics_array, string_array}); + + ASSERT_OK_AND_ASSIGN(auto statistics_array, batch->MakeStatisticsArray()); + + ASSERT_OK_AND_ASSIGN( + auto expected_statistics_array, + MakeStatisticsArray("[null, 1]", + {{ + ARROW_STATISTICS_KEY_ROW_COUNT_EXACT, + }, + { + ARROW_STATISTICS_KEY_MAX_VALUE_APPROXIMATE, + }}, + {{ + ArrayStatistics::ValueType{int64_t{3}}, + }, + { + max, + }}, + {null(), type()})); + AssertArraysEqual(*expected_statistics_array, *statistics_array, true); + } + + std::shared_ptr<::arrow::DataType> type() { + if constexpr (std::is_same_v<DataType, FixedSizeBinaryType>) { + return fixed_size_binary(1); + } else { + return TypeTraits<DataType>::type_singleton(); + } + } +}; +TYPED_TEST_SUITE(TestRecordBatchMakeStatisticsArrayBinary, + AllBinaryOrBinrayViewLikeArrowTypes); + +TYPED_TEST(TestRecordBatchMakeStatisticsArrayBinary, MaxApproximation) { + this->TestMaxApproximation(); +} + +// Validates that the union array creates two distinct child arrays for two +// FixedSizeBinaryArrays with unequal byte widths. +TEST_F(TestRecordBatch, MakeStatisticsArrayDifferentSizeFixedSizeBinary) { + auto fixed_size_type1 = fixed_size_binary(1); + auto fixed_size_type2 = fixed_size_binary(2); + + auto fixed_size_array1 = GenerateString(fixed_size_type1); + fixed_size_array1->data()->statistics = std::make_shared<ArrayStatistics>(); + fixed_size_array1->data()->statistics->max = + std::string(fixed_size_type1->byte_width(), 'c'); + + auto fixed_size_array2 = GenerateString(fixed_size_type2); + fixed_size_array2->data()->statistics = std::make_shared<ArrayStatistics>(); + fixed_size_array2->data()->statistics->max = + std::string(fixed_size_type2->byte_width(), 'c'); + + auto schema = ::arrow::schema( + {field("fixed_size1", fixed_size_type1), field("fixed_size2", fixed_size_type2)}); + auto batch = RecordBatch::Make(schema, fixed_size_array1->length(), + {fixed_size_array1, fixed_size_array2}); ASSERT_OK_AND_ASSIGN(auto statistics_array, batch->MakeStatisticsArray()); + ASSERT_OK_AND_ASSIGN( + auto expected_statistics_array, + MakeStatisticsArray("[null, 0, 1]", + {{ + ARROW_STATISTICS_KEY_ROW_COUNT_EXACT, + }, + { + ARROW_STATISTICS_KEY_MAX_VALUE_APPROXIMATE, + }, + { + ARROW_STATISTICS_KEY_MAX_VALUE_APPROXIMATE, + }}, + {{ + ArrayStatistics::ValueType{int64_t{3}}, + }, + { + ArrayStatistics::ValueType{ + std::string(fixed_size_type1->byte_width(), 'c')}, + }, + { + ArrayStatistics::ValueType{ + std::string(fixed_size_type2->byte_width(), 'c')}, + }}, + {null(), fixed_size_type1, fixed_size_type2})); + + AssertArraysEqual(*expected_statistics_array, *statistics_array, true); +} + +// Validates that the union array creates a single child array for two +// FixedSizeBinaryArrays with equal byte widths. +TEST_F(TestRecordBatch, MakeStatisticsArraySameSizeFixedSizeBinary) { + auto fixed_size_type = fixed_size_binary(2); + auto max = ArrayStatistics::ValueType{std::string(fixed_size_type->byte_width(), 'c')}; Review Comment: Actually, I found this [link](https://abseil.io/tips/88). It seems it is not a big deal and in case a conversion happens, the compiler will probably send us a warning( which is translated to an error in Arrow :-) ) _Conclusion The tradeoffs for uniform initialization syntax are not generally worth it: our compilers already warn against the Most Vexing Parse (you can use brace initialization or add parens to resolve the issue), and the safety from narrowing conversions isn’t worth the readability hit for brace-initialization (we’ll need a different solution for narrowing conversions, eventually). The Style Arbiters don’t think this issue is critical enough to make a formal rule on, especially because there are cases (notably in generic code) where brace initialization may be justified._ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org