kou commented on code in PR #46031:
URL: https://github.com/apache/arrow/pull/46031#discussion_r2053431775


##########
cpp/src/arrow/record_batch_test.cc:
##########
@@ -1423,37 +1453,140 @@ TEST_F(TestRecordBatch, 
MakeStatisticsArrayMaxApproximate) {
   AssertArraysEqual(*expected_statistics_array, *statistics_array, true);
 }
 
-TEST_F(TestRecordBatch, MakeStatisticsArrayString) {
-  auto schema =
-      ::arrow::schema({field("no-statistics", boolean()), field("string", 
utf8())});
-  auto no_statistics_array = ArrayFromJSON(boolean(), "[true, false, true]");
-  auto string_array_data = ArrayFromJSON(utf8(), "[\"a\", null, 
\"c\"]")->data()->Copy();
-  string_array_data->statistics = std::make_shared<ArrayStatistics>();
-  string_array_data->statistics->is_max_exact = true;
-  string_array_data->statistics->max = "c";
-  auto string_array = MakeArray(std::move(string_array_data));
-  auto batch = RecordBatch::Make(schema, string_array->length(),
-                                 {no_statistics_array, string_array});
+template <typename DataType>
+class TestRecordBatchMakeStatisticsArrayStringBase {
+ public:
+  std::shared_ptr<::arrow::DataType> type(int byte_width = 1) {
+    if constexpr (std::is_same_v<DataType, FixedSizeBinaryType>) {
+      return fixed_size_binary(byte_width);
+    } else {
+      return TypeTraits<DataType>::type_singleton();
+    }
+  }
+  std::shared_ptr<Array> GenerateString(
+      const std::shared_ptr<::arrow::DataType>& dataType) {
+    if (dataType->id() == Type::FIXED_SIZE_BINARY) {
+      FixedSizeBinaryType* type = 
static_cast<FixedSizeBinaryType*>(dataType.get());
+      int byte_width = type->byte_width();
+      auto a = std::string(byte_width, 'a');
+      auto b = std::string(byte_width, 'b');
+      auto c = std::string(byte_width, 'c');
+      std::stringstream ss;
+      ss << R"([")" << a << R"(",")" << b << R"(",")" << c << R"("])";
+      return ArrayFromJSON(dataType, ss.str());
+    }
+    return ArrayFromJSON(dataType, R"(["a","b","c"])");
+  }
+};
+template <typename DataType>
+class TestRecordBatchMakeStatisticsArrayEachStringType
+    : public TestRecordBatchMakeStatisticsArrayStringBase<DataType>,
+      public ::testing::Test {
+ public:
+  void TestEachType() {
+    auto schema = ::arrow::schema(
+        {field("no-statistics", boolean()), field("string", this->type())});
+    auto no_statistics_array = ArrayFromJSON(boolean(), "[true, false, true]");
+    auto string_array_data = 
this->GenerateString(this->type())->data()->Copy();
+    string_array_data->statistics = std::make_shared<ArrayStatistics>();
+    string_array_data->statistics->is_max_exact = true;
+    string_array_data->statistics->max = "c";
+    auto string_array = MakeArray(std::move(string_array_data));
+    auto batch = RecordBatch::Make(schema, string_array->length(),
+                                   {no_statistics_array, string_array});
+
+    ASSERT_OK_AND_ASSIGN(auto statistics_array, batch->MakeStatisticsArray());
+
+    ASSERT_OK_AND_ASSIGN(auto expected_statistics_array,
+                         MakeStatisticsArray("[null, 1]",
+                                             {{
+                                                  
ARROW_STATISTICS_KEY_ROW_COUNT_EXACT,
+                                              },
+                                              {
+                                                  
ARROW_STATISTICS_KEY_MAX_VALUE_EXACT,
+                                              }},
+                                             {{
+                                                  
ArrayStatistics::ValueType{int64_t{3}},
+                                              },
+                                              {
+                                                  
ArrayStatistics::ValueType{"c"},
+                                              }},
+                                             {null(), this->type()}));
+    AssertArraysEqual(*expected_statistics_array, *statistics_array, true);
+  }
+};
 
-  ASSERT_OK_AND_ASSIGN(auto statistics_array, batch->MakeStatisticsArray());
+class TestRecordBatchMakeStatisticsArrayFixedSizeBinaryCombination
+    : public TestRecordBatchMakeStatisticsArrayStringBase<FixedSizeBinaryType>,
+      public ::testing::TestWithParam<std::tuple<int, int>> {
+ public:
+  void TestCombination() {
+    auto [byte_width_1, byte_width_2] = GetParam();
+    auto f0 = GenerateString(type(byte_width_1));
+    f0->data()->statistics = std::make_shared<ArrayStatistics>();
+    f0->data()->statistics->max = std::string(byte_width_1, 'c');
+
+    auto f1 = ArrayFromJSON(int64(), R"([1, 2, 3])");
+
+    auto f2 = GenerateString(type(byte_width_2));
+    f2->data()->statistics = std::make_shared<ArrayStatistics>();
+    f2->data()->statistics->max = std::string(byte_width_2, 'c');
+
+    auto schema = ::arrow::schema({field("f0", type(byte_width_1)), 
field("f1", int64()),
+                                   field("f2", 
fixed_size_binary(byte_width_2))});
+    auto batch = RecordBatch::Make(schema, f0->length(), {f0, f1, f2});
+    ASSERT_OK_AND_ASSIGN(auto statistics_array, batch->MakeStatisticsArray());
+    ASSERT_OK_AND_ASSIGN(
+        auto expected_statistics_array,
+        MakeStatisticsArray(
+            "[null, 0, 2]",
+            {{
+                 ARROW_STATISTICS_KEY_ROW_COUNT_EXACT,
+             },
+             {
+                 ARROW_STATISTICS_KEY_MAX_VALUE_APPROXIMATE,
+             },
+             {
+                 ARROW_STATISTICS_KEY_MAX_VALUE_APPROXIMATE,
+             }},
+            {{
+                 ArrayStatistics::ValueType{int64_t{3}},
+             },
+             {
+                 ArrayStatistics::ValueType{std::string(byte_width_1, 'c')},
+             },
+             {
+                 ArrayStatistics::ValueType{std::string(byte_width_2, 'c')},
+             }},
+            {null(), type(byte_width_1), fixed_size_binary(byte_width_2)}));
+
+    AssertArraysEqual(*expected_statistics_array, *statistics_array, true);
+  }
+};
 
-  ASSERT_OK_AND_ASSIGN(auto expected_statistics_array,
-                       MakeStatisticsArray("[null, 1]",
-                                           {{
-                                                
ARROW_STATISTICS_KEY_ROW_COUNT_EXACT,
-                                            },
-                                            {
-                                                
ARROW_STATISTICS_KEY_MAX_VALUE_EXACT,
-                                            }},
-                                           {{
-                                                
ArrayStatistics::ValueType{int64_t{3}},
-                                            },
-                                            {
-                                                
ArrayStatistics::ValueType{"c"},
-                                            }}));
-  AssertArraysEqual(*expected_statistics_array, *statistics_array, true);
+using BaseBinaryOrBinaryViewOrFixedSizeBinaryLikeArrowTypes =
+    ::testing::Types<BinaryType, LargeBinaryType, BinaryViewType, 
FixedSizeBinaryType,
+                     StringType, LargeStringType, StringViewType>;

Review Comment:
   I know it but we want to use all binary related types, right? If we add more 
binary related types and we follow the `OrXXX` naming rule for this, other 
developer will add more `OrXXX` type alias and this test will still use 
`BaseBinaryOrBinaryViewOrFixedSizeBinaryLikeArrowTypes` not all binary related 
types.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to