mapleFU commented on code in PR #35989:
URL: https://github.com/apache/arrow/pull/35989#discussion_r1245508465


##########
cpp/src/parquet/statistics_test.cc:
##########
@@ -377,6 +380,79 @@ class TestStatistics : public PrimitiveTypedTest<TestType> 
{
     ASSERT_EQ(total->max(), std::max(statistics1->max(), statistics2->max()));
   }
 
+  void TestMergeEmpty() {
+    EncodedStatistics encoded_statistics1;
+    auto statistics1 = Statistics::Make(this->schema_.Column(0), 
&encoded_statistics1,
+                                        /*num_values=*/1000);
+    auto s1 = 
std::dynamic_pointer_cast<TypedStatistics<TestType>>(statistics1);
+
+    EXPECT_FALSE(statistics1->HasMinMax());
+    EXPECT_FALSE(statistics1->HasDistinctCount());
+    EXPECT_FALSE(statistics1->HasNullCount());
+
+    EncodedStatistics encoded_statistics2;
+    encoded_statistics2.has_distinct_count = true;
+    encoded_statistics2.distinct_count = 500;
+    auto statistics2 = Statistics::Make(this->schema_.Column(0), 
&encoded_statistics2,
+                                        /*num_values=*/1000);
+
+    EXPECT_FALSE(statistics2->HasMinMax());
+    EXPECT_TRUE(statistics2->HasDistinctCount());
+    EXPECT_FALSE(s1->HasNullCount());
+    auto s2 = 
std::dynamic_pointer_cast<TypedStatistics<TestType>>(statistics2);
+
+    auto total = MakeStatistics<TestType>(this->schema_.Column(0));
+    total->Merge(*s1);
+    total->Merge(*s2);
+
+    EXPECT_FALSE(total->HasDistinctCount());
+    EXPECT_FALSE(total->HasMinMax());
+    EXPECT_EQ(2000, total->num_values());
+    EXPECT_FALSE(total->HasNullCount());
+  }
+
+  void TestNotHasNullValue() {
+    EncodedStatistics encoded_statistics1;
+    encoded_statistics1.has_null_count = false;
+    auto statistics1 = Statistics::Make(this->schema_.Column(0), 
&encoded_statistics1,
+                                        /*num_values=*/1000);
+    auto s1 = 
std::dynamic_pointer_cast<TypedStatistics<TestType>>(statistics1);
+    EXPECT_FALSE(s1->HasNullCount());
+    auto encoded = s1->Encode();
+    EXPECT_FALSE(encoded.all_null_value);
+  }
+
+  void TestMergeMinMax() {
+    this->GenerateData(1000);
+
+    auto chunk_statistics = MakeStatistics<TestType>(this->schema_.Column(0));
+
+    {
+      auto page_statistics1 = 
MakeStatistics<TestType>(this->schema_.Column(0));
+      std::vector<uint8_t> valid_bits(
+          bit_util::BytesForBits(static_cast<uint32_t>(this->values_.size())) 
+ 1, 0);
+      page_statistics1->Update(this->values_ptr_, /*num_values=*/0,
+                               /*null_count*/ this->values_.size());
+      auto encoded_stats1 = page_statistics1->Encode();
+      EXPECT_FALSE(encoded_stats1.has_min);
+      EXPECT_FALSE(encoded_stats1.has_max);
+
+      chunk_statistics->Merge(*page_statistics1);
+      encoded_stats1 = chunk_statistics->Encode();
+      EXPECT_FALSE(encoded_stats1.has_min);
+      EXPECT_FALSE(encoded_stats1.has_max);
+    }
+    {
+      auto page_statistics2 = 
MakeStatistics<TestType>(this->schema_.Column(0));
+      page_statistics2->Update(this->values_ptr_, this->values_.size(), 0);
+
+      chunk_statistics->Merge(*page_statistics2);
+      auto encoded_stats2 = chunk_statistics->Encode();
+      EXPECT_TRUE(encoded_stats2.has_min);
+      EXPECT_TRUE(encoded_stats2.has_max);

Review Comment:
   ```c++
     void SetMinMaxPair(std::pair<T, T> min_max) {
       // CleanStatistic can return a nullopt in case of erroneous values, e.g. 
NaN
       auto maybe_min_max = CleanStatistic(min_max);
       if (!maybe_min_max) return;
       auto min = maybe_min_max.value().first;
       auto max = maybe_min_max.value().second;
       if (!has_min_max_) {
         has_min_max_ = true;
         Copy(min, &min_, min_buffer_.get());
         Copy(max, &max_, max_buffer_.get());
       } else {
         Copy(comparator_->Compare(min_, min) ? min_ : min, &min_, 
min_buffer_.get());
         Copy(comparator_->Compare(max_, max) ? max : max_, &max_, 
max_buffer_.get());
       }
     }
   ```
   
   If `other` has min-max, our code will always set `has_min_max_ = true` here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to