Re: [PR] GH-46395: [C++][Statistics] Correct the Equal method for min and max in arrow::ArrayStatistics [arrow]

via GitHub Tue, 03 Jun 2025 06:29:50 -0700


andishgar commented on code in PR #46422:
URL: https://github.com/apache/arrow/pull/46422#discussion_r2123763295



##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,69 @@ TEST(ArrayStatisticsTest, TestEquality) {
   ASSERT_NE(statistics1, statistics2);
   statistics2.is_max_exact = true;
   ASSERT_EQ(statistics1, statistics2);
+
+  // Test different ArrayStatistics::ValueType
+  statistics1.max = static_cast<uint64_t>(29);
+  statistics1.max = static_cast<int64_t>(29);
+  ASSERT_NE(statistics1, statistics2);
+}
+class TestEqualityDoubleValue : public ::testing::Test {
+ protected:
+  ArrayStatistics statistics1_;
+  ArrayStatistics statistics2_;
+  EqualOptions options_ = EqualOptions::Defaults();
+};
+
+TEST_F(TestEqualityDoubleValue, ExactValue) {
+  ASSERT_EQ(statistics1_, statistics2_);
+  statistics2_.min = 29.0;
+  ASSERT_NE(statistics1_, statistics2_);
+  statistics1_.min = 29.0;
+  ASSERT_EQ(statistics1_, statistics2_);
+  statistics2_.min = 30.0;
+  ASSERT_NE(statistics1_, statistics2_);
+}
+
+TEST_F(TestEqualityDoubleValue, SignedZero) {
+  statistics1_.min = +0.0;
+  statistics2_.min = -0.0;
+  ASSERT_TRUE(statistics1_.Equals(statistics2_, 
options_.signed_zeros_equal(true)));
+  ASSERT_FALSE(statistics1_.Equals(statistics2_, 
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Infinity) {
+  auto infinity = std::numeric_limits<double>::infinity();
+  statistics1_.min = infinity;
+  statistics2_.min = infinity;
+  ASSERT_TRUE(statistics1_.Equals(statistics2_, 
options_.signed_zeros_equal(true)));
+  ASSERT_TRUE(statistics1_.Equals(statistics2_, 
options_.signed_zeros_equal(false)));
+  statistics1_.min = -infinity;
+  ASSERT_FALSE(statistics1_.Equals(statistics2_, 
options_.signed_zeros_equal(true)));
+  ASSERT_FALSE(statistics1_.Equals(statistics2_, 
options_.signed_zeros_equal(false)));
+  statistics1_.min = 0.0;
+  ASSERT_FALSE(statistics1_.Equals(statistics2_, 
options_.signed_zeros_equal(true)));
+  ASSERT_FALSE(statistics1_.Equals(statistics2_, 
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Nan) {
+  statistics1_.min = static_cast<double>(NAN);
+  statistics2_.min = static_cast<double>(NAN);
+  ASSERT_TRUE(statistics1_.Equals(statistics2_, options_.nans_equal(true)));
+  ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(false)));
+
+  statistics2_.min = 2.0;
+  ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(true)));
+  ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(false)));

Review Comment:
   In the previous logic I implemented, I discovered a bug through this test. 
Although the current algorithm works correctly, I have a suggestion to improve 
its behavior.
   
   Specifically, please consider changing the following 
[lines](https://github.com/apache/arrow/blob/94e3b3e98fe75a7498f2ee6edaeb006146d5cbe3/cpp/src/arrow/compare.cc#L88-L90).
   
   With the current logic, if  one of the values is NaN and the other is not, 
the following 
[lines](https://github.com/apache/arrow/blob/94e3b3e98fe75a7498f2ee6edaeb006146d5cbe3/cpp/src/arrow/compare.cc#L91-L92)
 are executed,
   Here is my suggested change for [lines 
88–90](https://github.com/apache/arrow/blob/94e3b3e98fe75a7498f2ee6edaeb006146d5cbe3/cpp/src/arrow/compare.cc#L88-L90):
   ```c++
       if (Flags::nans_equal && (std::isnan(x) || std::isnan(y))) {
         return std::isnan(x) && std::isnan(y);
       }
   ``` 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] GH-46395: [C++][Statistics] Correct the Equal method for min and max in arrow::ArrayStatistics [arrow]

Reply via email to