andishgar commented on code in PR #46422:
URL: https://github.com/apache/arrow/pull/46422#discussion_r2123763295
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,69 @@ TEST(ArrayStatisticsTest, TestEquality) {
ASSERT_NE(statistics1, statistics2);
statistics2.is_max_exact = true;
ASSERT_EQ(statistics1, statistics2);
+
+ // Test different ArrayStatistics::ValueType
+ statistics1.max = static_cast<uint64_t>(29);
+ statistics1.max = static_cast<int64_t>(29);
+ ASSERT_NE(statistics1, statistics2);
+}
+class TestEqualityDoubleValue : public ::testing::Test {
+ protected:
+ ArrayStatistics statistics1_;
+ ArrayStatistics statistics2_;
+ EqualOptions options_ = EqualOptions::Defaults();
+};
+
+TEST_F(TestEqualityDoubleValue, ExactValue) {
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 29.0;
+ ASSERT_NE(statistics1_, statistics2_);
+ statistics1_.min = 29.0;
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 30.0;
+ ASSERT_NE(statistics1_, statistics2_);
+}
+
+TEST_F(TestEqualityDoubleValue, SignedZero) {
+ statistics1_.min = +0.0;
+ statistics2_.min = -0.0;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Infinity) {
+ auto infinity = std::numeric_limits<double>::infinity();
+ statistics1_.min = infinity;
+ statistics2_.min = infinity;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = -infinity;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = 0.0;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Nan) {
+ statistics1_.min = static_cast<double>(NAN);
+ statistics2_.min = static_cast<double>(NAN);
+ ASSERT_TRUE(statistics1_.Equals(statistics2_, options_.nans_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(false)));
+
+ statistics2_.min = 2.0;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(false)));
Review Comment:
In the previous logic I implemented, I discovered a bug through this test.
Although the current algorithm works correctly, I have a suggestion to improve
its behavior.
Specifically, please consider changing the following
[lines](https://github.com/apache/arrow/blob/94e3b3e98fe75a7498f2ee6edaeb006146d5cbe3/cpp/src/arrow/compare.cc#L88-L90).
With the current logic, if one of the values is NaN and the other is not,
the following
[lines](https://github.com/apache/arrow/blob/94e3b3e98fe75a7498f2ee6edaeb006146d5cbe3/cpp/src/arrow/compare.cc#L91-L92)
are executed,
Here is my suggested change for [lines
88–90](https://github.com/apache/arrow/blob/94e3b3e98fe75a7498f2ee6edaeb006146d5cbe3/cpp/src/arrow/compare.cc#L88-L90):
```c++
if (Flags::nans_equal && (std::isnan(x) || std::isnan(y))) {
return std::isnan(x) && std::isnan(y);
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]