kou commented on code in PR #46422:
URL: https://github.com/apache/arrow/pull/46422#discussion_r2123592310
##########
cpp/src/arrow/compare.cc:
##########
@@ -1522,5 +1524,53 @@ bool TypeEquals(const DataType& left, const DataType&
right, bool check_metadata
return visitor.result();
}
}
+namespace {
+
+using ValueType = ArrayStatistics::ValueType;
+
+bool DoubleEquals(const double& left, const double& right, const EqualOptions&
options) {
+ bool result;
+ auto visitor = [&](auto&& compare_func) { result = compare_func(left,
right); };
+ VisitFloatingEquality<double>(options, options.use_atol(),
std::move(visitor));
+ return result;
+}
+bool ValueTypeEquals(const std::optional<ValueType>& left,
+ const std::optional<ValueType>& right, const
EqualOptions& options) {
+ if (!left.has_value() || !right.has_value()) {
+ return left.has_value() == right.has_value();
+ } else if (left->index() != right->index()) {
+ return false;
+ } else {
+ auto EqualsVisitor = [&](const auto& v1, const auto& v2) {
+ using type_1 = std::decay_t<decltype(v1)>;
+ using type_2 = std::decay_t<decltype(v2)>;
+ if constexpr (std::conjunction_v<std::is_same<type_1, double>,
+ std::is_same<type_2, double>>) {
+ return DoubleEquals(v1, v2, options);
+ } else if constexpr (std::is_same_v<type_1, type_2>) {
+ return v1 == v2;
+ }
+ // It is unreachable
+ DCHECK(false);
+ return false;
+ };
+ return std::visit(EqualsVisitor, left.value(), right.value());
+ }
+}
+bool EqualsImpl(const ArrayStatistics& left, const ArrayStatistics& right,
Review Comment:
```suggestion
bool ArrayStatisticsEqualsImpl(const ArrayStatistics& left, const
ArrayStatistics& right,
```
##########
cpp/src/arrow/compare.h:
##########
@@ -87,7 +97,7 @@ class EqualOptions {
double atol_ = kDefaultAbsoluteTolerance;
bool nans_equal_ = false;
bool signed_zeros_equal_ = true;
-
+ bool use_atol_ = true;
Review Comment:
```suggestion
bool use_atol_ = true;
```
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,69 @@ TEST(ArrayStatisticsTest, TestEquality) {
ASSERT_NE(statistics1, statistics2);
statistics2.is_max_exact = true;
ASSERT_EQ(statistics1, statistics2);
+
+ // Test different ArrayStatistics::ValueType
+ statistics1.max = static_cast<uint64_t>(29);
+ statistics1.max = static_cast<int64_t>(29);
+ ASSERT_NE(statistics1, statistics2);
+}
+class TestEqualityDoubleValue : public ::testing::Test {
+ protected:
+ ArrayStatistics statistics1_;
+ ArrayStatistics statistics2_;
+ EqualOptions options_ = EqualOptions::Defaults();
+};
+
+TEST_F(TestEqualityDoubleValue, ExactValue) {
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 29.0;
+ ASSERT_NE(statistics1_, statistics2_);
+ statistics1_.min = 29.0;
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 30.0;
+ ASSERT_NE(statistics1_, statistics2_);
+}
+
+TEST_F(TestEqualityDoubleValue, SignedZero) {
+ statistics1_.min = +0.0;
+ statistics2_.min = -0.0;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Infinity) {
+ auto infinity = std::numeric_limits<double>::infinity();
+ statistics1_.min = infinity;
+ statistics2_.min = infinity;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = -infinity;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = 0.0;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Nan) {
+ statistics1_.min = static_cast<double>(NAN);
+ statistics2_.min = static_cast<double>(NAN);
Review Comment:
Can we use `std::numeric_limits<double>::quet_NaN()`?
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,69 @@ TEST(ArrayStatisticsTest, TestEquality) {
ASSERT_NE(statistics1, statistics2);
statistics2.is_max_exact = true;
ASSERT_EQ(statistics1, statistics2);
+
+ // Test different ArrayStatistics::ValueType
+ statistics1.max = static_cast<uint64_t>(29);
+ statistics1.max = static_cast<int64_t>(29);
+ ASSERT_NE(statistics1, statistics2);
+}
+class TestEqualityDoubleValue : public ::testing::Test {
+ protected:
+ ArrayStatistics statistics1_;
+ ArrayStatistics statistics2_;
+ EqualOptions options_ = EqualOptions::Defaults();
+};
+
+TEST_F(TestEqualityDoubleValue, ExactValue) {
+ ASSERT_EQ(statistics1_, statistics2_);
Review Comment:
Why is this needed in this test?
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,69 @@ TEST(ArrayStatisticsTest, TestEquality) {
ASSERT_NE(statistics1, statistics2);
statistics2.is_max_exact = true;
ASSERT_EQ(statistics1, statistics2);
+
+ // Test different ArrayStatistics::ValueType
+ statistics1.max = static_cast<uint64_t>(29);
+ statistics1.max = static_cast<int64_t>(29);
+ ASSERT_NE(statistics1, statistics2);
+}
+class TestEqualityDoubleValue : public ::testing::Test {
+ protected:
+ ArrayStatistics statistics1_;
+ ArrayStatistics statistics2_;
+ EqualOptions options_ = EqualOptions::Defaults();
+};
+
+TEST_F(TestEqualityDoubleValue, ExactValue) {
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 29.0;
+ ASSERT_NE(statistics1_, statistics2_);
+ statistics1_.min = 29.0;
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 30.0;
+ ASSERT_NE(statistics1_, statistics2_);
+}
+
+TEST_F(TestEqualityDoubleValue, SignedZero) {
+ statistics1_.min = +0.0;
+ statistics2_.min = -0.0;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Infinity) {
+ auto infinity = std::numeric_limits<double>::infinity();
+ statistics1_.min = infinity;
+ statistics2_.min = infinity;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = -infinity;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = 0.0;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Nan) {
+ statistics1_.min = static_cast<double>(NAN);
+ statistics2_.min = static_cast<double>(NAN);
+ ASSERT_TRUE(statistics1_.Equals(statistics2_, options_.nans_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(false)));
+
+ statistics2_.min = 2.0;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(false)));
Review Comment:
Do we need this?
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,69 @@ TEST(ArrayStatisticsTest, TestEquality) {
ASSERT_NE(statistics1, statistics2);
statistics2.is_max_exact = true;
ASSERT_EQ(statistics1, statistics2);
+
+ // Test different ArrayStatistics::ValueType
+ statistics1.max = static_cast<uint64_t>(29);
+ statistics1.max = static_cast<int64_t>(29);
+ ASSERT_NE(statistics1, statistics2);
+}
+class TestEqualityDoubleValue : public ::testing::Test {
+ protected:
+ ArrayStatistics statistics1_;
+ ArrayStatistics statistics2_;
+ EqualOptions options_ = EqualOptions::Defaults();
+};
+
+TEST_F(TestEqualityDoubleValue, ExactValue) {
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 29.0;
+ ASSERT_NE(statistics1_, statistics2_);
+ statistics1_.min = 29.0;
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 30.0;
+ ASSERT_NE(statistics1_, statistics2_);
+}
+
+TEST_F(TestEqualityDoubleValue, SignedZero) {
+ statistics1_.min = +0.0;
+ statistics2_.min = -0.0;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Infinity) {
+ auto infinity = std::numeric_limits<double>::infinity();
+ statistics1_.min = infinity;
+ statistics2_.min = infinity;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = -infinity;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = 0.0;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Nan) {
+ statistics1_.min = static_cast<double>(NAN);
+ statistics2_.min = static_cast<double>(NAN);
+ ASSERT_TRUE(statistics1_.Equals(statistics2_, options_.nans_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(false)));
+
+ statistics2_.min = 2.0;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_, options_.nans_equal(false)));
+}
+TEST_F(TestEqualityDoubleValue, ApproximateEquals) {
+ statistics1_.max = 0.5001f;
+ statistics2_.max = 0.5;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.atol(1e-3).use_atol(false)));
+
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.atol(1e-3).use_atol(true)));
Review Comment:
```suggestion
ASSERT_TRUE(statistics1_.Equals(statistics2_, options_.atol(1e-3)));
```
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -15,9 +15,12 @@
// specific language governing permissions and limitations
// under the License.
+#include <cmath>
+
#include <gtest/gtest.h>
#include "arrow/array/statistics.h"
+#include "arrow/compare.h"
Review Comment:
Do we need this?
##########
cpp/src/arrow/compare.cc:
##########
@@ -1522,5 +1524,53 @@ bool TypeEquals(const DataType& left, const DataType&
right, bool check_metadata
return visitor.result();
}
}
+namespace {
+
+using ValueType = ArrayStatistics::ValueType;
+
+bool DoubleEquals(const double& left, const double& right, const EqualOptions&
options) {
+ bool result;
+ auto visitor = [&](auto&& compare_func) { result = compare_func(left,
right); };
+ VisitFloatingEquality<double>(options, options.use_atol(),
std::move(visitor));
+ return result;
+}
+bool ValueTypeEquals(const std::optional<ValueType>& left,
Review Comment:
```suggestion
bool ArrayStatisticsValueTypeEquals(const std::optional<ValueType>& left,
```
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,69 @@ TEST(ArrayStatisticsTest, TestEquality) {
ASSERT_NE(statistics1, statistics2);
statistics2.is_max_exact = true;
ASSERT_EQ(statistics1, statistics2);
+
+ // Test different ArrayStatistics::ValueType
+ statistics1.max = static_cast<uint64_t>(29);
+ statistics1.max = static_cast<int64_t>(29);
+ ASSERT_NE(statistics1, statistics2);
+}
+class TestEqualityDoubleValue : public ::testing::Test {
+ protected:
+ ArrayStatistics statistics1_;
+ ArrayStatistics statistics2_;
+ EqualOptions options_ = EqualOptions::Defaults();
+};
+
+TEST_F(TestEqualityDoubleValue, ExactValue) {
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 29.0;
+ ASSERT_NE(statistics1_, statistics2_);
+ statistics1_.min = 29.0;
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 30.0;
+ ASSERT_NE(statistics1_, statistics2_);
+}
+
+TEST_F(TestEqualityDoubleValue, SignedZero) {
+ statistics1_.min = +0.0;
+ statistics2_.min = -0.0;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Infinity) {
+ auto infinity = std::numeric_limits<double>::infinity();
+ statistics1_.min = infinity;
+ statistics2_.min = infinity;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = -infinity;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = 0.0;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
Review Comment:
Why do we need `singned_zeros_equal(true/false)`?
##########
cpp/src/arrow/array/statistics_test.cc:
##########
@@ -96,6 +99,69 @@ TEST(ArrayStatisticsTest, TestEquality) {
ASSERT_NE(statistics1, statistics2);
statistics2.is_max_exact = true;
ASSERT_EQ(statistics1, statistics2);
+
+ // Test different ArrayStatistics::ValueType
+ statistics1.max = static_cast<uint64_t>(29);
+ statistics1.max = static_cast<int64_t>(29);
+ ASSERT_NE(statistics1, statistics2);
+}
+class TestEqualityDoubleValue : public ::testing::Test {
+ protected:
+ ArrayStatistics statistics1_;
+ ArrayStatistics statistics2_;
+ EqualOptions options_ = EqualOptions::Defaults();
+};
+
+TEST_F(TestEqualityDoubleValue, ExactValue) {
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 29.0;
+ ASSERT_NE(statistics1_, statistics2_);
+ statistics1_.min = 29.0;
+ ASSERT_EQ(statistics1_, statistics2_);
+ statistics2_.min = 30.0;
+ ASSERT_NE(statistics1_, statistics2_);
+}
+
+TEST_F(TestEqualityDoubleValue, SignedZero) {
+ statistics1_.min = +0.0;
+ statistics2_.min = -0.0;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Infinity) {
+ auto infinity = std::numeric_limits<double>::infinity();
+ statistics1_.min = infinity;
+ statistics2_.min = infinity;
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_TRUE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = -infinity;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+ statistics1_.min = 0.0;
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(true)));
+ ASSERT_FALSE(statistics1_.Equals(statistics2_,
options_.signed_zeros_equal(false)));
+}
+
+TEST_F(TestEqualityDoubleValue, Nan) {
Review Comment:
```suggestion
TEST_F(TestEqualityDoubleValue, NaN) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]