This is an automated email from the ASF dual-hosted git repository.

bkietz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 0e52d308c5 GH-38624: [C++] Fix: add TestingEqualOptions for gtest 
functions. (#38642)
0e52d308c5 is described below

commit 0e52d308c567b400d6599f2f2b238a3520b817bb
Author: Francis <[email protected]>
AuthorDate: Thu Nov 16 05:09:17 2023 +0800

    GH-38624: [C++] Fix: add TestingEqualOptions for gtest functions. (#38642)
    
    
    
    ### Rationale for this change
    
    some other interfaces lack equal options, suck as nan equal, so we add 
TestingEqualOptions for each function and add some testing.
    
    ### What changes are included in this PR?
    
    gtest_util related.
    
    ### Are these changes tested?
    
    gtest_util.cc
    
    ### Are there any user-facing changes?
    yes.
    * Closes: #38624
    
    Authored-by: light-city <[email protected]>
    Signed-off-by: Benjamin Kietzman <[email protected]>
---
 cpp/src/arrow/chunked_array.cc           |  13 +--
 cpp/src/arrow/chunked_array.h            |   6 +-
 cpp/src/arrow/testing/CMakeLists.txt     |   1 +
 cpp/src/arrow/testing/gtest_util.cc      |  55 +++++++------
 cpp/src/arrow/testing/gtest_util.h       |  48 ++++++-----
 cpp/src/arrow/testing/gtest_util_test.cc | 137 +++++++++++++++++++++++++++++++
 6 files changed, 207 insertions(+), 53 deletions(-)

diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index 12937406e7..c36b736d5d 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -86,7 +86,7 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::MakeEmpty(
   return std::make_shared<ChunkedArray>(std::move(new_chunks));
 }
 
-bool ChunkedArray::Equals(const ChunkedArray& other) const {
+bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) 
const {
   if (length_ != other.length()) {
     return false;
   }
@@ -102,9 +102,9 @@ bool ChunkedArray::Equals(const ChunkedArray& other) const {
   // the underlying data independently of the chunk size.
   return internal::ApplyBinaryChunked(
              *this, other,
-             [](const Array& left_piece, const Array& right_piece,
-                int64_t ARROW_ARG_UNUSED(position)) {
-               if (!left_piece.Equals(right_piece)) {
+             [&](const Array& left_piece, const Array& right_piece,
+                 int64_t ARROW_ARG_UNUSED(position)) {
+               if (!left_piece.Equals(right_piece, opts)) {
                  return Status::Invalid("Unequal piece");
                }
                return Status::OK();
@@ -129,14 +129,15 @@ bool mayHaveNaN(const arrow::DataType& type) {
 
 }  //  namespace
 
-bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
+bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other,
+                          const EqualOptions& opts) const {
   if (!other) {
     return false;
   }
   if (this == other.get() && !mayHaveNaN(*type_)) {
     return true;
   }
-  return Equals(*other.get());
+  return Equals(*other.get(), opts);
 }
 
 bool ChunkedArray::ApproxEquals(const ChunkedArray& other,
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 6ec7d11ac8..5d300861d8 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -152,9 +152,11 @@ class ARROW_EXPORT ChunkedArray {
   ///
   /// Two chunked arrays can be equal only if they have equal datatypes.
   /// However, they may be equal even if they have different chunkings.
-  bool Equals(const ChunkedArray& other) const;
+  bool Equals(const ChunkedArray& other,
+              const EqualOptions& opts = EqualOptions::Defaults()) const;
   /// \brief Determine if two chunked arrays are equal.
-  bool Equals(const std::shared_ptr<ChunkedArray>& other) const;
+  bool Equals(const std::shared_ptr<ChunkedArray>& other,
+              const EqualOptions& opts = EqualOptions::Defaults()) const;
   /// \brief Determine if two chunked arrays approximately equal
   bool ApproxEquals(const ChunkedArray& other,
                     const EqualOptions& = EqualOptions::Defaults()) const;
diff --git a/cpp/src/arrow/testing/CMakeLists.txt 
b/cpp/src/arrow/testing/CMakeLists.txt
index d533240596..59825f0bf2 100644
--- a/cpp/src/arrow/testing/CMakeLists.txt
+++ b/cpp/src/arrow/testing/CMakeLists.txt
@@ -19,4 +19,5 @@ arrow_install_all_headers("arrow/testing")
 
 if(ARROW_BUILD_TESTS)
   add_arrow_test(random_test)
+  add_arrow_test(gtest_util_test)
 endif()
diff --git a/cpp/src/arrow/testing/gtest_util.cc 
b/cpp/src/arrow/testing/gtest_util.cc
index a6dc1d59c6..5ef1820d5b 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -145,42 +145,46 @@ void AssertScalarsApproxEqual(const Scalar& expected, 
const Scalar& actual, bool
 }
 
 void AssertBatchesEqual(const RecordBatch& expected, const RecordBatch& actual,
-                        bool check_metadata) {
+                        bool check_metadata, const EqualOptions& options) {
   AssertTsSame(expected, actual,
                [&](const RecordBatch& expected, const RecordBatch& actual) {
-                 return expected.Equals(actual, check_metadata);
+                 return expected.Equals(actual, check_metadata, options);
                });
 }
 
-void AssertBatchesApproxEqual(const RecordBatch& expected, const RecordBatch& 
actual) {
+void AssertBatchesApproxEqual(const RecordBatch& expected, const RecordBatch& 
actual,
+                              const EqualOptions& options) {
   AssertTsSame(expected, actual,
                [&](const RecordBatch& expected, const RecordBatch& actual) {
-                 return expected.ApproxEquals(actual);
+                 return expected.ApproxEquals(actual, options);
                });
 }
 
-void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& 
actual) {
+void AssertChunkedEqual(const ChunkedArray& expected, const ChunkedArray& 
actual,
+                        const EqualOptions& options) {
   ASSERT_EQ(expected.num_chunks(), actual.num_chunks()) << "# chunks unequal";
-  if (!actual.Equals(expected)) {
+  if (!actual.Equals(expected, options)) {
     std::stringstream diff;
     for (int i = 0; i < actual.num_chunks(); ++i) {
       auto c1 = actual.chunk(i);
       auto c2 = expected.chunk(i);
       diff << "# chunk " << i << std::endl;
-      ARROW_IGNORE_EXPR(c1->Equals(c2, EqualOptions().diff_sink(&diff)));
+      ARROW_IGNORE_EXPR(c1->Equals(c2, options.diff_sink(&diff)));
     }
     FAIL() << diff.str();
   }
 }
 
-void AssertChunkedEqual(const ChunkedArray& actual, const ArrayVector& 
expected) {
-  AssertChunkedEqual(ChunkedArray(expected, actual.type()), actual);
+void AssertChunkedEqual(const ChunkedArray& actual, const ArrayVector& 
expected,
+                        const EqualOptions& options) {
+  AssertChunkedEqual(ChunkedArray(expected, actual.type()), actual, options);
 }
 
-void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& 
actual) {
+void AssertChunkedEquivalent(const ChunkedArray& expected, const ChunkedArray& 
actual,
+                             const EqualOptions& options) {
   // XXX: AssertChunkedEqual in gtest_util.h does not permit the chunk layouts
   // to be different
-  if (!actual.Equals(expected)) {
+  if (!actual.Equals(expected, options)) {
     std::stringstream pp_expected;
     std::stringstream pp_actual;
     ::arrow::PrettyPrintOptions options(/*indent=*/2);
@@ -321,21 +325,23 @@ ASSERT_EQUAL_IMPL(Field, Field, "fields")
 ASSERT_EQUAL_IMPL(Schema, Schema, "schemas")
 #undef ASSERT_EQUAL_IMPL
 
-void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool 
verbose) {
+void AssertDatumsEqual(const Datum& expected, const Datum& actual, bool 
verbose,
+                       const EqualOptions& options) {
   ASSERT_EQ(expected.kind(), actual.kind())
       << "expected:" << expected.ToString() << " got:" << actual.ToString();
 
   switch (expected.kind()) {
     case Datum::SCALAR:
-      AssertScalarsEqual(*expected.scalar(), *actual.scalar(), verbose);
+      AssertScalarsEqual(*expected.scalar(), *actual.scalar(), verbose, 
options);
       break;
     case Datum::ARRAY: {
       auto expected_array = expected.make_array();
       auto actual_array = actual.make_array();
-      AssertArraysEqual(*expected_array, *actual_array, verbose);
+      AssertArraysEqual(*expected_array, *actual_array, verbose, options);
     } break;
     case Datum::CHUNKED_ARRAY:
-      AssertChunkedEquivalent(*expected.chunked_array(), 
*actual.chunked_array());
+      AssertChunkedEquivalent(*expected.chunked_array(), 
*actual.chunked_array(),
+                              options);
       break;
     default:
       // TODO: Implement better print
@@ -479,7 +485,7 @@ Result<std::optional<std::string>> PrintArrayDiff(const 
ChunkedArray& expected,
 }
 
 void AssertTablesEqual(const Table& expected, const Table& actual, bool 
same_chunk_layout,
-                       bool combine_chunks) {
+                       bool combine_chunks, const EqualOptions& options) {
   ASSERT_EQ(expected.num_columns(), actual.num_columns());
 
   if (combine_chunks) {
@@ -487,13 +493,13 @@ void AssertTablesEqual(const Table& expected, const 
Table& actual, bool same_chu
     ASSERT_OK_AND_ASSIGN(auto new_expected, expected.CombineChunks(pool));
     ASSERT_OK_AND_ASSIGN(auto new_actual, actual.CombineChunks(pool));
 
-    AssertTablesEqual(*new_expected, *new_actual, false, false);
+    AssertTablesEqual(*new_expected, *new_actual, false, false, options);
     return;
   }
 
   if (same_chunk_layout) {
     for (int i = 0; i < actual.num_columns(); ++i) {
-      AssertChunkedEqual(*expected.column(i), *actual.column(i));
+      AssertChunkedEqual(*expected.column(i), *actual.column(i), options);
     }
   } else {
     std::stringstream ss;
@@ -533,17 +539,18 @@ void CompareBatchWith(const RecordBatch& left, const 
RecordBatch& right,
 }
 
 void CompareBatch(const RecordBatch& left, const RecordBatch& right,
-                  bool compare_metadata) {
+                  bool compare_metadata, const EqualOptions& options) {
   return CompareBatchWith(
       left, right, compare_metadata,
-      [](const Array& left, const Array& right) { return left.Equals(right); 
});
+      [&](const Array& left, const Array& right) { return left.Equals(right, 
options); });
 }
 
 void ApproxCompareBatch(const RecordBatch& left, const RecordBatch& right,
-                        bool compare_metadata) {
-  return CompareBatchWith(
-      left, right, compare_metadata,
-      [](const Array& left, const Array& right) { return 
left.ApproxEquals(right); });
+                        bool compare_metadata, const EqualOptions& options) {
+  return CompareBatchWith(left, right, compare_metadata,
+                          [&](const Array& left, const Array& right) {
+                            return left.ApproxEquals(right, options);
+                          });
 }
 
 std::shared_ptr<Array> TweakValidityBit(const std::shared_ptr<Array>& array,
diff --git a/cpp/src/arrow/testing/gtest_util.h 
b/cpp/src/arrow/testing/gtest_util.h
index 641aae5a5e..916067d85b 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -221,18 +221,22 @@ ARROW_TESTING_EXPORT void AssertScalarsEqual(
 ARROW_TESTING_EXPORT void AssertScalarsApproxEqual(
     const Scalar& expected, const Scalar& actual, bool verbose = false,
     const EqualOptions& options = TestingEqualOptions());
-ARROW_TESTING_EXPORT void AssertBatchesEqual(const RecordBatch& expected,
-                                             const RecordBatch& actual,
-                                             bool check_metadata = false);
-ARROW_TESTING_EXPORT void AssertBatchesApproxEqual(const RecordBatch& expected,
-                                                   const RecordBatch& actual);
-ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& expected,
-                                             const ChunkedArray& actual);
-ARROW_TESTING_EXPORT void AssertChunkedEqual(const ChunkedArray& actual,
-                                             const ArrayVector& expected);
+ARROW_TESTING_EXPORT void AssertBatchesEqual(
+    const RecordBatch& expected, const RecordBatch& actual, bool 
check_metadata = false,
+    const EqualOptions& options = TestingEqualOptions());
+ARROW_TESTING_EXPORT void AssertBatchesApproxEqual(
+    const RecordBatch& expected, const RecordBatch& actual,
+    const EqualOptions& options = TestingEqualOptions());
+ARROW_TESTING_EXPORT void AssertChunkedEqual(
+    const ChunkedArray& expected, const ChunkedArray& actual,
+    const EqualOptions& options = TestingEqualOptions());
+ARROW_TESTING_EXPORT void AssertChunkedEqual(
+    const ChunkedArray& actual, const ArrayVector& expected,
+    const EqualOptions& options = TestingEqualOptions());
 // Like ChunkedEqual, but permits different chunk layout
-ARROW_TESTING_EXPORT void AssertChunkedEquivalent(const ChunkedArray& expected,
-                                                  const ChunkedArray& actual);
+ARROW_TESTING_EXPORT void AssertChunkedEquivalent(
+    const ChunkedArray& expected, const ChunkedArray& actual,
+    const EqualOptions& options = TestingEqualOptions());
 ARROW_TESTING_EXPORT void AssertChunkedApproxEquivalent(
     const ChunkedArray& expected, const ChunkedArray& actual,
     const EqualOptions& options = TestingEqualOptions());
@@ -277,12 +281,13 @@ ARROW_TESTING_EXPORT void AssertSchemaNotEqual(const 
std::shared_ptr<Schema>& lh
 ARROW_TESTING_EXPORT Result<std::optional<std::string>> PrintArrayDiff(
     const ChunkedArray& expected, const ChunkedArray& actual);
 
-ARROW_TESTING_EXPORT void AssertTablesEqual(const Table& expected, const 
Table& actual,
-                                            bool same_chunk_layout = true,
-                                            bool flatten = false);
+ARROW_TESTING_EXPORT void AssertTablesEqual(
+    const Table& expected, const Table& actual, bool same_chunk_layout = true,
+    bool flatten = false, const EqualOptions& options = TestingEqualOptions());
 
-ARROW_TESTING_EXPORT void AssertDatumsEqual(const Datum& expected, const 
Datum& actual,
-                                            bool verbose = false);
+ARROW_TESTING_EXPORT void AssertDatumsEqual(
+    const Datum& expected, const Datum& actual, bool verbose = false,
+    const EqualOptions& options = TestingEqualOptions());
 ARROW_TESTING_EXPORT void AssertDatumsApproxEqual(
     const Datum& expected, const Datum& actual, bool verbose = false,
     const EqualOptions& options = TestingEqualOptions());
@@ -296,12 +301,13 @@ void AssertNumericDataEqual(const C_TYPE* raw_data,
   }
 }
 
-ARROW_TESTING_EXPORT void CompareBatch(const RecordBatch& left, const 
RecordBatch& right,
-                                       bool compare_metadata = true);
+ARROW_TESTING_EXPORT void CompareBatch(
+    const RecordBatch& left, const RecordBatch& right, bool compare_metadata = 
true,
+    const EqualOptions& options = TestingEqualOptions());
 
-ARROW_TESTING_EXPORT void ApproxCompareBatch(const RecordBatch& left,
-                                             const RecordBatch& right,
-                                             bool compare_metadata = true);
+ARROW_TESTING_EXPORT void ApproxCompareBatch(
+    const RecordBatch& left, const RecordBatch& right, bool compare_metadata = 
true,
+    const EqualOptions& options = TestingEqualOptions());
 
 // Check if the padding of the buffers of the array is zero.
 // Also cause valgrind warnings if the padding bytes are uninitialized.
diff --git a/cpp/src/arrow/testing/gtest_util_test.cc 
b/cpp/src/arrow/testing/gtest_util_test.cc
new file mode 100644
index 0000000000..14c17a972a
--- /dev/null
+++ b/cpp/src/arrow/testing/gtest_util_test.cc
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_decimal.h"
+#include "arrow/datum.h"
+#include "arrow/record_batch.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+
+namespace arrow {
+
+// Test basic cases for contains NaN.
+class TestAssertContainsNaN : public ::testing::Test {};
+
+TEST_F(TestAssertContainsNaN, BatchesEqual) {
+  auto schema = ::arrow::schema({
+      {field("a", float32())},
+      {field("b", float64())},
+  });
+
+  auto expected = RecordBatchFromJSON(schema,
+                                      R"([{"a": 3,    "b": 5},
+                                       {"a": 1,    "b": 3},
+                                       {"a": 3,    "b": 4},
+                                       {"a": NaN,  "b": 6},
+                                       {"a": 2,    "b": 5},
+                                       {"a": 1,    "b": NaN},
+                                       {"a": 1,    "b": 3}
+                                       ])");
+  auto actual = RecordBatchFromJSON(schema,
+                                    R"([{"a": 3,    "b": 5},
+                                       {"a": 1,    "b": 3},
+                                       {"a": 3,    "b": 4},
+                                       {"a": NaN,  "b": 6},
+                                       {"a": 2,    "b": 5},
+                                       {"a": 1,    "b": NaN},
+                                       {"a": 1,    "b": 3}
+                                       ])");
+  ASSERT_BATCHES_EQUAL(*expected, *actual);
+  AssertBatchesApproxEqual(*expected, *actual);
+}
+
+TEST_F(TestAssertContainsNaN, TableEqual) {
+  auto schema = ::arrow::schema({
+      {field("a", float32())},
+      {field("b", float64())},
+  });
+
+  auto expected = TableFromJSON(schema, {R"([{"a": null, "b": 5},
+                                     {"a": NaN,    "b": 3},
+                                     {"a": 3,    "b": null}
+                                    ])",
+                                         R"([{"a": null, "b": null},
+                                     {"a": 2,    "b": NaN},
+                                     {"a": 1,    "b": 5},
+                                     {"a": 3,    "b": 5}
+                                    ])"});
+  auto actual = TableFromJSON(schema, {R"([{"a": null, "b": 5},
+                                     {"a": NaN,    "b": 3},
+                                     {"a": 3,    "b": null}
+                                    ])",
+                                       R"([{"a": null, "b": null},
+                                     {"a": 2,    "b": NaN},
+                                     {"a": 1,    "b": 5},
+                                     {"a": 3,    "b": 5}
+                                    ])"});
+  ASSERT_TABLES_EQUAL(*expected, *actual);
+}
+
+TEST_F(TestAssertContainsNaN, ArrayEqual) {
+  auto expected = ArrayFromJSON(float64(), "[0, 1, 2, NaN]");
+  auto actual = ArrayFromJSON(float64(), "[0, 1, 2, NaN]");
+  AssertArraysEqual(*expected, *actual);
+}
+
+TEST_F(TestAssertContainsNaN, ChunkedEqual) {
+  auto expected = ChunkedArrayFromJSON(float64(), {
+                                                      "[null, 1]",
+                                                      "[3, NaN, 2]",
+                                                      "[NaN]",
+                                                  });
+
+  auto actual = ChunkedArrayFromJSON(float64(), {
+                                                    "[null, 1]",
+                                                    "[3, NaN, 2]",
+                                                    "[NaN]",
+                                                });
+  AssertChunkedEqual(*expected, *actual);
+}
+
+TEST_F(TestAssertContainsNaN, DatumEqual) {
+  // scalar
+  auto expected_scalar = ScalarFromJSON(float64(), "NaN");
+  auto actual_scalar = ScalarFromJSON(float64(), "NaN");
+  AssertDatumsEqual(expected_scalar, actual_scalar);
+
+  // array
+  auto expected_array = ArrayFromJSON(float64(), "[3, NaN, 2, 1, 5]");
+  auto actual_array = ArrayFromJSON(float64(), "[3, NaN, 2, 1, 5]");
+  AssertDatumsEqual(expected_array, actual_array);
+
+  // chunked array
+  auto expected_chunked = ChunkedArrayFromJSON(float64(), {
+                                                              "[null, 1]",
+                                                              "[3, NaN, 2]",
+                                                              "[NaN]",
+                                                          });
+
+  auto actual_chunked = ChunkedArrayFromJSON(float64(), {
+                                                            "[null, 1]",
+                                                            "[3, NaN, 2]",
+                                                            "[NaN]",
+                                                        });
+  AssertDatumsEqual(expected_chunked, actual_chunked);
+}
+
+}  // namespace arrow

Reply via email to