[I] Add array comparison utility to nanoarrow_testing [arrow-nanoarrow]

via GitHub Mon, 05 Aug 2024 11:49:02 -0700


paleolimbot opened a new issue, #577:
URL: https://github.com/apache/arrow-nanoarrow/issues/577


   There are a number of different types of array comparison that have popped 
up in a number of places depending on the exact needs of the test:
   
   @zeroshade's utility in cudf (I believe this is something like Arrow C++'s 
"Equal" in that it non-equal buffer values in null slots are still considered 
equal)
   
   
https://github.com/rapidsai/cudf/blob/af57286536fc21b47b80e45be222773b751600c9/cpp/tests/interop/to_arrow_host_test.cpp#L45-L210
   
   @bkietz's `Matcher` to support IPC batch roundtrip testing:
   
   ```cpp
   struct ArrowArrayViewEqualTo {
     const struct ArrowArrayView* expected;
   
     using is_gtest_matcher = void;
   
     bool MatchAndExplain(const struct ArrowArrayView* actual, std::ostream* 
os) const {
       return MatchAndExplain({}, actual, expected, os);
     }
   
     static bool MatchAndExplain(std::vector<int> field_path,
                                 const struct ArrowArrayView* actual,
                                 const struct ArrowArrayView* expected, 
std::ostream* os) {
       auto prefixed = [&]() -> std::ostream& {
         if (!field_path.empty()) {
           for (int i : field_path) {
             *os << "." << i;
           }
           *os << ":";
         }
         return *os;
       };
   
       NANOARROW_DCHECK(actual->offset == 0);
       NANOARROW_DCHECK(expected->offset == 0);
   
       if (actual->length != expected->length) {
         prefixed() << "expected length=" << expected->length << "\n";
         prefixed() << "  actual length=" << actual->length << "\n";
         return false;
       }
   
       auto null_count = [](const struct ArrowArrayView* a) {
         return a->null_count != -1 ? a->null_count : 
ArrowArrayViewComputeNullCount(a);
       };
       if (null_count(actual) != null_count(expected)) {
         prefixed() << "expected null_count=" << null_count(expected) << "\n";
         prefixed() << "  actual null_count=" << null_count(actual) << "\n";
         return false;
       }
   
       for (int64_t i = 0; actual->layout.buffer_type[i] != 
NANOARROW_BUFFER_TYPE_NONE &&
                           i < NANOARROW_MAX_FIXED_BUFFERS;
            ++i) {
         auto a_buf = actual->buffer_views[i];
         auto e_buf = expected->buffer_views[i];
         if (a_buf.size_bytes != e_buf.size_bytes) {
           prefixed() << "expected buffer[" << i << "].size=" << 
e_buf.size_bytes << "\n";
           prefixed() << "  actual buffer[" << i << "].size=" << 
a_buf.size_bytes << "\n";
           return false;
         }
         if (memcmp(a_buf.data.data, e_buf.data.data, a_buf.size_bytes) != 0) {
           prefixed() << "expected buffer[" << i << "]'s data to match\n";
           return false;
         }
       }
   
       field_path.push_back(0);
       for (int64_t i = 0; i < actual->n_children; ++i) {
         field_path.back() = i;
         if (!MatchAndExplain(field_path, actual->children[i], 
expected->children[i], os)) {
           return false;
         }
       }
       return true;
     }
   
     void DescribeTo(std::ostream* os) const { *os << "is equivalent to the 
array view"; }
     void DescribeNegationTo(std::ostream* os) const {
       *os << "is not equivalent to the array view";
     }
   };
   ```
   
   A utility comparer with probably terrible failure modes I hacked together 
(also to support IPC batch roundtrip testing):
   
   ```cpp
   void AssertArrayViewIdentical(actual, expected) {
     NANOARROW_DCHECK(actual->dictionary != nullptr);
     NANOARROW_DCHECK(expected->dictionary != nullptr);
   
     ASSERT_EQ(actual->storage_type, expected->storage_type);
     ASSERT_EQ(actual->offset, expected->offset);
     ASSERT_EQ(actual->length, expected->length);
     for (int i = 0; i < 3; i++) {
       auto a_buf = actual->buffer_views[i];
       auto e_buf = expected->buffer_views[i];
       ASSERT_EQ(a_buf.size_bytes,  e_buf->size_bytes);
       if (a_buf.size_bytes != 0) {
         ASSERT_EQ(memcmp(a_buf.data.data, e_buf.data.data, a_buf.size_bytes), 
0);
       }
     }
   
     ASSERT_EQ(actual->n_children, expected->n_children);
     for (int i = 0; i < actual->n_children; i++) {
       AssertArrayViewIdentical(actual->children[i], expected->children[i]);
     }
   }
   ```
   
   Implementation for the integration tests (based on JSON and so is probably 
not suitable for arbitrary input).
   
   
https://github.com/apache/arrow-nanoarrow/blob/f74d57cd742c58a0563d51ac30ab96f1dc9576b5/src/nanoarrow/integration/c_data_integration.cc#L176-L198
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[I] Add array comparison utility to nanoarrow_testing [arrow-nanoarrow]

Reply via email to