This is an automated email from the ASF dual-hosted git repository. paleolimbot pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push: new 3a78aa45 fix: Relax comparison strictness such that integration tests pass (#399) 3a78aa45 is described below commit 3a78aa459a1125fe22e2f1fe3c8f20a592229413 Author: Dewey Dunnington <de...@dunnington.ca> AuthorDate: Mon Apr 15 15:52:24 2024 -0300 fix: Relax comparison strictness such that integration tests pass (#399) These changes are the changes required such that https://github.com/apache/arrow/pull/39302 results in passing integration tests for nanoarrow. The changes are mostly related to comparison: - We needed an option to allow metadata to be compared on a key/value basis without considering order (for Java, which seems to reorder metadata on read) - We needed the ability to treat NULL metadata and zero-size metadata as equivalent (for Go, which always exports zero-length metadata) - We needed an option to ignore flags for top-level batches (for C#, which exports nullable structs) - We needed to ensure that the last few bits of the validity buffer were zeroed (for C#, although this is now fixed in C# on Arrow main) - We needed to ensure that no buffers were NULL (For C#, which leaks the top-level array if it encounters one, at least in the integration tests. This should really be fixed in C#). --- src/nanoarrow/array.c | 26 ++- src/nanoarrow/array_test.cc | 35 +++ src/nanoarrow/integration/c_data_integration.cc | 10 + src/nanoarrow/nanoarrow_testing.hpp | 174 ++++++++++++--- src/nanoarrow/nanoarrow_testing_test.cc | 274 +++++++++++++++++++++++- 5 files changed, 472 insertions(+), 47 deletions(-) diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c index 2373e58e..4fb7b7b1 100644 --- a/src/nanoarrow/array.c +++ b/src/nanoarrow/array.c @@ -407,19 +407,16 @@ static ArrowErrorCode ArrowArrayFinalizeBuffers(struct ArrowArray* array) { struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*)array->private_data; - // The only buffer finalizing this currently does is make sure the data - // buffer for (Large)String|Binary is never NULL - switch (private_data->storage_type) { - case NANOARROW_TYPE_BINARY: - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_LARGE_BINARY: - case NANOARROW_TYPE_LARGE_STRING: - if (ArrowArrayBuffer(array, 2)->data == NULL) { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(ArrowArrayBuffer(array, 2), 0)); - } - break; - default: - break; + for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) { + if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY || + private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) { + continue; + } + + struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); + if (buffer->data == NULL) { + NANOARROW_RETURN_NOT_OK((ArrowBufferReserve(buffer, 1))); + } } for (int64_t i = 0; i < array->n_children; i++) { @@ -455,7 +452,8 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, struct ArrowError* error) { // Even if the data buffer is size zero, the pointer value needed to be non-null // in some implementations (at least one version of Arrow C++ at the time this - // was added). Only do this fix if we can assume CPU data access. + // was added and C# as later discovered). Only do this fix if we can assume + // CPU data access. if (validation_level >= NANOARROW_VALIDATION_LEVEL_DEFAULT) { NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFinalizeBuffers(array), error); } diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc index 81360b0a..53116c05 100644 --- a/src/nanoarrow/array_test.cc +++ b/src/nanoarrow/array_test.cc @@ -324,6 +324,41 @@ TEST(ArrayTest, ArrayTestValidateMinimalBufferAccess) { ArrowArrayRelease(&array); } +class UnparameterizedTypeTestFixture : public ::testing::TestWithParam<enum ArrowType> { + protected: + enum ArrowType data_type; +}; + +TEST_P(UnparameterizedTypeTestFixture, ArrayTestBuildEmptyArray) { + struct ArrowArray array; + ASSERT_EQ(ArrowArrayInitFromType(&array, GetParam()), NANOARROW_OK); + + EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK); + + EXPECT_EQ(array.offset, 0); + EXPECT_EQ(array.length, 0); + EXPECT_EQ(array.null_count, 0); + + // For all of these, the validity buffer is the first buffer and should be NULL; + // however, other buffers should not be NULL. + for (int64_t i = 1; i < array.n_buffers; i++) { + if (i == 0) { + EXPECT_EQ(array.buffers[i], nullptr); + } else { + EXPECT_NE(array.buffers[i], nullptr); + } + } + + ArrowArrayRelease(&array); +} + +// We don't need to exhaustively check here...just a few different categories +// of inputs to ensure our buffer finalizing worked. +INSTANTIATE_TEST_SUITE_P(NanoarrowIpcTest, UnparameterizedTypeTestFixture, + ::testing::Values(NANOARROW_TYPE_NA, NANOARROW_TYPE_INT32, + NANOARROW_TYPE_BINARY, NANOARROW_TYPE_STRUCT)); + TEST(ArrayTest, ArrayTestAppendToNullArray) { struct ArrowArray array; ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_NA), NANOARROW_OK); diff --git a/src/nanoarrow/integration/c_data_integration.cc b/src/nanoarrow/integration/c_data_integration.cc index 3660af7c..fab901ec 100644 --- a/src/nanoarrow/integration/c_data_integration.cc +++ b/src/nanoarrow/integration/c_data_integration.cc @@ -56,6 +56,12 @@ static ArrowBufferAllocator IntegrationTestAllocator() { return allocator; } +static void SetComparisonOptions(nanoarrow::testing::TestingJSONComparison* comparison) { + comparison->set_compare_batch_flags(false); + comparison->set_compare_float_precision(3); + comparison->set_compare_metadata_order(false); +} + static ArrowErrorCode ReadFileString(std::ostream& out, const std::string& file_path) { std::ifstream infile(file_path, std::ios::in | std::ios::binary); char buf[8096]; @@ -143,6 +149,8 @@ static ArrowErrorCode ImportSchemaAndCompareToJson(const char* json_path, error)); nanoarrow::testing::TestingJSONComparison comparison; + SetComparisonOptions(&comparison); + NANOARROW_RETURN_NOT_OK( comparison.CompareSchema(actual.get(), data.schema.get(), error)); if (comparison.num_differences() > 0) { @@ -173,6 +181,8 @@ static ArrowErrorCode ImportBatchAndCompareToJson(const char* json_path, int num NANOARROW_RETURN_NOT_OK(MaterializeJsonFilePath(json_path, &data, num_batch, error)); nanoarrow::testing::TestingJSONComparison comparison; + SetComparisonOptions(&comparison); + NANOARROW_RETURN_NOT_OK(comparison.SetSchema(data.schema.get(), error)); NANOARROW_RETURN_NOT_OK( comparison.CompareBatch(actual.get(), data.arrays[0].get(), error)); diff --git a/src/nanoarrow/nanoarrow_testing.hpp b/src/nanoarrow/nanoarrow_testing.hpp index a7602b70..f7d2da4f 100644 --- a/src/nanoarrow/nanoarrow_testing.hpp +++ b/src/nanoarrow/nanoarrow_testing.hpp @@ -138,7 +138,7 @@ class DictionaryContext { /// \brief Writer for the Arrow integration testing JSON format class TestingJSONWriter { public: - TestingJSONWriter() : float_precision_(-1) {} + TestingJSONWriter() : float_precision_(-1), include_metadata_(true) {} /// \brief Set the floating point precision of the writer /// @@ -146,7 +146,12 @@ class TestingJSONWriter { /// to encode the value in the output. When writing files specifically for /// integration tests, floating point values should be rounded to 3 decimal places to /// avoid serialization issues. - void set_float_precision(int precision) { float_precision_ = precision; } + void set_float_precision(int value) { float_precision_ = value; } + + /// \brief Set whether metadata should be included in the output of a schema or field + /// + /// Use false to skip writing schema/field metadata in the output. + void set_include_metadata(bool value) { include_metadata_ = value; } void ResetDictionaries() { dictionaries_.clear(); } @@ -227,7 +232,7 @@ class TestingJSONWriter { } // Write metadata - if (schema->metadata != nullptr) { + if (ShouldWriteMetadata(schema->metadata)) { out << R"(, "metadata": )"; NANOARROW_RETURN_NOT_OK(WriteMetadata(out, schema->metadata)); } @@ -293,7 +298,7 @@ class TestingJSONWriter { } // Write metadata - if (field->metadata != nullptr) { + if (ShouldWriteMetadata(field->metadata)) { out << R"(, "metadata": )"; NANOARROW_RETURN_NOT_OK(WriteMetadata(out, field->metadata)); } @@ -494,8 +499,13 @@ class TestingJSONWriter { private: int float_precision_; + bool include_metadata_; internal::DictionaryContext dictionaries_; + bool ShouldWriteMetadata(const char* metadata) { + return metadata != nullptr && include_metadata_; + } + ArrowErrorCode WriteDictionaryBatch(std::ostream& out, int32_t dictionary_id) { const internal::Dictionary& dict = dictionaries_.Get(dictionary_id); out << R"({"id": )" << dictionary_id << R"(, "data": {"count": )" @@ -2023,8 +2033,7 @@ class TestingJSONReader { buffer_view->data.as_uint8 = buffer->data; buffer_view->size_bytes = buffer->size_bytes; - // If this is a validity buffer with a big enough size, set the array_view's - // null_count + // If this is a validity buffer, set the null_count if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY && _ArrowBytesForBits(array_view->length) <= buffer_view->size_bytes) { array_view->null_count = @@ -2033,6 +2042,11 @@ class TestingJSONReader { } } + // The null type doesn't have any buffers but we can set the null_count + if (array_view->storage_type == NANOARROW_TYPE_NA) { + array_view->null_count = array_view->length; + } + // If there is a dictionary associated with schema, parse its value into dictionary if (schema->dictionary != nullptr) { NANOARROW_RETURN_NOT_OK(Check( @@ -2051,9 +2065,10 @@ class TestingJSONReader { ArrowArrayViewValidate(array_view, NANOARROW_VALIDATION_LEVEL_FULL, error), error, error_prefix + "failed to validate: ")); - // Flush length and buffer pointers to the Array - NANOARROW_RETURN_NOT_OK_WITH_ERROR( - ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_NONE, nullptr), error); + // Flush length and buffer pointers to the Array. This also ensures that buffers + // are not NULL (matters for some versions of some implementations). + NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFinishBuildingDefault(array, nullptr), + error); array->length = array_view->length; array->null_count = array_view->null_count; @@ -2178,6 +2193,11 @@ class TestingJSONReader { NANOARROW_RETURN_NOT_OK( Check(value.is_array(), error, "bitmap buffer must be array")); + // Reserving with the exact length ensures that the last bits are always zeroed. + // This was an assumption made by the C# implementation at the time this was + // implemented. + NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBitmapReserve(bitmap, value.size()), error); + for (const auto& item : value) { // Some example files write bitmaps as [true, false, true] but the documentation // says [1, 0, 1]. Accept both for simplicity. @@ -2505,6 +2525,35 @@ class TestingJSONComparison { }; public: + TestingJSONComparison() : compare_batch_flags_(true), compare_metadata_order_(true) { + // We do our own metadata comparison + writer_actual_.set_include_metadata(false); + writer_expected_.set_include_metadata(false); + } + + /// \brief Compare top-level RecordBatch flags (e.g., nullability) + /// + /// Some Arrow implementations export batches as nullable, and some export them as + /// non-nullable. Use false to consider these two types of batches as equivalent. + void set_compare_batch_flags(bool value) { compare_batch_flags_ = value; } + + /// \brief Compare metadata order + /// + /// Some Arrow implementations store metadata using structures (e.g., hash map) that + /// reorder metadata items. Use false to consider metadata whose keys/values have + /// been reordered as equivalent. + void set_compare_metadata_order(bool value) { compare_metadata_order_ = value; } + + /// \brief Set float precision + /// + /// The Arrow Integration Testing JSON document states that values should be compared + /// to 3 decimal places to avoid floating point serialization issues. Use -1 to specify + /// that all decimal places should be used (the default). + void set_compare_float_precision(int value) { + writer_actual_.set_float_precision(value); + writer_expected_.set_float_precision(value); + } + /// \brief Returns the number of differences found by the previous call int64_t num_differences() const { return differences_.size(); } @@ -2619,7 +2668,7 @@ class TestingJSONComparison { // (Purposefully ignore the name field at the top level) // Compare flags - if (actual->flags != expected->flags) { + if (compare_batch_flags_ && actual->flags != expected->flags) { differences_.push_back({path, std::string(".flags: ") + std::to_string(actual->flags), std::string(".flags: ") + std::to_string(expected->flags)}); @@ -2639,20 +2688,8 @@ class TestingJSONComparison { } // Compare metadata - std::stringstream ss; - NANOARROW_RETURN_NOT_OK_WITH_ERROR(writer_actual_.WriteMetadata(ss, actual->metadata), - error); - std::string actual_metadata = ss.str(); - - ss.str(""); - NANOARROW_RETURN_NOT_OK_WITH_ERROR( - writer_expected_.WriteMetadata(ss, expected->metadata), error); - std::string expected_metadata = ss.str(); - - if (actual_metadata != expected_metadata) { - differences_.push_back({path, std::string(".metadata: ") + actual_metadata, - std::string(".metadata: ") + expected_metadata}); - } + NANOARROW_RETURN_NOT_OK(CompareMetadata(actual->metadata, expected->metadata, error, + path + std::string(".metadata"))); return NANOARROW_OK; } @@ -2722,6 +2759,10 @@ class TestingJSONComparison { nanoarrow::UniqueArrayView actual_; nanoarrow::UniqueArrayView expected_; + // Comparison options + bool compare_batch_flags_; + bool compare_metadata_order_; + ArrowErrorCode CompareField(ArrowSchema* actual, ArrowSchema* expected, ArrowError* error, const std::string& path = "") { // Preprocess both fields such that map types have canonical names @@ -2753,6 +2794,91 @@ class TestingJSONComparison { differences_.push_back({path, actual_json, expected_json}); } + NANOARROW_RETURN_NOT_OK(CompareMetadata(actual->metadata, expected->metadata, error, + path + std::string(".metadata"))); + return NANOARROW_OK; + } + + ArrowErrorCode CompareMetadata(const char* actual, const char* expected, + ArrowError* error, const std::string& path = "") { + std::stringstream ss; + + NANOARROW_RETURN_NOT_OK_WITH_ERROR(writer_actual_.WriteMetadata(ss, actual), error); + std::string actual_json = ss.str(); + + ss.str(""); + NANOARROW_RETURN_NOT_OK_WITH_ERROR(writer_expected_.WriteMetadata(ss, expected), + error); + std::string expected_json = ss.str(); + + bool metadata_equal = actual_json == expected_json; + + // If there is a difference in the rendered JSON but we aren't being strict about + // order, check again using the KeyValue comparison. + if (!metadata_equal && !compare_metadata_order_) { + NANOARROW_RETURN_NOT_OK( + MetadataEqualKeyValue(actual, expected, &metadata_equal, error)); + } + + // If we still have an inequality, add a difference. + if (!metadata_equal) { + differences_.push_back({path, actual_json, expected_json}); + } + + return NANOARROW_OK; + } + + ArrowErrorCode MetadataEqualKeyValue(const char* actual, const char* expected, + bool* out, ArrowError* error) { + std::unordered_map<std::string, std::string> actual_map, expected_map; + NANOARROW_RETURN_NOT_OK(MetadataToMap(actual, &actual_map, error)); + NANOARROW_RETURN_NOT_OK(MetadataToMap(expected, &expected_map, error)); + + if (actual_map.size() != expected_map.size()) { + *out = false; + return NANOARROW_OK; + } + + for (const auto& item : expected_map) { + const auto& actual_item = actual_map.find(item.first); + if (actual_item == actual_map.end()) { + *out = false; + return NANOARROW_OK; + } + + if (actual_item->second != item.second) { + *out = false; + return NANOARROW_OK; + } + } + + *out = true; + return NANOARROW_OK; + } + + ArrowErrorCode MetadataToMap(const char* metadata, + std::unordered_map<std::string, std::string>* out, + ArrowError* error) { + ArrowMetadataReader reader; + NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowMetadataReaderInit(&reader, metadata), error); + + ArrowStringView key, value; + size_t metadata_num_keys = 0; + while (reader.remaining_keys > 0) { + NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowMetadataReaderRead(&reader, &key, &value), + error); + out->insert({std::string(key.data, key.size_bytes), + std::string(value.data, value.size_bytes)}); + metadata_num_keys++; + } + + if (metadata_num_keys != out->size()) { + ArrowErrorSet(error, + "Comparison of metadata containing duplicate keys without " + "considering order is not implemented"); + return ENOTSUP; + } + return NANOARROW_OK; } diff --git a/src/nanoarrow/nanoarrow_testing_test.cc b/src/nanoarrow/nanoarrow_testing_test.cc index f7b48444..884210a6 100644 --- a/src/nanoarrow/nanoarrow_testing_test.cc +++ b/src/nanoarrow/nanoarrow_testing_test.cc @@ -462,6 +462,17 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldMetadata) { /*append_expr*/ nullptr, &WriteFieldJSON, R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": [], )" R"("metadata": [{"key": "k1", "value": "v1"}, {"key": "k2", "value": "v2"}]})"); + + // Ensure we can turn off metadata + TestWriteJSON( + [](ArrowSchema* schema) { + NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_NA)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetMetadata(schema, "\0\0\0\0")); + return NANOARROW_OK; + }, + [](ArrowArray* array) { return NANOARROW_OK; }, &WriteFieldJSON, + R"({"name": null, "nullable": true, "type": {"name": "null"}, "children": []})", + [](TestingJSONWriter& writer) { writer.set_include_metadata(false); }); } TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldNested) { @@ -1328,21 +1339,33 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldDictionaryRoundtrip) { R"("isOrdered": true}, "children": []})"); } -void AssertSchemasCompareEqual(ArrowSchema* actual, ArrowSchema* expected) { +void AssertSchemasCompareEqual( + ArrowSchema* actual, ArrowSchema* expected, + void (*setup_comparison)(TestingJSONComparison&) = nullptr) { TestingJSONComparison comparison; std::stringstream msg; + if (setup_comparison != nullptr) { + setup_comparison(comparison); + } + ASSERT_EQ(comparison.CompareSchema(actual, expected), NANOARROW_OK); EXPECT_EQ(comparison.num_differences(), 0); comparison.WriteDifferences(msg); EXPECT_EQ(msg.str(), ""); } -void AssertSchemasCompareUnequal(ArrowSchema* actual, ArrowSchema* expected, - int num_differences, const std::string& differences) { +void AssertSchemasCompareUnequal( + ArrowSchema* actual, ArrowSchema* expected, int num_differences, + const std::string& differences, + void (*setup_comparison)(TestingJSONComparison&) = nullptr) { TestingJSONComparison comparison; std::stringstream msg; + if (setup_comparison != nullptr) { + setup_comparison(comparison); + } + ASSERT_EQ(comparison.CompareSchema(actual, expected), NANOARROW_OK); EXPECT_EQ(comparison.num_differences(), num_differences); comparison.WriteDifferences(msg); @@ -1365,25 +1388,34 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestSchemaComparison) { actual->flags = 0; AssertSchemasCompareUnequal(actual.get(), expected.get(), /*num_differences*/ 1, "Path: \n- .flags: 0\n+ .flags: 2\n\n"); + // With different top-level flags but turning off that comparison + AssertSchemasCompareEqual(actual.get(), expected.get(), + [](TestingJSONComparison& comparison) { + comparison.set_compare_batch_flags((false)); + }); actual->flags = expected->flags; // With different top-level metadata nanoarrow::UniqueBuffer buf; ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK); - ASSERT_EQ( - ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key"), ArrowCharView("value")), - NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key1"), + ArrowCharView("value1")), + NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"), + ArrowCharView("value2")), + NANOARROW_OK); ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(), reinterpret_cast<char*>(buf->data)), NANOARROW_OK); AssertSchemasCompareUnequal(actual.get(), expected.get(), /*num_differences*/ 1, /*differences*/ - "Path: " + "Path: .metadata" R"( -- .metadata: [{"key": "key", "value": "value"}] -+ .metadata: null +- [{"key": "key1", "value": "value1"}, {"key": "key2", "value": "value2"}] ++ null )"); + ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(), nullptr), NANOARROW_OK); // With different children @@ -1443,6 +1475,191 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestSchemaComparisonMap) { AssertSchemasCompareEqual(actual2.get(), expected.get()); } +TEST(NanoarrowTestingTest, NanoarrowTestingTestMetadataComparison) { + nanoarrow::UniqueSchema actual; + nanoarrow::UniqueSchema expected; + nanoarrow::UniqueBuffer buf; + + // Start with two identical schemas and ensure there are no differences + ArrowSchemaInit(actual.get()); + ASSERT_EQ(ArrowSchemaSetTypeStruct(actual.get(), 1), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetType(actual->children[0], NANOARROW_TYPE_NA), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaDeepCopy(actual.get(), expected.get()), NANOARROW_OK); + AssertSchemasCompareEqual(actual.get(), expected.get()); + + // With different top-level metadata that are not equivalent because of order + buf.reset(); + ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key1"), + ArrowCharView("value1")), + NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"), + ArrowCharView("value2")), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(), reinterpret_cast<char*>(buf->data)), + NANOARROW_OK); + + buf.reset(); + ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"), + ArrowCharView("value2")), + NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key1"), + ArrowCharView("value1")), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetMetadata(expected.get(), reinterpret_cast<char*>(buf->data)), + NANOARROW_OK); + + // ...using the comparison that considers ordering + AssertSchemasCompareUnequal(actual.get(), expected.get(), /*num_differences*/ 1, + /*differences*/ + "Path: .metadata" + R"( +- [{"key": "key1", "value": "value1"}, {"key": "key2", "value": "value2"}] ++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}] + +)"); + + // ...using the comparison that does *not* consider ordering + AssertSchemasCompareEqual(actual.get(), expected.get(), + [](TestingJSONComparison& comparison) { + comparison.set_compare_metadata_order(false); + }); + + // With different top-level metadata that are not equivalent because of number of items + ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(), nullptr), NANOARROW_OK); + + // ...using the comparison that considers ordering + AssertSchemasCompareUnequal(actual.get(), expected.get(), + /*num_differences*/ 1, + /*differences*/ + "Path: .metadata" + R"( +- null ++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}] + +)", + [](TestingJSONComparison& comparison) { + comparison.set_compare_metadata_order(false); + }); + + // ...using the comparison that does *not* consider ordering + AssertSchemasCompareUnequal(actual.get(), expected.get(), + /*num_differences*/ 1, + /*differences*/ + "Path: .metadata" + R"( +- null ++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}] + +)", + [](TestingJSONComparison& comparison) { + comparison.set_compare_metadata_order(false); + }); + + // With different top-level metadata that are not equivalent because of item content + buf.reset(); + ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"), + ArrowCharView("value2")), + NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key1"), + ArrowCharView("gazornenplat")), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(), reinterpret_cast<char*>(buf->data)), + NANOARROW_OK); + + // ...using the schema comparison that considers order + AssertSchemasCompareUnequal(actual.get(), expected.get(), + /*num_differences*/ 1, + /*differences*/ + "Path: .metadata" + R"( +- [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "gazornenplat"}] ++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}] + +)"); + + // ...and using the schema comparison that does *not* consider order + AssertSchemasCompareUnequal(actual.get(), expected.get(), + /*num_differences*/ 1, + /*differences*/ + "Path: .metadata" + R"( +- [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "gazornenplat"}] ++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}] + +)", + [](TestingJSONComparison& comparison) { + comparison.set_compare_metadata_order(false); + }); + + // With different top-level metadata that are not equivalent because of item keys + buf.reset(); + ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"), + ArrowCharView("value2")), + NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key3"), + ArrowCharView("value1")), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(), reinterpret_cast<char*>(buf->data)), + NANOARROW_OK); + + // ...using the schema comparison that considers order + AssertSchemasCompareUnequal(actual.get(), expected.get(), + /*num_differences*/ 1, + /*differences*/ + "Path: .metadata" + R"( +- [{"key": "key2", "value": "value2"}, {"key": "key3", "value": "value1"}] ++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}] + +)"); + + // ...and using the schema comparison that does *not* consider order + AssertSchemasCompareUnequal(actual.get(), expected.get(), + /*num_differences*/ 1, + /*differences*/ + "Path: .metadata" + R"( +- [{"key": "key2", "value": "value2"}, {"key": "key3", "value": "value1"}] ++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}] + +)", + [](TestingJSONComparison& comparison) { + comparison.set_compare_metadata_order(false); + }); + + // Metadata that are not equal and contain duplicate keys + buf.reset(); + ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"), + ArrowCharView("value2")), + NANOARROW_OK); + ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"), + ArrowCharView("value2 again")), + NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(), reinterpret_cast<char*>(buf->data)), + NANOARROW_OK); + + // ...using the schema comparison that considers order + AssertSchemasCompareUnequal(actual.get(), expected.get(), + /*num_differences*/ 1, + /*differences*/ + "Path: .metadata" + R"( +- [{"key": "key2", "value": "value2"}, {"key": "key2", "value": "value2 again"}] ++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}] + +)"); + + // Comparison is not implemented for the comparison that does not consider order + TestingJSONComparison comparison; + comparison.set_compare_metadata_order(false); + ASSERT_EQ(comparison.CompareSchema(actual.get(), expected.get()), ENOTSUP); +} + TEST(NanoarrowTestingTest, NanoarrowTestingTestArrayComparison) { nanoarrow::UniqueSchema schema; nanoarrow::UniqueArray actual; @@ -1496,6 +1713,45 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestArrayComparison) { )"); } +TEST(NanoarrowTestingTest, NanoarrowTestingTestFloatingPointArrayComparison) { + nanoarrow::UniqueSchema schema; + nanoarrow::UniqueArray actual; + nanoarrow::UniqueArray expected; + TestingJSONComparison comparison; + std::stringstream msg; + + ArrowSchemaInit(schema.get()); + ASSERT_EQ(ArrowSchemaSetTypeStruct(schema.get(), 1), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_DOUBLE), NANOARROW_OK); + ASSERT_EQ(comparison.SetSchema(schema.get()), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayInitFromSchema(actual.get(), schema.get(), nullptr), NANOARROW_OK); + ASSERT_EQ(ArrowArrayAppendDouble(actual->children[0], 1.23456789), NANOARROW_OK); + ASSERT_EQ(ArrowArrayFinishBuildingDefault(actual.get(), nullptr), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayInitFromSchema(expected.get(), schema.get(), nullptr), + NANOARROW_OK); + ASSERT_EQ(ArrowArrayAppendDouble(expected->children[0], 1.23456), NANOARROW_OK); + ASSERT_EQ(ArrowArrayFinishBuildingDefault(expected.get(), nullptr), NANOARROW_OK); + + // Default precision: all decimal places + ASSERT_EQ(comparison.CompareBatch(actual.get(), expected.get()), NANOARROW_OK); + EXPECT_EQ(comparison.num_differences(), 1); + comparison.ClearDifferences(); + + // With just enough decimal places to trigger a difference + comparison.set_compare_float_precision(5); + ASSERT_EQ(comparison.CompareBatch(actual.get(), expected.get()), NANOARROW_OK); + EXPECT_EQ(comparison.num_differences(), 1); + comparison.ClearDifferences(); + + // With just few enough decimal places to be considered equivalent + comparison.set_compare_float_precision(4); + ASSERT_EQ(comparison.CompareBatch(actual.get(), expected.get()), NANOARROW_OK); + EXPECT_EQ(comparison.num_differences(), 0); + comparison.ClearDifferences(); +} + TEST(NanoarrowTestingTest, NanoarrowTestingTestArrayWithDictionaryComparison) { nanoarrow::UniqueSchema schema; nanoarrow::UniqueArray actual;