This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 3a78aa45 fix: Relax comparison strictness such that integration tests
pass (#399)
3a78aa45 is described below
commit 3a78aa459a1125fe22e2f1fe3c8f20a592229413
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Apr 15 15:52:24 2024 -0300
fix: Relax comparison strictness such that integration tests pass (#399)
These changes are the changes required such that
https://github.com/apache/arrow/pull/39302 results in passing
integration tests for nanoarrow. The changes are mostly related to
comparison:
- We needed an option to allow metadata to be compared on a key/value
basis without considering order (for Java, which seems to reorder
metadata on read)
- We needed the ability to treat NULL metadata and zero-size metadata as
equivalent (for Go, which always exports zero-length metadata)
- We needed an option to ignore flags for top-level batches (for C#,
which exports nullable structs)
- We needed to ensure that the last few bits of the validity buffer were
zeroed (for C#, although this is now fixed in C# on Arrow main)
- We needed to ensure that no buffers were NULL (For C#, which leaks the
top-level array if it encounters one, at least in the integration tests.
This should really be fixed in C#).
---
src/nanoarrow/array.c | 26 ++-
src/nanoarrow/array_test.cc | 35 +++
src/nanoarrow/integration/c_data_integration.cc | 10 +
src/nanoarrow/nanoarrow_testing.hpp | 174 ++++++++++++---
src/nanoarrow/nanoarrow_testing_test.cc | 274 +++++++++++++++++++++++-
5 files changed, 472 insertions(+), 47 deletions(-)
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 2373e58e..4fb7b7b1 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -407,19 +407,16 @@ static ArrowErrorCode ArrowArrayFinalizeBuffers(struct
ArrowArray* array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
- // The only buffer finalizing this currently does is make sure the data
- // buffer for (Large)String|Binary is never NULL
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_BINARY:
- case NANOARROW_TYPE_STRING:
- case NANOARROW_TYPE_LARGE_BINARY:
- case NANOARROW_TYPE_LARGE_STRING:
- if (ArrowArrayBuffer(array, 2)->data == NULL) {
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(ArrowArrayBuffer(array,
2), 0));
- }
- break;
- default:
- break;
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY
||
+ private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
+ continue;
+ }
+
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+ if (buffer->data == NULL) {
+ NANOARROW_RETURN_NOT_OK((ArrowBufferReserve(buffer, 1)));
+ }
}
for (int64_t i = 0; i < array->n_children; i++) {
@@ -455,7 +452,8 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray*
array,
struct ArrowError* error) {
// Even if the data buffer is size zero, the pointer value needed to be
non-null
// in some implementations (at least one version of Arrow C++ at the time
this
- // was added). Only do this fix if we can assume CPU data access.
+ // was added and C# as later discovered). Only do this fix if we can assume
+ // CPU data access.
if (validation_level >= NANOARROW_VALIDATION_LEVEL_DEFAULT) {
NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFinalizeBuffers(array),
error);
}
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index 81360b0a..53116c05 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -324,6 +324,41 @@ TEST(ArrayTest, ArrayTestValidateMinimalBufferAccess) {
ArrowArrayRelease(&array);
}
+class UnparameterizedTypeTestFixture : public ::testing::TestWithParam<enum
ArrowType> {
+ protected:
+ enum ArrowType data_type;
+};
+
+TEST_P(UnparameterizedTypeTestFixture, ArrayTestBuildEmptyArray) {
+ struct ArrowArray array;
+ ASSERT_EQ(ArrowArrayInitFromType(&array, GetParam()), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.offset, 0);
+ EXPECT_EQ(array.length, 0);
+ EXPECT_EQ(array.null_count, 0);
+
+ // For all of these, the validity buffer is the first buffer and should be
NULL;
+ // however, other buffers should not be NULL.
+ for (int64_t i = 1; i < array.n_buffers; i++) {
+ if (i == 0) {
+ EXPECT_EQ(array.buffers[i], nullptr);
+ } else {
+ EXPECT_NE(array.buffers[i], nullptr);
+ }
+ }
+
+ ArrowArrayRelease(&array);
+}
+
+// We don't need to exhaustively check here...just a few different categories
+// of inputs to ensure our buffer finalizing worked.
+INSTANTIATE_TEST_SUITE_P(NanoarrowIpcTest, UnparameterizedTypeTestFixture,
+ ::testing::Values(NANOARROW_TYPE_NA,
NANOARROW_TYPE_INT32,
+ NANOARROW_TYPE_BINARY,
NANOARROW_TYPE_STRUCT));
+
TEST(ArrayTest, ArrayTestAppendToNullArray) {
struct ArrowArray array;
ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_NA), NANOARROW_OK);
diff --git a/src/nanoarrow/integration/c_data_integration.cc
b/src/nanoarrow/integration/c_data_integration.cc
index 3660af7c..fab901ec 100644
--- a/src/nanoarrow/integration/c_data_integration.cc
+++ b/src/nanoarrow/integration/c_data_integration.cc
@@ -56,6 +56,12 @@ static ArrowBufferAllocator IntegrationTestAllocator() {
return allocator;
}
+static void SetComparisonOptions(nanoarrow::testing::TestingJSONComparison*
comparison) {
+ comparison->set_compare_batch_flags(false);
+ comparison->set_compare_float_precision(3);
+ comparison->set_compare_metadata_order(false);
+}
+
static ArrowErrorCode ReadFileString(std::ostream& out, const std::string&
file_path) {
std::ifstream infile(file_path, std::ios::in | std::ios::binary);
char buf[8096];
@@ -143,6 +149,8 @@ static ArrowErrorCode ImportSchemaAndCompareToJson(const
char* json_path,
error));
nanoarrow::testing::TestingJSONComparison comparison;
+ SetComparisonOptions(&comparison);
+
NANOARROW_RETURN_NOT_OK(
comparison.CompareSchema(actual.get(), data.schema.get(), error));
if (comparison.num_differences() > 0) {
@@ -173,6 +181,8 @@ static ArrowErrorCode ImportBatchAndCompareToJson(const
char* json_path, int num
NANOARROW_RETURN_NOT_OK(MaterializeJsonFilePath(json_path, &data, num_batch,
error));
nanoarrow::testing::TestingJSONComparison comparison;
+ SetComparisonOptions(&comparison);
+
NANOARROW_RETURN_NOT_OK(comparison.SetSchema(data.schema.get(), error));
NANOARROW_RETURN_NOT_OK(
comparison.CompareBatch(actual.get(), data.arrays[0].get(), error));
diff --git a/src/nanoarrow/nanoarrow_testing.hpp
b/src/nanoarrow/nanoarrow_testing.hpp
index a7602b70..f7d2da4f 100644
--- a/src/nanoarrow/nanoarrow_testing.hpp
+++ b/src/nanoarrow/nanoarrow_testing.hpp
@@ -138,7 +138,7 @@ class DictionaryContext {
/// \brief Writer for the Arrow integration testing JSON format
class TestingJSONWriter {
public:
- TestingJSONWriter() : float_precision_(-1) {}
+ TestingJSONWriter() : float_precision_(-1), include_metadata_(true) {}
/// \brief Set the floating point precision of the writer
///
@@ -146,7 +146,12 @@ class TestingJSONWriter {
/// to encode the value in the output. When writing files specifically for
/// integration tests, floating point values should be rounded to 3 decimal
places to
/// avoid serialization issues.
- void set_float_precision(int precision) { float_precision_ = precision; }
+ void set_float_precision(int value) { float_precision_ = value; }
+
+ /// \brief Set whether metadata should be included in the output of a schema
or field
+ ///
+ /// Use false to skip writing schema/field metadata in the output.
+ void set_include_metadata(bool value) { include_metadata_ = value; }
void ResetDictionaries() { dictionaries_.clear(); }
@@ -227,7 +232,7 @@ class TestingJSONWriter {
}
// Write metadata
- if (schema->metadata != nullptr) {
+ if (ShouldWriteMetadata(schema->metadata)) {
out << R"(, "metadata": )";
NANOARROW_RETURN_NOT_OK(WriteMetadata(out, schema->metadata));
}
@@ -293,7 +298,7 @@ class TestingJSONWriter {
}
// Write metadata
- if (field->metadata != nullptr) {
+ if (ShouldWriteMetadata(field->metadata)) {
out << R"(, "metadata": )";
NANOARROW_RETURN_NOT_OK(WriteMetadata(out, field->metadata));
}
@@ -494,8 +499,13 @@ class TestingJSONWriter {
private:
int float_precision_;
+ bool include_metadata_;
internal::DictionaryContext dictionaries_;
+ bool ShouldWriteMetadata(const char* metadata) {
+ return metadata != nullptr && include_metadata_;
+ }
+
ArrowErrorCode WriteDictionaryBatch(std::ostream& out, int32_t
dictionary_id) {
const internal::Dictionary& dict = dictionaries_.Get(dictionary_id);
out << R"({"id": )" << dictionary_id << R"(, "data": {"count": )"
@@ -2023,8 +2033,7 @@ class TestingJSONReader {
buffer_view->data.as_uint8 = buffer->data;
buffer_view->size_bytes = buffer->size_bytes;
- // If this is a validity buffer with a big enough size, set the
array_view's
- // null_count
+ // If this is a validity buffer, set the null_count
if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY
&&
_ArrowBytesForBits(array_view->length) <= buffer_view->size_bytes) {
array_view->null_count =
@@ -2033,6 +2042,11 @@ class TestingJSONReader {
}
}
+ // The null type doesn't have any buffers but we can set the null_count
+ if (array_view->storage_type == NANOARROW_TYPE_NA) {
+ array_view->null_count = array_view->length;
+ }
+
// If there is a dictionary associated with schema, parse its value into
dictionary
if (schema->dictionary != nullptr) {
NANOARROW_RETURN_NOT_OK(Check(
@@ -2051,9 +2065,10 @@ class TestingJSONReader {
ArrowArrayViewValidate(array_view, NANOARROW_VALIDATION_LEVEL_FULL,
error), error,
error_prefix + "failed to validate: "));
- // Flush length and buffer pointers to the Array
- NANOARROW_RETURN_NOT_OK_WITH_ERROR(
- ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_NONE,
nullptr), error);
+ // Flush length and buffer pointers to the Array. This also ensures that
buffers
+ // are not NULL (matters for some versions of some implementations).
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayFinishBuildingDefault(array,
nullptr),
+ error);
array->length = array_view->length;
array->null_count = array_view->null_count;
@@ -2178,6 +2193,11 @@ class TestingJSONReader {
NANOARROW_RETURN_NOT_OK(
Check(value.is_array(), error, "bitmap buffer must be array"));
+ // Reserving with the exact length ensures that the last bits are always
zeroed.
+ // This was an assumption made by the C# implementation at the time this
was
+ // implemented.
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBitmapReserve(bitmap,
value.size()), error);
+
for (const auto& item : value) {
// Some example files write bitmaps as [true, false, true] but the
documentation
// says [1, 0, 1]. Accept both for simplicity.
@@ -2505,6 +2525,35 @@ class TestingJSONComparison {
};
public:
+ TestingJSONComparison() : compare_batch_flags_(true),
compare_metadata_order_(true) {
+ // We do our own metadata comparison
+ writer_actual_.set_include_metadata(false);
+ writer_expected_.set_include_metadata(false);
+ }
+
+ /// \brief Compare top-level RecordBatch flags (e.g., nullability)
+ ///
+ /// Some Arrow implementations export batches as nullable, and some export
them as
+ /// non-nullable. Use false to consider these two types of batches as
equivalent.
+ void set_compare_batch_flags(bool value) { compare_batch_flags_ = value; }
+
+ /// \brief Compare metadata order
+ ///
+ /// Some Arrow implementations store metadata using structures (e.g., hash
map) that
+ /// reorder metadata items. Use false to consider metadata whose keys/values
have
+ /// been reordered as equivalent.
+ void set_compare_metadata_order(bool value) { compare_metadata_order_ =
value; }
+
+ /// \brief Set float precision
+ ///
+ /// The Arrow Integration Testing JSON document states that values should be
compared
+ /// to 3 decimal places to avoid floating point serialization issues. Use -1
to specify
+ /// that all decimal places should be used (the default).
+ void set_compare_float_precision(int value) {
+ writer_actual_.set_float_precision(value);
+ writer_expected_.set_float_precision(value);
+ }
+
/// \brief Returns the number of differences found by the previous call
int64_t num_differences() const { return differences_.size(); }
@@ -2619,7 +2668,7 @@ class TestingJSONComparison {
// (Purposefully ignore the name field at the top level)
// Compare flags
- if (actual->flags != expected->flags) {
+ if (compare_batch_flags_ && actual->flags != expected->flags) {
differences_.push_back({path,
std::string(".flags: ") +
std::to_string(actual->flags),
std::string(".flags: ") +
std::to_string(expected->flags)});
@@ -2639,20 +2688,8 @@ class TestingJSONComparison {
}
// Compare metadata
- std::stringstream ss;
- NANOARROW_RETURN_NOT_OK_WITH_ERROR(writer_actual_.WriteMetadata(ss,
actual->metadata),
- error);
- std::string actual_metadata = ss.str();
-
- ss.str("");
- NANOARROW_RETURN_NOT_OK_WITH_ERROR(
- writer_expected_.WriteMetadata(ss, expected->metadata), error);
- std::string expected_metadata = ss.str();
-
- if (actual_metadata != expected_metadata) {
- differences_.push_back({path, std::string(".metadata: ") +
actual_metadata,
- std::string(".metadata: ") + expected_metadata});
- }
+ NANOARROW_RETURN_NOT_OK(CompareMetadata(actual->metadata,
expected->metadata, error,
+ path + std::string(".metadata")));
return NANOARROW_OK;
}
@@ -2722,6 +2759,10 @@ class TestingJSONComparison {
nanoarrow::UniqueArrayView actual_;
nanoarrow::UniqueArrayView expected_;
+ // Comparison options
+ bool compare_batch_flags_;
+ bool compare_metadata_order_;
+
ArrowErrorCode CompareField(ArrowSchema* actual, ArrowSchema* expected,
ArrowError* error, const std::string& path = "")
{
// Preprocess both fields such that map types have canonical names
@@ -2753,6 +2794,91 @@ class TestingJSONComparison {
differences_.push_back({path, actual_json, expected_json});
}
+ NANOARROW_RETURN_NOT_OK(CompareMetadata(actual->metadata,
expected->metadata, error,
+ path + std::string(".metadata")));
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode CompareMetadata(const char* actual, const char* expected,
+ ArrowError* error, const std::string& path =
"") {
+ std::stringstream ss;
+
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(writer_actual_.WriteMetadata(ss,
actual), error);
+ std::string actual_json = ss.str();
+
+ ss.str("");
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(writer_expected_.WriteMetadata(ss,
expected),
+ error);
+ std::string expected_json = ss.str();
+
+ bool metadata_equal = actual_json == expected_json;
+
+ // If there is a difference in the rendered JSON but we aren't being
strict about
+ // order, check again using the KeyValue comparison.
+ if (!metadata_equal && !compare_metadata_order_) {
+ NANOARROW_RETURN_NOT_OK(
+ MetadataEqualKeyValue(actual, expected, &metadata_equal, error));
+ }
+
+ // If we still have an inequality, add a difference.
+ if (!metadata_equal) {
+ differences_.push_back({path, actual_json, expected_json});
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode MetadataEqualKeyValue(const char* actual, const char*
expected,
+ bool* out, ArrowError* error) {
+ std::unordered_map<std::string, std::string> actual_map, expected_map;
+ NANOARROW_RETURN_NOT_OK(MetadataToMap(actual, &actual_map, error));
+ NANOARROW_RETURN_NOT_OK(MetadataToMap(expected, &expected_map, error));
+
+ if (actual_map.size() != expected_map.size()) {
+ *out = false;
+ return NANOARROW_OK;
+ }
+
+ for (const auto& item : expected_map) {
+ const auto& actual_item = actual_map.find(item.first);
+ if (actual_item == actual_map.end()) {
+ *out = false;
+ return NANOARROW_OK;
+ }
+
+ if (actual_item->second != item.second) {
+ *out = false;
+ return NANOARROW_OK;
+ }
+ }
+
+ *out = true;
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode MetadataToMap(const char* metadata,
+ std::unordered_map<std::string, std::string>*
out,
+ ArrowError* error) {
+ ArrowMetadataReader reader;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowMetadataReaderInit(&reader,
metadata), error);
+
+ ArrowStringView key, value;
+ size_t metadata_num_keys = 0;
+ while (reader.remaining_keys > 0) {
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowMetadataReaderRead(&reader,
&key, &value),
+ error);
+ out->insert({std::string(key.data, key.size_bytes),
+ std::string(value.data, value.size_bytes)});
+ metadata_num_keys++;
+ }
+
+ if (metadata_num_keys != out->size()) {
+ ArrowErrorSet(error,
+ "Comparison of metadata containing duplicate keys without "
+ "considering order is not implemented");
+ return ENOTSUP;
+ }
+
return NANOARROW_OK;
}
diff --git a/src/nanoarrow/nanoarrow_testing_test.cc
b/src/nanoarrow/nanoarrow_testing_test.cc
index f7b48444..884210a6 100644
--- a/src/nanoarrow/nanoarrow_testing_test.cc
+++ b/src/nanoarrow/nanoarrow_testing_test.cc
@@ -462,6 +462,17 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestFieldMetadata) {
/*append_expr*/ nullptr, &WriteFieldJSON,
R"({"name": null, "nullable": true, "type": {"name": "null"},
"children": [], )"
R"("metadata": [{"key": "k1", "value": "v1"}, {"key": "k2", "value":
"v2"}]})");
+
+ // Ensure we can turn off metadata
+ TestWriteJSON(
+ [](ArrowSchema* schema) {
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema,
NANOARROW_TYPE_NA));
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetMetadata(schema, "\0\0\0\0"));
+ return NANOARROW_OK;
+ },
+ [](ArrowArray* array) { return NANOARROW_OK; }, &WriteFieldJSON,
+ R"({"name": null, "nullable": true, "type": {"name": "null"},
"children": []})",
+ [](TestingJSONWriter& writer) { writer.set_include_metadata(false); });
}
TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldNested) {
@@ -1328,21 +1339,33 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestFieldDictionaryRoundtrip) {
R"("isOrdered": true}, "children": []})");
}
-void AssertSchemasCompareEqual(ArrowSchema* actual, ArrowSchema* expected) {
+void AssertSchemasCompareEqual(
+ ArrowSchema* actual, ArrowSchema* expected,
+ void (*setup_comparison)(TestingJSONComparison&) = nullptr) {
TestingJSONComparison comparison;
std::stringstream msg;
+ if (setup_comparison != nullptr) {
+ setup_comparison(comparison);
+ }
+
ASSERT_EQ(comparison.CompareSchema(actual, expected), NANOARROW_OK);
EXPECT_EQ(comparison.num_differences(), 0);
comparison.WriteDifferences(msg);
EXPECT_EQ(msg.str(), "");
}
-void AssertSchemasCompareUnequal(ArrowSchema* actual, ArrowSchema* expected,
- int num_differences, const std::string&
differences) {
+void AssertSchemasCompareUnequal(
+ ArrowSchema* actual, ArrowSchema* expected, int num_differences,
+ const std::string& differences,
+ void (*setup_comparison)(TestingJSONComparison&) = nullptr) {
TestingJSONComparison comparison;
std::stringstream msg;
+ if (setup_comparison != nullptr) {
+ setup_comparison(comparison);
+ }
+
ASSERT_EQ(comparison.CompareSchema(actual, expected), NANOARROW_OK);
EXPECT_EQ(comparison.num_differences(), num_differences);
comparison.WriteDifferences(msg);
@@ -1365,25 +1388,34 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestSchemaComparison) {
actual->flags = 0;
AssertSchemasCompareUnequal(actual.get(), expected.get(),
/*num_differences*/ 1,
"Path: \n- .flags: 0\n+ .flags: 2\n\n");
+ // With different top-level flags but turning off that comparison
+ AssertSchemasCompareEqual(actual.get(), expected.get(),
+ [](TestingJSONComparison& comparison) {
+ comparison.set_compare_batch_flags((false));
+ });
actual->flags = expected->flags;
// With different top-level metadata
nanoarrow::UniqueBuffer buf;
ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK);
- ASSERT_EQ(
- ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key"),
ArrowCharView("value")),
- NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key1"),
+ ArrowCharView("value1")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"),
+ ArrowCharView("value2")),
+ NANOARROW_OK);
ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(),
reinterpret_cast<char*>(buf->data)),
NANOARROW_OK);
AssertSchemasCompareUnequal(actual.get(), expected.get(),
/*num_differences*/ 1,
/*differences*/
- "Path: "
+ "Path: .metadata"
R"(
-- .metadata: [{"key": "key", "value": "value"}]
-+ .metadata: null
+- [{"key": "key1", "value": "value1"}, {"key": "key2", "value": "value2"}]
++ null
)");
+
ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(), nullptr), NANOARROW_OK);
// With different children
@@ -1443,6 +1475,191 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestSchemaComparisonMap) {
AssertSchemasCompareEqual(actual2.get(), expected.get());
}
+TEST(NanoarrowTestingTest, NanoarrowTestingTestMetadataComparison) {
+ nanoarrow::UniqueSchema actual;
+ nanoarrow::UniqueSchema expected;
+ nanoarrow::UniqueBuffer buf;
+
+ // Start with two identical schemas and ensure there are no differences
+ ArrowSchemaInit(actual.get());
+ ASSERT_EQ(ArrowSchemaSetTypeStruct(actual.get(), 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetType(actual->children[0], NANOARROW_TYPE_NA),
NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaDeepCopy(actual.get(), expected.get()), NANOARROW_OK);
+ AssertSchemasCompareEqual(actual.get(), expected.get());
+
+ // With different top-level metadata that are not equivalent because of order
+ buf.reset();
+ ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key1"),
+ ArrowCharView("value1")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"),
+ ArrowCharView("value2")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(),
reinterpret_cast<char*>(buf->data)),
+ NANOARROW_OK);
+
+ buf.reset();
+ ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"),
+ ArrowCharView("value2")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key1"),
+ ArrowCharView("value1")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetMetadata(expected.get(),
reinterpret_cast<char*>(buf->data)),
+ NANOARROW_OK);
+
+ // ...using the comparison that considers ordering
+ AssertSchemasCompareUnequal(actual.get(), expected.get(),
/*num_differences*/ 1,
+ /*differences*/
+ "Path: .metadata"
+ R"(
+- [{"key": "key1", "value": "value1"}, {"key": "key2", "value": "value2"}]
++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}]
+
+)");
+
+ // ...using the comparison that does *not* consider ordering
+ AssertSchemasCompareEqual(actual.get(), expected.get(),
+ [](TestingJSONComparison& comparison) {
+ comparison.set_compare_metadata_order(false);
+ });
+
+ // With different top-level metadata that are not equivalent because of
number of items
+ ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(), nullptr), NANOARROW_OK);
+
+ // ...using the comparison that considers ordering
+ AssertSchemasCompareUnequal(actual.get(), expected.get(),
+ /*num_differences*/ 1,
+ /*differences*/
+ "Path: .metadata"
+ R"(
+- null
++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}]
+
+)",
+ [](TestingJSONComparison& comparison) {
+ comparison.set_compare_metadata_order(false);
+ });
+
+ // ...using the comparison that does *not* consider ordering
+ AssertSchemasCompareUnequal(actual.get(), expected.get(),
+ /*num_differences*/ 1,
+ /*differences*/
+ "Path: .metadata"
+ R"(
+- null
++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}]
+
+)",
+ [](TestingJSONComparison& comparison) {
+ comparison.set_compare_metadata_order(false);
+ });
+
+ // With different top-level metadata that are not equivalent because of item
content
+ buf.reset();
+ ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"),
+ ArrowCharView("value2")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key1"),
+ ArrowCharView("gazornenplat")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(),
reinterpret_cast<char*>(buf->data)),
+ NANOARROW_OK);
+
+ // ...using the schema comparison that considers order
+ AssertSchemasCompareUnequal(actual.get(), expected.get(),
+ /*num_differences*/ 1,
+ /*differences*/
+ "Path: .metadata"
+ R"(
+- [{"key": "key2", "value": "value2"}, {"key": "key1", "value":
"gazornenplat"}]
++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}]
+
+)");
+
+ // ...and using the schema comparison that does *not* consider order
+ AssertSchemasCompareUnequal(actual.get(), expected.get(),
+ /*num_differences*/ 1,
+ /*differences*/
+ "Path: .metadata"
+ R"(
+- [{"key": "key2", "value": "value2"}, {"key": "key1", "value":
"gazornenplat"}]
++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}]
+
+)",
+ [](TestingJSONComparison& comparison) {
+ comparison.set_compare_metadata_order(false);
+ });
+
+ // With different top-level metadata that are not equivalent because of item
keys
+ buf.reset();
+ ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"),
+ ArrowCharView("value2")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key3"),
+ ArrowCharView("value1")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(),
reinterpret_cast<char*>(buf->data)),
+ NANOARROW_OK);
+
+ // ...using the schema comparison that considers order
+ AssertSchemasCompareUnequal(actual.get(), expected.get(),
+ /*num_differences*/ 1,
+ /*differences*/
+ "Path: .metadata"
+ R"(
+- [{"key": "key2", "value": "value2"}, {"key": "key3", "value": "value1"}]
++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}]
+
+)");
+
+ // ...and using the schema comparison that does *not* consider order
+ AssertSchemasCompareUnequal(actual.get(), expected.get(),
+ /*num_differences*/ 1,
+ /*differences*/
+ "Path: .metadata"
+ R"(
+- [{"key": "key2", "value": "value2"}, {"key": "key3", "value": "value1"}]
++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}]
+
+)",
+ [](TestingJSONComparison& comparison) {
+ comparison.set_compare_metadata_order(false);
+ });
+
+ // Metadata that are not equal and contain duplicate keys
+ buf.reset();
+ ASSERT_EQ(ArrowMetadataBuilderInit(buf.get(), nullptr), NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"),
+ ArrowCharView("value2")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowMetadataBuilderAppend(buf.get(), ArrowCharView("key2"),
+ ArrowCharView("value2 again")),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetMetadata(actual.get(),
reinterpret_cast<char*>(buf->data)),
+ NANOARROW_OK);
+
+ // ...using the schema comparison that considers order
+ AssertSchemasCompareUnequal(actual.get(), expected.get(),
+ /*num_differences*/ 1,
+ /*differences*/
+ "Path: .metadata"
+ R"(
+- [{"key": "key2", "value": "value2"}, {"key": "key2", "value": "value2
again"}]
++ [{"key": "key2", "value": "value2"}, {"key": "key1", "value": "value1"}]
+
+)");
+
+ // Comparison is not implemented for the comparison that does not consider
order
+ TestingJSONComparison comparison;
+ comparison.set_compare_metadata_order(false);
+ ASSERT_EQ(comparison.CompareSchema(actual.get(), expected.get()), ENOTSUP);
+}
+
TEST(NanoarrowTestingTest, NanoarrowTestingTestArrayComparison) {
nanoarrow::UniqueSchema schema;
nanoarrow::UniqueArray actual;
@@ -1496,6 +1713,45 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestArrayComparison) {
)");
}
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFloatingPointArrayComparison) {
+ nanoarrow::UniqueSchema schema;
+ nanoarrow::UniqueArray actual;
+ nanoarrow::UniqueArray expected;
+ TestingJSONComparison comparison;
+ std::stringstream msg;
+
+ ArrowSchemaInit(schema.get());
+ ASSERT_EQ(ArrowSchemaSetTypeStruct(schema.get(), 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_DOUBLE),
NANOARROW_OK);
+ ASSERT_EQ(comparison.SetSchema(schema.get()), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayInitFromSchema(actual.get(), schema.get(), nullptr),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendDouble(actual->children[0], 1.23456789),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(actual.get(), nullptr),
NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayInitFromSchema(expected.get(), schema.get(), nullptr),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendDouble(expected->children[0], 1.23456),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(expected.get(), nullptr),
NANOARROW_OK);
+
+ // Default precision: all decimal places
+ ASSERT_EQ(comparison.CompareBatch(actual.get(), expected.get()),
NANOARROW_OK);
+ EXPECT_EQ(comparison.num_differences(), 1);
+ comparison.ClearDifferences();
+
+ // With just enough decimal places to trigger a difference
+ comparison.set_compare_float_precision(5);
+ ASSERT_EQ(comparison.CompareBatch(actual.get(), expected.get()),
NANOARROW_OK);
+ EXPECT_EQ(comparison.num_differences(), 1);
+ comparison.ClearDifferences();
+
+ // With just few enough decimal places to be considered equivalent
+ comparison.set_compare_float_precision(4);
+ ASSERT_EQ(comparison.CompareBatch(actual.get(), expected.get()),
NANOARROW_OK);
+ EXPECT_EQ(comparison.num_differences(), 0);
+ comparison.ClearDifferences();
+}
+
TEST(NanoarrowTestingTest, NanoarrowTestingTestArrayWithDictionaryComparison) {
nanoarrow::UniqueSchema schema;
nanoarrow::UniqueArray actual;