This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 12be163 feat: Add C Data integration test shared library (#337)
12be163 is described below
commit 12be163269f3c1b27a09c58c58ba388acbd36b86
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Dec 19 10:14:12 2023 -0400
feat: Add C Data integration test shared library (#337)
This PR adds the shared library target required by the archery
integration tetster, based on
https://github.com/apache/arrow/blob/main/cpp/src/arrow/integration/c_data_integration_internal.cc
.
I haven't tested this via archery because I have no idea how to do so
(the implementation names and file locations seem hard-coded?); however,
it does add a googletest file with some minimal examples to at least
make sure everything is wired up.
---------
Co-authored-by: Antoine Pitrou <[email protected]>
---
CMakeLists.txt | 36 ++--
.../src/nanoarrow/nanoarrow_ipc_files_test.cc | 4 +-
src/nanoarrow/integration/c_data_integration.cc | 224 +++++++++++++++++++++
src/nanoarrow/integration/c_data_integration.h | 90 +++++++++
.../integration/c_data_integration_test.cc | 160 +++++++++++++++
src/nanoarrow/nanoarrow_testing.hpp | 47 ++++-
src/nanoarrow/nanoarrow_testing_test.cc | 8 +-
7 files changed, 551 insertions(+), 18 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a48769f..1e44b6f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,6 +32,8 @@ set(NANOARROW_VERSION_MINOR "${nanoarrow_VERSION_MINOR}")
set(NANOARROW_VERSION_PATCH "${nanoarrow_VERSION_PATCH}")
option(NANOARROW_BUILD_TESTS "Build tests" OFF)
+option(NANOARROW_BUILD_INTEGRATION_TESTS
+ "Build cross-implementation Arrow integration tests" OFF)
option(NANOARROW_BUNDLE "Create bundled nanoarrow.h and nanoarrow.c" OFF)
option(NANOARROW_BUNDLE_AS_CPP "Bundle nanoarrow source file as nanoarrow.cc"
OFF)
option(NANOARROW_NAMESPACE "A prefix for exported symbols" OFF)
@@ -151,10 +153,25 @@ else()
DESTINATION include/nanoarrow)
endif()
-if(NANOARROW_BUILD_TESTS)
- # For testing we use GTest + Arrow C++
+# Always build integration test if building tests
+if(NANOARROW_BUILD_TESTS OR NANOARROW_BUILD_INTEGRATION_TESTS)
include(FetchContent)
+ fetchcontent_declare(nlohmann_json
+ URL
https://github.com/nlohmann/json/archive/refs/tags/v3.11.2.zip
+ URL_HASH
SHA256=95651d7d1fcf2e5c3163c3d37df6d6b3e9e5027299e6bd050d157322ceda9ac9
+ )
+ fetchcontent_makeavailable(nlohmann_json)
+
+ add_library(nanoarrow_c_data_integration SHARED
+ src/nanoarrow/integration/c_data_integration.cc)
+ target_include_directories(nanoarrow_c_data_integration
+ PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
+ $<INSTALL_INTERFACE:include>)
+ target_link_libraries(nanoarrow_c_data_integration PRIVATE nanoarrow
nlohmann_json)
+endif()
+
+if(NANOARROW_BUILD_TESTS)
set(MEMORYCHECK_COMMAND_OPTIONS
"--leak-check=full
--suppressions=${CMAKE_CURRENT_LIST_DIR}/valgrind.supp --error-exitcode=1"
)
@@ -203,16 +220,6 @@ if(NANOARROW_BUILD_TESTS)
fetchcontent_makeavailable(googletest)
- # JSON library for integration testing
- # Also used by some versions of Arrow, so check if this is already available
- if(NOT TARGET nlohmann_json::nlohmann_json)
- fetchcontent_declare(nlohmann_json
- URL
https://github.com/nlohmann/json/archive/refs/tags/v3.11.2.zip
- URL_HASH
SHA256=95651d7d1fcf2e5c3163c3d37df6d6b3e9e5027299e6bd050d157322ceda9ac9
- )
- fetchcontent_makeavailable(nlohmann_json)
- endif()
-
add_executable(utils_test src/nanoarrow/utils_test.cc)
add_executable(buffer_test src/nanoarrow/buffer_test.cc)
add_executable(array_test src/nanoarrow/array_test.cc)
@@ -220,6 +227,8 @@ if(NANOARROW_BUILD_TESTS)
add_executable(array_stream_test src/nanoarrow/array_stream_test.cc)
add_executable(nanoarrow_hpp_test src/nanoarrow/nanoarrow_hpp_test.cc)
add_executable(nanoarrow_testing_test
src/nanoarrow/nanoarrow_testing_test.cc)
+ add_executable(c_data_integration_test
+ src/nanoarrow/integration/c_data_integration_test.cc)
if(NANOARROW_CODE_COVERAGE)
target_compile_options(coverage_config INTERFACE -O0 -g --coverage)
@@ -250,6 +259,8 @@ if(NANOARROW_BUILD_TESTS)
gtest_main
nlohmann_json::nlohmann_json
coverage_config)
+ target_link_libraries(c_data_integration_test nanoarrow
nanoarrow_c_data_integration
+ gtest_main)
include(GoogleTest)
# Some users have reported a timeout with the default value of 5
@@ -262,4 +273,5 @@ if(NANOARROW_BUILD_TESTS)
gtest_discover_tests(array_stream_test DISCOVERY_TIMEOUT 10)
gtest_discover_tests(nanoarrow_hpp_test DISCOVERY_TIMEOUT 10)
gtest_discover_tests(nanoarrow_testing_test DISCOVERY_TIMEOUT 10)
+ gtest_discover_tests(c_data_integration_test DISCOVERY_TIMEOUT 10)
endif()
diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
index 7bbd5e7..b7d922c 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_files_test.cc
@@ -122,7 +122,9 @@ class TestFile {
// Use testing util to populate the array stream
nanoarrow::testing::TestingJSONReader reader;
- NANOARROW_RETURN_NOT_OK(reader.ReadDataFile(json_string, out, error));
+ NANOARROW_RETURN_NOT_OK(reader.ReadDataFile(
+ json_string, out,
nanoarrow::testing::TestingJSONReader::kNumBatchReadAll,
+ error));
return NANOARROW_OK;
}
diff --git a/src/nanoarrow/integration/c_data_integration.cc
b/src/nanoarrow/integration/c_data_integration.cc
new file mode 100644
index 0000000..6c391ec
--- /dev/null
+++ b/src/nanoarrow/integration/c_data_integration.cc
@@ -0,0 +1,224 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <nanoarrow/nanoarrow_testing.hpp>
+#include "c_data_integration.h"
+
+static int64_t kBytesAllocated = 0;
+
+static uint8_t* IntegrationTestReallocate(ArrowBufferAllocator* allocator,
uint8_t* ptr,
+ int64_t old_size, int64_t new_size) {
+ ArrowBufferAllocator default_allocator = ArrowBufferAllocatorDefault();
+ kBytesAllocated -= old_size;
+ uint8_t* out =
+ default_allocator.reallocate(&default_allocator, ptr, old_size,
new_size);
+ if (out != nullptr) {
+ kBytesAllocated += new_size;
+ }
+
+ return out;
+}
+
+static void IntegrationTestFree(struct ArrowBufferAllocator* allocator,
uint8_t* ptr,
+ int64_t size) {
+ ArrowBufferAllocator default_allocator = ArrowBufferAllocatorDefault();
+ kBytesAllocated -= size;
+ default_allocator.free(&default_allocator, ptr, size);
+}
+
+static ArrowBufferAllocator IntegrationTestAllocator() {
+ ArrowBufferAllocator allocator;
+ allocator.reallocate = &IntegrationTestReallocate;
+ allocator.free = &IntegrationTestFree;
+ allocator.private_data = nullptr;
+ return allocator;
+}
+
+static ArrowErrorCode ReadFileString(std::ostream& out, const std::string&
file_path) {
+ std::ifstream infile(file_path, std::ios::in | std::ios::binary);
+ char buf[8096];
+ do {
+ infile.read(buf, sizeof(buf));
+ out << std::string(buf, infile.gcount());
+ } while (infile.gcount() > 0);
+
+ infile.close();
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrayStreamFromJsonFilePath(const std::string& json_path,
+ ArrowArrayStream* out, int
num_batch,
+ ArrowError* error) {
+ std::stringstream ss;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ReadFileString(ss, json_path), error);
+
+ nanoarrow::testing::TestingJSONReader reader(IntegrationTestAllocator());
+ NANOARROW_RETURN_NOT_OK(reader.ReadDataFile(ss.str(), out, num_batch,
error));
+ return NANOARROW_OK;
+}
+
+struct MaterializedArrayStream {
+ nanoarrow::UniqueSchema schema;
+ std::vector<nanoarrow::UniqueArray> arrays;
+};
+
+static ArrowErrorCode MaterializeJsonFilePath(const std::string& json_path,
+ MaterializedArrayStream* out,
int num_batch,
+ ArrowError* error) {
+ nanoarrow::UniqueArrayStream stream;
+ NANOARROW_RETURN_NOT_OK(
+ ArrayStreamFromJsonFilePath(json_path, stream.get(), num_batch, error));
+
+ int result = stream->get_schema(stream.get(), out->schema.get());
+ if (result != NANOARROW_OK) {
+ const char* err = stream->get_last_error(stream.get());
+ if (err != nullptr) {
+ ArrowErrorSet(error, "%s", err);
+ }
+ }
+
+ nanoarrow::UniqueArray tmp;
+ do {
+ tmp.reset();
+ int result = stream->get_next(stream.get(), tmp.get());
+ if (result != NANOARROW_OK) {
+ const char* err = stream->get_last_error(stream.get());
+ if (err != nullptr) {
+ ArrowErrorSet(error, "%s", err);
+ }
+
+ return result;
+ }
+
+ if (tmp->release == nullptr) {
+ break;
+ }
+
+ out->arrays.emplace_back(tmp.get());
+ } while (true);
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ExportSchemaFromJson(const char* json_path, ArrowSchema*
out,
+ ArrowError* error) {
+ MaterializedArrayStream data;
+ NANOARROW_RETURN_NOT_OK(MaterializeJsonFilePath(
+ json_path, &data,
nanoarrow::testing::TestingJSONReader::kNumBatchOnlySchema,
+ error));
+ ArrowSchemaMove(data.schema.get(), out);
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ImportSchemaAndCompareToJson(const char* json_path,
+ ArrowSchema* schema,
+ ArrowError* error) {
+ nanoarrow::UniqueSchema actual(schema);
+
+ MaterializedArrayStream data;
+ NANOARROW_RETURN_NOT_OK(MaterializeJsonFilePath(
+ json_path, &data,
nanoarrow::testing::TestingJSONReader::kNumBatchOnlySchema,
+ error));
+
+ nanoarrow::testing::TestingJSONComparison comparison;
+ NANOARROW_RETURN_NOT_OK(
+ comparison.CompareSchema(actual.get(), data.schema.get(), error));
+ if (comparison.num_differences() > 0) {
+ std::stringstream ss;
+ comparison.WriteDifferences(ss);
+ ArrowErrorSet(error, "Found %d differences:\n%s",
+ static_cast<int>(comparison.num_differences()),
ss.str().c_str());
+ return EINVAL;
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ExportBatchFromJson(const char* json_path, int num_batch,
+ ArrowArray* out, ArrowError* error) {
+ MaterializedArrayStream data;
+ NANOARROW_RETURN_NOT_OK(MaterializeJsonFilePath(json_path, &data, num_batch,
error));
+
+ ArrowArrayMove(data.arrays[num_batch].get(), out);
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ImportBatchAndCompareToJson(const char* json_path, int
num_batch,
+ ArrowArray* batch,
ArrowError* error) {
+ nanoarrow::UniqueArray actual(batch);
+
+ MaterializedArrayStream data;
+ NANOARROW_RETURN_NOT_OK(MaterializeJsonFilePath(json_path, &data, num_batch,
error));
+
+ nanoarrow::testing::TestingJSONComparison comparison;
+ NANOARROW_RETURN_NOT_OK(comparison.SetSchema(data.schema.get(), error));
+ NANOARROW_RETURN_NOT_OK(
+ comparison.CompareBatch(actual.get(), data.arrays[num_batch].get(),
error));
+ if (comparison.num_differences() > 0) {
+ std::stringstream ss;
+ comparison.WriteDifferences(ss);
+ ArrowErrorSet(error, "Found %d differences:\n%s",
+ static_cast<int>(comparison.num_differences()),
ss.str().c_str());
+ return EINVAL;
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowError global_error;
+
+static const char* ConvertError(ArrowErrorCode errno_code) {
+ if (errno_code == NANOARROW_OK) {
+ return nullptr;
+ } else {
+ return global_error.message;
+ }
+}
+
+int64_t nanoarrow_BytesAllocated() { return kBytesAllocated; }
+
+const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(const char*
json_path,
+ ArrowSchema* out) {
+ ArrowErrorInit(&global_error);
+ return ConvertError(ExportSchemaFromJson(json_path, out, &global_error));
+}
+
+const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(const
char* json_path,
+
ArrowSchema* schema) {
+ ArrowErrorInit(&global_error);
+ return ConvertError(ImportSchemaAndCompareToJson(json_path, schema,
&global_error));
+}
+
+const char* nanoarrow_CDataIntegration_ExportBatchFromJson(const char*
json_path,
+ int num_batch,
+ ArrowArray* out) {
+ ArrowErrorInit(&global_error);
+ return ConvertError(ExportBatchFromJson(json_path, num_batch, out,
&global_error));
+}
+
+const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(const char*
json_path,
+ int
num_batch,
+ ArrowArray*
batch) {
+ ArrowErrorInit(&global_error);
+ return ConvertError(
+ ImportBatchAndCompareToJson(json_path, num_batch, batch, &global_error));
+}
diff --git a/src/nanoarrow/integration/c_data_integration.h
b/src/nanoarrow/integration/c_data_integration.h
new file mode 100644
index 0000000..cf76c1b
--- /dev/null
+++ b/src/nanoarrow/integration/c_data_integration.h
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef NANOARROW_INTEGRATION_C_DATA_INTEGRATION_H_INCLUDED
+#define NANOARROW_INTEGRATION_C_DATA_INTEGRATION_H_INCLUDED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Extra guard for versions of Arrow without the canonical guard
+#ifndef ARROW_FLAG_DICTIONARY_ORDERED
+
+#ifndef ARROW_C_DATA_INTERFACE
+#define ARROW_C_DATA_INTERFACE
+
+#define ARROW_FLAG_DICTIONARY_ORDERED 1
+#define ARROW_FLAG_NULLABLE 2
+#define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+struct ArrowSchema {
+ // Array type description
+ const char* format;
+ const char* name;
+ const char* metadata;
+ int64_t flags;
+ int64_t n_children;
+ struct ArrowSchema** children;
+ struct ArrowSchema* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowSchema*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+struct ArrowArray {
+ // Array data description
+ int64_t length;
+ int64_t null_count;
+ int64_t offset;
+ int64_t n_buffers;
+ int64_t n_children;
+ const void** buffers;
+ struct ArrowArray** children;
+ struct ArrowArray* dictionary;
+
+ // Release callback
+ void (*release)(struct ArrowArray*);
+ // Opaque producer-specific data
+ void* private_data;
+};
+
+#endif // ARROW_C_DATA_INTERFACE
+#endif // ARROW_FLAG_DICTIONARY_ORDERED
+
+const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(const char*
json_path,
+ struct
ArrowSchema* out);
+
+const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
+ const char* json_path, struct ArrowSchema* schema);
+
+const char* nanoarrow_CDataIntegration_ExportBatchFromJson(const char*
json_path,
+ int num_batch,
+ struct ArrowArray*
out);
+
+const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
+ const char* json_path, int num_batch, struct ArrowArray* batch);
+
+int64_t nanoarrow_BytesAllocated(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/nanoarrow/integration/c_data_integration_test.cc
b/src/nanoarrow/integration/c_data_integration_test.cc
new file mode 100644
index 0000000..533dd70
--- /dev/null
+++ b/src/nanoarrow/integration/c_data_integration_test.cc
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdio>
+#include <fstream>
+
+#include <gtest/gtest.h>
+
+#include <nanoarrow/nanoarrow.hpp>
+
+#include "c_data_integration.h"
+
+// Not a true tempfile (writes to working directory), but is possibly more
+// portable than mkstemp()
+class TempFile {
+ public:
+ const char* name() { return name_; }
+
+ ~TempFile() {
+ if (std::remove(name_) != 0) {
+ std::cerr << "Failed to remove '" << name_ << "'\n";
+ }
+ }
+
+ private:
+ const char* name_ = "c_data_integration.tmp.json";
+};
+
+ArrowErrorCode WriteFileString(const std::string& content, const std::string&
path) {
+ std::ofstream outfile(path, std::ios::out | std::ios::binary);
+ outfile.write(content.data(), content.size());
+ outfile.close();
+ return NANOARROW_OK;
+}
+
+TEST(NanoarrowIntegrationTest, NanoarrowIntegrationTestSchema) {
+ TempFile temp;
+ nanoarrow::UniqueSchema schema;
+
+ // Check error on export
+ ASSERT_EQ(WriteFileString("this is not valid JSON", temp.name()),
NANOARROW_OK);
+ const char* err =
+ nanoarrow_CDataIntegration_ExportSchemaFromJson(temp.name(),
schema.get());
+ ASSERT_NE(err, nullptr);
+ ASSERT_EQ(std::string(err).substr(0, 9), "Exception");
+
+ // Check valid roundtrip
+ ASSERT_EQ(WriteFileString(R"({"schema": {"fields": []}, "batches": []})",
temp.name()),
+ NANOARROW_OK);
+
+ err = nanoarrow_CDataIntegration_ExportSchemaFromJson(temp.name(),
schema.get());
+ ASSERT_EQ(err, nullptr) << err;
+ ASSERT_NE(schema->release, nullptr);
+
+ err =
+ nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(temp.name(),
schema.get());
+ ASSERT_EQ(err, nullptr) << err;
+ ASSERT_EQ(schema->release, nullptr);
+
+ // Check roundtrip with differences
+ ASSERT_EQ(WriteFileString(R"({"schema": {"fields": []}, "batches": []})",
temp.name()),
+ NANOARROW_OK);
+
+ err = nanoarrow_CDataIntegration_ExportSchemaFromJson(temp.name(),
schema.get());
+ ASSERT_EQ(err, nullptr) << err;
+ ASSERT_NE(schema->release, nullptr);
+
+ // Change underlying JSON so we get differences
+ ASSERT_EQ(
+ WriteFileString(
+ R"({"schema": {"fields": [{"name": "col1", "nullable": true, "type":
{"name": "null"}, "children": []}]}, "batches": []})",
+ temp.name()),
+ NANOARROW_OK);
+ err =
+ nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(temp.name(),
schema.get());
+ ASSERT_NE(err, nullptr);
+ ASSERT_EQ(std::string(err).substr(0, 19), "Found 1 differences") << err;
+ ASSERT_EQ(schema->release, nullptr);
+}
+
+TEST(NanoarrowIntegrationTest, NanoarrowIntegrationTestBatch) {
+ TempFile temp;
+ nanoarrow::UniqueArray array;
+ int64_t bytes_allocated_start = nanoarrow_BytesAllocated();
+
+ // Check error on export
+ ASSERT_EQ(WriteFileString("this is not valid JSON", temp.name()),
NANOARROW_OK);
+ const char* err =
+ nanoarrow_CDataIntegration_ExportBatchFromJson(temp.name(), 0,
array.get());
+ ASSERT_NE(err, nullptr);
+ ASSERT_EQ(std::string(err).substr(0, 9), "Exception") << err;
+
+ // Check error for invalid batch id
+ ASSERT_EQ(
+ WriteFileString(
+ R"({"schema": {)"
+ R"("fields": [{"name": "col1", "nullable": true, "type": {"name":
"utf8"}, "children": []}]}, )"
+ R"("batches": [{"count": 1, "columns": [{"name": "col1", "count": 1,
"VALIDITY": [1], "OFFSET": [0, 3], "DATA": ["abc"]}]}]})",
+ temp.name()),
+ NANOARROW_OK);
+ err = nanoarrow_CDataIntegration_ExportBatchFromJson(temp.name(), 1,
array.get());
+ ASSERT_EQ(array->release, nullptr);
+ ASSERT_NE(err, nullptr);
+ ASSERT_STREQ(err, "Expected num_batch between 0 and 0 but got 1") << err;
+
+ err = nanoarrow_CDataIntegration_ExportBatchFromJson(temp.name(), 0,
array.get());
+ ASSERT_EQ(err, nullptr) << err;
+ ASSERT_NE(array->release, nullptr);
+ err =
+ nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(temp.name(), 1,
array.get());
+ ASSERT_EQ(array->release, nullptr);
+ ASSERT_STREQ(err, "Expected num_batch between 0 and 0 but got 1") << err;
+
+ // Check valid roundtrip
+ err = nanoarrow_CDataIntegration_ExportBatchFromJson(temp.name(), 0,
array.get());
+ ASSERT_NE(array->release, nullptr);
+ ASSERT_EQ(err, nullptr);
+ ASSERT_GT(nanoarrow_BytesAllocated(), bytes_allocated_start);
+
+ err =
+ nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(temp.name(), 0,
array.get());
+ ASSERT_EQ(array->release, nullptr);
+ ASSERT_EQ(err, nullptr) << err;
+ ASSERT_EQ(nanoarrow_BytesAllocated(), bytes_allocated_start);
+
+ // Check roundtrip with differences
+ err = nanoarrow_CDataIntegration_ExportBatchFromJson(temp.name(), 0,
array.get());
+ ASSERT_NE(array->release, nullptr);
+ ASSERT_EQ(err, nullptr) << err;
+ ASSERT_GT(nanoarrow_BytesAllocated(), bytes_allocated_start);
+
+ ASSERT_EQ(
+ WriteFileString(
+ R"({"schema": {)"
+ R"("fields": [{"name": "col1", "nullable": true, "type": {"name":
"utf8"}, "children": []}]}, )"
+ R"("batches": [{"count": 0, "columns": [{"name": "col1", "count": 0,
"VALIDITY": [], "OFFSET": [0], "DATA": []}]}]})",
+ temp.name()),
+ NANOARROW_OK);
+ err =
+ nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(temp.name(), 0,
array.get());
+ ASSERT_NE(err, nullptr);
+ ASSERT_EQ(std::string(err).substr(0, 19), "Found 2 differences") << err;
+ ASSERT_EQ(array->release, nullptr);
+
+ ASSERT_EQ(nanoarrow_BytesAllocated(), bytes_allocated_start);
+}
diff --git a/src/nanoarrow/nanoarrow_testing.hpp
b/src/nanoarrow/nanoarrow_testing.hpp
index c2502a1..5cd40e4 100644
--- a/src/nanoarrow/nanoarrow_testing.hpp
+++ b/src/nanoarrow/nanoarrow_testing.hpp
@@ -733,11 +733,18 @@ class TestingJSONReader {
using json = nlohmann::json;
public:
+ TestingJSONReader(ArrowBufferAllocator allocator) : allocator_(allocator) {}
+ TestingJSONReader() : TestingJSONReader(ArrowBufferAllocatorDefault()) {}
+
+ static const int kNumBatchOnlySchema = -2;
+ static const int kNumBatchReadAll = -1;
+
/// \brief Read JSON representing a data file object
///
/// Read a JSON object in the form `{"schema": {...}, "batches": [...],
...}`,
/// propagating `out` on success.
ArrowErrorCode ReadDataFile(const std::string& data_file_json,
ArrowArrayStream* out,
+ int num_batch = kNumBatchReadAll,
ArrowError* error = nullptr) {
try {
auto obj = json::parse(data_file_json);
@@ -760,18 +767,34 @@ class TestingJSONReader {
NANOARROW_RETURN_NOT_OK(
ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), error));
+ // Get a vector of batch ids to parse
+ std::vector<size_t> batch_ids;
+ if (num_batch == kNumBatchOnlySchema) {
+ batch_ids.resize(0);
+ } else if (num_batch == kNumBatchReadAll) {
+ batch_ids.resize(batches.size());
+ std::iota(batch_ids.begin(), batch_ids.end(), 0);
+ } else if (num_batch >= 0 && num_batch < batches.size()) {
+ batch_ids.push_back(num_batch);
+ } else {
+ ArrowErrorSet(error, "Expected num_batch between 0 and %d but got %d",
+ static_cast<int>(batches.size() - 1), num_batch);
+ return EINVAL;
+ }
+
// Initialize ArrayStream with required capacity
nanoarrow::UniqueArrayStream stream;
NANOARROW_RETURN_NOT_OK_WITH_ERROR(
- ArrowBasicArrayStreamInit(stream.get(), schema.get(),
batches.size()), error);
+ ArrowBasicArrayStreamInit(stream.get(), schema.get(),
batch_ids.size()), error);
// Populate ArrayStream batches
- for (size_t i = 0; i < batches.size(); i++) {
+ for (size_t i = 0; i < batch_ids.size(); i++) {
nanoarrow::UniqueArray array;
NANOARROW_RETURN_NOT_OK(
ArrowArrayInitFromArrayView(array.get(), array_view.get(), error));
+ SetArrayAllocatorRecursive(array.get());
NANOARROW_RETURN_NOT_OK(
- SetArrayBatch(batches[i], array_view.get(), array.get(), error));
+ SetArrayBatch(batches[batch_ids[i]], array_view.get(),
array.get(), error));
ArrowBasicArrayStreamSetArray(stream.get(), i, array.get());
}
@@ -839,6 +862,7 @@ class TestingJSONReader {
// ArrowArray to hold memory
nanoarrow::UniqueArray array;
NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array.get(), schema,
error));
+ SetArrayAllocatorRecursive(array.get());
NANOARROW_RETURN_NOT_OK(SetArrayBatch(obj, array_view.get(),
array.get(), error));
ArrowArrayMove(array.get(), out);
@@ -867,6 +891,7 @@ class TestingJSONReader {
// ArrowArray to hold memory
nanoarrow::UniqueArray array;
NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array.get(), schema,
error));
+ SetArrayAllocatorRecursive(array.get());
// Parse the JSON into the array
NANOARROW_RETURN_NOT_OK(SetArrayColumn(obj, array_view.get(),
array.get(), error));
@@ -881,6 +906,8 @@ class TestingJSONReader {
}
private:
+ ArrowBufferAllocator allocator_;
+
ArrowErrorCode SetSchema(ArrowSchema* schema, const json& value, ArrowError*
error) {
NANOARROW_RETURN_NOT_OK(
Check(value.is_object(), error, "Expected Schema to be a JSON
object"));
@@ -1713,6 +1740,20 @@ class TestingJSONReader {
return NANOARROW_OK;
}
+ void SetArrayAllocatorRecursive(ArrowArray* array) {
+ for (int i = 0; i < array->n_buffers; i++) {
+ ArrowArrayBuffer(array, i)->allocator = allocator_;
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ SetArrayAllocatorRecursive(array->children[i]);
+ }
+
+ if (array->dictionary != nullptr) {
+ SetArrayAllocatorRecursive(array->dictionary);
+ }
+ }
+
ArrowErrorCode PrefixError(ArrowErrorCode value, ArrowError* error,
const std::string& prefix) {
if (value != NANOARROW_OK && error != nullptr) {
diff --git a/src/nanoarrow/nanoarrow_testing_test.cc
b/src/nanoarrow/nanoarrow_testing_test.cc
index d47159f..217bf9d 100644
--- a/src/nanoarrow/nanoarrow_testing_test.cc
+++ b/src/nanoarrow/nanoarrow_testing_test.cc
@@ -824,7 +824,9 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestRoundtripDataFile) {
R"(]})";
TestingJSONReader reader;
- ASSERT_EQ(reader.ReadDataFile(data_file_json, stream.get(), &error),
NANOARROW_OK)
+ ASSERT_EQ(reader.ReadDataFile(data_file_json, stream.get(),
+ TestingJSONReader::kNumBatchReadAll, &error),
+ NANOARROW_OK)
<< error.message;
TestingJSONWriter writer;
@@ -837,7 +839,9 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestRoundtripDataFile) {
// Check with zero batches
std::string data_file_json_empty = R"({"schema": {"fields": []}, "batches":
[]})";
- ASSERT_EQ(reader.ReadDataFile(data_file_json_empty, stream.get(), &error),
NANOARROW_OK)
+ ASSERT_EQ(reader.ReadDataFile(data_file_json_empty, stream.get(),
+ TestingJSONReader::kNumBatchReadAll, &error),
+ NANOARROW_OK)
<< error.message;
ASSERT_EQ(writer.WriteDataFile(data_file_json_roundtrip, stream.get()),
NANOARROW_OK);
EXPECT_EQ(data_file_json_roundtrip.str(), data_file_json_empty);