This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new cb2aa71 feat(extensions/nanoarrow_testing): Add nanoarrow_testing
extension with testing JSON writer (#317)
cb2aa71 is described below
commit cb2aa71b385147d85eff91ed87a418cefaa3c8bd
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Nov 21 16:51:54 2023 -0400
feat(extensions/nanoarrow_testing): Add nanoarrow_testing extension with
testing JSON writer (#317)
This PR adds the first few bits of infrastructure needed to implement
integration testing:
- nanoarrow_testing.hpp testing utility header
- CI to build/run the tests
- Batch + Column JSON writer for easy types to get things going
The design of the testing helper library is intentionally header-only to
facilitate dropping in to projects where needed (although I'm happy to
change that if there are opinions otherwise).
Some obvious follow-ups not included yet:
- Implement ArrowSchema -> JSON
- Support decimal and interval types
- Schema/Array equality checking
- JSON -> ArrowSchema
- JSON -> ArrowArray
---
CMakeLists.txt | 13 +-
docs/source/reference/index.rst | 1 +
docs/source/reference/{index.rst => testing.rst} | 17 +-
src/nanoarrow/nanoarrow_testing.hpp | 389 +++++++++++++++++++++++
src/nanoarrow/nanoarrow_testing_test.cc | 290 +++++++++++++++++
5 files changed, 699 insertions(+), 11 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1147d1e..cffa590 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,11 +71,17 @@ if(NANOARROW_BUNDLE)
"${SRC_FILE_CONTENTS}")
file(WRITE ${NANOARROW_H_TEMP} "${SRC_FILE_CONTENTS}")
- # Copy nanoarrow.hpp too
+ # Copy nanoarrow.hpp
set(NANOARROW_HPP_TEMP
${CMAKE_BINARY_DIR}/amalgamation/nanoarrow/nanoarrow.hpp)
file(READ src/nanoarrow/nanoarrow.hpp SRC_FILE_CONTENTS)
file(WRITE ${NANOARROW_HPP_TEMP} "${SRC_FILE_CONTENTS}")
+ # Copy nanoarrow_testing.hpp
+ set(NANOARROW_TESTING_HPP_TEMP
+ ${CMAKE_BINARY_DIR}/amalgamation/nanoarrow/nanoarrow_testing.hpp)
+ file(READ src/nanoarrow/nanoarrow_testing.hpp SRC_FILE_CONTENTS)
+ file(WRITE ${NANOARROW_TESTING_HPP_TEMP} "${SRC_FILE_CONTENTS}")
+
# Combine all source files into amalgamation/nanoarrow.c in the build
directory
if(NANOARROW_BUNDLE_AS_CPP)
set(NANOARROW_C_TEMP
${CMAKE_BINARY_DIR}/amalgamation/nanoarrow/nanoarrow.cc)
@@ -100,7 +106,7 @@ if(NANOARROW_BUNDLE)
# Install the amalgamated header and source
install(FILES ${NANOARROW_H_TEMP} ${NANOARROW_C_TEMP} ${NANOARROW_HPP_TEMP}
- DESTINATION ".")
+ ${NANOARROW_TESTING_HPP_TEMP} DESTINATION ".")
else()
add_library(nanoarrow src/nanoarrow/array.c src/nanoarrow/schema.c
src/nanoarrow/array_stream.c src/nanoarrow/utils.c)
@@ -203,6 +209,7 @@ if(NANOARROW_BUILD_TESTS)
add_executable(schema_test src/nanoarrow/schema_test.cc)
add_executable(array_stream_test src/nanoarrow/array_stream_test.cc)
add_executable(nanoarrow_hpp_test src/nanoarrow/nanoarrow_hpp_test.cc)
+ add_executable(nanoarrow_testing_test
src/nanoarrow/nanoarrow_testing_test.cc)
if(NANOARROW_CODE_COVERAGE)
target_compile_options(coverage_config INTERFACE -O0 -g --coverage)
@@ -228,6 +235,7 @@ if(NANOARROW_BUILD_TESTS)
coverage_config)
target_link_libraries(array_stream_test nanoarrow gtest_main coverage_config)
target_link_libraries(nanoarrow_hpp_test nanoarrow gtest_main
coverage_config)
+ target_link_libraries(nanoarrow_testing_test nanoarrow gtest_main
coverage_config)
include(GoogleTest)
# Some users have reported a timeout with the default value of 5
@@ -239,4 +247,5 @@ if(NANOARROW_BUILD_TESTS)
gtest_discover_tests(schema_test DISCOVERY_TIMEOUT 10)
gtest_discover_tests(array_stream_test DISCOVERY_TIMEOUT 10)
gtest_discover_tests(nanoarrow_hpp_test DISCOVERY_TIMEOUT 10)
+ gtest_discover_tests(nanoarrow_testing_test DISCOVERY_TIMEOUT 10)
endif()
diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst
index 56c4b50..613aca7 100644
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/index.rst
@@ -24,5 +24,6 @@ API Reference
R API Reference <r>
C API Reference <c>
C++ API Reference <cpp>
+ Testing API Reference <testing>
IPC Extension Reference <ipc>
Device Extension Reference <device>
diff --git a/docs/source/reference/index.rst b/docs/source/reference/testing.rst
similarity index 79%
copy from docs/source/reference/index.rst
copy to docs/source/reference/testing.rst
index 56c4b50..7e36a32 100644
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/testing.rst
@@ -15,14 +15,13 @@
.. specific language governing permissions and limitations
.. under the License.
-API Reference
-=============
+Teseting API Reference
+======================
-.. toctree::
- :maxdepth: 2
+.. doxygengroup:: nanoarrow_testing
- R API Reference <r>
- C API Reference <c>
- C++ API Reference <cpp>
- IPC Extension Reference <ipc>
- Device Extension Reference <device>
+Integration testing JSON
+------------------------
+
+.. doxygengroup:: nanoarrow_testing-json
+ :members:
diff --git a/src/nanoarrow/nanoarrow_testing.hpp
b/src/nanoarrow/nanoarrow_testing.hpp
new file mode 100644
index 0000000..aa6200a
--- /dev/null
+++ b/src/nanoarrow/nanoarrow_testing.hpp
@@ -0,0 +1,389 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <iostream>
+#include <string>
+
+#include "nanoarrow.hpp"
+
+#ifndef NANOARROW_TESTING_HPP_INCLUDED
+#define NANOARROW_TESTING_HPP_INCLUDED
+
+/// \defgroup nanoarrow_testing Nanoarrow Testing Helpers
+///
+/// Utilities for testing nanoarrow structures and functions.
+
+namespace nanoarrow {
+
+namespace testing {
+
+/// \defgroup nanoarrow_testing-json Integration test helpers
+///
+/// See testing format documentation for details of the JSON representation.
This
+/// representation is not canonical but can be used to implement integration
tests with
+/// other implementations.
+///
+/// @{
+
+/// \brief Writer for the Arrow integration testing JSON format
+class TestingJSONWriter {
+ public:
+ /// \brief Write a "batch" to out
+ ///
+ /// Creates output like `{"count": 123, "columns": [...]}`.
+ ArrowErrorCode WriteBatch(std::ostream& out, const ArrowSchema* schema,
+ ArrowArrayView* value) {
+ // Make sure we have a struct
+ if (std::string(schema->format) != "+s") {
+ return EINVAL;
+ }
+
+ out << "{";
+
+ // Write length
+ out << R"("count": )" << value->length;
+
+ // Write children
+ out << R"(, "columns": )";
+ NANOARROW_RETURN_NOT_OK(WriteChildren(out, schema, value));
+
+ out << "}";
+ return NANOARROW_OK;
+ }
+
+ /// \brief Write a column to out
+ ///
+ /// Creates output like `{"name": "col", "count": 123, "VALIDITY": [...],
...}`.
+ ArrowErrorCode WriteColumn(std::ostream& out, const ArrowSchema* field,
+ ArrowArrayView* value) {
+ out << "{";
+
+ // Write schema->name (may be null)
+ if (field->name == nullptr) {
+ out << R"("name": null)";
+ } else {
+ out << R"("name": ")" << field->name << R"(")";
+ }
+
+ // Write length
+ out << R"(, "count": )" << value->length;
+
+ // Write the VALIDITY element if required
+ switch (value->storage_type) {
+ case NANOARROW_TYPE_NA:
+ case NANOARROW_TYPE_DENSE_UNION:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ break;
+ default:
+ out << R"(, "VALIDITY": )";
+ WriteBitmap(out, value->buffer_views[0].data.as_uint8, value->length);
+ break;
+ }
+
+ // Write the TYPE_ID element if required
+ switch (value->storage_type) {
+ case NANOARROW_TYPE_SPARSE_UNION:
+ case NANOARROW_TYPE_DENSE_UNION:
+ out << R"(, "TYPE_ID": )";
+ NANOARROW_RETURN_NOT_OK(WriteOffsetOrTypeID<int8_t>(out,
value->buffer_views[0]));
+ break;
+ default:
+ break;
+ }
+
+ // Write the OFFSET element if required
+ switch (value->storage_type) {
+ case NANOARROW_TYPE_BINARY:
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_DENSE_UNION:
+ case NANOARROW_TYPE_LIST:
+ out << R"(, "OFFSET": )";
+ NANOARROW_RETURN_NOT_OK(
+ WriteOffsetOrTypeID<int32_t>(out, value->buffer_views[1]));
+ break;
+ case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ case NANOARROW_TYPE_LARGE_STRING:
+ out << R"(, "OFFSET": )";
+ NANOARROW_RETURN_NOT_OK(
+ WriteOffsetOrTypeID<int64_t>(out, value->buffer_views[1]));
+ break;
+ default:
+ break;
+ }
+
+ // Write the DATA element if required
+ switch (value->storage_type) {
+ case NANOARROW_TYPE_NA:
+ case NANOARROW_TYPE_STRUCT:
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ case NANOARROW_TYPE_DENSE_UNION:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ break;
+ default:
+ out << R"(, "DATA": )";
+ NANOARROW_RETURN_NOT_OK(WriteData(out, value));
+ break;
+ }
+
+ switch (value->storage_type) {
+ case NANOARROW_TYPE_STRUCT:
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ case NANOARROW_TYPE_DENSE_UNION:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ out << R"(, "children": )";
+ NANOARROW_RETURN_NOT_OK(WriteChildren(out, field, value));
+ break;
+ default:
+ break;
+ }
+
+ out << "}";
+ return NANOARROW_OK;
+ }
+
+ private:
+ void WriteBitmap(std::ostream& out, const uint8_t* bits, int64_t length) {
+ if (length == 0) {
+ out << "[]";
+ return;
+ }
+
+ out << "[";
+
+ if (bits == nullptr) {
+ out << "1";
+ for (int64_t i = 1; i < length; i++) {
+ out << ", 1";
+ }
+ } else {
+ out << static_cast<int32_t>(ArrowBitGet(bits, 0));
+ for (int64_t i = 1; i < length; i++) {
+ out << ", " << static_cast<int32_t>(ArrowBitGet(bits, i));
+ }
+ }
+
+ out << "]";
+ }
+
+ template <typename T>
+ ArrowErrorCode WriteOffsetOrTypeID(std::ostream& out, ArrowBufferView
content) {
+ if (content.size_bytes == 0) {
+ out << "[]";
+ return NANOARROW_OK;
+ }
+
+ const T* values = reinterpret_cast<const T*>(content.data.data);
+ int64_t n_values = content.size_bytes / sizeof(T);
+
+ out << "[";
+
+ if (sizeof(T) == sizeof(int64_t)) {
+ // Ensure int64s are quoted (i.e, "123456")
+ out << R"(")" << values[0] << R"(")";
+ for (int64_t i = 1; i < n_values; i++) {
+ out << R"(, ")" << values[i] << R"(")";
+ }
+ } else {
+ // No need to quote smaller ints (i.e., 123456)
+ out << values[0];
+ for (int64_t i = 1; i < n_values; i++) {
+ out << ", " << static_cast<int64_t>(values[i]);
+ }
+ }
+
+ out << "]";
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode WriteData(std::ostream& out, ArrowArrayView* value) {
+ if (value->length == 0) {
+ out << "[]";
+ return NANOARROW_OK;
+ }
+
+ out << "[";
+
+ switch (value->storage_type) {
+ case NANOARROW_TYPE_BOOL:
+ case NANOARROW_TYPE_INT8:
+ case NANOARROW_TYPE_UINT8:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_UINT32:
+ // Regular JSON integers (i.e., 123456)
+ out << ArrowArrayViewGetIntUnsafe(value, 0);
+ for (int64_t i = 1; i < value->length; i++) {
+ out << ", " << ArrowArrayViewGetIntUnsafe(value, i);
+ }
+ break;
+ case NANOARROW_TYPE_INT64:
+ // Quoted integers to avoid overflow (i.e., "123456")
+ out << R"(")" << ArrowArrayViewGetIntUnsafe(value, 0) << R"(")";
+ for (int64_t i = 1; i < value->length; i++) {
+ out << R"(, ")" << ArrowArrayViewGetIntUnsafe(value, i) << R"(")";
+ }
+ break;
+ case NANOARROW_TYPE_UINT64:
+ // Quoted integers to avoid overflow (i.e., "123456")
+ out << R"(")" << ArrowArrayViewGetUIntUnsafe(value, 0) << R"(")";
+ for (int64_t i = 1; i < value->length; i++) {
+ out << R"(, ")" << ArrowArrayViewGetUIntUnsafe(value, i) << R"(")";
+ }
+ break;
+
+ case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_DOUBLE: {
+ // JSON number to 3 decimal places
+ LocalizedStream local_stream_opt(out);
+ local_stream_opt.SetFixed(3);
+
+ out << ArrowArrayViewGetDoubleUnsafe(value, 0);
+ for (int64_t i = 1; i < value->length; i++) {
+ out << ", " << ArrowArrayViewGetDoubleUnsafe(value, i);
+ }
+ break;
+ }
+
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_STRING:
+ NANOARROW_RETURN_NOT_OK(
+ WriteString(out, ArrowArrayViewGetStringUnsafe(value, 0)));
+ for (int64_t i = 1; i < value->length; i++) {
+ out << ", ";
+ NANOARROW_RETURN_NOT_OK(
+ WriteString(out, ArrowArrayViewGetStringUnsafe(value, i)));
+ }
+ break;
+
+ case NANOARROW_TYPE_BINARY:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY: {
+ NANOARROW_RETURN_NOT_OK(WriteBytes(out,
ArrowArrayViewGetBytesUnsafe(value, 0)));
+ for (int64_t i = 1; i < value->length; i++) {
+ out << ", ";
+ NANOARROW_RETURN_NOT_OK(
+ WriteBytes(out, ArrowArrayViewGetBytesUnsafe(value, i)));
+ }
+ break;
+ }
+
+ default:
+ // Not supported
+ return ENOTSUP;
+ }
+
+ out << "]";
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode WriteString(std::ostream& out, ArrowStringView value) {
+ out << R"(")";
+
+ for (int64_t i = 0; i < value.size_bytes; i++) {
+ char c = value.data[i];
+ if (c == '"') {
+ out << R"(\")";
+ } else if (c == '\\') {
+ out << R"(\\)";
+ } else if (c < 0) {
+ // Not supporting multibyte unicode yet
+ return ENOTSUP;
+ } else if (c < 20) {
+ // Data in the arrow-testing repo has a lot of content that requires
escaping
+ // in this way (\uXXXX).
+ uint16_t utf16_bytes = static_cast<uint16_t>(c);
+
+ char utf16_esc[7];
+ utf16_esc[6] = '\0';
+ snprintf(utf16_esc, sizeof(utf16_esc), R"(\u%04x)", utf16_bytes);
+ out << utf16_esc;
+ } else {
+ out << c;
+ }
+ }
+
+ out << R"(")";
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode WriteBytes(std::ostream& out, ArrowBufferView value) {
+ out << R"(")";
+ char hex[3];
+ hex[2] = '\0';
+
+ for (int64_t i = 0; i < value.size_bytes; i++) {
+ snprintf(hex, sizeof(hex), "%02X",
static_cast<int>(value.data.as_uint8[i]));
+ out << hex;
+ }
+ out << R"(")";
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode WriteChildren(std::ostream& out, const ArrowSchema* field,
+ ArrowArrayView* value) {
+ if (field->n_children == 0) {
+ out << "[]";
+ return NANOARROW_OK;
+ }
+
+ out << "[";
+ NANOARROW_RETURN_NOT_OK(WriteColumn(out, field->children[0],
value->children[0]));
+ for (int64_t i = 1; i < field->n_children; i++) {
+ out << ", ";
+ NANOARROW_RETURN_NOT_OK(WriteColumn(out, field->children[i],
value->children[i]));
+ }
+ out << "]";
+ return NANOARROW_OK;
+ }
+
+ class LocalizedStream {
+ public:
+ LocalizedStream(std::ostream& out) : out_(out) {
+ previous_locale_ = out.imbue(std::locale::classic());
+ previous_precision_ = out.precision();
+ fmt_flags_ = out.flags();
+ out.setf(out.fixed);
+ }
+
+ void SetFixed(int precision) { out_.precision(precision); }
+
+ ~LocalizedStream() {
+ out_.flags(fmt_flags_);
+ out_.precision(previous_precision_);
+ out_.imbue(previous_locale_);
+ }
+
+ private:
+ std::ostream& out_;
+ std::locale previous_locale_;
+ std::ios::fmtflags fmt_flags_;
+ std::streamsize previous_precision_;
+ };
+};
+
+/// @}
+
+} // namespace testing
+} // namespace nanoarrow
+
+#endif
diff --git a/src/nanoarrow/nanoarrow_testing_test.cc
b/src/nanoarrow/nanoarrow_testing_test.cc
new file mode 100644
index 0000000..bde776c
--- /dev/null
+++ b/src/nanoarrow/nanoarrow_testing_test.cc
@@ -0,0 +1,290 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <functional>
+#include <sstream>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "nanoarrow/nanoarrow_testing.hpp"
+
+using nanoarrow::testing::TestingJSONWriter;
+
+ArrowErrorCode WriteBatchJSON(std::ostream& out, const ArrowSchema* schema,
+ ArrowArrayView* array_view) {
+ TestingJSONWriter writer;
+ return writer.WriteBatch(out, schema, array_view);
+}
+
+ArrowErrorCode WriteColumnJSON(std::ostream& out, const ArrowSchema* schema,
+ ArrowArrayView* array_view) {
+ TestingJSONWriter writer;
+ return writer.WriteColumn(out, schema, array_view);
+}
+
+void TestColumn(std::function<ArrowErrorCode(ArrowSchema*)> type_expr,
+ std::function<ArrowErrorCode(ArrowArray*)> append_expr,
+ ArrowErrorCode (*test_expr)(std::ostream&, const ArrowSchema*,
+ ArrowArrayView*),
+ const std::string& expected_json) {
+ std::stringstream ss;
+
+ nanoarrow::UniqueSchema schema;
+ ASSERT_EQ(type_expr(schema.get()), NANOARROW_OK);
+ nanoarrow::UniqueArray array;
+ ASSERT_EQ(ArrowArrayInitFromSchema(array.get(), schema.get(), nullptr),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(array.get()), NANOARROW_OK);
+ ASSERT_EQ(append_expr(array.get()), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(array.get(), nullptr),
NANOARROW_OK);
+
+ nanoarrow::UniqueArrayView array_view;
+ ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(),
nullptr),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr),
NANOARROW_OK);
+
+ ASSERT_EQ(test_expr(ss, schema.get(), array_view.get()), NANOARROW_OK);
+ EXPECT_EQ(ss.str(), expected_json);
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnNull) {
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_NA);
+ },
+ [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+ R"({"name": null, "count": 0})");
+
+ TestColumn(
+ [](ArrowSchema* schema) {
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema,
NANOARROW_TYPE_NA));
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema, "colname"));
+ return NANOARROW_OK;
+ },
+ [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+ R"({"name": "colname", "count": 0})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnInt) {
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT32);
+ },
+ [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+ R"({"name": null, "count": 0, "VALIDITY": [], "DATA": []})");
+
+ // Without a null value
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT32);
+ },
+ [](ArrowArray* array) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 1));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": [0, 1,
0]})");
+
+ // With two null values
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT32);
+ },
+ [](ArrowArray* array) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(array, 2));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 1));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 3, "VALIDITY": [0, 0, 1], "DATA": [0, 0,
1]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnInt64) {
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT64);
+ },
+ [](ArrowArray* array) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 1));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": ["0", "1",
"0"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnUInt64) {
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_UINT64);
+ },
+ [](ArrowArray* array) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 1));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": ["0", "1",
"0"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnFloat) {
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_FLOAT);
+ },
+ [](ArrowArray* array) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendDouble(array, 0.1234));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendDouble(array, 1.2345));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 2, "VALIDITY": [1, 1], "DATA": [0.123,
1.235]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnString) {
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_STRING);
+ },
+ [](ArrowArray* array) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array,
ArrowCharView("abc")));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array,
ArrowCharView("def")));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 2, "VALIDITY": [1, 1], )"
+ R"("OFFSET": [0, 3, 6], "DATA": ["abc", "def"]})");
+
+ // Check a string that requires escaping of characters \ and "
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_STRING);
+ },
+ [](ArrowArray* array) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array,
ArrowCharView(R"("\)")));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 1, "VALIDITY": [1], )"
+ R"("OFFSET": [0, 2], "DATA": ["\"\\"]})");
+
+ // Check a string that requires unicode escape
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_STRING);
+ },
+ [](ArrowArray* array) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array,
ArrowCharView("\u0001")));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 1, "VALIDITY": [1], )"
+ R"("OFFSET": [0, 1], "DATA": ["\u0001"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnLargeString) {
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_LARGE_STRING);
+ },
+ [](ArrowArray* array) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array,
ArrowCharView("abc")));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array,
ArrowCharView("def")));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 2, "VALIDITY": [1, 1], )"
+ R"("OFFSET": ["0", "3", "6"], "DATA": ["abc", "def"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnBinary) {
+ TestColumn(
+ [](ArrowSchema* schema) {
+ return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_BINARY);
+ },
+ [](ArrowArray* array) {
+ uint8_t value[] = {0x00, 0x01, 0xff};
+ ArrowBufferView value_view;
+ value_view.data.as_uint8 = value;
+ value_view.size_bytes = sizeof(value);
+
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array,
ArrowCharView("abc")));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendBytes(array, value_view));
+ return NANOARROW_OK;
+ },
+ &WriteColumnJSON,
+ R"({"name": null, "count": 2, "VALIDITY": [1, 1], )"
+ R"("OFFSET": [0, 3, 6], "DATA": ["616263", "0001FF"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnStruct) {
+ // Empty struct
+ TestColumn(
+ [](ArrowSchema* schema) {
+ ArrowSchemaInit(schema);
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, 0));
+ return NANOARROW_OK;
+ },
+ [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+ R"({"name": null, "count": 0, "VALIDITY": [], "children": []})");
+
+ // Non-empty struct
+ TestColumn(
+ [](ArrowSchema* schema) {
+ ArrowSchemaInit(schema);
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, 2));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_NA));
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0],
"col1"));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaSetType(schema->children[1], NANOARROW_TYPE_NA));
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[1],
"col2"));
+ return NANOARROW_OK;
+ },
+ [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+ R"({"name": null, "count": 0, "VALIDITY": [], "children": [)"
+ R"({"name": "col1", "count": 0}, {"name": "col2", "count": 0}]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnDenseUnion) {
+ // Empty union
+ TestColumn(
+ [](ArrowSchema* schema) {
+ ArrowSchemaInit(schema);
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaSetTypeUnion(schema, NANOARROW_TYPE_DENSE_UNION, 0));
+ return NANOARROW_OK;
+ },
+ [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+ R"({"name": null, "count": 0, "TYPE_ID": [], "OFFSET": [], "children":
[]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestBatch) {
+ // Empty batch
+ TestColumn(
+ [](ArrowSchema* schema) {
+ ArrowSchemaInit(schema);
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, 0));
+ return NANOARROW_OK;
+ },
+ [](ArrowArray* array) { return NANOARROW_OK; }, &WriteBatchJSON,
+ R"({"count": 0, "columns": []})");
+}