This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new cb2aa71  feat(extensions/nanoarrow_testing): Add nanoarrow_testing 
extension with testing JSON writer (#317)
cb2aa71 is described below

commit cb2aa71b385147d85eff91ed87a418cefaa3c8bd
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Nov 21 16:51:54 2023 -0400

    feat(extensions/nanoarrow_testing): Add nanoarrow_testing extension with 
testing JSON writer (#317)
    
    This PR adds the first few bits of infrastructure needed to implement
    integration testing:
    
    - nanoarrow_testing.hpp testing utility header
    - CI to build/run the tests
    - Batch + Column JSON writer for easy types to get things going
    
    The design of the testing helper library is intentionally header-only to
    facilitate dropping in to projects where needed (although I'm happy to
    change that if there are opinions otherwise).
    
    Some obvious follow-ups not included yet:
    
    - Implement ArrowSchema -> JSON
    - Support decimal and interval types
    - Schema/Array equality checking
    - JSON -> ArrowSchema
    - JSON -> ArrowArray
---
 CMakeLists.txt                                   |  13 +-
 docs/source/reference/index.rst                  |   1 +
 docs/source/reference/{index.rst => testing.rst} |  17 +-
 src/nanoarrow/nanoarrow_testing.hpp              | 389 +++++++++++++++++++++++
 src/nanoarrow/nanoarrow_testing_test.cc          | 290 +++++++++++++++++
 5 files changed, 699 insertions(+), 11 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1147d1e..cffa590 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,11 +71,17 @@ if(NANOARROW_BUNDLE)
                        "${SRC_FILE_CONTENTS}")
   file(WRITE ${NANOARROW_H_TEMP} "${SRC_FILE_CONTENTS}")
 
-  # Copy nanoarrow.hpp too
+  # Copy nanoarrow.hpp
   set(NANOARROW_HPP_TEMP 
${CMAKE_BINARY_DIR}/amalgamation/nanoarrow/nanoarrow.hpp)
   file(READ src/nanoarrow/nanoarrow.hpp SRC_FILE_CONTENTS)
   file(WRITE ${NANOARROW_HPP_TEMP} "${SRC_FILE_CONTENTS}")
 
+  # Copy nanoarrow_testing.hpp
+  set(NANOARROW_TESTING_HPP_TEMP
+      ${CMAKE_BINARY_DIR}/amalgamation/nanoarrow/nanoarrow_testing.hpp)
+  file(READ src/nanoarrow/nanoarrow_testing.hpp SRC_FILE_CONTENTS)
+  file(WRITE ${NANOARROW_TESTING_HPP_TEMP} "${SRC_FILE_CONTENTS}")
+
   # Combine all source files into amalgamation/nanoarrow.c in the build 
directory
   if(NANOARROW_BUNDLE_AS_CPP)
     set(NANOARROW_C_TEMP 
${CMAKE_BINARY_DIR}/amalgamation/nanoarrow/nanoarrow.cc)
@@ -100,7 +106,7 @@ if(NANOARROW_BUNDLE)
 
   # Install the amalgamated header and source
   install(FILES ${NANOARROW_H_TEMP} ${NANOARROW_C_TEMP} ${NANOARROW_HPP_TEMP}
-          DESTINATION ".")
+                ${NANOARROW_TESTING_HPP_TEMP} DESTINATION ".")
 else()
   add_library(nanoarrow src/nanoarrow/array.c src/nanoarrow/schema.c
                         src/nanoarrow/array_stream.c src/nanoarrow/utils.c)
@@ -203,6 +209,7 @@ if(NANOARROW_BUILD_TESTS)
   add_executable(schema_test src/nanoarrow/schema_test.cc)
   add_executable(array_stream_test src/nanoarrow/array_stream_test.cc)
   add_executable(nanoarrow_hpp_test src/nanoarrow/nanoarrow_hpp_test.cc)
+  add_executable(nanoarrow_testing_test 
src/nanoarrow/nanoarrow_testing_test.cc)
 
   if(NANOARROW_CODE_COVERAGE)
     target_compile_options(coverage_config INTERFACE -O0 -g --coverage)
@@ -228,6 +235,7 @@ if(NANOARROW_BUILD_TESTS)
                         coverage_config)
   target_link_libraries(array_stream_test nanoarrow gtest_main coverage_config)
   target_link_libraries(nanoarrow_hpp_test nanoarrow gtest_main 
coverage_config)
+  target_link_libraries(nanoarrow_testing_test nanoarrow gtest_main 
coverage_config)
 
   include(GoogleTest)
   # Some users have reported a timeout with the default value of 5
@@ -239,4 +247,5 @@ if(NANOARROW_BUILD_TESTS)
   gtest_discover_tests(schema_test DISCOVERY_TIMEOUT 10)
   gtest_discover_tests(array_stream_test DISCOVERY_TIMEOUT 10)
   gtest_discover_tests(nanoarrow_hpp_test DISCOVERY_TIMEOUT 10)
+  gtest_discover_tests(nanoarrow_testing_test DISCOVERY_TIMEOUT 10)
 endif()
diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst
index 56c4b50..613aca7 100644
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/index.rst
@@ -24,5 +24,6 @@ API Reference
    R API Reference <r>
    C API Reference <c>
    C++ API Reference <cpp>
+   Testing API Reference <testing>
    IPC Extension Reference <ipc>
    Device Extension Reference <device>
diff --git a/docs/source/reference/index.rst b/docs/source/reference/testing.rst
similarity index 79%
copy from docs/source/reference/index.rst
copy to docs/source/reference/testing.rst
index 56c4b50..7e36a32 100644
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/testing.rst
@@ -15,14 +15,13 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-API Reference
-=============
+Teseting API Reference
+======================
 
-.. toctree::
-   :maxdepth: 2
+.. doxygengroup:: nanoarrow_testing
 
-   R API Reference <r>
-   C API Reference <c>
-   C++ API Reference <cpp>
-   IPC Extension Reference <ipc>
-   Device Extension Reference <device>
+Integration testing JSON
+------------------------
+
+.. doxygengroup:: nanoarrow_testing-json
+    :members:
diff --git a/src/nanoarrow/nanoarrow_testing.hpp 
b/src/nanoarrow/nanoarrow_testing.hpp
new file mode 100644
index 0000000..aa6200a
--- /dev/null
+++ b/src/nanoarrow/nanoarrow_testing.hpp
@@ -0,0 +1,389 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <iostream>
+#include <string>
+
+#include "nanoarrow.hpp"
+
+#ifndef NANOARROW_TESTING_HPP_INCLUDED
+#define NANOARROW_TESTING_HPP_INCLUDED
+
+/// \defgroup nanoarrow_testing Nanoarrow Testing Helpers
+///
+/// Utilities for testing nanoarrow structures and functions.
+
+namespace nanoarrow {
+
+namespace testing {
+
+/// \defgroup nanoarrow_testing-json Integration test helpers
+///
+/// See testing format documentation for details of the JSON representation. 
This
+/// representation is not canonical but can be used to implement integration 
tests with
+/// other implementations.
+///
+/// @{
+
+/// \brief Writer for the Arrow integration testing JSON format
+class TestingJSONWriter {
+ public:
+  /// \brief Write a "batch" to out
+  ///
+  /// Creates output like `{"count": 123, "columns": [...]}`.
+  ArrowErrorCode WriteBatch(std::ostream& out, const ArrowSchema* schema,
+                            ArrowArrayView* value) {
+    // Make sure we have a struct
+    if (std::string(schema->format) != "+s") {
+      return EINVAL;
+    }
+
+    out << "{";
+
+    // Write length
+    out << R"("count": )" << value->length;
+
+    // Write children
+    out << R"(, "columns": )";
+    NANOARROW_RETURN_NOT_OK(WriteChildren(out, schema, value));
+
+    out << "}";
+    return NANOARROW_OK;
+  }
+
+  /// \brief Write a column to out
+  ///
+  /// Creates output like `{"name": "col", "count": 123, "VALIDITY": [...], 
...}`.
+  ArrowErrorCode WriteColumn(std::ostream& out, const ArrowSchema* field,
+                             ArrowArrayView* value) {
+    out << "{";
+
+    // Write schema->name (may be null)
+    if (field->name == nullptr) {
+      out << R"("name": null)";
+    } else {
+      out << R"("name": ")" << field->name << R"(")";
+    }
+
+    // Write length
+    out << R"(, "count": )" << value->length;
+
+    // Write the VALIDITY element if required
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_NA:
+      case NANOARROW_TYPE_DENSE_UNION:
+      case NANOARROW_TYPE_SPARSE_UNION:
+        break;
+      default:
+        out << R"(, "VALIDITY": )";
+        WriteBitmap(out, value->buffer_views[0].data.as_uint8, value->length);
+        break;
+    }
+
+    // Write the TYPE_ID element if required
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_SPARSE_UNION:
+      case NANOARROW_TYPE_DENSE_UNION:
+        out << R"(, "TYPE_ID": )";
+        NANOARROW_RETURN_NOT_OK(WriteOffsetOrTypeID<int8_t>(out, 
value->buffer_views[0]));
+        break;
+      default:
+        break;
+    }
+
+    // Write the OFFSET element if required
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_BINARY:
+      case NANOARROW_TYPE_STRING:
+      case NANOARROW_TYPE_DENSE_UNION:
+      case NANOARROW_TYPE_LIST:
+        out << R"(, "OFFSET": )";
+        NANOARROW_RETURN_NOT_OK(
+            WriteOffsetOrTypeID<int32_t>(out, value->buffer_views[1]));
+        break;
+      case NANOARROW_TYPE_LARGE_LIST:
+      case NANOARROW_TYPE_LARGE_BINARY:
+      case NANOARROW_TYPE_LARGE_STRING:
+        out << R"(, "OFFSET": )";
+        NANOARROW_RETURN_NOT_OK(
+            WriteOffsetOrTypeID<int64_t>(out, value->buffer_views[1]));
+        break;
+      default:
+        break;
+    }
+
+    // Write the DATA element if required
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_NA:
+      case NANOARROW_TYPE_STRUCT:
+      case NANOARROW_TYPE_LIST:
+      case NANOARROW_TYPE_LARGE_LIST:
+      case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      case NANOARROW_TYPE_DENSE_UNION:
+      case NANOARROW_TYPE_SPARSE_UNION:
+        break;
+      default:
+        out << R"(, "DATA": )";
+        NANOARROW_RETURN_NOT_OK(WriteData(out, value));
+        break;
+    }
+
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_STRUCT:
+      case NANOARROW_TYPE_LIST:
+      case NANOARROW_TYPE_LARGE_LIST:
+      case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      case NANOARROW_TYPE_DENSE_UNION:
+      case NANOARROW_TYPE_SPARSE_UNION:
+        out << R"(, "children": )";
+        NANOARROW_RETURN_NOT_OK(WriteChildren(out, field, value));
+        break;
+      default:
+        break;
+    }
+
+    out << "}";
+    return NANOARROW_OK;
+  }
+
+ private:
+  void WriteBitmap(std::ostream& out, const uint8_t* bits, int64_t length) {
+    if (length == 0) {
+      out << "[]";
+      return;
+    }
+
+    out << "[";
+
+    if (bits == nullptr) {
+      out << "1";
+      for (int64_t i = 1; i < length; i++) {
+        out << ", 1";
+      }
+    } else {
+      out << static_cast<int32_t>(ArrowBitGet(bits, 0));
+      for (int64_t i = 1; i < length; i++) {
+        out << ", " << static_cast<int32_t>(ArrowBitGet(bits, i));
+      }
+    }
+
+    out << "]";
+  }
+
+  template <typename T>
+  ArrowErrorCode WriteOffsetOrTypeID(std::ostream& out, ArrowBufferView 
content) {
+    if (content.size_bytes == 0) {
+      out << "[]";
+      return NANOARROW_OK;
+    }
+
+    const T* values = reinterpret_cast<const T*>(content.data.data);
+    int64_t n_values = content.size_bytes / sizeof(T);
+
+    out << "[";
+
+    if (sizeof(T) == sizeof(int64_t)) {
+      // Ensure int64s are quoted (i.e, "123456")
+      out << R"(")" << values[0] << R"(")";
+      for (int64_t i = 1; i < n_values; i++) {
+        out << R"(, ")" << values[i] << R"(")";
+      }
+    } else {
+      // No need to quote smaller ints (i.e., 123456)
+      out << values[0];
+      for (int64_t i = 1; i < n_values; i++) {
+        out << ", " << static_cast<int64_t>(values[i]);
+      }
+    }
+
+    out << "]";
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode WriteData(std::ostream& out, ArrowArrayView* value) {
+    if (value->length == 0) {
+      out << "[]";
+      return NANOARROW_OK;
+    }
+
+    out << "[";
+
+    switch (value->storage_type) {
+      case NANOARROW_TYPE_BOOL:
+      case NANOARROW_TYPE_INT8:
+      case NANOARROW_TYPE_UINT8:
+      case NANOARROW_TYPE_INT16:
+      case NANOARROW_TYPE_UINT16:
+      case NANOARROW_TYPE_INT32:
+      case NANOARROW_TYPE_UINT32:
+        // Regular JSON integers (i.e., 123456)
+        out << ArrowArrayViewGetIntUnsafe(value, 0);
+        for (int64_t i = 1; i < value->length; i++) {
+          out << ", " << ArrowArrayViewGetIntUnsafe(value, i);
+        }
+        break;
+      case NANOARROW_TYPE_INT64:
+        // Quoted integers to avoid overflow (i.e., "123456")
+        out << R"(")" << ArrowArrayViewGetIntUnsafe(value, 0) << R"(")";
+        for (int64_t i = 1; i < value->length; i++) {
+          out << R"(, ")" << ArrowArrayViewGetIntUnsafe(value, i) << R"(")";
+        }
+        break;
+      case NANOARROW_TYPE_UINT64:
+        // Quoted integers to avoid overflow (i.e., "123456")
+        out << R"(")" << ArrowArrayViewGetUIntUnsafe(value, 0) << R"(")";
+        for (int64_t i = 1; i < value->length; i++) {
+          out << R"(, ")" << ArrowArrayViewGetUIntUnsafe(value, i) << R"(")";
+        }
+        break;
+
+      case NANOARROW_TYPE_FLOAT:
+      case NANOARROW_TYPE_DOUBLE: {
+        // JSON number to 3 decimal places
+        LocalizedStream local_stream_opt(out);
+        local_stream_opt.SetFixed(3);
+
+        out << ArrowArrayViewGetDoubleUnsafe(value, 0);
+        for (int64_t i = 1; i < value->length; i++) {
+          out << ", " << ArrowArrayViewGetDoubleUnsafe(value, i);
+        }
+        break;
+      }
+
+      case NANOARROW_TYPE_STRING:
+      case NANOARROW_TYPE_LARGE_STRING:
+        NANOARROW_RETURN_NOT_OK(
+            WriteString(out, ArrowArrayViewGetStringUnsafe(value, 0)));
+        for (int64_t i = 1; i < value->length; i++) {
+          out << ", ";
+          NANOARROW_RETURN_NOT_OK(
+              WriteString(out, ArrowArrayViewGetStringUnsafe(value, i)));
+        }
+        break;
+
+      case NANOARROW_TYPE_BINARY:
+      case NANOARROW_TYPE_LARGE_BINARY:
+      case NANOARROW_TYPE_FIXED_SIZE_BINARY: {
+        NANOARROW_RETURN_NOT_OK(WriteBytes(out, 
ArrowArrayViewGetBytesUnsafe(value, 0)));
+        for (int64_t i = 1; i < value->length; i++) {
+          out << ", ";
+          NANOARROW_RETURN_NOT_OK(
+              WriteBytes(out, ArrowArrayViewGetBytesUnsafe(value, i)));
+        }
+        break;
+      }
+
+      default:
+        // Not supported
+        return ENOTSUP;
+    }
+
+    out << "]";
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode WriteString(std::ostream& out, ArrowStringView value) {
+    out << R"(")";
+
+    for (int64_t i = 0; i < value.size_bytes; i++) {
+      char c = value.data[i];
+      if (c == '"') {
+        out << R"(\")";
+      } else if (c == '\\') {
+        out << R"(\\)";
+      } else if (c < 0) {
+        // Not supporting multibyte unicode yet
+        return ENOTSUP;
+      } else if (c < 20) {
+        // Data in the arrow-testing repo has a lot of content that requires 
escaping
+        // in this way (\uXXXX).
+        uint16_t utf16_bytes = static_cast<uint16_t>(c);
+
+        char utf16_esc[7];
+        utf16_esc[6] = '\0';
+        snprintf(utf16_esc, sizeof(utf16_esc), R"(\u%04x)", utf16_bytes);
+        out << utf16_esc;
+      } else {
+        out << c;
+      }
+    }
+
+    out << R"(")";
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode WriteBytes(std::ostream& out, ArrowBufferView value) {
+    out << R"(")";
+    char hex[3];
+    hex[2] = '\0';
+
+    for (int64_t i = 0; i < value.size_bytes; i++) {
+      snprintf(hex, sizeof(hex), "%02X", 
static_cast<int>(value.data.as_uint8[i]));
+      out << hex;
+    }
+    out << R"(")";
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode WriteChildren(std::ostream& out, const ArrowSchema* field,
+                               ArrowArrayView* value) {
+    if (field->n_children == 0) {
+      out << "[]";
+      return NANOARROW_OK;
+    }
+
+    out << "[";
+    NANOARROW_RETURN_NOT_OK(WriteColumn(out, field->children[0], 
value->children[0]));
+    for (int64_t i = 1; i < field->n_children; i++) {
+      out << ", ";
+      NANOARROW_RETURN_NOT_OK(WriteColumn(out, field->children[i], 
value->children[i]));
+    }
+    out << "]";
+    return NANOARROW_OK;
+  }
+
+  class LocalizedStream {
+   public:
+    LocalizedStream(std::ostream& out) : out_(out) {
+      previous_locale_ = out.imbue(std::locale::classic());
+      previous_precision_ = out.precision();
+      fmt_flags_ = out.flags();
+      out.setf(out.fixed);
+    }
+
+    void SetFixed(int precision) { out_.precision(precision); }
+
+    ~LocalizedStream() {
+      out_.flags(fmt_flags_);
+      out_.precision(previous_precision_);
+      out_.imbue(previous_locale_);
+    }
+
+   private:
+    std::ostream& out_;
+    std::locale previous_locale_;
+    std::ios::fmtflags fmt_flags_;
+    std::streamsize previous_precision_;
+  };
+};
+
+/// @}
+
+}  // namespace testing
+}  // namespace nanoarrow
+
+#endif
diff --git a/src/nanoarrow/nanoarrow_testing_test.cc 
b/src/nanoarrow/nanoarrow_testing_test.cc
new file mode 100644
index 0000000..bde776c
--- /dev/null
+++ b/src/nanoarrow/nanoarrow_testing_test.cc
@@ -0,0 +1,290 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <functional>
+#include <sstream>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "nanoarrow/nanoarrow_testing.hpp"
+
+using nanoarrow::testing::TestingJSONWriter;
+
+ArrowErrorCode WriteBatchJSON(std::ostream& out, const ArrowSchema* schema,
+                              ArrowArrayView* array_view) {
+  TestingJSONWriter writer;
+  return writer.WriteBatch(out, schema, array_view);
+}
+
+ArrowErrorCode WriteColumnJSON(std::ostream& out, const ArrowSchema* schema,
+                               ArrowArrayView* array_view) {
+  TestingJSONWriter writer;
+  return writer.WriteColumn(out, schema, array_view);
+}
+
+void TestColumn(std::function<ArrowErrorCode(ArrowSchema*)> type_expr,
+                std::function<ArrowErrorCode(ArrowArray*)> append_expr,
+                ArrowErrorCode (*test_expr)(std::ostream&, const ArrowSchema*,
+                                            ArrowArrayView*),
+                const std::string& expected_json) {
+  std::stringstream ss;
+
+  nanoarrow::UniqueSchema schema;
+  ASSERT_EQ(type_expr(schema.get()), NANOARROW_OK);
+  nanoarrow::UniqueArray array;
+  ASSERT_EQ(ArrowArrayInitFromSchema(array.get(), schema.get(), nullptr), 
NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayStartAppending(array.get()), NANOARROW_OK);
+  ASSERT_EQ(append_expr(array.get()), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayFinishBuildingDefault(array.get(), nullptr), 
NANOARROW_OK);
+
+  nanoarrow::UniqueArrayView array_view;
+  ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), 
nullptr),
+            NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), 
NANOARROW_OK);
+
+  ASSERT_EQ(test_expr(ss, schema.get(), array_view.get()), NANOARROW_OK);
+  EXPECT_EQ(ss.str(), expected_json);
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnNull) {
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_NA);
+      },
+      [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+      R"({"name": null, "count": 0})");
+
+  TestColumn(
+      [](ArrowSchema* schema) {
+        NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, 
NANOARROW_TYPE_NA));
+        NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema, "colname"));
+        return NANOARROW_OK;
+      },
+      [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+      R"({"name": "colname", "count": 0})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnInt) {
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT32);
+      },
+      [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+      R"({"name": null, "count": 0, "VALIDITY": [], "DATA": []})");
+
+  // Without a null value
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT32);
+      },
+      [](ArrowArray* array) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 1));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": [0, 1, 
0]})");
+
+  // With two null values
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT32);
+      },
+      [](ArrowArray* array) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(array, 2));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 1));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 3, "VALIDITY": [0, 0, 1], "DATA": [0, 0, 
1]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnInt64) {
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT64);
+      },
+      [](ArrowArray* array) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 1));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": ["0", "1", 
"0"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnUInt64) {
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_UINT64);
+      },
+      [](ArrowArray* array) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 1));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array, 0));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": ["0", "1", 
"0"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnFloat) {
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_FLOAT);
+      },
+      [](ArrowArray* array) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendDouble(array, 0.1234));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendDouble(array, 1.2345));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 2, "VALIDITY": [1, 1], "DATA": [0.123, 
1.235]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnString) {
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_STRING);
+      },
+      [](ArrowArray* array) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array, 
ArrowCharView("abc")));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array, 
ArrowCharView("def")));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 2, "VALIDITY": [1, 1], )"
+      R"("OFFSET": [0, 3, 6], "DATA": ["abc", "def"]})");
+
+  // Check a string that requires escaping of characters \ and "
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_STRING);
+      },
+      [](ArrowArray* array) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array, 
ArrowCharView(R"("\)")));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 1, "VALIDITY": [1], )"
+      R"("OFFSET": [0, 2], "DATA": ["\"\\"]})");
+
+  // Check a string that requires unicode escape
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_STRING);
+      },
+      [](ArrowArray* array) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array, 
ArrowCharView("\u0001")));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 1, "VALIDITY": [1], )"
+      R"("OFFSET": [0, 1], "DATA": ["\u0001"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnLargeString) {
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_LARGE_STRING);
+      },
+      [](ArrowArray* array) {
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array, 
ArrowCharView("abc")));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array, 
ArrowCharView("def")));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 2, "VALIDITY": [1, 1], )"
+      R"("OFFSET": ["0", "3", "6"], "DATA": ["abc", "def"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnBinary) {
+  TestColumn(
+      [](ArrowSchema* schema) {
+        return ArrowSchemaInitFromType(schema, NANOARROW_TYPE_BINARY);
+      },
+      [](ArrowArray* array) {
+        uint8_t value[] = {0x00, 0x01, 0xff};
+        ArrowBufferView value_view;
+        value_view.data.as_uint8 = value;
+        value_view.size_bytes = sizeof(value);
+
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendString(array, 
ArrowCharView("abc")));
+        NANOARROW_RETURN_NOT_OK(ArrowArrayAppendBytes(array, value_view));
+        return NANOARROW_OK;
+      },
+      &WriteColumnJSON,
+      R"({"name": null, "count": 2, "VALIDITY": [1, 1], )"
+      R"("OFFSET": [0, 3, 6], "DATA": ["616263", "0001FF"]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnStruct) {
+  // Empty struct
+  TestColumn(
+      [](ArrowSchema* schema) {
+        ArrowSchemaInit(schema);
+        NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, 0));
+        return NANOARROW_OK;
+      },
+      [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+      R"({"name": null, "count": 0, "VALIDITY": [], "children": []})");
+
+  // Non-empty struct
+  TestColumn(
+      [](ArrowSchema* schema) {
+        ArrowSchemaInit(schema);
+        NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, 2));
+        NANOARROW_RETURN_NOT_OK(
+            ArrowSchemaSetType(schema->children[0], NANOARROW_TYPE_NA));
+        NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], 
"col1"));
+        NANOARROW_RETURN_NOT_OK(
+            ArrowSchemaSetType(schema->children[1], NANOARROW_TYPE_NA));
+        NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[1], 
"col2"));
+        return NANOARROW_OK;
+      },
+      [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+      R"({"name": null, "count": 0, "VALIDITY": [], "children": [)"
+      R"({"name": "col1", "count": 0}, {"name": "col2", "count": 0}]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestColumnDenseUnion) {
+  // Empty union
+  TestColumn(
+      [](ArrowSchema* schema) {
+        ArrowSchemaInit(schema);
+        NANOARROW_RETURN_NOT_OK(
+            ArrowSchemaSetTypeUnion(schema, NANOARROW_TYPE_DENSE_UNION, 0));
+        return NANOARROW_OK;
+      },
+      [](ArrowArray* array) { return NANOARROW_OK; }, &WriteColumnJSON,
+      R"({"name": null, "count": 0, "TYPE_ID": [], "OFFSET": [], "children": 
[]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestBatch) {
+  // Empty batch
+  TestColumn(
+      [](ArrowSchema* schema) {
+        ArrowSchemaInit(schema);
+        NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, 0));
+        return NANOARROW_OK;
+      },
+      [](ArrowArray* array) { return NANOARROW_OK; }, &WriteBatchJSON,
+      R"({"count": 0, "columns": []})");
+}

Reply via email to