This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e0329f4  feat: Add integration testing reader for Column (#325)
e0329f4 is described below

commit e0329f4e9188d198680439cba74006173b7c02aa
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Nov 29 15:37:43 2023 -0400

    feat: Add integration testing reader for Column (#325)
---
 .../src/nanoarrow/nanoarrow_device.c               |   2 +-
 .../src/nanoarrow/nanoarrow_ipc_decoder.c          |   4 +-
 python/.gitignore                                  |   1 +
 python/bootstrap.py                                |   5 +
 src/nanoarrow/array.c                              |  33 +-
 src/nanoarrow/array_inline.h                       |   6 +-
 src/nanoarrow/array_test.cc                        |   4 +-
 src/nanoarrow/nanoarrow_testing.hpp                | 451 ++++++++++++++++++++-
 src/nanoarrow/nanoarrow_testing_test.cc            | 182 +++++++--
 src/nanoarrow/nanoarrow_types.h                    |  20 +-
 10 files changed, 640 insertions(+), 68 deletions(-)

diff --git a/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c 
b/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c
index 4be7a93..c4df3d1 100644
--- a/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c
+++ b/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c
@@ -408,7 +408,7 @@ static ArrowErrorCode 
ArrowDeviceArrayViewCopyInternal(struct ArrowDevice* devic
   dst->offset = src->offset;
   dst->null_count = src->null_count;
 
-  for (int i = 0; i < 3; i++) {
+  for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     if (src->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
       break;
     }
diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c 
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
index 2fac3c7..9e4a6c9 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
@@ -1155,7 +1155,7 @@ static void ArrowIpcDecoderInitFields(struct 
ArrowIpcField* fields,
   field->array = array;
   field->buffer_offset = *n_buffers;
 
-  for (int i = 0; i < 3; i++) {
+  for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     *n_buffers += array_view->layout.buffer_type[i] != 
NANOARROW_BUFFER_TYPE_NONE;
   }
 
@@ -1524,7 +1524,7 @@ static int ArrowIpcDecoderWalkSetArrayView(struct 
ArrowIpcArraySetter* setter,
   array_view->null_count = ns(FieldNode_null_count(field));
   setter->field_i += 1;
 
-  for (int64_t i = 0; i < 3; i++) {
+  for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
       break;
     }
diff --git a/python/.gitignore b/python/.gitignore
index d30e198..0927980 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -19,6 +19,7 @@
 src/nanoarrow/nanoarrow.c
 src/nanoarrow/nanoarrow.h
 src/nanoarrow/nanoarrow_device.h
+src/nanoarrow/nanoarrow_testing.hpp
 src/nanoarrow/nanoarrow_c.pxd
 src/nanoarrow/*.c
 
diff --git a/python/bootstrap.py b/python/bootstrap.py
index 9e54cb7..bbb5d66 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -36,6 +36,9 @@ class NanoarrowPxdGenerator:
         # Strip comments
         content = self.re_comment.sub("", content)
 
+        # Replace NANOARROW_MAX_FIXED_BUFFERS with its value
+        content = self.re_max_buffers.sub("3", content)
+
         # Find types and function definitions
         types = self._find_types(content)
         func_defs = self._find_func_defs(content)
@@ -59,6 +62,7 @@ class NanoarrowPxdGenerator:
             output.write(b"\n")
             output.write(b"    ctypedef int ArrowErrorCode\n")
             output.write(b"    cdef int NANOARROW_OK\n")
+            output.write(b"    cdef int NANOARROW_MAX_FIXED_BUFFERS\n")
             output.write(b"\n")
 
             for type in types_cython:
@@ -71,6 +75,7 @@ class NanoarrowPxdGenerator:
 
     def _define_regexes(self):
         self.re_comment = re.compile(r"\s*//[^\n]*")
+        self.re_max_buffers = re.compile(r"NANOARROW_MAX_FIXED_BUFFERS")
         self.re_type = re.compile(
             r"(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+) 
{(?P<body>[^}]*)}"
         )
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 1e59777..a0e711e 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -437,7 +437,7 @@ static void ArrowArrayFlushInternalPointers(struct 
ArrowArray* array) {
   struct ArrowArrayPrivateData* private_data =
       (struct ArrowArrayPrivateData*)array->private_data;
 
-  for (int64_t i = 0; i < 3; i++) {
+  for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
   }
 
@@ -621,7 +621,7 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view) 
{
 }
 
 void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t 
length) {
-  for (int i = 0; i < 3; i++) {
+  for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
 
     switch (array_view->layout.buffer_type[i]) {
@@ -671,26 +671,13 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* 
array_view, int64_t length)
 static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
                                           struct ArrowArray* array,
                                           struct ArrowError* error) {
-  // Check length and offset
-  if (array->offset < 0) {
-    ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of 
%ld",
-                  (long)array->offset);
-    return EINVAL;
-  }
-
-  if (array->length < 0) {
-    ArrowErrorSet(error, "Expected array length >= 0 but found array length of 
%ld",
-                  (long)array->length);
-    return EINVAL;
-  }
-
   array_view->array = array;
   array_view->offset = array->offset;
   array_view->length = array->length;
   array_view->null_count = array->null_count;
 
   int64_t buffers_required = 0;
-  for (int i = 0; i < 3; i++) {
+  for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
       break;
     }
@@ -749,6 +736,18 @@ static int ArrowArrayViewSetArrayInternal(struct 
ArrowArrayView* array_view,
 
 static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
                                          struct ArrowError* error) {
+  if (array_view->length < 0) {
+    ArrowErrorSet(error, "Expected length >= 0 but found length %ld",
+                  (long)array_view->length);
+    return EINVAL;
+  }
+
+  if (array_view->offset < 0) {
+    ArrowErrorSet(error, "Expected offset >= 0 but found offset %ld",
+                  (long)array_view->offset);
+    return EINVAL;
+  }
+
   // Calculate buffer sizes that do not require buffer access. If marked as
   // unknown, assign the buffer size; otherwise, validate it.
   int64_t offset_plus_length = array_view->offset + array_view->length;
@@ -1103,7 +1102,7 @@ static int ArrowAssertInt8In(struct ArrowBufferView view, 
const int8_t* values,
 
 static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
                                       struct ArrowError* error) {
-  for (int i = 0; i < 3; i++) {
+  for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     switch (array_view->layout.buffer_type[i]) {
       case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
         if (array_view->layout.element_size_bits[i] == 32) {
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 96fdf57..c089d2b 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -140,7 +140,7 @@ static inline ArrowErrorCode 
ArrowArrayStartAppending(struct ArrowArray* array)
   }
 
   // Initialize any data offset buffer with a single zero
-  for (int i = 0; i < 3; i++) {
+  for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     if (private_data->layout.buffer_type[i] == 
NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
         private_data->layout.element_size_bits[i] == 64) {
       NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, 
i), 0));
@@ -163,7 +163,7 @@ static inline ArrowErrorCode 
ArrowArrayStartAppending(struct ArrowArray* array)
 }
 
 static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
-  for (int64_t i = 0; i < 3; i++) {
+  for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
     NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1));
   }
@@ -278,7 +278,7 @@ static inline ArrowErrorCode 
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
   struct ArrowBuffer* buffer;
   int64_t size_bytes;
 
-  for (int i = 0; i < 3; i++) {
+  for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
     buffer = ArrowArrayBuffer(array, i);
     size_bytes = private_data->layout.element_size_bits[i] / 8;
 
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index 6585957..6a22008 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -1589,12 +1589,12 @@ TEST(ArrayTest, ArrayViewTestBasic) {
   // Expect error for bad offset + length
   array.length = -1;
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
-  EXPECT_STREQ(error.message, "Expected array length >= 0 but found array 
length of -1");
+  EXPECT_STREQ(error.message, "Expected length >= 0 but found length -1");
   array.length = 3;
 
   array.offset = -1;
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
-  EXPECT_STREQ(error.message, "Expected array offset >= 0 but found array 
offset of -1");
+  EXPECT_STREQ(error.message, "Expected offset >= 0 but found offset -1");
   array.offset = 0;
 
   // Expect error for the wrong number of buffers
diff --git a/src/nanoarrow/nanoarrow_testing.hpp 
b/src/nanoarrow/nanoarrow_testing.hpp
index 454a6da..103f22e 100644
--- a/src/nanoarrow/nanoarrow_testing.hpp
+++ b/src/nanoarrow/nanoarrow_testing.hpp
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <iostream>
+#include <limits>
 #include <sstream>
 #include <string>
 
@@ -439,7 +440,7 @@ class TestingJSONWriter {
       }
     } else {
       // No need to quote smaller ints (i.e., 123456)
-      out << values[0];
+      out << static_cast<int64_t>(values[0]);
       for (int64_t i = 1; i < n_values; i++) {
         out << ", " << static_cast<int64_t>(values[i]);
       }
@@ -621,36 +622,76 @@ class TestingJSONReader {
   using json = nlohmann::json;
 
  public:
-  ArrowErrorCode ReadSchema(const std::string& value, ArrowSchema* out,
+  /// \brief Read JSON representing a Schema
+  ///
+  /// Reads a JSON object in the form `{"fields": [...], "metadata": [...]}`,
+  /// propagating `out` on success.
+  ArrowErrorCode ReadSchema(const std::string& schema_json, ArrowSchema* out,
                             ArrowError* error = nullptr) {
     try {
-      auto obj = json::parse(value);
+      auto obj = json::parse(schema_json);
       nanoarrow::UniqueSchema schema;
 
       NANOARROW_RETURN_NOT_OK(SetSchema(schema.get(), obj, error));
       ArrowSchemaMove(schema.get(), out);
       return NANOARROW_OK;
-    } catch (std::exception& e) {
+    } catch (json::exception& e) {
       ArrowErrorSet(error, "Exception in TestingJSONReader::ReadSchema(): %s", 
e.what());
       return EINVAL;
     }
   }
 
-  ArrowErrorCode ReadField(const std::string& value, ArrowSchema* out,
+  /// \brief Read JSON representing a Field
+  ///
+  /// Read a JSON object in the form `{"name" : "col", "type": {...}, ...}`,
+  /// propagating `out` on success.
+  ArrowErrorCode ReadField(const std::string& field_json, ArrowSchema* out,
                            ArrowError* error = nullptr) {
     try {
-      auto obj = json::parse(value);
+      auto obj = json::parse(field_json);
       nanoarrow::UniqueSchema schema;
 
       NANOARROW_RETURN_NOT_OK(SetField(schema.get(), obj, error));
       ArrowSchemaMove(schema.get(), out);
       return NANOARROW_OK;
-    } catch (std::exception& e) {
+    } catch (json::exception& e) {
       ArrowErrorSet(error, "Exception in TestingJSONReader::ReadField(): %s", 
e.what());
       return EINVAL;
     }
   }
 
+  /// \brief Read JSON representing a Column
+  ///
+  /// Read a JSON object in the form
+  /// `{"name": "col", "count": 123, "VALIDITY": [...], ...}`, propagating
+  /// `out` on success.
+  ArrowErrorCode ReadColumn(const std::string& column_json, const ArrowSchema* 
schema,
+                            ArrowArray* out, ArrowError* error = nullptr) {
+    try {
+      auto obj = json::parse(column_json);
+
+      // ArrowArrayView to enable validation
+      nanoarrow::UniqueArrayView array_view;
+      NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(
+          array_view.get(), const_cast<ArrowSchema*>(schema), error));
+
+      // ArrowArray to hold memory
+      nanoarrow::UniqueArray array;
+      NANOARROW_RETURN_NOT_OK(
+          ArrowArrayInitFromSchema(array.get(), 
const_cast<ArrowSchema*>(schema), error));
+
+      // Parse the JSON into the array
+      NANOARROW_RETURN_NOT_OK(SetArrayColumn(obj, array_view.get(), 
array.get(), error));
+
+      // Return the result
+      ArrowArrayMove(array.get(), out);
+      return NANOARROW_OK;
+    } catch (json::exception& e) {
+      ArrowErrorSet(error, "Exception in TestingJSONReader::ReadColumn(): %s", 
e.what());
+      return EINVAL;
+    }
+  }
+
  private:
   ArrowErrorCode SetSchema(ArrowSchema* schema, const json& value, ArrowError* 
error) {
     NANOARROW_RETURN_NOT_OK(
@@ -1053,6 +1094,402 @@ class TestingJSONReader {
     return NANOARROW_OK;
   }
 
+  ArrowErrorCode SetArrayColumn(const json& value, ArrowArrayView* array_view,
+                                ArrowArray* array, ArrowError* error,
+                                const std::string& parent_error_prefix = "") {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.is_object(), error, "Expected Column to be a JSON 
object"));
+
+    // Check + resolve name early to generate better error messages
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("name"), error, "Column missing key 'name'"));
+
+    const auto& name = value["name"];
+    NANOARROW_RETURN_NOT_OK(Check(name.is_null() || name.is_string(), error,
+                                  "Column name must be string or null"));
+
+    std::string error_prefix;
+    if (name.is_string()) {
+      error_prefix = parent_error_prefix + "-> Column '" + 
name.get<std::string>() + "' ";
+    } else {
+      error_prefix = parent_error_prefix + "-> Column <name is null> ";
+    }
+
+    // Check, resolve, and recurse children
+    NANOARROW_RETURN_NOT_OK(
+        Check(array_view->n_children == 0 || value.contains("children"), error,
+              error_prefix + "missing key children"));
+
+    if (value.contains("children")) {
+      const auto& children = value["children"];
+      NANOARROW_RETURN_NOT_OK(
+          Check(children.is_array(), error, error_prefix + "children must be 
array"));
+      NANOARROW_RETURN_NOT_OK(Check(children.size() == array_view->n_children, 
error,
+                                    error_prefix + "children has incorrect 
size"));
+
+      for (int64_t i = 0; i < array_view->n_children; i++) {
+        NANOARROW_RETURN_NOT_OK(SetArrayColumn(children[i], 
array_view->children[i],
+                                               array->children[i], error, 
error_prefix));
+      }
+    }
+
+    // Build buffers
+    for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+      NANOARROW_RETURN_NOT_OK(
+          PrefixError(SetArrayColumnBuffers(value, array_view, array, i, 
error), error,
+                      error_prefix));
+    }
+
+    // Check + resolve count
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.contains("count"), error, error_prefix + "missing key 
'count'"));
+    const auto& count = value["count"];
+    NANOARROW_RETURN_NOT_OK(
+        Check(count.is_number_integer(), error, error_prefix + "count must be 
integer"));
+    array_view->length = count.get<int64_t>();
+
+    // Set ArrayView buffer views. This is because ArrowArrayInitFromSchema() 
doesn't
+    // support custom type ids for unions but the ArrayView does (otherwise
+    // ArrowArrayFinishBuilding() would work).
+    for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+      ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+      ArrowBufferView* buffer_view = array_view->buffer_views + i;
+      buffer_view->data.as_uint8 = buffer->data;
+      buffer_view->size_bytes = buffer->size_bytes;
+    }
+
+    // Validate the array view
+    NANOARROW_RETURN_NOT_OK(PrefixError(
+        ArrowArrayViewValidate(array_view, NANOARROW_VALIDATION_LEVEL_FULL, 
error), error,
+        error_prefix + "failed to validate: "));
+
+    // Flush length and buffer pointers to the Array
+    array->length = array_view->length;
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_NONE, 
nullptr), error);
+
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetArrayColumnBuffers(const json& value, ArrowArrayView* 
array_view,
+                                       ArrowArray* array, int buffer_i,
+                                       ArrowError* error) {
+    ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
+
+    switch (array_view->layout.buffer_type[buffer_i]) {
+      case NANOARROW_BUFFER_TYPE_VALIDITY: {
+        NANOARROW_RETURN_NOT_OK(
+            Check(value.contains("VALIDITY"), error, "missing key 
'VALIDITY'"));
+        const auto& validity = value["VALIDITY"];
+        NANOARROW_RETURN_NOT_OK(
+            SetBufferBitmap(validity, ArrowArrayValidityBitmap(array), error));
+        break;
+      }
+      case NANOARROW_BUFFER_TYPE_TYPE_ID: {
+        NANOARROW_RETURN_NOT_OK(
+            Check(value.contains("TYPE_ID"), error, "missing key 'TYPE_ID'"));
+        const auto& type_id = value["TYPE_ID"];
+        NANOARROW_RETURN_NOT_OK(SetBufferInt<int8_t>(type_id, buffer, error));
+        break;
+      }
+      case NANOARROW_BUFFER_TYPE_UNION_OFFSET: {
+        NANOARROW_RETURN_NOT_OK(
+            Check(value.contains("OFFSET"), error, "missing key 'OFFSET'"));
+        const auto& offset = value["OFFSET"];
+        NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer, error));
+        break;
+      }
+      case NANOARROW_BUFFER_TYPE_DATA_OFFSET: {
+        NANOARROW_RETURN_NOT_OK(
+            Check(value.contains("OFFSET"), error, "missing key 'OFFSET'"));
+        const auto& offset = value["OFFSET"];
+
+        if (array_view->layout.element_size_bits[buffer_i] == 32) {
+          NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer, 
error));
+        } else {
+          NANOARROW_RETURN_NOT_OK(SetBufferInt<int64_t>(offset, buffer, 
error));
+        }
+        break;
+      }
+
+      case NANOARROW_BUFFER_TYPE_DATA: {
+        NANOARROW_RETURN_NOT_OK(
+            Check(value.contains("DATA"), error, "missing key 'DATA'"));
+        const auto& data = value["DATA"];
+
+        switch (array_view->storage_type) {
+          case NANOARROW_TYPE_BOOL: {
+            nanoarrow::UniqueBitmap bitmap;
+            NANOARROW_RETURN_NOT_OK(SetBufferBitmap(data, bitmap.get(), 
error));
+            ArrowBufferMove(&bitmap->buffer, buffer);
+            return NANOARROW_OK;
+          }
+          case NANOARROW_TYPE_INT8:
+            return SetBufferInt<int8_t>(data, buffer, error);
+          case NANOARROW_TYPE_UINT8:
+            return SetBufferInt<uint8_t>(data, buffer, error);
+          case NANOARROW_TYPE_INT16:
+            return SetBufferInt<int16_t>(data, buffer, error);
+          case NANOARROW_TYPE_UINT16:
+            return SetBufferInt<uint16_t>(data, buffer, error);
+          case NANOARROW_TYPE_INT32:
+            return SetBufferInt<int32_t>(data, buffer, error);
+          case NANOARROW_TYPE_UINT32:
+            return SetBufferInt<uint32_t>(data, buffer, error);
+          case NANOARROW_TYPE_INT64:
+            return SetBufferInt<int64_t>(data, buffer, error);
+          case NANOARROW_TYPE_UINT64:
+            return SetBufferInt<uint64_t, uint64_t>(data, buffer, error);
+
+          case NANOARROW_TYPE_FLOAT:
+            return SetBufferFloatingPoint<float>(data, buffer, error);
+          case NANOARROW_TYPE_DOUBLE:
+            return SetBufferFloatingPoint<double>(data, buffer, error);
+
+          case NANOARROW_TYPE_STRING:
+            return SetBufferString<int32_t>(data, ArrowArrayBuffer(array, 
buffer_i - 1),
+                                            buffer, error);
+          case NANOARROW_TYPE_LARGE_STRING:
+            return SetBufferString<int64_t>(data, ArrowArrayBuffer(array, 
buffer_i - 1),
+                                            buffer, error);
+          case NANOARROW_TYPE_BINARY:
+            return SetBufferBinary<int32_t>(data, ArrowArrayBuffer(array, 
buffer_i - 1),
+                                            buffer, error);
+          case NANOARROW_TYPE_LARGE_BINARY:
+            return SetBufferBinary<int64_t>(data, ArrowArrayBuffer(array, 
buffer_i - 1),
+                                            buffer, error);
+          case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+            return SetBufferFixedSizeBinary(
+                data, buffer, array_view->layout.element_size_bits[buffer_i] / 
8, error);
+
+          default:
+            ArrowErrorSet(error, "storage type %s DATA buffer not supported",
+                          ArrowTypeString(array_view->storage_type));
+            return ENOTSUP;
+        }
+        break;
+      }
+      case NANOARROW_BUFFER_TYPE_NONE:
+        break;
+    }
+
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetBufferBitmap(const json& value, ArrowBitmap* bitmap,
+                                 ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.is_array(), error, "bitmap buffer must be array"));
+
+    for (const auto& item : value) {
+      // Some example files write bitmaps as [true, false, true] but the 
documentation
+      // says [1, 0, 1]. Accept both for simplicity.
+      NANOARROW_RETURN_NOT_OK(Check(item.is_boolean() || 
item.is_number_integer(), error,
+                                    "bitmap item must be bool or integer"));
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBitmapAppend(bitmap, 
item.get<int>(), 1),
+                                         error);
+    }
+
+    return NANOARROW_OK;
+  }
+
+  template <typename T, typename BiggerT = int64_t>
+  ArrowErrorCode SetBufferInt(const json& value, ArrowBuffer* buffer, 
ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(Check(value.is_array(), error, "int buffer must be 
array"));
+
+    for (const auto& item : value) {
+      // NANOARROW_RETURN_NOT_OK() interacts poorly with multiple template args
+      ArrowErrorCode result = SetBufferIntItem<T, BiggerT>(item, buffer, 
error);
+      NANOARROW_RETURN_NOT_OK(result);
+    }
+
+    return NANOARROW_OK;
+  }
+
+  template <typename T, typename BiggerT = int64_t>
+  ArrowErrorCode SetBufferIntItem(const json& item, ArrowBuffer* buffer,
+                                  ArrowError* error) {
+    if (item.is_string()) {
+      try {
+        // The JSON parser here can handle up to 2^64 - 1
+        auto item_int = json::parse(item.get<std::string>());
+        return SetBufferIntItem<T, BiggerT>(item_int, buffer, error);
+      } catch (json::parse_error& e) {
+        ArrowErrorSet(error,
+                      "integer buffer item encoded as string must parse as 
integer: %s",
+                      item.dump().c_str());
+        return EINVAL;
+      }
+    }
+
+    NANOARROW_RETURN_NOT_OK(
+        Check(item.is_number_integer(), error,
+              "integer buffer item must be integer number or string"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(std::numeric_limits<T>::is_signed || item.is_number_unsigned(), 
error,
+              "expected unsigned integer buffer item but found signed integer 
'" +
+                  item.dump() + "'"));
+
+    auto item_int = item.get<BiggerT>();
+
+    NANOARROW_RETURN_NOT_OK(
+        Check(item_int >= std::numeric_limits<T>::lowest() &&
+                  item_int <= std::numeric_limits<T>::max(),
+              error, "integer buffer item '" + item.dump() + "' outside type 
limits"));
+
+    T buffer_value = item_int;
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
+
+    return NANOARROW_OK;
+  }
+
+  template <typename T>
+  ArrowErrorCode SetBufferFloatingPoint(const json& value, ArrowBuffer* buffer,
+                                        ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.is_array(), error, "floatingpoint buffer must be array"));
+
+    for (const auto& item : value) {
+      NANOARROW_RETURN_NOT_OK(
+          Check(item.is_number(), error, "floatingpoint buffer item must be 
number"));
+      double item_dbl = item.get<double>();
+
+      NANOARROW_RETURN_NOT_OK(Check(
+          item_dbl >= std::numeric_limits<T>::lowest() &&
+              item_dbl <= std::numeric_limits<T>::max(),
+          error, "floatingpoint buffer item '" + item.dump() + "' outside type 
limits"));
+
+      T buffer_value = item_dbl;
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+          ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
+    }
+
+    return NANOARROW_OK;
+  }
+
+  template <typename T>
+  ArrowErrorCode SetBufferString(const json& value, ArrowBuffer* offsets,
+                                 ArrowBuffer* data, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.is_array(), error, "utf8 data buffer must be array"));
+
+    // Check offsets against values
+    const T* expected_offset = reinterpret_cast<const T*>(offsets->data);
+    NANOARROW_RETURN_NOT_OK(Check(
+        offsets->size_bytes == ((value.size() + 1) * sizeof(T)), error,
+        "Expected offset buffer with " + std::to_string(value.size()) + " 
elements"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(*expected_offset++ == 0, error, "first offset must be zero"));
+
+    int64_t last_offset = 0;
+
+    for (const auto& item : value) {
+      NANOARROW_RETURN_NOT_OK(
+          Check(item.is_string(), error, "utf8 data buffer item must be 
string"));
+      auto item_str = item.get<std::string>();
+
+      // Append data
+      NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+          ArrowBufferAppend(data, reinterpret_cast<const 
uint8_t*>(item_str.data()),
+                            item_str.size()),
+          error);
+
+      // Check offset
+      last_offset += item_str.size();
+      NANOARROW_RETURN_NOT_OK(Check(*expected_offset++ == last_offset, error,
+                                    "Expected offset value " +
+                                        std::to_string(last_offset) +
+                                        " at utf8 data buffer item " + 
item.dump()));
+    }
+
+    return NANOARROW_OK;
+  }
+
+  template <typename T>
+  ArrowErrorCode SetBufferBinary(const json& value, ArrowBuffer* offsets,
+                                 ArrowBuffer* data, ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.is_array(), error, "binary data buffer must be array"));
+
+    // Check offsets against values if not fixed size
+    const T* expected_offset = reinterpret_cast<const T*>(offsets->data);
+    NANOARROW_RETURN_NOT_OK(Check(
+        offsets->size_bytes == ((value.size() + 1) * sizeof(T)), error,
+        "Expected offset buffer with " + std::to_string(value.size()) + " 
elements"));
+    NANOARROW_RETURN_NOT_OK(
+        Check(*expected_offset++ == 0, error, "first offset must be zero"));
+
+    for (const auto& item : value) {
+      NANOARROW_RETURN_NOT_OK(AppendBinaryElement(item, data, error));
+
+      // Check offset
+      NANOARROW_RETURN_NOT_OK(Check(*expected_offset++ == data->size_bytes, 
error,
+                                    "Expected offset value " +
+                                        std::to_string(data->size_bytes) +
+                                        " at binary data buffer item " + 
item.dump()));
+    }
+
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode SetBufferFixedSizeBinary(const json& value, ArrowBuffer* data,
+                                          int64_t fixed_size, ArrowError* 
error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(value.is_array(), error, "binary data buffer must be array"));
+
+    int64_t last_offset = 0;
+
+    for (const auto& item : value) {
+      NANOARROW_RETURN_NOT_OK(AppendBinaryElement(item, data, error));
+      int64_t item_size_bytes = data->size_bytes - last_offset;
+
+      NANOARROW_RETURN_NOT_OK(Check(item_size_bytes == fixed_size, error,
+                                    "Expected fixed size binary value of size 
" +
+                                        std::to_string(fixed_size) +
+                                        " at binary data buffer item " + 
item.dump()));
+      last_offset = data->size_bytes;
+    }
+
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode AppendBinaryElement(const json& item, ArrowBuffer* data,
+                                     ArrowError* error) {
+    NANOARROW_RETURN_NOT_OK(
+        Check(item.is_string(), error, "binary data buffer item must be 
string"));
+    auto item_str = item.get<std::string>();
+
+    int64_t item_size_bytes = item_str.size() / 2;
+    NANOARROW_RETURN_NOT_OK(Check((item_size_bytes * 2) == item_str.size(), 
error,
+                                  "binary data buffer item must have even 
size"));
+
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferReserve(data, 
item_size_bytes), error);
+    for (int64_t i = 0; i < item_str.size(); i += 2) {
+      std::string byte_hex = item_str.substr(i, 2);
+      char* end_ptr;
+      uint8_t byte = std::strtoul(byte_hex.data(), &end_ptr, 16);
+      NANOARROW_RETURN_NOT_OK(
+          Check(end_ptr == (byte_hex.data() + 2), error,
+                "binary data buffer item must contain a valid hex-encoded byte 
string"));
+
+      data->data[data->size_bytes] = byte;
+      data->size_bytes++;
+    }
+
+    return NANOARROW_OK;
+  }
+
+  ArrowErrorCode PrefixError(ArrowErrorCode value, ArrowError* error,
+                             const std::string& prefix) {
+    if (value != NANOARROW_OK && error != nullptr) {
+      std::string msg = prefix + error->message;
+      ArrowErrorSet(error, "%s", msg.c_str());
+    }
+
+    return value;
+  }
+
   ArrowErrorCode Check(bool value, ArrowError* error, const std::string& err) {
     if (value) {
       return NANOARROW_OK;
diff --git a/src/nanoarrow/nanoarrow_testing_test.cc 
b/src/nanoarrow/nanoarrow_testing_test.cc
index 73c8f79..0b8f733 100644
--- a/src/nanoarrow/nanoarrow_testing_test.cc
+++ b/src/nanoarrow/nanoarrow_testing_test.cc
@@ -747,26 +747,87 @@ TEST(NanoarrowTestingTest, 
NanoarrowTestingTestReadFieldNested) {
   EXPECT_STREQ(schema->children[0]->format, "n");
 }
 
-void TestFieldRoundtrip(const std::string& field_json) {
+TEST(NanoarrowTestingTest, NanoarrowTestingTestReadColumnBasic) {
+  nanoarrow::UniqueSchema schema;
+  nanoarrow::UniqueArray array;
+  ArrowError error;
+  error.message[0] = '\0';
+
+  TestingJSONReader reader;
+
+  ASSERT_EQ(
+      reader.ReadField(
+          R"({"name": null, "nullable": true, "type": {"name": "null"}, 
"children": [], "metadata": null})",
+          schema.get()),
+      NANOARROW_OK);
+
+  ASSERT_EQ(reader.ReadColumn(R"({"name": null, "count": 2})", schema.get(), 
array.get(),
+                              &error),
+            NANOARROW_OK)
+      << error.message;
+  EXPECT_EQ(array->length, 2);
+
+  // Check invalid JSON
+  EXPECT_EQ(reader.ReadColumn(R"({)", schema.get(), array.get()), EINVAL);
+
+  // Check at least one failed Check()
+  EXPECT_EQ(
+      reader.ReadColumn(R"("this is not a JSON object")", schema.get(), 
array.get()),
+      EINVAL);
+
+  // Check at least one failed PrefixError()
+  EXPECT_EQ(reader.ReadColumn(R"({"name": "colname", "count": "not an 
integer"})",
+                              schema.get(), array.get(), &error),
+            EINVAL);
+  EXPECT_STREQ(error.message, "-> Column 'colname' count must be integer");
+
+  // Check that field is validated
+  EXPECT_EQ(
+      reader.ReadColumn(R"({"name": null, "count": -1})", schema.get(), 
array.get()),
+      EINVAL);
+}
+
+void TestFieldRoundtrip(const std::string& field_json,
+                        const std::string& column_json = "") {
   nanoarrow::UniqueSchema schema;
   TestingJSONReader reader;
   TestingJSONWriter writer;
   ArrowError error;
   error.message[0] = '\0';
 
-  int result = reader.ReadField(field_json, schema.get(), &error);
-  ASSERT_EQ(result, NANOARROW_OK) << "Error: " << error.message;
+  ASSERT_EQ(reader.ReadField(field_json, schema.get(), &error), NANOARROW_OK)
+      << "Error: " << error.message;
 
-  std::stringstream field_json_roundtrip;
-  ASSERT_EQ(writer.WriteField(field_json_roundtrip, schema.get()), 
NANOARROW_OK);
-  EXPECT_EQ(field_json_roundtrip.str(), field_json);
+  std::stringstream json_roundtrip;
+  ASSERT_EQ(writer.WriteField(json_roundtrip, schema.get()), NANOARROW_OK);
+  EXPECT_EQ(json_roundtrip.str(), field_json);
+
+  if (column_json == "") {
+    return;
+  }
+
+  nanoarrow::UniqueArray array;
+  ASSERT_EQ(reader.ReadColumn(column_json, schema.get(), array.get(), &error),
+            NANOARROW_OK)
+      << error.message;
+
+  nanoarrow::UniqueArrayView array_view;
+  ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), 
nullptr),
+            NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), 
NANOARROW_OK);
+
+  json_roundtrip.str("");
+  ASSERT_EQ(writer.WriteColumn(json_roundtrip, schema.get(), array_view.get()),
+            NANOARROW_OK);
+  EXPECT_EQ(json_roundtrip.str(), column_json);
 }
 
-void TestTypeRoundtrip(const std::string& type_json) {
+void TestTypeRoundtrip(const std::string& type_json,
+                       const std::string& column_json = "") {
   std::stringstream field_json_builder;
   field_json_builder << R"({"name": null, "nullable": true, "type": )" << 
type_json
                      << R"(, "children": [], "metadata": null})";
-  TestFieldRoundtrip(field_json_builder.str());
+  TestFieldRoundtrip(field_json_builder.str(), column_json);
 }
 
 void TestFieldError(const std::string& field_json, const std::string& msg,
@@ -788,33 +849,65 @@ void TestTypeError(const std::string& type_json, const 
std::string& msg,
   TestFieldError(field_json_builder.str(), msg, code);
 }
 
-TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldPrimitive) {
-  TestTypeRoundtrip(R"({"name": "null"})");
-  TestTypeRoundtrip(R"({"name": "bool"})");
-  TestTypeRoundtrip(R"({"name": "utf8"})");
-  TestTypeRoundtrip(R"({"name": "largeutf8"})");
-  TestTypeRoundtrip(R"({"name": "binary"})");
-  TestTypeRoundtrip(R"({"name": "largebinary"})");
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldNull) {
+  TestTypeRoundtrip(R"({"name": "null"})", R"({"name": null, "count": 2})");
 
   TestTypeError(R"({"name": "an unsupported type"})",
                 "Unsupported Type name: 'an unsupported type'", ENOTSUP);
 }
 
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldBool) {
+  TestTypeRoundtrip(
+      R"({"name": "bool"})",
+      R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0, 1, 
0]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldString) {
+  TestTypeRoundtrip(
+      R"({"name": "utf8"})",
+      R"({"name": null, "count": 2, "VALIDITY": [1, 0], "OFFSET": [0, 3, 3], 
"DATA": ["abc", ""]})");
+  TestTypeRoundtrip(
+      R"({"name": "largeutf8"})",
+      R"({"name": null, "count": 2, "VALIDITY": [1, 0], "OFFSET": ["0", "3", 
"3"], "DATA": ["abc", ""]})");
+  TestTypeRoundtrip(
+      R"({"name": "binary"})",
+      R"({"name": null, "count": 2, "VALIDITY": [1, 0], "OFFSET": [0, 3, 3], 
"DATA": ["00FFA0", ""]})");
+  TestTypeRoundtrip(
+      R"({"name": "largebinary"})",
+      R"({"name": null, "count": 2, "VALIDITY": [1, 0], "OFFSET": ["0", "3", 
"3"], "DATA": ["00FFA0", ""]})");
+}
+
 TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldInt) {
-  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 8, "isSigned": true})");
-  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 16, "isSigned": true})");
-  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 32, "isSigned": true})");
-  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 64, "isSigned": true})");
+  TestTypeRoundtrip(
+      R"({"name": "int", "bitWidth": 8, "isSigned": true})",
+      R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": [-128, 0, 
127]})");
+  TestTypeRoundtrip(
+      R"({"name": "int", "bitWidth": 16, "isSigned": true})",
+      R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": [-129, 0, 
127]})");
+  TestTypeRoundtrip(
+      R"({"name": "int", "bitWidth": 32, "isSigned": true})",
+      R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": [-130, 0, 
127]})");
+  TestTypeRoundtrip(
+      R"({"name": "int", "bitWidth": 64, "isSigned": true})",
+      R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": ["-131", 
"0", "127"]})");
 
   TestTypeError(R"({"name": "int", "bitWidth": 1, "isSigned": true})",
                 "Type[name=='int'] bitWidth must be 8, 16, 32, or 64");
 }
 
 TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldUInt) {
-  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 8, "isSigned": false})");
-  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 16, "isSigned": false})");
-  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 32, "isSigned": false})");
-  TestTypeRoundtrip(R"({"name": "int", "bitWidth": 64, "isSigned": false})");
+  TestTypeRoundtrip(
+      R"({"name": "int", "bitWidth": 8, "isSigned": false})",
+      R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0, 0, 
255]})");
+  TestTypeRoundtrip(
+      R"({"name": "int", "bitWidth": 16, "isSigned": false})",
+      R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0, 0, 
256]})");
+  TestTypeRoundtrip(
+      R"({"name": "int", "bitWidth": 32, "isSigned": false})",
+      R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0, 0, 
257]})");
+  TestTypeRoundtrip(
+      R"({"name": "int", "bitWidth": 64, "isSigned": false})",
+      R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": ["0", "0", 
"258"]})");
 
   TestTypeError(R"({"name": "int", "bitWidth": 1, "isSigned": false})",
                 "Type[name=='int'] bitWidth must be 8, 16, 32, or 64");
@@ -822,8 +915,12 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldUInt) {
 
 TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldFloatingPoint) {
   TestTypeRoundtrip(R"({"name": "floatingpoint", "precision": "HALF"})");
-  TestTypeRoundtrip(R"({"name": "floatingpoint", "precision": "SINGLE"})");
-  TestTypeRoundtrip(R"({"name": "floatingpoint", "precision": "DOUBLE"})");
+  TestTypeRoundtrip(
+      R"({"name": "floatingpoint", "precision": "SINGLE"})",
+      R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0.000, 
1.230, 4.560]})");
+  TestTypeRoundtrip(
+      R"({"name": "floatingpoint", "precision": "DOUBLE"})",
+      R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0.000, 
1.230, 4.560]})");
 
   TestTypeError(
       R"({"name": "floatingpoint", "precision": "NOT_A_PRECISION"})",
@@ -831,7 +928,9 @@ TEST(NanoarrowTestingTest, 
NanoarrowTestingTestFieldFloatingPoint) {
 }
 
 TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldFixedSizeBinary) {
-  TestTypeRoundtrip(R"({"name": "fixedsizebinary", "byteWidth": 123})");
+  TestTypeRoundtrip(
+      R"({"name": "fixedsizebinary", "byteWidth": 3})",
+      R"({"name": null, "count": 2, "VALIDITY": [1, 0], "DATA": ["00FFA0", 
"000000"]})");
 }
 
 TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldDecimal) {
@@ -868,7 +967,8 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldStruct) 
{
   // Empty
   TestFieldRoundtrip(
       R"({"name": null, "nullable": true, "type": {"name": "struct"}, 
"children": [)"
-      R"(], "metadata": null})");
+      R"(], "metadata": null})",
+      R"({"name": null, "count": 0, "VALIDITY": [], "children": []})");
 
   // Non-empty
   TestFieldRoundtrip(
@@ -897,17 +997,39 @@ TEST(NanoarrowTestingTest, 
NanoarrowTestingTestFieldFixedSizeList) {
 }
 
 TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldUnion) {
+  // Empty unions
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "union", "mode": 
"DENSE", "typeIds": []}, "children": [], "metadata": null})",
+      R"({"name": null, "count": 0, "TYPE_ID": [], "OFFSET": [], "children": 
[]})");
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "union", "mode": 
"SPARSE", "typeIds": []}, "children": [], "metadata": null})",
+      R"({"name": null, "count": 0, "TYPE_ID": [], "children": []})");
+
   TestFieldRoundtrip(
       R"({"name": null, "nullable": true, "type": {"name": "union", "mode": 
"DENSE", "typeIds": [10,20]}, "children": [)"
       R"({"name": null, "nullable": true, "type": {"name": "null"}, 
"children": [], "metadata": null}, )"
       R"({"name": null, "nullable": true, "type": {"name": "utf8"}, 
"children": [], "metadata": null})"
       R"(], "metadata": null})");
 
+  // Non-empty unions (null, "abc")
   TestFieldRoundtrip(
       R"({"name": null, "nullable": true, "type": {"name": "union", "mode": 
"SPARSE", "typeIds": [10,20]}, "children": [)"
-      R"({"name": null, "nullable": true, "type": {"name": "null"}, 
"children": [], "metadata": null}, )"
-      R"({"name": null, "nullable": true, "type": {"name": "utf8"}, 
"children": [], "metadata": null})"
-      R"(], "metadata": null})");
+      R"({"name": "nulls", "nullable": true, "type": {"name": "null"}, 
"children": [], "metadata": null}, )"
+      R"({"name": "strings", "nullable": true, "type": {"name": "utf8"}, 
"children": [], "metadata": null})"
+      R"(], "metadata": null})",
+      R"({"name": null, "count": 2, "TYPE_ID": [20, 10], "children": [)"
+      R"({"name": "nulls", "count": 2}, )"
+      R"({"name": "strings", "count": 2, "VALIDITY": [1, 1], "OFFSET": [0, 3, 
3], "DATA": ["abc", ""]})"
+      R"(]})");
+  TestFieldRoundtrip(
+      R"({"name": null, "nullable": true, "type": {"name": "union", "mode": 
"DENSE", "typeIds": [10,20]}, "children": [)"
+      R"({"name": "nulls", "nullable": true, "type": {"name": "null"}, 
"children": [], "metadata": null}, )"
+      R"({"name": "strings", "nullable": true, "type": {"name": "utf8"}, 
"children": [], "metadata": null})"
+      R"(], "metadata": null})",
+      R"({"name": null, "count": 2, "TYPE_ID": [20, 10], "OFFSET": [0, 0], 
"children": [)"
+      R"({"name": "nulls", "count": 1}, )"
+      R"({"name": "strings", "count": 1, "VALIDITY": [1], "OFFSET": [0, 3], 
"DATA": ["abc"]})"
+      R"(]})");
 
   TestTypeError(R"({"name": "union", "mode": "NOT_A_MODE", "typeIds": []})",
                 "Type[name=='union'] mode must be 'DENSE' or 'SPARSE'");
diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h
index e96207e..2ea16b2 100644
--- a/src/nanoarrow/nanoarrow_types.h
+++ b/src/nanoarrow/nanoarrow_types.h
@@ -449,6 +449,14 @@ enum ArrowBufferType {
   NANOARROW_BUFFER_TYPE_DATA
 };
 
+/// \brief The maximum number of buffers in an ArrowArrayView or ArrowLayout
+/// \ingroup nanoarrow-array-view
+///
+/// All currently supported types have 3 buffers or fewer; however, future 
types
+/// may involve a variable number of buffers (e.g., string view). These buffers
+/// will be represented by separate members of the ArrowArrayView or 
ArrowLayout.
+#define NANOARROW_MAX_FIXED_BUFFERS 3
+
 /// \brief An non-owning view of a string
 /// \ingroup nanoarrow-utils
 struct ArrowStringView {
@@ -561,13 +569,13 @@ struct ArrowBitmap {
 /// the length and offset of the array.
 struct ArrowLayout {
   /// \brief The function of each buffer
-  enum ArrowBufferType buffer_type[3];
+  enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS];
 
   /// \brief The data type of each buffer
-  enum ArrowType buffer_data_type[3];
+  enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS];
 
   /// \brief The size of an element each buffer or 0 if this size is variable 
or unknown
-  int64_t element_size_bits[3];
+  int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS];
 
   /// \brief The number of elements in the child array per element in this 
array for a
   /// fixed-size list
@@ -609,7 +617,7 @@ struct ArrowArrayView {
   struct ArrowLayout layout;
 
   /// \brief This Array's buffers as ArrowBufferView objects
-  struct ArrowBufferView buffer_views[3];
+  struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS];
 
   /// \brief The number of children of this view
   int64_t n_children;
@@ -637,12 +645,12 @@ struct ArrowArrayPrivateData {
   struct ArrowBitmap bitmap;
 
   // Holder for additional buffers as required
-  struct ArrowBuffer buffers[2];
+  struct ArrowBuffer buffers[NANOARROW_MAX_FIXED_BUFFERS - 1];
 
   // The array of pointers to buffers. This must be updated after a sequence
   // of appends to synchronize its values with the actual buffer addresses
   // (which may have ben reallocated uring that time)
-  const void* buffer_data[3];
+  const void* buffer_data[NANOARROW_MAX_FIXED_BUFFERS];
 
   // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
   enum ArrowType storage_type;


Reply via email to