This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new e0329f4 feat: Add integration testing reader for Column (#325)
e0329f4 is described below
commit e0329f4e9188d198680439cba74006173b7c02aa
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Nov 29 15:37:43 2023 -0400
feat: Add integration testing reader for Column (#325)
---
.../src/nanoarrow/nanoarrow_device.c | 2 +-
.../src/nanoarrow/nanoarrow_ipc_decoder.c | 4 +-
python/.gitignore | 1 +
python/bootstrap.py | 5 +
src/nanoarrow/array.c | 33 +-
src/nanoarrow/array_inline.h | 6 +-
src/nanoarrow/array_test.cc | 4 +-
src/nanoarrow/nanoarrow_testing.hpp | 451 ++++++++++++++++++++-
src/nanoarrow/nanoarrow_testing_test.cc | 182 +++++++--
src/nanoarrow/nanoarrow_types.h | 20 +-
10 files changed, 640 insertions(+), 68 deletions(-)
diff --git a/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c
b/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c
index 4be7a93..c4df3d1 100644
--- a/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c
+++ b/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c
@@ -408,7 +408,7 @@ static ArrowErrorCode
ArrowDeviceArrayViewCopyInternal(struct ArrowDevice* devic
dst->offset = src->offset;
dst->null_count = src->null_count;
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
if (src->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
break;
}
diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
index 2fac3c7..9e4a6c9 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
@@ -1155,7 +1155,7 @@ static void ArrowIpcDecoderInitFields(struct
ArrowIpcField* fields,
field->array = array;
field->buffer_offset = *n_buffers;
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
*n_buffers += array_view->layout.buffer_type[i] !=
NANOARROW_BUFFER_TYPE_NONE;
}
@@ -1524,7 +1524,7 @@ static int ArrowIpcDecoderWalkSetArrayView(struct
ArrowIpcArraySetter* setter,
array_view->null_count = ns(FieldNode_null_count(field));
setter->field_i += 1;
- for (int64_t i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
break;
}
diff --git a/python/.gitignore b/python/.gitignore
index d30e198..0927980 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -19,6 +19,7 @@
src/nanoarrow/nanoarrow.c
src/nanoarrow/nanoarrow.h
src/nanoarrow/nanoarrow_device.h
+src/nanoarrow/nanoarrow_testing.hpp
src/nanoarrow/nanoarrow_c.pxd
src/nanoarrow/*.c
diff --git a/python/bootstrap.py b/python/bootstrap.py
index 9e54cb7..bbb5d66 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -36,6 +36,9 @@ class NanoarrowPxdGenerator:
# Strip comments
content = self.re_comment.sub("", content)
+ # Replace NANOARROW_MAX_FIXED_BUFFERS with its value
+ content = self.re_max_buffers.sub("3", content)
+
# Find types and function definitions
types = self._find_types(content)
func_defs = self._find_func_defs(content)
@@ -59,6 +62,7 @@ class NanoarrowPxdGenerator:
output.write(b"\n")
output.write(b" ctypedef int ArrowErrorCode\n")
output.write(b" cdef int NANOARROW_OK\n")
+ output.write(b" cdef int NANOARROW_MAX_FIXED_BUFFERS\n")
output.write(b"\n")
for type in types_cython:
@@ -71,6 +75,7 @@ class NanoarrowPxdGenerator:
def _define_regexes(self):
self.re_comment = re.compile(r"\s*//[^\n]*")
+ self.re_max_buffers = re.compile(r"NANOARROW_MAX_FIXED_BUFFERS")
self.re_type = re.compile(
r"(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+)
{(?P<body>[^}]*)}"
)
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 1e59777..a0e711e 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -437,7 +437,7 @@ static void ArrowArrayFlushInternalPointers(struct
ArrowArray* array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
- for (int64_t i = 0; i < 3; i++) {
+ for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
}
@@ -621,7 +621,7 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view)
{
}
void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t
length) {
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
switch (array_view->layout.buffer_type[i]) {
@@ -671,26 +671,13 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length)
static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
struct ArrowArray* array,
struct ArrowError* error) {
- // Check length and offset
- if (array->offset < 0) {
- ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of
%ld",
- (long)array->offset);
- return EINVAL;
- }
-
- if (array->length < 0) {
- ArrowErrorSet(error, "Expected array length >= 0 but found array length of
%ld",
- (long)array->length);
- return EINVAL;
- }
-
array_view->array = array;
array_view->offset = array->offset;
array_view->length = array->length;
array_view->null_count = array->null_count;
int64_t buffers_required = 0;
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
break;
}
@@ -749,6 +736,18 @@ static int ArrowArrayViewSetArrayInternal(struct
ArrowArrayView* array_view,
static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
struct ArrowError* error) {
+ if (array_view->length < 0) {
+ ArrowErrorSet(error, "Expected length >= 0 but found length %ld",
+ (long)array_view->length);
+ return EINVAL;
+ }
+
+ if (array_view->offset < 0) {
+ ArrowErrorSet(error, "Expected offset >= 0 but found offset %ld",
+ (long)array_view->offset);
+ return EINVAL;
+ }
+
// Calculate buffer sizes that do not require buffer access. If marked as
// unknown, assign the buffer size; otherwise, validate it.
int64_t offset_plus_length = array_view->offset + array_view->length;
@@ -1103,7 +1102,7 @@ static int ArrowAssertInt8In(struct ArrowBufferView view,
const int8_t* values,
static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
struct ArrowError* error) {
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
switch (array_view->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
if (array_view->layout.element_size_bits[i] == 32) {
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 96fdf57..c089d2b 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -140,7 +140,7 @@ static inline ArrowErrorCode
ArrowArrayStartAppending(struct ArrowArray* array)
}
// Initialize any data offset buffer with a single zero
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
if (private_data->layout.buffer_type[i] ==
NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
private_data->layout.element_size_bits[i] == 64) {
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array,
i), 0));
@@ -163,7 +163,7 @@ static inline ArrowErrorCode
ArrowArrayStartAppending(struct ArrowArray* array)
}
static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
- for (int64_t i = 0; i < 3; i++) {
+ for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1));
}
@@ -278,7 +278,7 @@ static inline ArrowErrorCode
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
struct ArrowBuffer* buffer;
int64_t size_bytes;
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
buffer = ArrowArrayBuffer(array, i);
size_bytes = private_data->layout.element_size_bits[i] / 8;
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index 6585957..6a22008 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -1589,12 +1589,12 @@ TEST(ArrayTest, ArrayViewTestBasic) {
// Expect error for bad offset + length
array.length = -1;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
- EXPECT_STREQ(error.message, "Expected array length >= 0 but found array
length of -1");
+ EXPECT_STREQ(error.message, "Expected length >= 0 but found length -1");
array.length = 3;
array.offset = -1;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
- EXPECT_STREQ(error.message, "Expected array offset >= 0 but found array
offset of -1");
+ EXPECT_STREQ(error.message, "Expected offset >= 0 but found offset -1");
array.offset = 0;
// Expect error for the wrong number of buffers
diff --git a/src/nanoarrow/nanoarrow_testing.hpp
b/src/nanoarrow/nanoarrow_testing.hpp
index 454a6da..103f22e 100644
--- a/src/nanoarrow/nanoarrow_testing.hpp
+++ b/src/nanoarrow/nanoarrow_testing.hpp
@@ -16,6 +16,7 @@
// under the License.
#include <iostream>
+#include <limits>
#include <sstream>
#include <string>
@@ -439,7 +440,7 @@ class TestingJSONWriter {
}
} else {
// No need to quote smaller ints (i.e., 123456)
- out << values[0];
+ out << static_cast<int64_t>(values[0]);
for (int64_t i = 1; i < n_values; i++) {
out << ", " << static_cast<int64_t>(values[i]);
}
@@ -621,36 +622,76 @@ class TestingJSONReader {
using json = nlohmann::json;
public:
- ArrowErrorCode ReadSchema(const std::string& value, ArrowSchema* out,
+ /// \brief Read JSON representing a Schema
+ ///
+ /// Reads a JSON object in the form `{"fields": [...], "metadata": [...]}`,
+ /// propagating `out` on success.
+ ArrowErrorCode ReadSchema(const std::string& schema_json, ArrowSchema* out,
ArrowError* error = nullptr) {
try {
- auto obj = json::parse(value);
+ auto obj = json::parse(schema_json);
nanoarrow::UniqueSchema schema;
NANOARROW_RETURN_NOT_OK(SetSchema(schema.get(), obj, error));
ArrowSchemaMove(schema.get(), out);
return NANOARROW_OK;
- } catch (std::exception& e) {
+ } catch (json::exception& e) {
ArrowErrorSet(error, "Exception in TestingJSONReader::ReadSchema(): %s",
e.what());
return EINVAL;
}
}
- ArrowErrorCode ReadField(const std::string& value, ArrowSchema* out,
+ /// \brief Read JSON representing a Field
+ ///
+ /// Read a JSON object in the form `{"name" : "col", "type": {...}, ...}`,
+ /// propagating `out` on success.
+ ArrowErrorCode ReadField(const std::string& field_json, ArrowSchema* out,
ArrowError* error = nullptr) {
try {
- auto obj = json::parse(value);
+ auto obj = json::parse(field_json);
nanoarrow::UniqueSchema schema;
NANOARROW_RETURN_NOT_OK(SetField(schema.get(), obj, error));
ArrowSchemaMove(schema.get(), out);
return NANOARROW_OK;
- } catch (std::exception& e) {
+ } catch (json::exception& e) {
ArrowErrorSet(error, "Exception in TestingJSONReader::ReadField(): %s",
e.what());
return EINVAL;
}
}
+ /// \brief Read JSON representing a Column
+ ///
+ /// Read a JSON object in the form
+ /// `{"name": "col", "count": 123, "VALIDITY": [...], ...}`, propagating
+ /// `out` on success.
+ ArrowErrorCode ReadColumn(const std::string& column_json, const ArrowSchema*
schema,
+ ArrowArray* out, ArrowError* error = nullptr) {
+ try {
+ auto obj = json::parse(column_json);
+
+ // ArrowArrayView to enable validation
+ nanoarrow::UniqueArrayView array_view;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(
+ array_view.get(), const_cast<ArrowSchema*>(schema), error));
+
+ // ArrowArray to hold memory
+ nanoarrow::UniqueArray array;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayInitFromSchema(array.get(),
const_cast<ArrowSchema*>(schema), error));
+
+ // Parse the JSON into the array
+ NANOARROW_RETURN_NOT_OK(SetArrayColumn(obj, array_view.get(),
array.get(), error));
+
+ // Return the result
+ ArrowArrayMove(array.get(), out);
+ return NANOARROW_OK;
+ } catch (json::exception& e) {
+ ArrowErrorSet(error, "Exception in TestingJSONReader::ReadColumn(): %s",
e.what());
+ return EINVAL;
+ }
+ }
+
private:
ArrowErrorCode SetSchema(ArrowSchema* schema, const json& value, ArrowError*
error) {
NANOARROW_RETURN_NOT_OK(
@@ -1053,6 +1094,402 @@ class TestingJSONReader {
return NANOARROW_OK;
}
+ ArrowErrorCode SetArrayColumn(const json& value, ArrowArrayView* array_view,
+ ArrowArray* array, ArrowError* error,
+ const std::string& parent_error_prefix = "") {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_object(), error, "Expected Column to be a JSON
object"));
+
+ // Check + resolve name early to generate better error messages
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("name"), error, "Column missing key 'name'"));
+
+ const auto& name = value["name"];
+ NANOARROW_RETURN_NOT_OK(Check(name.is_null() || name.is_string(), error,
+ "Column name must be string or null"));
+
+ std::string error_prefix;
+ if (name.is_string()) {
+ error_prefix = parent_error_prefix + "-> Column '" +
name.get<std::string>() + "' ";
+ } else {
+ error_prefix = parent_error_prefix + "-> Column <name is null> ";
+ }
+
+ // Check, resolve, and recurse children
+ NANOARROW_RETURN_NOT_OK(
+ Check(array_view->n_children == 0 || value.contains("children"), error,
+ error_prefix + "missing key children"));
+
+ if (value.contains("children")) {
+ const auto& children = value["children"];
+ NANOARROW_RETURN_NOT_OK(
+ Check(children.is_array(), error, error_prefix + "children must be
array"));
+ NANOARROW_RETURN_NOT_OK(Check(children.size() == array_view->n_children,
error,
+ error_prefix + "children has incorrect
size"));
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(SetArrayColumn(children[i],
array_view->children[i],
+ array->children[i], error,
error_prefix));
+ }
+ }
+
+ // Build buffers
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ PrefixError(SetArrayColumnBuffers(value, array_view, array, i,
error), error,
+ error_prefix));
+ }
+
+ // Check + resolve count
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("count"), error, error_prefix + "missing key
'count'"));
+ const auto& count = value["count"];
+ NANOARROW_RETURN_NOT_OK(
+ Check(count.is_number_integer(), error, error_prefix + "count must be
integer"));
+ array_view->length = count.get<int64_t>();
+
+ // Set ArrayView buffer views. This is because ArrowArrayInitFromSchema()
doesn't
+ // support custom type ids for unions but the ArrayView does (otherwise
+ // ArrowArrayFinishBuilding() would work).
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+ ArrowBufferView* buffer_view = array_view->buffer_views + i;
+ buffer_view->data.as_uint8 = buffer->data;
+ buffer_view->size_bytes = buffer->size_bytes;
+ }
+
+ // Validate the array view
+ NANOARROW_RETURN_NOT_OK(PrefixError(
+ ArrowArrayViewValidate(array_view, NANOARROW_VALIDATION_LEVEL_FULL,
error), error,
+ error_prefix + "failed to validate: "));
+
+ // Flush length and buffer pointers to the Array
+ array->length = array_view->length;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_NONE,
nullptr), error);
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode SetArrayColumnBuffers(const json& value, ArrowArrayView*
array_view,
+ ArrowArray* array, int buffer_i,
+ ArrowError* error) {
+ ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
+
+ switch (array_view->layout.buffer_type[buffer_i]) {
+ case NANOARROW_BUFFER_TYPE_VALIDITY: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("VALIDITY"), error, "missing key
'VALIDITY'"));
+ const auto& validity = value["VALIDITY"];
+ NANOARROW_RETURN_NOT_OK(
+ SetBufferBitmap(validity, ArrowArrayValidityBitmap(array), error));
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_TYPE_ID: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("TYPE_ID"), error, "missing key 'TYPE_ID'"));
+ const auto& type_id = value["TYPE_ID"];
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int8_t>(type_id, buffer, error));
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("OFFSET"), error, "missing key 'OFFSET'"));
+ const auto& offset = value["OFFSET"];
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer, error));
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("OFFSET"), error, "missing key 'OFFSET'"));
+ const auto& offset = value["OFFSET"];
+
+ if (array_view->layout.element_size_bits[buffer_i] == 32) {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer,
error));
+ } else {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int64_t>(offset, buffer,
error));
+ }
+ break;
+ }
+
+ case NANOARROW_BUFFER_TYPE_DATA: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("DATA"), error, "missing key 'DATA'"));
+ const auto& data = value["DATA"];
+
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BOOL: {
+ nanoarrow::UniqueBitmap bitmap;
+ NANOARROW_RETURN_NOT_OK(SetBufferBitmap(data, bitmap.get(),
error));
+ ArrowBufferMove(&bitmap->buffer, buffer);
+ return NANOARROW_OK;
+ }
+ case NANOARROW_TYPE_INT8:
+ return SetBufferInt<int8_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT8:
+ return SetBufferInt<uint8_t>(data, buffer, error);
+ case NANOARROW_TYPE_INT16:
+ return SetBufferInt<int16_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT16:
+ return SetBufferInt<uint16_t>(data, buffer, error);
+ case NANOARROW_TYPE_INT32:
+ return SetBufferInt<int32_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT32:
+ return SetBufferInt<uint32_t>(data, buffer, error);
+ case NANOARROW_TYPE_INT64:
+ return SetBufferInt<int64_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT64:
+ return SetBufferInt<uint64_t, uint64_t>(data, buffer, error);
+
+ case NANOARROW_TYPE_FLOAT:
+ return SetBufferFloatingPoint<float>(data, buffer, error);
+ case NANOARROW_TYPE_DOUBLE:
+ return SetBufferFloatingPoint<double>(data, buffer, error);
+
+ case NANOARROW_TYPE_STRING:
+ return SetBufferString<int32_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_LARGE_STRING:
+ return SetBufferString<int64_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_BINARY:
+ return SetBufferBinary<int32_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_LARGE_BINARY:
+ return SetBufferBinary<int64_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ return SetBufferFixedSizeBinary(
+ data, buffer, array_view->layout.element_size_bits[buffer_i] /
8, error);
+
+ default:
+ ArrowErrorSet(error, "storage type %s DATA buffer not supported",
+ ArrowTypeString(array_view->storage_type));
+ return ENOTSUP;
+ }
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_NONE:
+ break;
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode SetBufferBitmap(const json& value, ArrowBitmap* bitmap,
+ ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "bitmap buffer must be array"));
+
+ for (const auto& item : value) {
+ // Some example files write bitmaps as [true, false, true] but the
documentation
+ // says [1, 0, 1]. Accept both for simplicity.
+ NANOARROW_RETURN_NOT_OK(Check(item.is_boolean() ||
item.is_number_integer(), error,
+ "bitmap item must be bool or integer"));
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBitmapAppend(bitmap,
item.get<int>(), 1),
+ error);
+ }
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T, typename BiggerT = int64_t>
+ ArrowErrorCode SetBufferInt(const json& value, ArrowBuffer* buffer,
ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(Check(value.is_array(), error, "int buffer must be
array"));
+
+ for (const auto& item : value) {
+ // NANOARROW_RETURN_NOT_OK() interacts poorly with multiple template args
+ ArrowErrorCode result = SetBufferIntItem<T, BiggerT>(item, buffer,
error);
+ NANOARROW_RETURN_NOT_OK(result);
+ }
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T, typename BiggerT = int64_t>
+ ArrowErrorCode SetBufferIntItem(const json& item, ArrowBuffer* buffer,
+ ArrowError* error) {
+ if (item.is_string()) {
+ try {
+ // The JSON parser here can handle up to 2^64 - 1
+ auto item_int = json::parse(item.get<std::string>());
+ return SetBufferIntItem<T, BiggerT>(item_int, buffer, error);
+ } catch (json::parse_error& e) {
+ ArrowErrorSet(error,
+ "integer buffer item encoded as string must parse as
integer: %s",
+ item.dump().c_str());
+ return EINVAL;
+ }
+ }
+
+ NANOARROW_RETURN_NOT_OK(
+ Check(item.is_number_integer(), error,
+ "integer buffer item must be integer number or string"));
+ NANOARROW_RETURN_NOT_OK(
+ Check(std::numeric_limits<T>::is_signed || item.is_number_unsigned(),
error,
+ "expected unsigned integer buffer item but found signed integer
'" +
+ item.dump() + "'"));
+
+ auto item_int = item.get<BiggerT>();
+
+ NANOARROW_RETURN_NOT_OK(
+ Check(item_int >= std::numeric_limits<T>::lowest() &&
+ item_int <= std::numeric_limits<T>::max(),
+ error, "integer buffer item '" + item.dump() + "' outside type
limits"));
+
+ T buffer_value = item_int;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T>
+ ArrowErrorCode SetBufferFloatingPoint(const json& value, ArrowBuffer* buffer,
+ ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "floatingpoint buffer must be array"));
+
+ for (const auto& item : value) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(item.is_number(), error, "floatingpoint buffer item must be
number"));
+ double item_dbl = item.get<double>();
+
+ NANOARROW_RETURN_NOT_OK(Check(
+ item_dbl >= std::numeric_limits<T>::lowest() &&
+ item_dbl <= std::numeric_limits<T>::max(),
+ error, "floatingpoint buffer item '" + item.dump() + "' outside type
limits"));
+
+ T buffer_value = item_dbl;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
+ }
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T>
+ ArrowErrorCode SetBufferString(const json& value, ArrowBuffer* offsets,
+ ArrowBuffer* data, ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "utf8 data buffer must be array"));
+
+ // Check offsets against values
+ const T* expected_offset = reinterpret_cast<const T*>(offsets->data);
+ NANOARROW_RETURN_NOT_OK(Check(
+ offsets->size_bytes == ((value.size() + 1) * sizeof(T)), error,
+ "Expected offset buffer with " + std::to_string(value.size()) + "
elements"));
+ NANOARROW_RETURN_NOT_OK(
+ Check(*expected_offset++ == 0, error, "first offset must be zero"));
+
+ int64_t last_offset = 0;
+
+ for (const auto& item : value) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(item.is_string(), error, "utf8 data buffer item must be
string"));
+ auto item_str = item.get<std::string>();
+
+ // Append data
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowBufferAppend(data, reinterpret_cast<const
uint8_t*>(item_str.data()),
+ item_str.size()),
+ error);
+
+ // Check offset
+ last_offset += item_str.size();
+ NANOARROW_RETURN_NOT_OK(Check(*expected_offset++ == last_offset, error,
+ "Expected offset value " +
+ std::to_string(last_offset) +
+ " at utf8 data buffer item " +
item.dump()));
+ }
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T>
+ ArrowErrorCode SetBufferBinary(const json& value, ArrowBuffer* offsets,
+ ArrowBuffer* data, ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "binary data buffer must be array"));
+
+ // Check offsets against values if not fixed size
+ const T* expected_offset = reinterpret_cast<const T*>(offsets->data);
+ NANOARROW_RETURN_NOT_OK(Check(
+ offsets->size_bytes == ((value.size() + 1) * sizeof(T)), error,
+ "Expected offset buffer with " + std::to_string(value.size()) + "
elements"));
+ NANOARROW_RETURN_NOT_OK(
+ Check(*expected_offset++ == 0, error, "first offset must be zero"));
+
+ for (const auto& item : value) {
+ NANOARROW_RETURN_NOT_OK(AppendBinaryElement(item, data, error));
+
+ // Check offset
+ NANOARROW_RETURN_NOT_OK(Check(*expected_offset++ == data->size_bytes,
error,
+ "Expected offset value " +
+ std::to_string(data->size_bytes) +
+ " at binary data buffer item " +
item.dump()));
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode SetBufferFixedSizeBinary(const json& value, ArrowBuffer* data,
+ int64_t fixed_size, ArrowError*
error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "binary data buffer must be array"));
+
+ int64_t last_offset = 0;
+
+ for (const auto& item : value) {
+ NANOARROW_RETURN_NOT_OK(AppendBinaryElement(item, data, error));
+ int64_t item_size_bytes = data->size_bytes - last_offset;
+
+ NANOARROW_RETURN_NOT_OK(Check(item_size_bytes == fixed_size, error,
+ "Expected fixed size binary value of size
" +
+ std::to_string(fixed_size) +
+ " at binary data buffer item " +
item.dump()));
+ last_offset = data->size_bytes;
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode AppendBinaryElement(const json& item, ArrowBuffer* data,
+ ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(item.is_string(), error, "binary data buffer item must be
string"));
+ auto item_str = item.get<std::string>();
+
+ int64_t item_size_bytes = item_str.size() / 2;
+ NANOARROW_RETURN_NOT_OK(Check((item_size_bytes * 2) == item_str.size(),
error,
+ "binary data buffer item must have even
size"));
+
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferReserve(data,
item_size_bytes), error);
+ for (int64_t i = 0; i < item_str.size(); i += 2) {
+ std::string byte_hex = item_str.substr(i, 2);
+ char* end_ptr;
+ uint8_t byte = std::strtoul(byte_hex.data(), &end_ptr, 16);
+ NANOARROW_RETURN_NOT_OK(
+ Check(end_ptr == (byte_hex.data() + 2), error,
+ "binary data buffer item must contain a valid hex-encoded byte
string"));
+
+ data->data[data->size_bytes] = byte;
+ data->size_bytes++;
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode PrefixError(ArrowErrorCode value, ArrowError* error,
+ const std::string& prefix) {
+ if (value != NANOARROW_OK && error != nullptr) {
+ std::string msg = prefix + error->message;
+ ArrowErrorSet(error, "%s", msg.c_str());
+ }
+
+ return value;
+ }
+
ArrowErrorCode Check(bool value, ArrowError* error, const std::string& err) {
if (value) {
return NANOARROW_OK;
diff --git a/src/nanoarrow/nanoarrow_testing_test.cc
b/src/nanoarrow/nanoarrow_testing_test.cc
index 73c8f79..0b8f733 100644
--- a/src/nanoarrow/nanoarrow_testing_test.cc
+++ b/src/nanoarrow/nanoarrow_testing_test.cc
@@ -747,26 +747,87 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestReadFieldNested) {
EXPECT_STREQ(schema->children[0]->format, "n");
}
-void TestFieldRoundtrip(const std::string& field_json) {
+TEST(NanoarrowTestingTest, NanoarrowTestingTestReadColumnBasic) {
+ nanoarrow::UniqueSchema schema;
+ nanoarrow::UniqueArray array;
+ ArrowError error;
+ error.message[0] = '\0';
+
+ TestingJSONReader reader;
+
+ ASSERT_EQ(
+ reader.ReadField(
+ R"({"name": null, "nullable": true, "type": {"name": "null"},
"children": [], "metadata": null})",
+ schema.get()),
+ NANOARROW_OK);
+
+ ASSERT_EQ(reader.ReadColumn(R"({"name": null, "count": 2})", schema.get(),
array.get(),
+ &error),
+ NANOARROW_OK)
+ << error.message;
+ EXPECT_EQ(array->length, 2);
+
+ // Check invalid JSON
+ EXPECT_EQ(reader.ReadColumn(R"({)", schema.get(), array.get()), EINVAL);
+
+ // Check at least one failed Check()
+ EXPECT_EQ(
+ reader.ReadColumn(R"("this is not a JSON object")", schema.get(),
array.get()),
+ EINVAL);
+
+ // Check at least one failed PrefixError()
+ EXPECT_EQ(reader.ReadColumn(R"({"name": "colname", "count": "not an
integer"})",
+ schema.get(), array.get(), &error),
+ EINVAL);
+ EXPECT_STREQ(error.message, "-> Column 'colname' count must be integer");
+
+ // Check that field is validated
+ EXPECT_EQ(
+ reader.ReadColumn(R"({"name": null, "count": -1})", schema.get(),
array.get()),
+ EINVAL);
+}
+
+void TestFieldRoundtrip(const std::string& field_json,
+ const std::string& column_json = "") {
nanoarrow::UniqueSchema schema;
TestingJSONReader reader;
TestingJSONWriter writer;
ArrowError error;
error.message[0] = '\0';
- int result = reader.ReadField(field_json, schema.get(), &error);
- ASSERT_EQ(result, NANOARROW_OK) << "Error: " << error.message;
+ ASSERT_EQ(reader.ReadField(field_json, schema.get(), &error), NANOARROW_OK)
+ << "Error: " << error.message;
- std::stringstream field_json_roundtrip;
- ASSERT_EQ(writer.WriteField(field_json_roundtrip, schema.get()),
NANOARROW_OK);
- EXPECT_EQ(field_json_roundtrip.str(), field_json);
+ std::stringstream json_roundtrip;
+ ASSERT_EQ(writer.WriteField(json_roundtrip, schema.get()), NANOARROW_OK);
+ EXPECT_EQ(json_roundtrip.str(), field_json);
+
+ if (column_json == "") {
+ return;
+ }
+
+ nanoarrow::UniqueArray array;
+ ASSERT_EQ(reader.ReadColumn(column_json, schema.get(), array.get(), &error),
+ NANOARROW_OK)
+ << error.message;
+
+ nanoarrow::UniqueArrayView array_view;
+ ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(),
nullptr),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr),
NANOARROW_OK);
+
+ json_roundtrip.str("");
+ ASSERT_EQ(writer.WriteColumn(json_roundtrip, schema.get(), array_view.get()),
+ NANOARROW_OK);
+ EXPECT_EQ(json_roundtrip.str(), column_json);
}
-void TestTypeRoundtrip(const std::string& type_json) {
+void TestTypeRoundtrip(const std::string& type_json,
+ const std::string& column_json = "") {
std::stringstream field_json_builder;
field_json_builder << R"({"name": null, "nullable": true, "type": )" <<
type_json
<< R"(, "children": [], "metadata": null})";
- TestFieldRoundtrip(field_json_builder.str());
+ TestFieldRoundtrip(field_json_builder.str(), column_json);
}
void TestFieldError(const std::string& field_json, const std::string& msg,
@@ -788,33 +849,65 @@ void TestTypeError(const std::string& type_json, const
std::string& msg,
TestFieldError(field_json_builder.str(), msg, code);
}
-TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldPrimitive) {
- TestTypeRoundtrip(R"({"name": "null"})");
- TestTypeRoundtrip(R"({"name": "bool"})");
- TestTypeRoundtrip(R"({"name": "utf8"})");
- TestTypeRoundtrip(R"({"name": "largeutf8"})");
- TestTypeRoundtrip(R"({"name": "binary"})");
- TestTypeRoundtrip(R"({"name": "largebinary"})");
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldNull) {
+ TestTypeRoundtrip(R"({"name": "null"})", R"({"name": null, "count": 2})");
TestTypeError(R"({"name": "an unsupported type"})",
"Unsupported Type name: 'an unsupported type'", ENOTSUP);
}
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldBool) {
+ TestTypeRoundtrip(
+ R"({"name": "bool"})",
+ R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0, 1,
0]})");
+}
+
+TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldString) {
+ TestTypeRoundtrip(
+ R"({"name": "utf8"})",
+ R"({"name": null, "count": 2, "VALIDITY": [1, 0], "OFFSET": [0, 3, 3],
"DATA": ["abc", ""]})");
+ TestTypeRoundtrip(
+ R"({"name": "largeutf8"})",
+ R"({"name": null, "count": 2, "VALIDITY": [1, 0], "OFFSET": ["0", "3",
"3"], "DATA": ["abc", ""]})");
+ TestTypeRoundtrip(
+ R"({"name": "binary"})",
+ R"({"name": null, "count": 2, "VALIDITY": [1, 0], "OFFSET": [0, 3, 3],
"DATA": ["00FFA0", ""]})");
+ TestTypeRoundtrip(
+ R"({"name": "largebinary"})",
+ R"({"name": null, "count": 2, "VALIDITY": [1, 0], "OFFSET": ["0", "3",
"3"], "DATA": ["00FFA0", ""]})");
+}
+
TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldInt) {
- TestTypeRoundtrip(R"({"name": "int", "bitWidth": 8, "isSigned": true})");
- TestTypeRoundtrip(R"({"name": "int", "bitWidth": 16, "isSigned": true})");
- TestTypeRoundtrip(R"({"name": "int", "bitWidth": 32, "isSigned": true})");
- TestTypeRoundtrip(R"({"name": "int", "bitWidth": 64, "isSigned": true})");
+ TestTypeRoundtrip(
+ R"({"name": "int", "bitWidth": 8, "isSigned": true})",
+ R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": [-128, 0,
127]})");
+ TestTypeRoundtrip(
+ R"({"name": "int", "bitWidth": 16, "isSigned": true})",
+ R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": [-129, 0,
127]})");
+ TestTypeRoundtrip(
+ R"({"name": "int", "bitWidth": 32, "isSigned": true})",
+ R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": [-130, 0,
127]})");
+ TestTypeRoundtrip(
+ R"({"name": "int", "bitWidth": 64, "isSigned": true})",
+ R"({"name": null, "count": 3, "VALIDITY": [1, 1, 1], "DATA": ["-131",
"0", "127"]})");
TestTypeError(R"({"name": "int", "bitWidth": 1, "isSigned": true})",
"Type[name=='int'] bitWidth must be 8, 16, 32, or 64");
}
TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldUInt) {
- TestTypeRoundtrip(R"({"name": "int", "bitWidth": 8, "isSigned": false})");
- TestTypeRoundtrip(R"({"name": "int", "bitWidth": 16, "isSigned": false})");
- TestTypeRoundtrip(R"({"name": "int", "bitWidth": 32, "isSigned": false})");
- TestTypeRoundtrip(R"({"name": "int", "bitWidth": 64, "isSigned": false})");
+ TestTypeRoundtrip(
+ R"({"name": "int", "bitWidth": 8, "isSigned": false})",
+ R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0, 0,
255]})");
+ TestTypeRoundtrip(
+ R"({"name": "int", "bitWidth": 16, "isSigned": false})",
+ R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0, 0,
256]})");
+ TestTypeRoundtrip(
+ R"({"name": "int", "bitWidth": 32, "isSigned": false})",
+ R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0, 0,
257]})");
+ TestTypeRoundtrip(
+ R"({"name": "int", "bitWidth": 64, "isSigned": false})",
+ R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": ["0", "0",
"258"]})");
TestTypeError(R"({"name": "int", "bitWidth": 1, "isSigned": false})",
"Type[name=='int'] bitWidth must be 8, 16, 32, or 64");
@@ -822,8 +915,12 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldUInt) {
TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldFloatingPoint) {
TestTypeRoundtrip(R"({"name": "floatingpoint", "precision": "HALF"})");
- TestTypeRoundtrip(R"({"name": "floatingpoint", "precision": "SINGLE"})");
- TestTypeRoundtrip(R"({"name": "floatingpoint", "precision": "DOUBLE"})");
+ TestTypeRoundtrip(
+ R"({"name": "floatingpoint", "precision": "SINGLE"})",
+ R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0.000,
1.230, 4.560]})");
+ TestTypeRoundtrip(
+ R"({"name": "floatingpoint", "precision": "DOUBLE"})",
+ R"({"name": null, "count": 3, "VALIDITY": [0, 1, 1], "DATA": [0.000,
1.230, 4.560]})");
TestTypeError(
R"({"name": "floatingpoint", "precision": "NOT_A_PRECISION"})",
@@ -831,7 +928,9 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestFieldFloatingPoint) {
}
TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldFixedSizeBinary) {
- TestTypeRoundtrip(R"({"name": "fixedsizebinary", "byteWidth": 123})");
+ TestTypeRoundtrip(
+ R"({"name": "fixedsizebinary", "byteWidth": 3})",
+ R"({"name": null, "count": 2, "VALIDITY": [1, 0], "DATA": ["00FFA0",
"000000"]})");
}
TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldDecimal) {
@@ -868,7 +967,8 @@ TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldStruct)
{
// Empty
TestFieldRoundtrip(
R"({"name": null, "nullable": true, "type": {"name": "struct"},
"children": [)"
- R"(], "metadata": null})");
+ R"(], "metadata": null})",
+ R"({"name": null, "count": 0, "VALIDITY": [], "children": []})");
// Non-empty
TestFieldRoundtrip(
@@ -897,17 +997,39 @@ TEST(NanoarrowTestingTest,
NanoarrowTestingTestFieldFixedSizeList) {
}
TEST(NanoarrowTestingTest, NanoarrowTestingTestFieldUnion) {
+ // Empty unions
+ TestFieldRoundtrip(
+ R"({"name": null, "nullable": true, "type": {"name": "union", "mode":
"DENSE", "typeIds": []}, "children": [], "metadata": null})",
+ R"({"name": null, "count": 0, "TYPE_ID": [], "OFFSET": [], "children":
[]})");
+ TestFieldRoundtrip(
+ R"({"name": null, "nullable": true, "type": {"name": "union", "mode":
"SPARSE", "typeIds": []}, "children": [], "metadata": null})",
+ R"({"name": null, "count": 0, "TYPE_ID": [], "children": []})");
+
TestFieldRoundtrip(
R"({"name": null, "nullable": true, "type": {"name": "union", "mode":
"DENSE", "typeIds": [10,20]}, "children": [)"
R"({"name": null, "nullable": true, "type": {"name": "null"},
"children": [], "metadata": null}, )"
R"({"name": null, "nullable": true, "type": {"name": "utf8"},
"children": [], "metadata": null})"
R"(], "metadata": null})");
+ // Non-empty unions (null, "abc")
TestFieldRoundtrip(
R"({"name": null, "nullable": true, "type": {"name": "union", "mode":
"SPARSE", "typeIds": [10,20]}, "children": [)"
- R"({"name": null, "nullable": true, "type": {"name": "null"},
"children": [], "metadata": null}, )"
- R"({"name": null, "nullable": true, "type": {"name": "utf8"},
"children": [], "metadata": null})"
- R"(], "metadata": null})");
+ R"({"name": "nulls", "nullable": true, "type": {"name": "null"},
"children": [], "metadata": null}, )"
+ R"({"name": "strings", "nullable": true, "type": {"name": "utf8"},
"children": [], "metadata": null})"
+ R"(], "metadata": null})",
+ R"({"name": null, "count": 2, "TYPE_ID": [20, 10], "children": [)"
+ R"({"name": "nulls", "count": 2}, )"
+ R"({"name": "strings", "count": 2, "VALIDITY": [1, 1], "OFFSET": [0, 3,
3], "DATA": ["abc", ""]})"
+ R"(]})");
+ TestFieldRoundtrip(
+ R"({"name": null, "nullable": true, "type": {"name": "union", "mode":
"DENSE", "typeIds": [10,20]}, "children": [)"
+ R"({"name": "nulls", "nullable": true, "type": {"name": "null"},
"children": [], "metadata": null}, )"
+ R"({"name": "strings", "nullable": true, "type": {"name": "utf8"},
"children": [], "metadata": null})"
+ R"(], "metadata": null})",
+ R"({"name": null, "count": 2, "TYPE_ID": [20, 10], "OFFSET": [0, 0],
"children": [)"
+ R"({"name": "nulls", "count": 1}, )"
+ R"({"name": "strings", "count": 1, "VALIDITY": [1], "OFFSET": [0, 3],
"DATA": ["abc"]})"
+ R"(]})");
TestTypeError(R"({"name": "union", "mode": "NOT_A_MODE", "typeIds": []})",
"Type[name=='union'] mode must be 'DENSE' or 'SPARSE'");
diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h
index e96207e..2ea16b2 100644
--- a/src/nanoarrow/nanoarrow_types.h
+++ b/src/nanoarrow/nanoarrow_types.h
@@ -449,6 +449,14 @@ enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_DATA
};
+/// \brief The maximum number of buffers in an ArrowArrayView or ArrowLayout
+/// \ingroup nanoarrow-array-view
+///
+/// All currently supported types have 3 buffers or fewer; however, future
types
+/// may involve a variable number of buffers (e.g., string view). These buffers
+/// will be represented by separate members of the ArrowArrayView or
ArrowLayout.
+#define NANOARROW_MAX_FIXED_BUFFERS 3
+
/// \brief An non-owning view of a string
/// \ingroup nanoarrow-utils
struct ArrowStringView {
@@ -561,13 +569,13 @@ struct ArrowBitmap {
/// the length and offset of the array.
struct ArrowLayout {
/// \brief The function of each buffer
- enum ArrowBufferType buffer_type[3];
+ enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The data type of each buffer
- enum ArrowType buffer_data_type[3];
+ enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The size of an element each buffer or 0 if this size is variable
or unknown
- int64_t element_size_bits[3];
+ int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The number of elements in the child array per element in this
array for a
/// fixed-size list
@@ -609,7 +617,7 @@ struct ArrowArrayView {
struct ArrowLayout layout;
/// \brief This Array's buffers as ArrowBufferView objects
- struct ArrowBufferView buffer_views[3];
+ struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The number of children of this view
int64_t n_children;
@@ -637,12 +645,12 @@ struct ArrowArrayPrivateData {
struct ArrowBitmap bitmap;
// Holder for additional buffers as required
- struct ArrowBuffer buffers[2];
+ struct ArrowBuffer buffers[NANOARROW_MAX_FIXED_BUFFERS - 1];
// The array of pointers to buffers. This must be updated after a sequence
// of appends to synchronize its values with the actual buffer addresses
// (which may have ben reallocated uring that time)
- const void* buffer_data[3];
+ const void* buffer_data[NANOARROW_MAX_FIXED_BUFFERS];
// The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
enum ArrowType storage_type;