This is an automated email from the ASF dual-hosted git repository.
willayd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new e77a6567 feat: Implement LIST_VIEW and LARGE_LIST_VIEW support (#710)
e77a6567 is described below
commit e77a656728c6be5ba13e58f3a7ac9c4e13182fc5
Author: William Ayd <[email protected]>
AuthorDate: Tue Mar 4 16:44:22 2025 -0500
feat: Implement LIST_VIEW and LARGE_LIST_VIEW support (#710)
---
src/nanoarrow/common/array.c | 60 +++-
src/nanoarrow/common/array_test.cc | 526 +++++++++++++++++++++++++++++++++++-
src/nanoarrow/common/inline_array.h | 36 ++-
src/nanoarrow/common/inline_types.h | 15 +-
src/nanoarrow/common/schema.c | 26 ++
src/nanoarrow/common/schema_test.cc | 61 +++++
src/nanoarrow/common/utils.c | 18 ++
src/nanoarrow/testing/testing.cc | 23 ++
8 files changed, 759 insertions(+), 6 deletions(-)
diff --git a/src/nanoarrow/common/array.c b/src/nanoarrow/common/array.c
index 53cd4c65..63926634 100644
--- a/src/nanoarrow/common/array.c
+++ b/src/nanoarrow/common/array.c
@@ -123,6 +123,8 @@ static ArrowErrorCode ArrowArraySetStorageType(struct
ArrowArray* array,
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_BINARY:
case NANOARROW_TYPE_LARGE_BINARY:
+ case NANOARROW_TYPE_LIST_VIEW:
+ case NANOARROW_TYPE_LARGE_LIST_VIEW:
array->n_buffers = 3;
break;
@@ -169,6 +171,7 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray*
array,
private_data->n_variadic_buffers = 0;
private_data->variadic_buffers = NULL;
private_data->variadic_buffer_sizes = NULL;
+ private_data->list_view_offset = 0;
array->private_data = private_data;
array->buffers = (const void**)(private_data->buffer_data);
@@ -700,6 +703,8 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length)
continue;
case NANOARROW_BUFFER_TYPE_TYPE_ID:
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ case NANOARROW_BUFFER_TYPE_VIEW_OFFSET:
+ case NANOARROW_BUFFER_TYPE_SIZE:
array_view->buffer_views[i].size_bytes = element_size_bytes * length;
continue;
case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
@@ -856,12 +861,19 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length);
break;
+ case NANOARROW_BUFFER_TYPE_SIZE:
+ min_buffer_size_bytes = element_size_bytes * offset_plus_length;
+ break;
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
// Probably don't want/need to rely on the producer to have allocated
an
// offsets buffer of length 1 for a zero-size array
min_buffer_size_bytes =
(offset_plus_length != 0) * element_size_bytes *
(offset_plus_length + 1);
break;
+ case NANOARROW_BUFFER_TYPE_VIEW_OFFSET:
+ min_buffer_size_bytes =
+ (offset_plus_length != 0) * element_size_bytes *
offset_plus_length;
+ break;
case NANOARROW_BUFFER_TYPE_DATA:
min_buffer_size_bytes =
_ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i]
*
@@ -898,6 +910,8 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_LARGE_LIST:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
case NANOARROW_TYPE_MAP:
+ case NANOARROW_TYPE_LIST_VIEW:
+ case NANOARROW_TYPE_LARGE_LIST_VIEW:
if (array_view->n_children != 1) {
ArrowErrorSet(error,
"Expected 1 child of %s array but found %" PRId64 "
child arrays",
@@ -1177,10 +1191,11 @@ static int ArrowArrayViewValidateDefault(struct
ArrowArrayView* array_view,
if (array_view->children[0]->length < last_offset) {
ArrowErrorSet(error,
- "Expected child of large list array to have length >=
%" PRId64
+ "Expected child of %s array to have length >= %" PRId64
" but found array "
"with length %" PRId64,
- last_offset, array_view->children[0]->length);
+ ArrowTypeString(array_view->storage_type), last_offset,
+ array_view->children[0]->length);
return EINVAL;
}
}
@@ -1423,6 +1438,47 @@ static int ArrowArrayViewValidateFull(struct
ArrowArrayView* array_view,
}
}
+ if (array_view->storage_type == NANOARROW_TYPE_LIST_VIEW ||
+ array_view->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW) {
+ int64_t child_len = array_view->children[0]->length;
+
+ struct ArrowBufferView offsets, sizes;
+ offsets.data.data = array_view->buffer_views[1].data.data;
+ sizes.data.data = array_view->buffer_views[2].data.data;
+
+ for (int64_t i = array_view->offset; i < array_view->length +
array_view->offset;
+ i++) {
+ int64_t offset, size;
+ if (array_view->storage_type == NANOARROW_TYPE_LIST_VIEW) {
+ offset = offsets.data.as_int32[i];
+ size = sizes.data.as_int32[i];
+ } else {
+ offset = offsets.data.as_int64[i];
+ size = sizes.data.as_int64[i];
+ }
+
+ if (offset < 0) {
+ ArrowErrorSet(error, "Invalid negative offset %" PRId64 " at index %"
PRId64,
+ offset, i);
+ return EINVAL;
+ }
+
+ if (size < 0) {
+ ArrowErrorSet(error, "Invalid negative size %" PRId64 " at index %"
PRId64, size,
+ i);
+ return EINVAL;
+ }
+
+ if ((offset + size) > child_len) {
+ ArrowErrorSet(error,
+ "Offset: %" PRId64 " + size: %" PRId64 " at index: %"
PRId64
+ " exceeds length of child view: %" PRId64,
+ offset, size, i, child_len);
+ return EINVAL;
+ }
+ }
+ }
+
// Recurse for children
for (int64_t i = 0; i < array_view->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i],
error));
diff --git a/src/nanoarrow/common/array_test.cc
b/src/nanoarrow/common/array_test.cc
index dddc779d..f8739307 100644
--- a/src/nanoarrow/common/array_test.cc
+++ b/src/nanoarrow/common/array_test.cc
@@ -1525,7 +1525,7 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) {
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
EXPECT_STREQ(
ArrowErrorMessage(&error),
- "Expected child of large list array to have length >= 3 but found array
with "
+ "Expected child of large_list array to have length >= 3 but found array
with "
"length 2");
array.children[0]->length = array.children[0]->length + 1;
@@ -1555,6 +1555,147 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) {
#endif
}
+TEST(ArrayTest, ArrayTestAppendToListViewArray) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST_VIEW),
NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ // Check that we can reserve recursively without erroring
+ ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 0);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK);
+
+ // Make sure number of children is checked at finish
+ array.n_children = 0;
+ EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected 1 child of list_view array but found 0 child arrays");
+ array.n_children = 1;
+
+ // Make sure size + offset is always within bounds of child array
+ array.children[0]->length = array.children[0]->length - 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
+ EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Offset: 1 + size: 2 at index: 2 exceeds length of child view:
2");
+ array.children[0]->length = array.children[0]->length + 1;
+
+ EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
+ auto arrow_array = ImportArray(&array, &schema);
+ ARROW_EXPECT_OK(arrow_array);
+
+ constexpr size_t nelems = 4;
+ const std::array<int32_t, nelems> offsets = {0, 1, 1, 3};
+ const std::array<int32_t, nelems> sizes = {1, 0, 2, 0};
+ const std::array<uint8_t, nelems> valid_bytes = {1, 0, 1, 1};
+
+ auto child_builder = std::make_shared<Int64Builder>();
+ auto builder =
+ ListViewBuilder(default_memory_pool(), child_builder,
list_view(int64()));
+ ARROW_EXPECT_OK(
+ builder.AppendValues(offsets.data(), sizes.data(), nelems,
valid_bytes.data()));
+ ARROW_EXPECT_OK(child_builder->Append(123));
+ ARROW_EXPECT_OK(child_builder->Append(456));
+ ARROW_EXPECT_OK(child_builder->Append(789));
+ auto expected_array = builder.Finish();
+ ARROW_EXPECT_OK(expected_array);
+
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
+#else
+ ArrowSchemaRelease(&schema);
+ ArrowArrayRelease(&array);
+#endif
+}
+
+TEST(ArrayTest, ArrayTestAppendToLargeListViewArray) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LARGE_LIST_VIEW),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ // Check that we can reserve recursively without erroring
+ ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 0);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK);
+
+ // Make sure number of children is checked at finish
+ array.n_children = 0;
+ EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected 1 child of large_list_view array but found 0 child
arrays");
+ array.n_children = 1;
+
+ // Make sure size + offset is always within bounds of child array
+ array.children[0]->length = array.children[0]->length - 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
+ EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Offset: 1 + size: 2 at index: 2 exceeds length of child view:
2");
+ array.children[0]->length = array.children[0]->length + 1;
+
+ EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
+ auto arrow_array = ImportArray(&array, &schema);
+ ARROW_EXPECT_OK(arrow_array);
+
+ constexpr size_t nelems = 4;
+ const std::array<int64_t, nelems> offsets = {0, 1, 1, 3};
+ const std::array<int64_t, nelems> sizes = {1, 0, 2, 0};
+ const std::array<uint8_t, nelems> valid_bytes = {1, 0, 1, 1};
+
+ auto child_builder = std::make_shared<Int64Builder>();
+ auto builder =
+ LargeListViewBuilder(default_memory_pool(), child_builder,
list_view(int64()));
+ ARROW_EXPECT_OK(
+ builder.AppendValues(offsets.data(), sizes.data(), nelems,
valid_bytes.data()));
+ ARROW_EXPECT_OK(child_builder->Append(123));
+ ARROW_EXPECT_OK(child_builder->Append(456));
+ ARROW_EXPECT_OK(child_builder->Append(789));
+ auto expected_array = builder.Finish();
+ ARROW_EXPECT_OK(expected_array);
+
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
+#else
+ ArrowSchemaRelease(&schema);
+ ArrowArrayRelease(&array);
+#endif
+}
+
TEST(ArrayTest, ArrayTestAppendToMapArray) {
struct ArrowArray array;
struct ArrowSchema schema;
@@ -3061,6 +3202,389 @@ TEST(ArrayTest, ArrayViewTestLargeList) {
ArrowArrayViewReset(&array_view);
}
+TEST(ArrayTest, ArrayViewTestListView) {
+ struct ArrowArrayView array_view;
+ struct ArrowError error;
+ ArrowArrayViewInitFromType(&array_view, NANOARROW_TYPE_LIST_VIEW);
+
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_LIST_VIEW);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 1),
+ NANOARROW_BUFFER_TYPE_VIEW_OFFSET);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 1), 8 *
sizeof(int32_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 2),
NANOARROW_BUFFER_TYPE_SIZE);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 2), 8 *
sizeof(int32_t));
+
+ EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
+ EXPECT_EQ(array_view.n_children, 1);
+ ArrowArrayViewInitFromType(array_view.children[0], NANOARROW_TYPE_INT32);
+ EXPECT_EQ(array_view.children[0]->storage_type, NANOARROW_TYPE_INT32);
+
+ // Can't assume the offsets buffer exists for length == 0
+ ArrowArrayViewSetLength(&array_view, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
+
+ // This should pass validation even if all buffers are empty
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+
+ ArrowArrayViewSetLength(&array_view, 5);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 5 *
sizeof(int32_t));
+
+ // Build a valid array ([[1234], []])
+ struct ArrowArray array;
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_LIST_VIEW),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAllocateChildren(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromType(array.children[0], NANOARROW_TYPE_INT32),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 1234), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
+
+ // Expect error for offsets that will cause bad access
+ auto offsets = const_cast<int32_t*>(reinterpret_cast<const
int32_t*>(array.buffers[1]));
+
+ offsets[0] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message, "Invalid negative offset -1 at index 0");
+
+ // For a sliced array, this can still pass validation
+ array.offset = 1;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+
+ // Check for negative element sizes
+ array.offset = 0;
+ array.length = 2;
+ offsets[0] = 0;
+ offsets[1] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message, "Invalid negative offset -1 at index 1");
+
+ // Sliced array should also fail validation because the first element is
negative
+ array.offset = 1;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(
+ error.message,
+ "Invalid negative offset -1 at index 1"); // TODO: should the index
include offset?
+
+ // ...but the array should be valid if we do not include the negative offset
+ array.offset = 0;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+ offsets[1] = 1;
+ array.length = 2;
+
+ // Expect error for invalid sizes
+ auto sizes = const_cast<int32_t*>(reinterpret_cast<const
int32_t*>(array.buffers[2]));
+
+ sizes[0] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message, "Invalid negative size -1 at index 0");
+
+ // For a sliced array, this can still pass validation
+ array.offset = 1;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+
+ // Check for negative element sizes
+ array.offset = 0;
+ array.length = 2;
+ sizes[0] = 0;
+ sizes[1] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message, "Invalid negative size -1 at index 1");
+
+ // Sliced array should also fail validation because the first element is
negative
+ array.offset = 1;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(
+ error.message,
+ "Invalid negative size -1 at index 1"); // TODO: should the index
include offset?
+
+ // ...but the array should be valid if we do not include the negative offset
+ array.offset = 0;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+ sizes[0] = 1;
+ sizes[1] = 0;
+ array.length = 2;
+
+ // Make sure size + offset is always within bounds of child array
+ offsets[0] = 42;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message,
+ "Offset: 42 + size: 1 at index: 0 exceeds length of child view:
1");
+
+ offsets[0] = 0;
+ sizes[1] = 42;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message,
+ "Offset: 1 + size: 42 at index: 1 exceeds length of child view:
1");
+
+ ArrowArrayRelease(&array);
+ ArrowArrayViewReset(&array_view);
+}
+
+TEST(ArrayTest, ArrayViewTestListViewGet) {
+ struct ArrowArrayView array_view;
+ ArrowArrayViewInitFromType(&array_view, NANOARROW_TYPE_LIST_VIEW);
+ EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
+ ArrowArrayViewInitFromType(array_view.children[0], NANOARROW_TYPE_INT32);
+
+ // Build a valid array
+ struct ArrowArray array;
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_LIST_VIEW),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAllocateChildren(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromType(array.children[0], NANOARROW_TYPE_INT32),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 1234), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 42), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 0), 0);
+ EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 1), 0);
+ EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 2), 1);
+
+ ArrowArrayRelease(&array);
+ ArrowArrayViewReset(&array_view);
+}
+
+TEST(ArrayTest, ArrayViewTestLargeListView) {
+ struct ArrowArrayView array_view;
+ struct ArrowError error;
+ ArrowArrayViewInitFromType(&array_view, NANOARROW_TYPE_LARGE_LIST_VIEW);
+
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_LARGE_LIST_VIEW);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 1),
+ NANOARROW_BUFFER_TYPE_VIEW_OFFSET);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 1), 8 *
sizeof(int64_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 2),
NANOARROW_BUFFER_TYPE_SIZE);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 2), 8 *
sizeof(int64_t));
+
+ EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
+ EXPECT_EQ(array_view.n_children, 1);
+ ArrowArrayViewInitFromType(array_view.children[0], NANOARROW_TYPE_INT32);
+ EXPECT_EQ(array_view.children[0]->storage_type, NANOARROW_TYPE_INT32);
+
+ // Can't assume the offsets buffer exists for length == 0
+ ArrowArrayViewSetLength(&array_view, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
+
+ // This should pass validation even if all buffers are empty
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+
+ ArrowArrayViewSetLength(&array_view, 5);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 5 *
sizeof(int64_t));
+
+ // Build a valid array ([[1234], []])
+ struct ArrowArray array;
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_LARGE_LIST_VIEW),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAllocateChildren(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromType(array.children[0], NANOARROW_TYPE_INT32),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 1234), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
+
+ // Expect error for offsets that will cause bad access
+ auto offsets = const_cast<int64_t*>(reinterpret_cast<const
int64_t*>(array.buffers[1]));
+
+ offsets[0] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message, "Invalid negative offset -1 at index 0");
+
+ // For a sliced array, this can still pass validation
+ array.offset = 1;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+
+ // Check for negative element sizes
+ array.offset = 0;
+ array.length = 2;
+ offsets[0] = 0;
+ offsets[1] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message, "Invalid negative offset -1 at index 1");
+
+ // Sliced array should also fail validation because the first element is
negative
+ array.offset = 1;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(
+ error.message,
+ "Invalid negative offset -1 at index 1"); // TODO: should the index
include offset?
+
+ // ...but the array should be valid if we do not include the negative offset
+ array.offset = 0;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+ offsets[1] = 1;
+ array.length = 2;
+
+ // Expect error for invalid sizes
+ auto sizes = const_cast<int64_t*>(reinterpret_cast<const
int64_t*>(array.buffers[2]));
+
+ sizes[0] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message, "Invalid negative size -1 at index 0");
+
+ // For a sliced array, this can still pass validation
+ array.offset = 1;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+
+ // Check for negative element sizes
+ array.offset = 0;
+ array.length = 2;
+ sizes[0] = 0;
+ sizes[1] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message, "Invalid negative size -1 at index 1");
+
+ // Sliced array should also fail validation because the first element is
negative
+ array.offset = 1;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(
+ error.message,
+ "Invalid negative size -1 at index 1"); // TODO: should the index
include offset?
+
+ // ...but the array should be valid if we do not include the negative offset
+ array.offset = 0;
+ array.length = 1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
+ sizes[0] = 1;
+ sizes[1] = 0;
+ array.length = 2;
+
+ // Make sure size + offset is always within bounds of child array
+ offsets[0] = 42;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message,
+ "Offset: 42 + size: 1 at index: 0 exceeds length of child view:
1");
+
+ offsets[0] = 0;
+ sizes[1] = 42;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(error.message,
+ "Offset: 1 + size: 42 at index: 1 exceeds length of child view:
1");
+ ArrowArrayRelease(&array);
+ ArrowArrayViewReset(&array_view);
+}
+
+TEST(ArrayTest, ArrayViewTestLargeListViewGet) {
+ struct ArrowArrayView array_view;
+ ArrowArrayViewInitFromType(&array_view, NANOARROW_TYPE_LARGE_LIST_VIEW);
+ EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
+ ArrowArrayViewInitFromType(array_view.children[0], NANOARROW_TYPE_INT32);
+
+ // Build a valid array
+ struct ArrowArray array;
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_LARGE_LIST_VIEW),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAllocateChildren(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromType(array.children[0], NANOARROW_TYPE_INT32),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 1234), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 42), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 0), 0);
+ EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 1), 0);
+ EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 2), 1);
+
+ ArrowArrayRelease(&array);
+ ArrowArrayViewReset(&array_view);
+}
+
TEST(ArrayTest, ArrayViewTestFixedSizeList) {
struct ArrowArrayView array_view;
ArrowArrayViewInitFromType(&array_view, NANOARROW_TYPE_FIXED_SIZE_LIST);
diff --git a/src/nanoarrow/common/inline_array.h
b/src/nanoarrow/common/inline_array.h
index 9fe5e0b7..8e2d3b93 100644
--- a/src/nanoarrow/common/inline_array.h
+++ b/src/nanoarrow/common/inline_array.h
@@ -290,6 +290,9 @@ static inline ArrowErrorCode
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
case NANOARROW_BUFFER_TYPE_VALIDITY:
continue;
+ case NANOARROW_BUFFER_TYPE_SIZE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes *
n));
+ continue;
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
// Append the current value at the end of the offset buffer for each
element
NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
@@ -310,7 +313,10 @@ static inline ArrowErrorCode
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n));
}
continue;
-
+ case NANOARROW_BUFFER_TYPE_VIEW_OFFSET:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes *
n));
+ continue;
case NANOARROW_BUFFER_TYPE_TYPE_ID:
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
// These cases return above
@@ -757,6 +763,7 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct
ArrowArray* array) {
if (child_length > INT32_MAX) {
return EOVERFLOW;
}
+
NANOARROW_RETURN_NOT_OK(
ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1),
(int32_t)child_length));
break;
@@ -772,6 +779,31 @@ static inline ArrowErrorCode
ArrowArrayFinishElement(struct ArrowArray* array) {
return EINVAL;
}
break;
+ case NANOARROW_TYPE_LIST_VIEW: {
+ child_length = array->children[0]->length;
+ if (child_length > INT32_MAX) {
+ return EOVERFLOW;
+ }
+
+ const int32_t last_valid_offset =
(int32_t)private_data->list_view_offset;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1),
last_valid_offset));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(
+ ArrowArrayBuffer(array, 2), (int32_t)child_length -
last_valid_offset));
+ private_data->list_view_offset = child_length;
+ break;
+ }
+ case NANOARROW_TYPE_LARGE_LIST_VIEW: {
+ child_length = array->children[0]->length;
+ const int64_t last_valid_offset = private_data->list_view_offset;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1),
last_valid_offset));
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array,
2),
+ child_length -
last_valid_offset));
+ private_data->list_view_offset = child_length;
+ break;
+ }
+
case NANOARROW_TYPE_STRUCT:
for (int64_t i = 0; i < array->n_children; i++) {
child_length = array->children[i]->length;
@@ -1046,8 +1078,10 @@ static inline int64_t ArrowArrayViewListChildOffset(
const struct ArrowArrayView* array_view, int64_t i) {
switch (array_view->storage_type) {
case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LIST_VIEW:
return array_view->buffer_views[1].data.as_int32[i];
case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST_VIEW:
return array_view->buffer_views[1].data.as_int64[i];
default:
return -1;
diff --git a/src/nanoarrow/common/inline_types.h
b/src/nanoarrow/common/inline_types.h
index d4fdfba9..3c0d9579 100644
--- a/src/nanoarrow/common/inline_types.h
+++ b/src/nanoarrow/common/inline_types.h
@@ -455,7 +455,9 @@ enum ArrowType {
NANOARROW_TYPE_BINARY_VIEW,
NANOARROW_TYPE_STRING_VIEW,
NANOARROW_TYPE_DECIMAL32,
- NANOARROW_TYPE_DECIMAL64
+ NANOARROW_TYPE_DECIMAL64,
+ NANOARROW_TYPE_LIST_VIEW,
+ NANOARROW_TYPE_LARGE_LIST_VIEW,
};
/// \brief Get a string value of an enum ArrowType value
@@ -552,6 +554,10 @@ static inline const char* ArrowTypeString(enum ArrowType
type) {
return "binary_view";
case NANOARROW_TYPE_STRING_VIEW:
return "string_view";
+ case NANOARROW_TYPE_LIST_VIEW:
+ return "list_view";
+ case NANOARROW_TYPE_LARGE_LIST_VIEW:
+ return "large_list_view";
default:
return NULL;
}
@@ -630,7 +636,9 @@ enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_DATA_OFFSET,
NANOARROW_BUFFER_TYPE_DATA,
NANOARROW_BUFFER_TYPE_VARIADIC_DATA,
- NANOARROW_BUFFER_TYPE_VARIADIC_SIZE
+ NANOARROW_BUFFER_TYPE_VARIADIC_SIZE,
+ NANOARROW_BUFFER_TYPE_VIEW_OFFSET,
+ NANOARROW_BUFFER_TYPE_SIZE,
};
/// \brief The maximum number of fixed buffers in an ArrowArrayView or
ArrowLayout
@@ -864,6 +872,9 @@ struct ArrowArrayPrivateData {
// Size of each variadic buffer in bytes
int64_t* variadic_buffer_sizes;
+
+ // The current offset used to build list views
+ int64_t list_view_offset;
};
/// \brief A representation of an interval.
diff --git a/src/nanoarrow/common/schema.c b/src/nanoarrow/common/schema.c
index b0e538c0..b6cf1b00 100644
--- a/src/nanoarrow/common/schema.c
+++ b/src/nanoarrow/common/schema.c
@@ -125,6 +125,10 @@ static const char* ArrowSchemaFormatTemplate(enum
ArrowType type) {
return "+l";
case NANOARROW_TYPE_LARGE_LIST:
return "+L";
+ case NANOARROW_TYPE_LIST_VIEW:
+ return "+vl";
+ case NANOARROW_TYPE_LARGE_LIST_VIEW:
+ return "+vL";
case NANOARROW_TYPE_STRUCT:
return "+s";
case NANOARROW_TYPE_MAP:
@@ -143,6 +147,8 @@ static int ArrowSchemaInitChildrenIfNeeded(struct
ArrowSchema* schema,
case NANOARROW_TYPE_LIST:
case NANOARROW_TYPE_LARGE_LIST:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ case NANOARROW_TYPE_LIST_VIEW:
+ case NANOARROW_TYPE_LARGE_LIST_VIEW:
NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 1));
ArrowSchemaInit(schema->children[0]);
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], "item"));
@@ -871,6 +877,24 @@ static ArrowErrorCode ArrowSchemaViewParse(struct
ArrowSchemaView* schema_view,
return EINVAL;
}
+ // views
+ case 'v':
+ switch (format[2]) {
+ case 'l':
+ schema_view->storage_type = NANOARROW_TYPE_LIST_VIEW;
+ schema_view->type = NANOARROW_TYPE_LIST_VIEW;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ case 'L':
+ schema_view->storage_type = NANOARROW_TYPE_LARGE_LIST_VIEW;
+ schema_view->type = NANOARROW_TYPE_LARGE_LIST_VIEW;
+ *format_end_out = format + 3;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(
+ error, "Expected view format string +vl or +vL but found
'%s'", format);
+ return EINVAL;
+ }
default:
ArrowErrorSet(error, "Expected nested type format string but found
'%s'",
format);
@@ -1201,7 +1225,9 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct
ArrowSchemaView* schema_vie
return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LIST_VIEW:
case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST_VIEW:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
return ArrowSchemaViewValidateNChildren(schema_view, 1, error);
diff --git a/src/nanoarrow/common/schema_test.cc
b/src/nanoarrow/common/schema_test.cc
index 03a32499..3a12a0b3 100644
--- a/src/nanoarrow/common/schema_test.cc
+++ b/src/nanoarrow/common/schema_test.cc
@@ -155,6 +155,35 @@ TEST(SchemaTest, SchemaTestInitNestedList) {
#endif
}
+TEST(SchemaTest, SchemaTestInitListView) {
+ struct ArrowSchema schema;
+
+ EXPECT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST_VIEW),
NANOARROW_OK);
+ EXPECT_STREQ(schema.format, "+vl");
+ ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT32),
NANOARROW_OK);
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
+ auto arrow_type = ImportType(&schema);
+ ARROW_EXPECT_OK(arrow_type);
+ EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(list_view(int32())));
+#else
+ ArrowSchemaRelease(&schema);
+#endif
+
+ EXPECT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LARGE_LIST_VIEW),
+ NANOARROW_OK);
+ EXPECT_STREQ(schema.format, "+vL");
+ ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT32),
NANOARROW_OK);
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
+ arrow_type = ImportType(&schema);
+ ARROW_EXPECT_OK(arrow_type);
+ EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(large_list_view(int32())));
+#else
+ ArrowSchemaRelease(&schema);
+#endif
+}
+
TEST(SchemaTest, SchemaTestInitNestedStruct) {
struct ArrowSchema schema;
@@ -1347,6 +1376,38 @@ TEST(SchemaViewTest, SchemaViewInitNestedList) {
EXPECT_EQ(ArrowSchemaToStdString(&schema), "large_list<item: int32>");
ArrowSchemaRelease(&schema);
+ ARROW_EXPECT_OK(ExportType(*list_view(int32()), &schema));
+ EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK);
+ EXPECT_EQ(schema_view.type, NANOARROW_TYPE_LIST_VIEW);
+ EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_LIST_VIEW);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_VIEW_OFFSET);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_SIZE);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT32);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_INT32);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 32);
+ EXPECT_EQ(ArrowSchemaToStdString(&schema), "list_view<item: int32>");
+ ArrowSchemaRelease(&schema);
+
+ ARROW_EXPECT_OK(ExportType(*large_list_view(int32()), &schema));
+ EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK);
+ EXPECT_EQ(schema_view.type, NANOARROW_TYPE_LARGE_LIST_VIEW);
+ EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_LARGE_LIST_VIEW);
+ EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(schema_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_VIEW_OFFSET);
+ EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_SIZE);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT64);
+ EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_INT64);
+ EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(schema_view.layout.element_size_bits[1], 64);
+ EXPECT_EQ(schema_view.layout.element_size_bits[2], 64);
+ EXPECT_EQ(ArrowSchemaToStdString(&schema), "large_list_view<item: int32>");
+ ArrowSchemaRelease(&schema);
+
ARROW_EXPECT_OK(ExportType(*fixed_size_list(int32(), 123), &schema));
EXPECT_EQ(ArrowSchemaViewInit(&schema_view, &schema, &error), NANOARROW_OK);
EXPECT_EQ(schema_view.type, NANOARROW_TYPE_FIXED_SIZE_LIST);
diff --git a/src/nanoarrow/common/utils.c b/src/nanoarrow/common/utils.c
index 400625f2..d78950b5 100644
--- a/src/nanoarrow/common/utils.c
+++ b/src/nanoarrow/common/utils.c
@@ -190,6 +190,24 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum
ArrowType storage_type) {
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->buffer_data_type[1] = NANOARROW_TYPE_STRING_VIEW;
layout->element_size_bits[1] = 128;
+ break;
+
+ case NANOARROW_TYPE_LIST_VIEW:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_VIEW_OFFSET;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
+ layout->element_size_bits[1] = 32;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_SIZE;
+ layout->buffer_data_type[2] = NANOARROW_TYPE_INT32;
+ layout->element_size_bits[2] = 32;
+ break;
+ case NANOARROW_TYPE_LARGE_LIST_VIEW:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_VIEW_OFFSET;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
+ layout->element_size_bits[1] = 64;
+ layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_SIZE;
+ layout->buffer_data_type[2] = NANOARROW_TYPE_INT64;
+ layout->element_size_bits[2] = 64;
+ break;
default:
break;
diff --git a/src/nanoarrow/testing/testing.cc b/src/nanoarrow/testing/testing.cc
index aba4b11f..fb588df1 100644
--- a/src/nanoarrow/testing/testing.cc
+++ b/src/nanoarrow/testing/testing.cc
@@ -1969,6 +1969,29 @@ ArrowErrorCode SetArrayColumnBuffers(const json& value,
ArrowArrayView* array_vi
}
break;
}
+ case NANOARROW_BUFFER_TYPE_VIEW_OFFSET: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("VIEW_OFFSET"), error, "missing key
'VIEW_OFFSET'"));
+ const auto& offset = value["VIEW_OFFSET"];
+
+ if (array_view->layout.element_size_bits[buffer_i] == 32) {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer, error));
+ } else {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int64_t>(offset, buffer, error));
+ }
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_SIZE: {
+ NANOARROW_RETURN_NOT_OK(Check(value.contains("SIZE"), error, "missing
key 'SIZE'"));
+ const auto& offset = value["SIZE"];
+
+ if (array_view->layout.element_size_bits[buffer_i] == 32) {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer, error));
+ } else {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int64_t>(offset, buffer, error));
+ }
+ break;
+ }
case NANOARROW_BUFFER_TYPE_DATA: {
NANOARROW_RETURN_NOT_OK(Check(value.contains("DATA"), error, "missing
key 'DATA'"));