This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new e52ff0de feat: Add ArrowArrayView accessors to inspect buffer
properties (#638)
e52ff0de is described below
commit e52ff0deb0c362b2af435350cd570f4921c387e0
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Oct 1 12:16:19 2024 -0500
feat: Add ArrowArrayView accessors to inspect buffer properties (#638)
This PR abstracts accessors for the buffer_view, buffer type, buffer
data type, and element bit width for the `ArrowArrayView`. Before adding
string/binary view support, this was done by directly accessing the
`layout` and `buffer_view` members; however, this required
special-casing + some duplicated code in the string view in the R/Python
bindings.
This PR also removes the dependence on the `ArrowArrayView::array`
member, since this member is optional (i.e., the data backing an
`ArrowArrayView` need not be related to an actual `ArrowArray`).
---
python/src/nanoarrow/_array.pyx | 55 +++------
python/tests/test_c_array.py | 2 +-
r/src/array.c | 36 +-----
r/src/buffer.c | 7 +-
r/tests/testthat/_snaps/array.md | 10 +-
src/nanoarrow/common/array.c | 8 +-
src/nanoarrow/common/array_test.cc | 239 ++++++++++++++++++++++++------------
src/nanoarrow/common/inline_array.h | 132 +++++++++++++++++++-
src/nanoarrow/common/inline_types.h | 6 +-
src/nanoarrow/common/schema_test.cc | 4 +-
src/nanoarrow/common/utils.c | 4 +-
src/nanoarrow/nanoarrow.h | 42 +++++++
src/nanoarrow/testing/testing.cc | 3 +-
13 files changed, 381 insertions(+), 167 deletions(-)
diff --git a/python/src/nanoarrow/_array.pyx b/python/src/nanoarrow/_array.pyx
index 0bd3a961..869087b5 100644
--- a/python/src/nanoarrow/_array.pyx
+++ b/python/src/nanoarrow/_array.pyx
@@ -46,8 +46,13 @@ from nanoarrow_c cimport (
ArrowArrayViewComputeNullCount,
ArrowArrayViewInitFromSchema,
ArrowArrayViewIsNull,
- ArrowArrayViewGetStringUnsafe,
ArrowArrayViewGetBytesUnsafe,
+ ArrowArrayViewGetBufferDataType,
+ ArrowArrayViewGetBufferElementSizeBits,
+ ArrowArrayViewGetBufferType,
+ ArrowArrayViewGetBufferView,
+ ArrowArrayViewGetNumBuffers,
+ ArrowArrayViewGetStringUnsafe,
ArrowArrayViewSetArray,
ArrowArrayViewSetArrayMinimal,
ArrowBitCountSet,
@@ -62,7 +67,8 @@ from nanoarrow_c cimport (
ArrowValidationLevel,
NANOARROW_BUFFER_TYPE_DATA,
NANOARROW_BUFFER_TYPE_DATA_OFFSET,
- NANOARROW_BUFFER_TYPE_DATA_VIEW,
+ NANOARROW_BUFFER_TYPE_VARIADIC_DATA,
+ NANOARROW_BUFFER_TYPE_VARIADIC_SIZE,
NANOARROW_BUFFER_TYPE_TYPE_ID,
NANOARROW_BUFFER_TYPE_UNION_OFFSET,
NANOARROW_BUFFER_TYPE_VALIDITY,
@@ -84,7 +90,6 @@ from nanoarrow._device cimport Device, CSharedSyncEvent
from nanoarrow._buffer cimport CBuffer, CBufferView
from nanoarrow._schema cimport CSchema, CLayout
-from nanoarrow cimport _types
from nanoarrow._utils cimport (
alloc_c_array,
alloc_c_device_array,
@@ -196,44 +201,20 @@ cdef class CArrayView:
@property
def n_buffers(self):
- if _types.is_data_view(self._ptr.storage_type):
- return 2 + self._ptr.n_variadic_buffers + 1
-
- return self.layout.n_buffers
+ return ArrowArrayViewGetNumBuffers(self._ptr)
def _buffer_info(self, int64_t i):
if i < 0 or i >= self.n_buffers:
raise IndexError(f"{i} out of range [0, {self.n_buffers}]")
- if (
- _types.is_data_view(self._ptr.storage_type)
- and i == (2 + self._ptr.n_variadic_buffers)
- ):
- return (
- NANOARROW_BUFFER_TYPE_DATA,
- _types.INT64,
- 64,
- <uintptr_t>self._ptr.array.buffers[i],
- (self._ptr.n_variadic_buffers) * 8
- )
- elif (
- _types.is_data_view(self._ptr.storage_type)
- and i >= 2
- ):
- return (
- NANOARROW_BUFFER_TYPE_DATA,
- _types.STRING if int(self._ptr.storage_type) ==
_types.STRING_VIEW else _types.BINARY,
- 0,
- <uintptr_t>self._ptr.array.buffers[i],
- (<int64_t*>self._ptr.array.buffers[2 +
self._ptr.n_variadic_buffers])[i - 2]
- )
+ cdef ArrowBufferView view = ArrowArrayViewGetBufferView(self._ptr, i)
return (
- self._ptr.layout.buffer_type[i],
- self._ptr.layout.buffer_data_type[i],
- self._ptr.layout.element_size_bits[i],
- <uintptr_t>self._ptr.buffer_views[i].data.data,
- self._ptr.buffer_views[i].size_bytes
+ ArrowArrayViewGetBufferType(self._ptr, i),
+ ArrowArrayViewGetBufferDataType(self._ptr, i),
+ ArrowArrayViewGetBufferElementSizeBits(self._ptr, i),
+ <uintptr_t>view.data.data,
+ view.size_bytes
)
def buffer_type(self, int64_t i):
@@ -248,8 +229,10 @@ cdef class CArrayView:
return "data_offset"
elif buffer_type == NANOARROW_BUFFER_TYPE_DATA:
return "data"
- elif buffer_type == NANOARROW_BUFFER_TYPE_DATA_VIEW:
- return "data_view"
+ elif buffer_type == NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
+ return "variadic_data"
+ elif buffer_type == NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
+ return "variadic_size"
else:
return "none"
diff --git a/python/tests/test_c_array.py b/python/tests/test_c_array.py
index d2993ec6..ae7b80a4 100644
--- a/python/tests/test_c_array.py
+++ b/python/tests/test_c_array.py
@@ -337,7 +337,7 @@ def test_c_array_from_iterable_bytes():
na.c_array([buf_2d], na.binary())
-def test_c_array_from_iterable__view():
+def test_c_array_from_iterable_view():
string = na.c_array(
[b"abc", None, b"a string longer than 12 bytes"], na.binary_view()
)
diff --git a/r/src/array.c b/r/src/array.c
index 5ae53ddd..c4603b11 100644
--- a/r/src/array.c
+++ b/r/src/array.c
@@ -370,38 +370,10 @@ static SEXP borrow_buffer(struct ArrowArrayView*
array_view, int64_t i, SEXP she
SEXP buffer_class = PROTECT(Rf_allocVector(STRSXP, 2));
SET_STRING_ELT(buffer_class, 1, Rf_mkChar("nanoarrow_buffer"));
- struct ArrowBufferView view;
- enum ArrowBufferType buffer_type;
- enum ArrowType data_type;
- int64_t element_size_bits;
- if ((array_view->storage_type == NANOARROW_TYPE_STRING_VIEW ||
- array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) &&
- i >= NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
- view.data.data = array_view->array->buffers[i];
-
- if (i == (array_view->n_variadic_buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
- view.size_bytes = array_view->n_variadic_buffers * sizeof(int64_t);
- buffer_type = NANOARROW_BUFFER_TYPE_DATA;
- data_type = NANOARROW_TYPE_INT64;
- element_size_bits = 64;
- } else {
- view.size_bytes =
- array_view->variadic_buffer_sizes[i -
NANOARROW_BINARY_VIEW_FIXED_BUFFERS];
- buffer_type = NANOARROW_BUFFER_TYPE_DATA;
-
- if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW) {
- data_type = NANOARROW_TYPE_STRING;
- } else {
- data_type = NANOARROW_TYPE_BINARY;
- }
- element_size_bits = 0;
- }
- } else {
- view = array_view->buffer_views[i];
- buffer_type = array_view->layout.buffer_type[i];
- data_type = array_view->layout.buffer_data_type[i];
- element_size_bits = array_view->layout.element_size_bits[i];
- }
+ struct ArrowBufferView view = ArrowArrayViewGetBufferView(array_view, i);
+ enum ArrowBufferType buffer_type = ArrowArrayViewGetBufferType(array_view,
i);
+ enum ArrowType data_type = ArrowArrayViewGetBufferDataType(array_view, i);
+ int64_t element_size_bits =
ArrowArrayViewGetBufferElementSizeBits(array_view, i);
SEXP buffer_xptr =
PROTECT(buffer_borrowed_xptr(view.data.data, view.size_bytes, shelter));
diff --git a/r/src/buffer.c b/r/src/buffer.c
index 20dd79dc..e1ab50da 100644
--- a/r/src/buffer.c
+++ b/r/src/buffer.c
@@ -163,8 +163,11 @@ SEXP nanoarrow_c_buffer_info(SEXP buffer_xptr) {
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
buffer_type_string = "union_offset";
break;
- case NANOARROW_BUFFER_TYPE_DATA_VIEW:
- buffer_type_string = "data_view";
+ case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
+ buffer_type_string = "variadic_data";
+ break;
+ case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
+ buffer_type_string = "variadic_size";
break;
default:
buffer_type_string = "unknown";
diff --git a/r/tests/testthat/_snaps/array.md b/r/tests/testthat/_snaps/array.md
index 9af9870b..ccc2d1ef 100644
--- a/r/tests/testthat/_snaps/array.md
+++ b/r/tests/testthat/_snaps/array.md
@@ -9,8 +9,8 @@
$ offset : int 0
$ buffers :List of 3
..$ :<nanoarrow_buffer validity<bool>[null] ``
- ..$ :<nanoarrow_buffer data_view<string_view>[26][416 b]>`
- ..$ :<nanoarrow_buffer data<int64>[null] ``
+ ..$ :<nanoarrow_buffer data<string_view>[26][416 b]>`
+ ..$ :<nanoarrow_buffer variadic_size<int64>[null] ``
$ dictionary: NULL
$ children : list()
@@ -25,9 +25,9 @@
$ offset : int 0
$ buffers :List of 4
..$ :<nanoarrow_buffer validity<bool>[null] ``
- ..$ :<nanoarrow_buffer data_view<string_view>[1][16 b]>`
- ..$ :<nanoarrow_buffer data<string>[35 b]> `this string is longer than
12 ...`
- ..$ :<nanoarrow_buffer data<int64>[1][8 b]> `35`
+ ..$ :<nanoarrow_buffer data<string_view>[1][16 b]>`
+ ..$ :<nanoarrow_buffer variadic_data<string>[35 b]> `this string is
longer...`
+ ..$ :<nanoarrow_buffer variadic_size<int64>[1][8 b]> `35`
$ dictionary: NULL
$ children : list()
diff --git a/src/nanoarrow/common/array.c b/src/nanoarrow/common/array.c
index dbb902df..9e32ac89 100644
--- a/src/nanoarrow/common/array.c
+++ b/src/nanoarrow/common/array.c
@@ -696,11 +696,12 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length)
_ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i]
* length) /
8;
continue;
- case NANOARROW_BUFFER_TYPE_DATA_VIEW:
case NANOARROW_BUFFER_TYPE_TYPE_ID:
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
array_view->buffer_views[i].size_bytes = element_size_bytes * length;
continue;
+ case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
case NANOARROW_BUFFER_TYPE_NONE:
array_view->buffer_views[i].size_bytes = 0;
continue;
@@ -734,6 +735,7 @@ static int ArrowArrayViewSetArrayInternal(struct
ArrowArrayView* array_view,
array_view->length = array->length;
array_view->null_count = array->null_count;
array_view->variadic_buffer_sizes = NULL;
+ array_view->variadic_buffers = NULL;
array_view->n_variadic_buffers = 0;
int64_t buffers_required = 0;
@@ -767,6 +769,7 @@ static int ArrowArrayViewSetArrayInternal(struct
ArrowArrayView* array_view,
const int32_t nvariadic_buf = (int32_t)(n_buffers - nfixed_buf - 1);
array_view->n_variadic_buffers = nvariadic_buf;
buffers_required += nvariadic_buf + 1;
+ array_view->variadic_buffers = array->buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
array_view->variadic_buffer_sizes = (int64_t*)array->buffers[n_buffers -
1];
}
@@ -863,9 +866,10 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
break;
case NANOARROW_BUFFER_TYPE_TYPE_ID:
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
- case NANOARROW_BUFFER_TYPE_DATA_VIEW:
min_buffer_size_bytes = element_size_bytes * offset_plus_length;
break;
+ case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
case NANOARROW_BUFFER_TYPE_NONE:
continue;
}
diff --git a/src/nanoarrow/common/array_test.cc
b/src/nanoarrow/common/array_test.cc
index 128c7ea4..37443cd4 100644
--- a/src/nanoarrow/common/array_test.cc
+++ b/src/nanoarrow/common/array_test.cc
@@ -1889,14 +1889,17 @@ TEST(ArrayTest, ArrayViewTestBasic) {
EXPECT_EQ(array_view.array, nullptr);
EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_INT32);
- EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
- EXPECT_EQ(array_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
- EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
- EXPECT_EQ(array_view.layout.element_size_bits[1], 32);
+ EXPECT_EQ(ArrowArrayViewGetNumBuffers(&array_view), 2);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 1),
NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(ArrowArrayViewGetBufferDataType(&array_view, 0),
NANOARROW_TYPE_BOOL);
+ EXPECT_EQ(ArrowArrayViewGetBufferDataType(&array_view, 1),
NANOARROW_TYPE_INT32);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 1), 32);
ArrowArrayViewSetLength(&array_view, 5);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 5 * sizeof(int32_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 5 *
sizeof(int32_t));
struct ArrowArray array;
@@ -1912,11 +1915,11 @@ TEST(ArrayTest, ArrayViewTestBasic) {
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 3 * sizeof(int32_t));
- EXPECT_EQ(array_view.buffer_views[1].data.as_int32[0], 11);
- EXPECT_EQ(array_view.buffer_views[1].data.as_int32[1], 12);
- EXPECT_EQ(array_view.buffer_views[1].data.as_int32[2], 13);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 3 *
sizeof(int32_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).data.as_int32[0], 11);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).data.as_int32[1], 12);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).data.as_int32[2], 13);
// Build with validity buffer
ASSERT_EQ(ArrowBitmapAppend(ArrowArrayValidityBitmap(&array), 1, 3),
NANOARROW_OK);
@@ -1926,8 +1929,8 @@ TEST(ArrayTest, ArrayViewTestBasic) {
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 3 * sizeof(int32_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 3 *
sizeof(int32_t));
// Expect error for bad offset + length
array.length = -1;
@@ -2247,27 +2250,29 @@ TEST(ArrayTest, ArrayViewTestString) {
EXPECT_EQ(array_view.array, nullptr);
EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_STRING);
- EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
- EXPECT_EQ(array_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
- EXPECT_EQ(array_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
- EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
- EXPECT_EQ(array_view.layout.element_size_bits[1], 32);
- EXPECT_EQ(array_view.layout.element_size_bits[2], 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 1),
+ NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 2),
NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 1), 32);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 2), 0);
// Can't assume offset buffer size > 0 if length == 0
ArrowArrayViewSetLength(&array_view, 0);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 0);
// This should pass validation even if all buffers are empty
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
ArrowArrayViewSetLength(&array_view, 5);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, (5 + 1) * sizeof(int32_t));
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes,
+ (5 + 1) * sizeof(int32_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 0);
struct ArrowArray array;
@@ -2278,9 +2283,9 @@ TEST(ArrayTest, ArrayViewTestString) {
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 0);
ArrowArrayRelease(&array);
// Build + check zero length
@@ -2289,9 +2294,9 @@ TEST(ArrayTest, ArrayViewTestString) {
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 0);
// Build non-zero length (the array ["abcd", "efg"])
ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(&array, 1), 0),
NANOARROW_OK);
@@ -2307,9 +2312,10 @@ TEST(ArrayTest, ArrayViewTestString) {
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, (1 + array.length) *
sizeof(int32_t));
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 7);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes,
+ (1 + array.length) * sizeof(int32_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 7);
// Expect error for offsets that will cause bad access
int32_t* offsets =
@@ -2378,27 +2384,29 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
EXPECT_EQ(array_view.array, nullptr);
EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_LARGE_STRING);
- EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
- EXPECT_EQ(array_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
- EXPECT_EQ(array_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
- EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
- EXPECT_EQ(array_view.layout.element_size_bits[1], 64);
- EXPECT_EQ(array_view.layout.element_size_bits[2], 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 1),
+ NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 2),
NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 1), 64);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 2), 0);
// Can't assume offset buffer size > 0 if length == 0
ArrowArrayViewSetLength(&array_view, 0);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 0);
// This should pass validation even if all buffers are empty
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
ArrowArrayViewSetLength(&array_view, 5);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, (5 + 1) * sizeof(int64_t));
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes,
+ (5 + 1) * sizeof(int64_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 0);
struct ArrowArray array;
@@ -2409,9 +2417,9 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 0);
ArrowArrayRelease(&array);
// Build + check zero length
@@ -2420,9 +2428,9 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 0);
// Build non-zero length (the array ["abcd", "efg"])
ASSERT_EQ(ArrowBufferAppendInt64(ArrowArrayBuffer(&array, 1), 0),
NANOARROW_OK);
@@ -2437,9 +2445,10 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, (1 + 2) * sizeof(int64_t));
- EXPECT_EQ(array_view.buffer_views[2].size_bytes, 7);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes,
+ (1 + 2) * sizeof(int64_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 7);
// Expect error for offsets that will cause bad access
int64_t* offsets =
@@ -2491,14 +2500,80 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
ArrowArrayViewReset(&array_view);
}
+void TestArrowArrayViewBinaryView(enum ArrowType type, enum ArrowType
buffer_data_type) {
+ struct ArrowArrayView array_view;
+ struct ArrowError error;
+ ArrowArrayViewInitFromType(&array_view, type);
+
+ // Check buffer properties for an empty view
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, type);
+ EXPECT_EQ(ArrowArrayViewGetNumBuffers(&array_view), 3);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 1),
NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 2),
+ NANOARROW_BUFFER_TYPE_VARIADIC_SIZE);
+ EXPECT_EQ(ArrowArrayViewGetBufferDataType(&array_view, 0),
NANOARROW_TYPE_BOOL);
+ EXPECT_EQ(ArrowArrayViewGetBufferDataType(&array_view, 1), type);
+ EXPECT_EQ(ArrowArrayViewGetBufferDataType(&array_view, 2),
NANOARROW_TYPE_INT64);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 1), 128);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 2), 64);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes, 0);
+
+ // Build a valid non-empty array with at least one variadic buffer
+ struct ArrowArray array;
+ ASSERT_EQ(ArrowArrayInitFromType(&array, type), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendString(&array, "longer than 12 bytes"_asv),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+
+ // Check buffer properties
+ EXPECT_EQ(ArrowArrayViewGetNumBuffers(&array_view), 4);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 1),
NANOARROW_BUFFER_TYPE_DATA);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 2),
+ NANOARROW_BUFFER_TYPE_VARIADIC_DATA);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 3),
+ NANOARROW_BUFFER_TYPE_VARIADIC_SIZE);
+ EXPECT_EQ(ArrowArrayViewGetBufferDataType(&array_view, 0),
NANOARROW_TYPE_BOOL);
+ EXPECT_EQ(ArrowArrayViewGetBufferDataType(&array_view, 1), type);
+ EXPECT_EQ(ArrowArrayViewGetBufferDataType(&array_view, 2), buffer_data_type);
+ EXPECT_EQ(ArrowArrayViewGetBufferDataType(&array_view, 3),
NANOARROW_TYPE_INT64);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 1), 128);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 2), 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 3), 64);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 16);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 2).size_bytes,
+ strlen("longer than 12 bytes"));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 3).size_bytes,
sizeof(int64_t));
+
+ array.release(&array);
+}
+
+TEST(ArrayTest, ArrayViewTestStringView) {
+ ASSERT_NO_FATAL_FAILURE(
+ TestArrowArrayViewBinaryView(NANOARROW_TYPE_STRING_VIEW,
NANOARROW_TYPE_STRING));
+}
+
+TEST(ArrayTest, ArrayViewTestBinaryView) {
+ ASSERT_NO_FATAL_FAILURE(
+ TestArrowArrayViewBinaryView(NANOARROW_TYPE_BINARY_VIEW,
NANOARROW_TYPE_BINARY));
+}
+
TEST(ArrayTest, ArrayViewTestStruct) {
struct ArrowArrayView array_view;
ArrowArrayViewInitFromType(&array_view, NANOARROW_TYPE_STRUCT);
EXPECT_EQ(array_view.array, nullptr);
EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_STRUCT);
- EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
- EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
// Expect error for out-of-memory
#if !defined(__SANITIZE_ADDRESS__)
@@ -2515,7 +2590,7 @@ TEST(ArrayTest, ArrayViewTestStruct) {
EXPECT_EQ(array_view.children[1]->storage_type, NANOARROW_TYPE_NA);
ArrowArrayViewSetLength(&array_view, 5);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
EXPECT_EQ(array_view.children[0]->buffer_views[1].size_bytes, 5 *
sizeof(int32_t));
// Except error for attempting to allocate a children array that already
exists
@@ -2531,10 +2606,11 @@ TEST(ArrayTest, ArrayViewTestList) {
EXPECT_EQ(array_view.array, nullptr);
EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_LIST);
- EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
- EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
- EXPECT_EQ(array_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
- EXPECT_EQ(array_view.layout.element_size_bits[1], 8 * sizeof(int32_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 1),
+ NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 1), 8 *
sizeof(int32_t));
EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
EXPECT_EQ(array_view.n_children, 1);
@@ -2543,16 +2619,17 @@ TEST(ArrayTest, ArrayViewTestList) {
// Can't assume the offsets buffer exists for length == 0
ArrowArrayViewSetLength(&array_view, 0);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
// This should pass validation even if all buffers are empty
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
ArrowArrayViewSetLength(&array_view, 5);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, (5 + 1) * sizeof(int32_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes,
+ (5 + 1) * sizeof(int32_t));
// Build a valid array ([[1234], []])
struct ArrowArray array;
@@ -2692,10 +2769,11 @@ TEST(ArrayTest, ArrayViewTestLargeList) {
EXPECT_EQ(array_view.array, nullptr);
EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_LARGE_LIST);
- EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
- EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
- EXPECT_EQ(array_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
- EXPECT_EQ(array_view.layout.element_size_bits[1], 8 * sizeof(int64_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 1),
+ NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 1), 8 *
sizeof(int64_t));
EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
EXPECT_EQ(array_view.n_children, 1);
@@ -2704,16 +2782,17 @@ TEST(ArrayTest, ArrayViewTestLargeList) {
// Can't assume the offsets buffer exists for length == 0
ArrowArrayViewSetLength(&array_view, 0);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 0);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 0);
// This should pass validation even if all buffers are empty
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
NANOARROW_OK);
ArrowArrayViewSetLength(&array_view, 5);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, (5 + 1) * sizeof(int64_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes,
+ (5 + 1) * sizeof(int64_t));
// Build a valid array ([[1234], []])
struct ArrowArray array;
@@ -2787,8 +2866,8 @@ TEST(ArrayTest, ArrayViewTestFixedSizeList) {
EXPECT_EQ(array_view.array, nullptr);
EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_FIXED_SIZE_LIST);
- EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
- EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferType(&array_view, 0),
NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(ArrowArrayViewGetBufferElementSizeBits(&array_view, 0), 1);
EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
EXPECT_EQ(array_view.n_children, 1);
@@ -2796,7 +2875,7 @@ TEST(ArrayTest, ArrayViewTestFixedSizeList) {
EXPECT_EQ(array_view.children[0]->storage_type, NANOARROW_TYPE_INT32);
ArrowArrayViewSetLength(&array_view, 5);
- EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).size_bytes, 1);
EXPECT_EQ(array_view.children[0]->buffer_views[1].size_bytes, 15 *
sizeof(int32_t));
ArrowArrayViewReset(&array_view);
@@ -2916,7 +2995,7 @@ TEST(ArrayTest, ArrayViewTestDictionary) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(array_view.buffer_views[1].size_bytes, 2 * sizeof(int32_t));
+ EXPECT_EQ(ArrowArrayViewGetBufferView(&array_view, 1).size_bytes, 2 *
sizeof(int32_t));
EXPECT_EQ(array_view.dictionary->buffer_views[2].size_bytes, 6);
EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
@@ -3240,7 +3319,7 @@ void TestGetFromNumericArrayView() {
NANOARROW_OK);
// We're trying to test behavior with no validity buffer, so make sure
that's true
- ASSERT_EQ(array_view.buffer_views[0].data.data, nullptr);
+ ASSERT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).data.data, nullptr);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 1), 0);
@@ -3319,7 +3398,7 @@ TEST(ArrayViewTest, ArrayViewTestGetFloat16) {
NANOARROW_OK);
// We're trying to test behavior with no validity buffer, so make sure
that's true
- ASSERT_EQ(array_view.buffer_views[0].data.data, nullptr);
+ ASSERT_EQ(ArrowArrayViewGetBufferView(&array_view, 0).data.data, nullptr);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 1), 0);
diff --git a/src/nanoarrow/common/inline_array.h
b/src/nanoarrow/common/inline_array.h
index 415e7d92..a397335b 100644
--- a/src/nanoarrow/common/inline_array.h
+++ b/src/nanoarrow/common/inline_array.h
@@ -286,6 +286,8 @@ static inline ArrowErrorCode
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
switch (private_data->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_NONE:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
case NANOARROW_BUFFER_TYPE_VALIDITY:
continue;
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
@@ -301,7 +303,6 @@ static inline ArrowErrorCode
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
i++;
continue;
case NANOARROW_BUFFER_TYPE_DATA:
- case NANOARROW_BUFFER_TYPE_DATA_VIEW:
// Zero out the next bit of memory
if (private_data->layout.element_size_bits[i] % 8 == 0) {
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes
* n));
@@ -826,6 +827,132 @@ static inline void ArrowArrayViewMove(struct
ArrowArrayView* src,
ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED);
}
+static inline int64_t ArrowArrayViewGetNumBuffers(struct ArrowArrayView*
array_view) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ return NANOARROW_BINARY_VIEW_FIXED_BUFFERS +
array_view->n_variadic_buffers + 1;
+ default:
+ break;
+ }
+
+ int64_t n_buffers = 0;
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
+ break;
+ }
+
+ n_buffers++;
+ }
+
+ return n_buffers;
+}
+
+static inline struct ArrowBufferView ArrowArrayViewGetBufferView(
+ struct ArrowArrayView* array_view, int64_t i) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
+ return array_view->buffer_views[i];
+ } else if (i >=
+ (array_view->n_variadic_buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
+ struct ArrowBufferView view;
+ view.data.as_int64 = array_view->variadic_buffer_sizes;
+ view.size_bytes = array_view->n_variadic_buffers * sizeof(double);
+ return view;
+ } else {
+ struct ArrowBufferView view;
+ view.data.data =
+ array_view->variadic_buffers[i -
NANOARROW_BINARY_VIEW_FIXED_BUFFERS];
+ view.size_bytes =
+ array_view->variadic_buffer_sizes[i -
NANOARROW_BINARY_VIEW_FIXED_BUFFERS];
+ return view;
+ }
+ default:
+ // We need this check to avoid -Warray-bounds from complaining
+ if (i >= NANOARROW_MAX_FIXED_BUFFERS) {
+ struct ArrowBufferView view;
+ view.data.data = NULL;
+ view.size_bytes = 0;
+ return view;
+ } else {
+ return array_view->buffer_views[i];
+ }
+ }
+}
+
+enum ArrowBufferType ArrowArrayViewGetBufferType(struct ArrowArrayView*
array_view,
+ int64_t i) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
+ return array_view->layout.buffer_type[i];
+ } else if (i ==
+ (array_view->n_variadic_buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
+ return NANOARROW_BUFFER_TYPE_VARIADIC_SIZE;
+ } else {
+ return NANOARROW_BUFFER_TYPE_VARIADIC_DATA;
+ }
+ default:
+ // We need this check to avoid -Warray-bounds from complaining
+ if (i >= NANOARROW_MAX_FIXED_BUFFERS) {
+ return NANOARROW_BUFFER_TYPE_NONE;
+ } else {
+ return array_view->layout.buffer_type[i];
+ }
+ }
+}
+
+static inline enum ArrowType ArrowArrayViewGetBufferDataType(
+ struct ArrowArrayView* array_view, int64_t i) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
+ return array_view->layout.buffer_data_type[i];
+ } else if (i >=
+ (array_view->n_variadic_buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
+ return NANOARROW_TYPE_INT64;
+ } else if (array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) {
+ return NANOARROW_TYPE_BINARY;
+ } else {
+ return NANOARROW_TYPE_STRING;
+ }
+ default:
+ // We need this check to avoid -Warray-bounds from complaining
+ if (i >= NANOARROW_MAX_FIXED_BUFFERS) {
+ return NANOARROW_TYPE_UNINITIALIZED;
+ } else {
+ return array_view->layout.buffer_data_type[i];
+ }
+ }
+}
+
+static inline int64_t ArrowArrayViewGetBufferElementSizeBits(
+ struct ArrowArrayView* array_view, int64_t i) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
+ return array_view->layout.element_size_bits[i];
+ } else if (i >=
+ (array_view->n_variadic_buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
+ return sizeof(int64_t) * 8;
+ } else {
+ return 0;
+ }
+ default:
+ // We need this check to avoid -Warray-bounds from complaining
+ if (i >= NANOARROW_MAX_FIXED_BUFFERS) {
+ return 0;
+ } else {
+ return array_view->layout.element_size_bits[i];
+ }
+ }
+}
+
static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView*
array_view,
int64_t i) {
const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
@@ -921,8 +1048,7 @@ static struct ArrowBufferView
ArrowArrayViewGetBytesFromViewArrayUnsafe(
return out;
}
- const int32_t buf_index = bv->ref.buffer_index +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
- out.data.data = array_view->array->buffers[buf_index];
+ out.data.data = array_view->variadic_buffers[bv->ref.buffer_index];
out.data.as_uint8 += bv->ref.offset;
return out;
}
diff --git a/src/nanoarrow/common/inline_types.h
b/src/nanoarrow/common/inline_types.h
index 6f2d1103..0a602618 100644
--- a/src/nanoarrow/common/inline_types.h
+++ b/src/nanoarrow/common/inline_types.h
@@ -623,7 +623,8 @@ enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_UNION_OFFSET,
NANOARROW_BUFFER_TYPE_DATA_OFFSET,
NANOARROW_BUFFER_TYPE_DATA,
- NANOARROW_BUFFER_TYPE_DATA_VIEW
+ NANOARROW_BUFFER_TYPE_VARIADIC_DATA,
+ NANOARROW_BUFFER_TYPE_VARIADIC_SIZE
};
/// \brief The maximum number of fixed buffers in an ArrowArrayView or
ArrowLayout
@@ -816,6 +817,9 @@ struct ArrowArrayView {
/// \brief Number of variadic buffers
int32_t n_variadic_buffers;
+ /// \brief Pointers to variadic buffers of binary/string_view arrays
+ const void** variadic_buffers;
+
/// \brief Size of each variadic buffer
int64_t* variadic_buffer_sizes;
};
diff --git a/src/nanoarrow/common/schema_test.cc
b/src/nanoarrow/common/schema_test.cc
index 3371632b..df0d1aba 100644
--- a/src/nanoarrow/common/schema_test.cc
+++ b/src/nanoarrow/common/schema_test.cc
@@ -918,7 +918,7 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
EXPECT_EQ(schema_view.type, NANOARROW_TYPE_STRING_VIEW);
EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_STRING_VIEW);
EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
- EXPECT_EQ(schema_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_DATA_VIEW);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
EXPECT_EQ(schema_view.layout.buffer_data_type[1],
NANOARROW_TYPE_STRING_VIEW);
EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
@@ -931,7 +931,7 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
EXPECT_EQ(schema_view.type, NANOARROW_TYPE_BINARY_VIEW);
EXPECT_EQ(schema_view.storage_type, NANOARROW_TYPE_BINARY_VIEW);
EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
- EXPECT_EQ(schema_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_DATA_VIEW);
+ EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
EXPECT_EQ(schema_view.layout.buffer_data_type[1],
NANOARROW_TYPE_BINARY_VIEW);
EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
diff --git a/src/nanoarrow/common/utils.c b/src/nanoarrow/common/utils.c
index 70d5da77..93e4d4b7 100644
--- a/src/nanoarrow/common/utils.c
+++ b/src/nanoarrow/common/utils.c
@@ -180,12 +180,12 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum
ArrowType storage_type) {
break;
case NANOARROW_TYPE_BINARY_VIEW:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_VIEW;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY_VIEW;
layout->element_size_bits[1] = 128;
break;
case NANOARROW_TYPE_STRING_VIEW:
- layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_VIEW;
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
layout->buffer_data_type[1] = NANOARROW_TYPE_STRING_VIEW;
layout->element_size_bits[1] = 128;
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index f65d053a..312ecbec 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -1053,6 +1053,48 @@ ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct
ArrowArrayView* array_view,
const struct ArrowArray* array,
struct ArrowError* error);
+/// \brief Get the number of buffers
+///
+/// The number of buffers referred to by this ArrowArrayView. In may cases
this can also
+/// be calculated from the ArrowLayout member of the ArrowArrayView or
ArrowSchemaView;
+/// however, for binary view and string view types, the number of total
buffers depends on
+/// the number of variadic buffers.
+static inline int64_t ArrowArrayViewGetNumBuffers(struct ArrowArrayView*
array_view);
+
+/// \brief Get a view of a specific buffer from an ArrowArrayView
+///
+/// This is the ArrowArrayView equivalent of ArrowArray::buffers[i] that
includes
+/// size information (if known).
+static inline struct ArrowBufferView ArrowArrayViewGetBufferView(
+ struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get the function of a specific buffer in an ArrowArrayView
+///
+/// In may cases this can also be obtained from the ArrowLayout member of the
+/// ArrowArrayView or ArrowSchemaView; however, for binary view and string
view types,
+/// the function of each buffer may be different between two arrays of the
same type
+/// depending on the number of variadic buffers.
+static inline enum ArrowBufferType ArrowArrayViewGetBufferType(
+ struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get the data type of a specific buffer in an ArrowArrayView
+///
+/// In may cases this can also be obtained from the ArrowLayout member of the
+/// ArrowArrayView or ArrowSchemaView; however, for binary view and string
view types,
+/// the data type of each buffer may be different between two arrays of the
same type
+/// depending on the number of variadic buffers.
+static inline enum ArrowType ArrowArrayViewGetBufferDataType(
+ struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get the element size (in bits) of a specific buffer in an
ArrowArrayView
+///
+/// In may cases this can also be obtained from the ArrowLayout member of the
+/// ArrowArrayView or ArrowSchemaView; however, for binary view and string
view types,
+/// the element width of each buffer may be different between two arrays of
the same type
+/// depending on the number of variadic buffers.
+static inline int64_t ArrowArrayViewGetBufferElementSizeBits(
+ struct ArrowArrayView* array_view, int64_t i);
+
/// \brief Performs checks on the content of an ArrowArrayView
///
/// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray,
diff --git a/src/nanoarrow/testing/testing.cc b/src/nanoarrow/testing/testing.cc
index ce453b9c..59f3617c 100644
--- a/src/nanoarrow/testing/testing.cc
+++ b/src/nanoarrow/testing/testing.cc
@@ -1932,7 +1932,8 @@ ArrowErrorCode SetArrayColumnBuffers(const json& value,
ArrowArrayView* array_vi
}
break;
}
- case NANOARROW_BUFFER_TYPE_DATA_VIEW:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
return ENOTSUP;
case NANOARROW_BUFFER_TYPE_NONE:
break;