This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new be3b2b1 feat: Implement `ArrowArrayViewValidateFull()` (#174)
be3b2b1 is described below
commit be3b2b13a7e2ad977f6a1cad9a157e617bf42dcc
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Mar 29 08:05:50 2023 -0400
feat: Implement `ArrowArrayViewValidateFull()` (#174)
This is required for IPC reading because corrupted offset and/or union
type ID buffers could result in consumers accessing out-of-bounds
elements. `ArrowArrayViewSetArray()` already checked the *last* element
of offset buffers against lengths but didn't check the first element and
didn't check for negative sequential offsets.
---------
Co-authored-by: Will Jones <[email protected]>
---
src/nanoarrow/array.c | 179 ++++++++++++++++++++++++++++++++++++++++
src/nanoarrow/array_inline.h | 14 +++-
src/nanoarrow/array_test.cc | 140 +++++++++++++++++++++++++++++++
src/nanoarrow/nanoarrow.h | 6 ++
src/nanoarrow/nanoarrow_types.h | 3 +-
5 files changed, 337 insertions(+), 5 deletions(-)
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 976b39e..81675bb 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -622,6 +622,21 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
struct ArrowArray* array,
struct ArrowError* error) {
array_view->array = array;
+
+ // Check length and offset
+ if (array->offset < 0) {
+ ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of
%ld",
+ (long)array->offset);
+ return EINVAL;
+ }
+
+ if (array->length < 0) {
+ ArrowErrorSet(error, "Expected array length >= 0 but found array length of
%ld",
+ (long)array->length);
+ return EINVAL;
+ }
+
+ // First pass setting lengths that do not depend on the data buffer
ArrowArrayViewSetLength(array_view, array->offset + array->length);
int64_t buffers_required = 0;
@@ -648,15 +663,25 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
if (array_view->n_children != array->n_children) {
+ ArrowErrorSet(error, "Expected %ld children but found %ld children",
+ (long)array_view->n_children, (long)array->n_children);
return EINVAL;
}
// Check child sizes and calculate sizes that depend on data in the array
buffers
+ int64_t first_offset;
int64_t last_offset;
switch (array_view->storage_type) {
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
+ first_offset = array_view->buffer_views[1].data.as_int32[0];
+ if (first_offset < 0) {
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+ (long)first_offset);
+ return EINVAL;
+ }
+
last_offset =
array_view->buffer_views[1].data.as_int32[array->offset +
array->length];
array_view->buffer_views[2].size_bytes = last_offset;
@@ -665,6 +690,13 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_LARGE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
+ first_offset = array_view->buffer_views[1].data.as_int64[0];
+ if (first_offset < 0) {
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+ (long)first_offset);
+ return EINVAL;
+ }
+
last_offset =
array_view->buffer_views[1].data.as_int64[array->offset +
array->length];
array_view->buffer_views[2].size_bytes = last_offset;
@@ -694,6 +726,13 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
if (array_view->buffer_views[1].size_bytes != 0) {
+ first_offset = array_view->buffer_views[1].data.as_int32[0];
+ if (first_offset < 0) {
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+ (long)first_offset);
+ return EINVAL;
+ }
+
last_offset =
array_view->buffer_views[1].data.as_int32[array->offset +
array->length];
if (array->children[0]->length < last_offset) {
@@ -716,6 +755,13 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
if (array_view->buffer_views[1].size_bytes != 0) {
+ first_offset = array_view->buffer_views[1].data.as_int64[0];
+ if (first_offset < 0) {
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+ (long)first_offset);
+ return EINVAL;
+ }
+
last_offset =
array_view->buffer_views[1].data.as_int64[array->offset +
array->length];
if (array->children[0]->length < last_offset) {
@@ -758,3 +804,136 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
return NANOARROW_OK;
}
+
+static int ArrowAssertIncreasingInt32(struct ArrowBufferView view,
+ struct ArrowError* error) {
+ if (view.size_bytes <= (int64_t)sizeof(int32_t)) {
+ return NANOARROW_OK;
+ }
+
+ for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int32_t); i++) {
+ int32_t diff = view.data.as_int32[i] - view.data.as_int32[i - 1];
+ if (diff < 0) {
+ ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element
size %ld",
+ (long)i, (long)diff);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowAssertIncreasingInt64(struct ArrowBufferView view,
+ struct ArrowError* error) {
+ if (view.size_bytes <= (int64_t)sizeof(int64_t)) {
+ return NANOARROW_OK;
+ }
+
+ for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int64_t); i++) {
+ int64_t diff = view.data.as_int64[i] - view.data.as_int64[i - 1];
+ if (diff < 0) {
+ ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element
size %ld",
+ (long)i, (long)diff);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowAssertRangeInt8(struct ArrowBufferView view, int8_t min_value,
+ int8_t max_value, struct ArrowError* error) {
+ for (int64_t i = 0; i < view.size_bytes; i++) {
+ if (view.data.as_int8[i] < min_value || view.data.as_int8[i] > max_value) {
+ ArrowErrorSet(error,
+ "[%ld] Expected buffer value between %d and %d but found
value %d",
+ (long)i, (int)min_value, (int)max_value,
(int)view.data.as_int8[i]);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowAssertInt8In(struct ArrowBufferView view, const int8_t* values,
+ int64_t n_values, struct ArrowError* error) {
+ for (int64_t i = 0; i < view.size_bytes; i++) {
+ int item_found = 0;
+ for (int64_t j = 0; j < n_values; j++) {
+ if (view.data.as_int8[i] == values[j]) {
+ item_found = 1;
+ break;
+ }
+ }
+
+ if (!item_found) {
+ ArrowErrorSet(error, "[%ld] Unexpected buffer value %d", (long)i,
+ (int)view.data.as_int8[i]);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ for (int i = 0; i < 3; i++) {
+ switch (array_view->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ NANOARROW_RETURN_NOT_OK(
+ ArrowAssertIncreasingInt32(array_view->buffer_views[i], error));
+ break;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ if (array_view->layout.element_size_bits[i] == 32) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowAssertIncreasingInt32(array_view->buffer_views[i], error));
+ } else {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowAssertIncreasingInt64(array_view->buffer_views[i], error));
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION ||
+ array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION) {
+ // Check that we have valid type ids
+ if (array_view->union_type_id_map == NULL ||
+
_ArrowParsedUnionTypeIdsWillEqualChildIndices(array_view->union_type_id_map,
+ array_view->n_children,
+ array_view->n_children))
{
+
NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(array_view->buffer_views[0], 0,
+ array_view->n_children - 1,
error));
+ } else {
+ NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(array_view->buffer_views[0],
+ array_view->union_type_id_map
+ 128,
+ array_view->n_children,
error));
+ }
+ }
+
+ if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) {
+ // Check that offsets refer to child elements that actually exist
+ for (int64_t i = 0; i < array_view->array->length; i++) {
+ int8_t child_id = ArrowArrayViewUnionChildIndex(array_view, i);
+ int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i);
+ int64_t child_length = array_view->array->children[child_id]->length;
+ if (offset < 0 || offset > child_length) {
+ ArrowErrorSet(
+ error,
+ "[%ld] Expected union offset for child id %d to be between 0 and
%ld but "
+ "found offset value %ld",
+ (long)i, (int)child_id, (long)child_length, offset);
+ return EINVAL;
+ }
+ }
+ }
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i],
error));
+ }
+
+ return NANOARROW_OK;
+}
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index c6f569a..bcb2fe7 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -94,10 +94,9 @@ static inline int8_t _ArrowParseUnionTypeIds(const char*
type_ids, int8_t* out)
return -1;
}
-static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char*
type_id_str,
- int64_t
n_children) {
- int8_t type_ids[128];
- int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids);
+static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const
int8_t* type_ids,
+ int64_t
n_type_ids,
+ int64_t
n_children) {
if (n_type_ids != n_children) {
return 0;
}
@@ -111,6 +110,13 @@ static inline int8_t
_ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id
return 1;
}
+static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char*
type_id_str,
+ int64_t
n_children) {
+ int8_t type_ids[128];
+ int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids);
+ return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids,
n_children);
+}
+
static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray*
array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index 2a3fd2e..ce07da2 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -1267,6 +1267,7 @@ TEST(ArrayTest, ArrayViewTestBasic) {
ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, 3 * sizeof(int32_t));
EXPECT_EQ(array_view.buffer_views[1].data.as_int32[0], 11);
@@ -1279,9 +1280,21 @@ TEST(ArrayTest, ArrayViewTestBasic) {
ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, 3 * sizeof(int32_t));
+ // Expect error for bad offset + length
+ array.length = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
+ EXPECT_STREQ(error.message, "Expected array length >= 0 but found array
length of -1");
+ array.length = 3;
+
+ array.offset = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
+ EXPECT_STREQ(error.message, "Expected array offset >= 0 but found array
offset of -1");
+ array.offset = 0;
+
// Expect error for the wrong number of buffers
ArrowArrayViewReset(&array_view);
ArrowArrayViewInitFromType(&array_view, NANOARROW_TYPE_STRING);
@@ -1338,6 +1351,7 @@ TEST(ArrayTest, ArrayViewTestString) {
ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_STRING),
NANOARROW_OK);
array.null_count = 0;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
@@ -1351,10 +1365,25 @@ TEST(ArrayTest, ArrayViewTestString) {
ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, (1 + 1) * sizeof(int32_t));
EXPECT_EQ(array_view.buffer_views[2].size_bytes, 4);
+ // Expect error for offsets that will cause bad access
+ int32_t* offsets =
+ const_cast<int32_t*>(reinterpret_cast<const int32_t*>(array.buffers[1]));
+
+ offsets[0] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
+ EXPECT_STREQ(error.message, "Expected first offset >= 0 but found -1");
+ offsets[0] = 0;
+
+ offsets[1] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found
element size -1");
+
array.release(&array);
ArrowArrayViewReset(&array_view);
}
@@ -1390,6 +1419,7 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_STRING),
NANOARROW_OK);
array.null_count = 0;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
@@ -1403,10 +1433,25 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, (1 + 1) * sizeof(int64_t));
EXPECT_EQ(array_view.buffer_views[2].size_bytes, 4);
+ // Expect error for offsets that will cause bad access
+ int64_t* offsets =
+ const_cast<int64_t*>(reinterpret_cast<const int64_t*>(array.buffers[1]));
+
+ offsets[0] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
+ EXPECT_STREQ(error.message, "Expected first offset >= 0 but found -1");
+ offsets[0] = 0;
+
+ offsets[1] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found
element size -1");
+
array.release(&array);
ArrowArrayViewReset(&array_view);
}
@@ -1462,6 +1507,36 @@ TEST(ArrayTest, ArrayViewTestList) {
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, (5 + 1) * sizeof(int32_t));
+ // Build a valid array
+ struct ArrowArray array;
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_LIST), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAllocateChildren(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromType(array.children[0], NANOARROW_TYPE_INT32),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 1234), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+
+ // Expect error for offsets that will cause bad access
+ struct ArrowError error;
+ int32_t* offsets =
+ const_cast<int32_t*>(reinterpret_cast<const int32_t*>(array.buffers[1]));
+
+ offsets[0] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
+ EXPECT_STREQ(error.message, "Expected first offset >= 0 but found -1");
+ offsets[0] = 0;
+
+ offsets[1] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found
element size -1");
+
+ array.release(&array);
ArrowArrayViewReset(&array_view);
}
@@ -1485,6 +1560,36 @@ TEST(ArrayTest, ArrayViewTestLargeList) {
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, (5 + 1) * sizeof(int64_t));
+ // Build a valid array
+ struct ArrowArray array;
+ ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_LARGE_LIST),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAllocateChildren(&array, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromType(array.children[0], NANOARROW_TYPE_INT32),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 1234), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+
+ // Expect error for offsets that will cause bad access
+ struct ArrowError error;
+ int64_t* offsets =
+ const_cast<int64_t*>(reinterpret_cast<const int64_t*>(array.buffers[1]));
+
+ offsets[0] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
+ EXPECT_STREQ(error.message, "Expected first offset >= 0 but found -1");
+ offsets[0] = 0;
+
+ offsets[1] = -1;
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found
element size -1");
+
+ array.release(&array);
ArrowArrayViewReset(&array_view);
}
@@ -1544,6 +1649,7 @@ TEST(ArrayTest, ArrayViewTestStructArray) {
ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(array_view.children[0]->buffer_views[1].size_bytes,
sizeof(int32_t));
EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
@@ -1586,6 +1692,7 @@ TEST(ArrayTest, ArrayViewTestFixedSizeListArray) {
ASSERT_EQ(ArrowArrayFinishBuilding(&array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(array_view.children[0]->buffer_views[1].size_bytes, 3 *
sizeof(int32_t));
EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
@@ -1622,6 +1729,7 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
ArrowArrayViewInitFromType(array_view.children[0], NANOARROW_TYPE_INT32);
ArrowArrayViewInitFromType(array_view.children[1], NANOARROW_TYPE_STRING);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 1), 1);
@@ -1633,12 +1741,32 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
// The test schema explicitly sets the type_ids 0,1 and this should work too
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 1), 1);
EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 1), 1);
+ // Check that bad type ids/offset are caught by validate full
+ struct ArrowError error;
+ int8_t* type_ids =
+ const_cast<int8_t*>(reinterpret_cast<const int8_t*>(array.buffers[0]));
+ int32_t* offsets =
+ const_cast<int32_t*>(reinterpret_cast<const int32_t*>(array.buffers[1]));
+ type_ids[0] = -1;
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_STREQ(error.message,
+ "[0] Expected buffer value between 0 and 1 but found value -1");
+ type_ids[0] = 0;
+
+ offsets[0] = -1;
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_STREQ(error.message,
+ "[0] Expected union offset for child id 0 to be between 0 and 1
but found "
+ "offset value -1");
+ offsets[0] = 0;
+
ArrowArrayViewReset(&array_view);
// Reversing the type ids should result in the same type ids but
@@ -1646,12 +1774,19 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
ASSERT_EQ(ArrowSchemaSetFormat(&schema, "+ud:1,0"), NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 1), 1);
EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 0), 1);
EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 1), 0);
+ // Check that bad type ids are caught by validate full
+ type_ids[0] = -1;
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_STREQ(error.message, "[0] Unexpected buffer value -1");
+ type_ids[0] = 0;
+
ArrowArrayViewReset(&array_view);
// Check the raw mapping in the array view for numbers that are easier to
check
@@ -1692,6 +1827,7 @@ TEST(ArrayTest, ArrayViewTestDenseUnionGet) {
// Initialize the array view
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
// Check the values that will be used to index into children
EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 0), 0);
@@ -1736,6 +1872,7 @@ TEST(ArrayTest, ArrayViewTestSparseUnionGet) {
// Initialize the array view
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
// Check the values that will be used to index into children
EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 0), 0);
@@ -1776,6 +1913,7 @@ void TestGetFromNumericArrayView() {
ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
@@ -1806,6 +1944,7 @@ void TestGetFromNumericArrayView() {
ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
// We're trying to test behavior with no validity buffer, so make sure
that's true
ASSERT_EQ(array_view.buffer_views[0].data.data, nullptr);
@@ -1852,6 +1991,7 @@ void TestGetFromBinary(BuilderClass& builder) {
ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 1d2a721..558d3d1 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -112,6 +112,8 @@
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
#define ArrowArrayViewSetArray \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray)
+#define ArrowArrayViewValidateFull \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidateFull)
#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE,
ArrowArrayViewReset)
#define ArrowBasicArrayStreamInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit)
@@ -893,6 +895,10 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length);
ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
struct ArrowArray* array, struct
ArrowError* error);
+/// \brief Performs extra checks on the array that was set via
ArrowArrayViewSetArray()
+ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+ struct ArrowError* error);
+
/// \brief Reset the contents of an ArrowArrayView and frees resources
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h
index 752c6d6..b9af6bb 100644
--- a/src/nanoarrow/nanoarrow_types.h
+++ b/src/nanoarrow/nanoarrow_types.h
@@ -503,7 +503,8 @@ struct ArrowArrayView {
///
/// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer
/// such that child_index == union_type_id_map[type_id] and
- /// type_id == union_type_id_map[128 + child_index]
+ /// type_id == union_type_id_map[128 + child_index]. This value may be
+ /// NULL in the case where child_id == type_id.
int8_t* union_type_id_map;
};