This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new d9f428d refactor: Unify `ArrowArrayView` and `ArrowArray` validation
(#201)
d9f428d is described below
commit d9f428dec04f1a610b1c76b58f454be560a004df
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed May 24 21:35:03 2023 -0400
refactor: Unify `ArrowArrayView` and `ArrowArray` validation (#201)
The initial motivation for this refactor was to make it possible to go
straight from Arrow IPC to ArrowArrayView with zero heap allocations and
improve IPC validation; however, the changes needed for that (basically,
give every ArrowArrayView its own offset, length, and null_count)
allowed unifying the validation that we do when building arrays and the
validation we do when wrapping a foregin ArrowArray in an
ArrowArrayView. This PR ensures that no internal ArrowArrayView
functions actually refer to the `array` member since there is no
guarantee that an ArrowArray backs the buffers. Along the way, error
messages and documentation were improved.
---
r/R/as-array.R | 8 +-
src/nanoarrow/array.c | 392 +++++++++++++++++++++++++---------------
src/nanoarrow/array_inline.h | 14 +-
src/nanoarrow/array_test.cc | 118 +++++++-----
src/nanoarrow/nanoarrow.h | 20 +-
src/nanoarrow/nanoarrow_types.h | 47 +++--
6 files changed, 380 insertions(+), 219 deletions(-)
diff --git a/r/R/as-array.R b/r/R/as-array.R
index a9781b1..68ed939 100644
--- a/r/R/as-array.R
+++ b/r/R/as-array.R
@@ -267,8 +267,12 @@ union_array_from_data_frame <- function(x, schema) {
"sparse_union" = {
struct_schema <- na_struct(schema$children)
array <- as_nanoarrow_array(x, array = struct_schema)
- nanoarrow_array_set_schema(array, schema, validate = FALSE)
- array$buffers[[1]] <- as.raw(child_index)
+ array <- nanoarrow_array_modify(
+ array,
+ list(buffers = list(as.raw(child_index))),
+ validate = FALSE
+ )
+ nanoarrow_array_set_schema(array, schema, validate = TRUE)
array
},
stop("Attempt to create union from non-union array type")
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index ec321e6..c669114 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -296,6 +296,16 @@ static ArrowErrorCode ArrowArrayViewInitFromArray(struct
ArrowArrayView* array_v
ArrowArrayViewInitFromType(array_view, private_data->storage_type);
array_view->layout = private_data->layout;
array_view->array = array;
+ array_view->length = array->length;
+ array_view->offset = array->offset;
+ array_view->null_count = array->null_count;
+
+ array_view->buffer_views[0].data.as_uint8 = private_data->bitmap.buffer.data;
+ array_view->buffer_views[0].size_bytes =
private_data->bitmap.buffer.size_bytes;
+ array_view->buffer_views[1].data.as_uint8 = private_data->buffers[0].data;
+ array_view->buffer_views[1].size_bytes = private_data->buffers[0].size_bytes;
+ array_view->buffer_views[2].data.as_uint8 = private_data->buffers[1].data;
+ array_view->buffer_views[2].size_bytes = private_data->buffers[1].size_bytes;
int result = ArrowArrayViewAllocateChildren(array_view, array->n_children);
if (result != NANOARROW_OK) {
@@ -399,39 +409,6 @@ static void ArrowArrayFlushInternalPointers(struct
ArrowArray* array) {
}
}
-static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
- struct ArrowArray* array, struct ArrowArrayView* array_view, char
set_length,
- struct ArrowError* error) {
- if (set_length) {
- ArrowArrayViewSetLength(array_view, array->offset + array->length);
- }
-
- for (int64_t i = 0; i < array->n_buffers; i++) {
- if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
- array->null_count == 0 && array->buffers[i] == NULL) {
- continue;
- }
-
- int64_t expected_size = array_view->buffer_views[i].size_bytes;
- int64_t actual_size = ArrowArrayBuffer(array, i)->size_bytes;
-
- if (actual_size < expected_size) {
- ArrowErrorSet(
- error,
- "Expected buffer %d to size >= %ld bytes but found buffer with %ld
bytes",
- (int)i, (long)expected_size, (long)actual_size);
- return EINVAL;
- }
- }
-
- for (int64_t i = 0; i < array->n_children; i++) {
- NANOARROW_RETURN_NOT_OK(ArrowArrayCheckInternalBufferSizes(
- array->children[i], array_view->children[i], set_length, error));
- }
-
- return NANOARROW_OK;
-}
-
ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
enum ArrowValidationLevel
validation_level,
struct ArrowError* error) {
@@ -450,45 +427,11 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct
ArrowArray* array,
return NANOARROW_OK;
}
- // Check buffer sizes to make sure we are not sending an ArrowArray
- // into the wild that is going to segfault
+ // For validation, initialize an ArrowArrayView with our known buffer sizes
struct ArrowArrayView array_view;
-
NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayViewInitFromArray(&array_view,
array),
error);
-
- // Check buffer sizes once without using internal buffer data since
- // ArrowArrayViewSetArray() assumes that all the buffers are long enough
- // and issues invalid reads on offset buffers if they are not
- int result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 1,
error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(&array_view);
- return result;
- }
-
- if (validation_level == NANOARROW_VALIDATION_LEVEL_MINIMAL) {
- ArrowArrayViewReset(&array_view);
- return NANOARROW_OK;
- }
-
- result = ArrowArrayViewSetArray(&array_view, array, error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(&array_view);
- return result;
- }
-
- result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 0, error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(&array_view);
- return result;
- }
-
- if (validation_level == NANOARROW_VALIDATION_LEVEL_DEFAULT) {
- ArrowArrayViewReset(&array_view);
- return NANOARROW_OK;
- }
-
- result = ArrowArrayViewValidateFull(&array_view, error);
+ int result = ArrowArrayViewValidate(&array_view, validation_level, error);
ArrowArrayViewReset(&array_view);
return result;
}
@@ -604,7 +547,6 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view)
{
void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t
length) {
for (int i = 0; i < 3; i++) {
int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
- array_view->buffer_views[i].data.data = NULL;
switch (array_view->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_VALIDITY:
@@ -648,11 +590,11 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length)
}
}
-ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
- struct ArrowArray* array,
- struct ArrowError* error) {
- array_view->array = array;
-
+// This version recursively extracts information from the array and stores it
+// in the array view, performing any checks that require the original array.
+static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
+ struct ArrowArray* array,
+ struct ArrowError* error) {
// Check length and offset
if (array->offset < 0) {
ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of
%ld",
@@ -666,8 +608,10 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
return EINVAL;
}
- // First pass setting lengths that do not depend on the data buffer
- ArrowArrayViewSetLength(array_view, array->offset + array->length);
+ array_view->array = array;
+ array_view->offset = array->offset;
+ array_view->length = array->length;
+ array_view->null_count = array->null_count;
int64_t buffers_required = 0;
for (int i = 0; i < 3; i++) {
@@ -677,28 +621,165 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
buffers_required++;
- // If the null_count is 0, the validity buffer can be NULL
- if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
- array->null_count == 0 && array->buffers[i] == NULL) {
+ // Set buffer pointer
+ array_view->buffer_views[i].data.data = array->buffers[i];
+
+ // If non-null, set buffer size to unknown.
+ if (array->buffers[i] == NULL) {
array_view->buffer_views[i].size_bytes = 0;
+ } else {
+ array_view->buffer_views[i].size_bytes = -1;
}
-
- array_view->buffer_views[i].data.data = array->buffers[i];
}
+ // Check the number of buffers
if (buffers_required != array->n_buffers) {
ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d
buffer(s)",
(int)buffers_required, (int)array->n_buffers);
return EINVAL;
}
+ // Check number of children
if (array_view->n_children != array->n_children) {
ArrowErrorSet(error, "Expected %ld children but found %ld children",
(long)array_view->n_children, (long)array->n_children);
return EINVAL;
}
- // Check child sizes and calculate sizes that depend on data in the array
buffers
+ // Recurse for children
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+
NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view->children[i],
+ array->children[i],
error));
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ // Calculate buffer sizes that do not require buffer access. If marked as
+ // unknown, assign the buffer size; otherwise, validate it.
+ int64_t offset_plus_length = array_view->offset + array_view->length;
+
+ // Only loop over the first two buffers because the size of the third buffer
+ // is always data dependent for all current Arrow types.
+ for (int i = 0; i < 2; i++) {
+ int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
+ int64_t min_buffer_size_bytes;
+
+ switch (array_view->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_VALIDITY:
+ if (array_view->null_count == 0 &&
array_view->buffer_views[i].size_bytes == 0) {
+ continue;
+ }
+
+ min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length);
+ break;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ // Probably don't want/need to rely on the producer to have allocated
an
+ // offsets buffer of length 1 for a zero-size array
+ min_buffer_size_bytes =
+ (offset_plus_length != 0) * element_size_bytes *
(offset_plus_length + 1);
+ break;
+ case NANOARROW_BUFFER_TYPE_DATA:
+ min_buffer_size_bytes =
+ _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i]
*
+ offset_plus_length) /
+ 8;
+ break;
+ case NANOARROW_BUFFER_TYPE_TYPE_ID:
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ min_buffer_size_bytes = element_size_bytes * offset_plus_length;
+ break;
+ case NANOARROW_BUFFER_TYPE_NONE:
+ continue;
+ }
+
+ // Assign or validate buffer size
+ if (array_view->buffer_views[i].size_bytes == -1) {
+ array_view->buffer_views[i].size_bytes = min_buffer_size_bytes;
+ } else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes)
{
+ ArrowErrorSet(error,
+ "Expected %s array buffer %d to have size >= %ld bytes but
found "
+ "buffer with %ld bytes",
+ ArrowTypeString(array_view->storage_type), (int)i,
+ (long)min_buffer_size_bytes,
+ (long)array_view->buffer_views[i].size_bytes);
+ return EINVAL;
+ }
+ }
+
+ // For list, fixed-size list and map views, we can validate the number of
children
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ case NANOARROW_TYPE_MAP:
+ if (array_view->n_children != 1) {
+ ArrowErrorSet(error, "Expected 1 child of %s array but found %ld child
arrays",
+ ArrowTypeString(array_view->storage_type),
+ (long)array_view->n_children);
+ return EINVAL;
+ }
+ default:
+ break;
+ }
+
+ // For struct, the sparse union, and the fixed-size list views, we can
validate child
+ // lengths.
+ int64_t child_min_length;
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_SPARSE_UNION:
+ case NANOARROW_TYPE_STRUCT:
+ child_min_length = (array_view->offset + array_view->length);
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ if (array_view->children[i]->length < child_min_length) {
+ ArrowErrorSet(
+ error,
+ "Expected struct child %d to have length >= %ld but found child
with "
+ "length %ld",
+ (int)(i + 1), (long)(child_min_length),
+ (long)array_view->children[i]->length);
+ return EINVAL;
+ }
+ }
+ break;
+
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ child_min_length = (array_view->offset + array_view->length) *
+ array_view->layout.child_size_elements;
+ if (array_view->children[0]->length < child_min_length) {
+ ArrowErrorSet(error,
+ "Expected child of fixed_size_list array to have length
>= %ld but "
+ "found array with length %ld",
+ (long)child_min_length,
(long)array_view->children[0]->length);
+ return EINVAL;
+ }
+ break;
+ default:
+ break;
+ }
+
+ // Recurse for children
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewValidateMinimal(array_view->children[i], error));
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ // Perform minimal validation. This will validate or assign
+ // buffer sizes as long as buffer access is not required.
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error));
+
+ // Calculate buffer sizes or child lengths that require accessing the offsets
+ // buffer. Where appropriate, validate that the first offset is >= 0.
+ // If a buffer size is marked as unknown, assign it; otherwise, validate it.
+ int64_t offset_plus_length = array_view->offset + array_view->length;
+
int64_t first_offset;
int64_t last_offset;
switch (array_view->storage_type) {
@@ -712,11 +793,22 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
return EINVAL;
}
- last_offset =
- array_view->buffer_views[1].data.as_int32[array->offset +
array->length];
- array_view->buffer_views[2].size_bytes = last_offset;
+ last_offset =
array_view->buffer_views[1].data.as_int32[offset_plus_length];
+
+ // If the data buffer size is unknown, assign it; otherwise, check it
+ if (array_view->buffer_views[2].size_bytes == -1) {
+ array_view->buffer_views[2].size_bytes = last_offset;
+ } else if (array_view->buffer_views[2].size_bytes < last_offset) {
+ ArrowErrorSet(error,
+ "Expected %s array buffer 2 to have size >= %ld bytes
but found "
+ "buffer with %ld bytes",
+ ArrowTypeString(array_view->storage_type),
(long)last_offset,
+ (long)array_view->buffer_views[2].size_bytes);
+ return EINVAL;
+ }
}
break;
+
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_LARGE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
@@ -727,34 +819,38 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
return EINVAL;
}
- last_offset =
- array_view->buffer_views[1].data.as_int64[array->offset +
array->length];
- array_view->buffer_views[2].size_bytes = last_offset;
+ last_offset =
array_view->buffer_views[1].data.as_int64[offset_plus_length];
+
+ // If the data buffer size is unknown, assign it; otherwise, check it
+ if (array_view->buffer_views[2].size_bytes == -1) {
+ array_view->buffer_views[2].size_bytes = last_offset;
+ } else if (array_view->buffer_views[2].size_bytes < last_offset) {
+ ArrowErrorSet(error,
+ "Expected %s array buffer 2 to have size >= %ld bytes
but found "
+ "buffer with %ld bytes",
+ ArrowTypeString(array_view->storage_type),
(long)last_offset,
+ (long)array_view->buffer_views[2].size_bytes);
+ return EINVAL;
+ }
}
break;
+
case NANOARROW_TYPE_STRUCT:
for (int64_t i = 0; i < array_view->n_children; i++) {
- if (array->children[i]->length < (array->offset + array->length)) {
+ if (array_view->children[i]->length < offset_plus_length) {
ArrowErrorSet(
error,
"Expected struct child %d to have length >= %ld but found child
with "
"length %ld",
- (int)(i + 1), (long)(array->offset + array->length),
- (long)array->children[i]->length);
+ (int)(i + 1), (long)offset_plus_length,
+ (long)array_view->children[i]->length);
return EINVAL;
}
}
break;
- case NANOARROW_TYPE_LIST:
- case NANOARROW_TYPE_MAP: {
- const char* type_name =
- array_view->storage_type == NANOARROW_TYPE_LIST ? "list" : "map";
- if (array->n_children != 1) {
- ArrowErrorSet(error, "Expected 1 child of %s array but found %d child
arrays",
- type_name, (int)array->n_children);
- return EINVAL;
- }
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_MAP:
if (array_view->buffer_views[1].size_bytes != 0) {
first_offset = array_view->buffer_views[1].data.as_int32[0];
if (first_offset < 0) {
@@ -763,27 +859,20 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
return EINVAL;
}
- last_offset =
- array_view->buffer_views[1].data.as_int32[array->offset +
array->length];
- if (array->children[0]->length < last_offset) {
+ last_offset =
array_view->buffer_views[1].data.as_int32[offset_plus_length];
+ if (array_view->children[0]->length < last_offset) {
ArrowErrorSet(
error,
- "Expected child of %s array with length >= %ld but found array
with "
+ "Expected child of %s array to have length >= %ld but found
array with "
"length %ld",
- type_name, (long)last_offset, (long)array->children[0]->length);
+ ArrowTypeString(array_view->storage_type), (long)last_offset,
+ (long)array_view->children[0]->length);
return EINVAL;
}
}
break;
- }
- case NANOARROW_TYPE_LARGE_LIST:
- if (array->n_children != 1) {
- ArrowErrorSet(error,
- "Expected 1 child of large list array but found %d child
arrays",
- (int)array->n_children);
- return EINVAL;
- }
+ case NANOARROW_TYPE_LARGE_LIST:
if (array_view->buffer_views[1].size_bytes != 0) {
first_offset = array_view->buffer_views[1].data.as_int64[0];
if (first_offset < 0) {
@@ -792,49 +881,43 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
return EINVAL;
}
- last_offset =
- array_view->buffer_views[1].data.as_int64[array->offset +
array->length];
- if (array->children[0]->length < last_offset) {
+ last_offset =
array_view->buffer_views[1].data.as_int64[offset_plus_length];
+ if (array_view->children[0]->length < last_offset) {
ArrowErrorSet(
error,
- "Expected child of large list array with length >= %ld but found
array "
+ "Expected child of large list array to have length >= %ld but
found array "
"with length %ld",
- (long)last_offset, (long)array->children[0]->length);
+ (long)last_offset, (long)array_view->children[0]->length);
return EINVAL;
}
}
break;
- case NANOARROW_TYPE_FIXED_SIZE_LIST:
- if (array->n_children != 1) {
- ArrowErrorSet(error,
- "Expected 1 child of fixed-size array but found %d child
arrays",
- (int)array->n_children);
- return EINVAL;
- }
-
- last_offset =
- (array->offset + array->length) *
array_view->layout.child_size_elements;
- if (array->children[0]->length < last_offset) {
- ArrowErrorSet(
- error,
- "Expected child of fixed-size list array with length >= %ld but
found array "
- "with length %ld",
- (long)last_offset, (long)array->children[0]->length);
- return EINVAL;
- }
- break;
default:
break;
}
+ // Recurse for children
for (int64_t i = 0; i < array_view->n_children; i++) {
NANOARROW_RETURN_NOT_OK(
- ArrowArrayViewSetArray(array_view->children[i], array->children[i],
error));
+ ArrowArrayViewValidateDefault(array_view->children[i], error));
}
return NANOARROW_OK;
}
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+ struct ArrowArray* array,
+ struct ArrowError* error) {
+ // Extract information from the array into the array view
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array,
error));
+
+ // Run default validation. Because we've marked all non-NULL buffers as
having unknown
+ // size, validation will also update the buffer sizes as it goes.
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error));
+
+ return NANOARROW_OK;
+}
+
static int ArrowAssertIncreasingInt32(struct ArrowBufferView view,
struct ArrowError* error) {
if (view.size_bytes <= (int64_t)sizeof(int32_t)) {
@@ -906,8 +989,8 @@ static int ArrowAssertInt8In(struct ArrowBufferView view,
const int8_t* values,
return NANOARROW_OK;
}
-ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
- struct ArrowError* error) {
+static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
for (int i = 0; i < 3; i++) {
switch (array_view->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
@@ -935,8 +1018,8 @@ ArrowErrorCode ArrowArrayViewValidateFull(struct
ArrowArrayView* array_view,
} else if (_ArrowParsedUnionTypeIdsWillEqualChildIndices(
array_view->union_type_id_map, array_view->n_children,
array_view->n_children)) {
-
NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(array_view->buffer_views[0], 0,
-
(int8_t)(array_view->n_children - 1), error));
+ NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(
+ array_view->buffer_views[0], 0, (int8_t)(array_view->n_children -
1), error));
} else {
NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(array_view->buffer_views[0],
array_view->union_type_id_map
+ 128,
@@ -947,10 +1030,10 @@ ArrowErrorCode ArrowArrayViewValidateFull(struct
ArrowArrayView* array_view,
if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION &&
array_view->union_type_id_map != NULL) {
// Check that offsets refer to child elements that actually exist
- for (int64_t i = 0; i < array_view->array->length; i++) {
+ for (int64_t i = 0; i < array_view->length; i++) {
int8_t child_id = ArrowArrayViewUnionChildIndex(array_view, i);
int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i);
- int64_t child_length = array_view->array->children[child_id]->length;
+ int64_t child_length = array_view->children[child_id]->length;
if (offset < 0 || offset > child_length) {
ArrowErrorSet(
error,
@@ -968,3 +1051,22 @@ ArrowErrorCode ArrowArrayViewValidateFull(struct
ArrowArrayView* array_view,
return NANOARROW_OK;
}
+
+ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view,
+ enum ArrowValidationLevel
validation_level,
+ struct ArrowError* error) {
+ switch (validation_level) {
+ case NANOARROW_VALIDATION_LEVEL_NONE:
+ return NANOARROW_OK;
+ case NANOARROW_VALIDATION_LEVEL_MINIMAL:
+ return ArrowArrayViewValidateMinimal(array_view, error);
+ case NANOARROW_VALIDATION_LEVEL_DEFAULT:
+ return ArrowArrayViewValidateDefault(array_view, error);
+ case NANOARROW_VALIDATION_LEVEL_FULL:
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view,
error));
+ return ArrowArrayViewValidateFull(array_view, error);
+ }
+
+ ArrowErrorSet(error, "validation_level not recognized");
+ return EINVAL;
+}
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 98b4f3e..dda4634 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -654,7 +654,7 @@ static inline void ArrowArrayViewMove(struct
ArrowArrayView* src,
static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view,
int64_t i) {
const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
- i += array_view->array->offset;
+ i += array_view->offset;
switch (array_view->storage_type) {
case NANOARROW_TYPE_NA:
return 0x01;
@@ -715,7 +715,7 @@ static inline int64_t ArrowArrayViewListChildOffset(struct
ArrowArrayView* array
static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView*
array_view,
int64_t i) {
struct ArrowBufferView* data_view = &array_view->buffer_views[1];
- i += array_view->array->offset;
+ i += array_view->offset;
switch (array_view->storage_type) {
case NANOARROW_TYPE_INT64:
return data_view->data.as_int64[i];
@@ -746,7 +746,7 @@ static inline int64_t ArrowArrayViewGetIntUnsafe(struct
ArrowArrayView* array_vi
static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView*
array_view,
int64_t i) {
- i += array_view->array->offset;
+ i += array_view->offset;
struct ArrowBufferView* data_view = &array_view->buffer_views[1];
switch (array_view->storage_type) {
case NANOARROW_TYPE_INT64:
@@ -778,7 +778,7 @@ static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct
ArrowArrayView* array_
static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView*
array_view,
int64_t i) {
- i += array_view->array->offset;
+ i += array_view->offset;
struct ArrowBufferView* data_view = &array_view->buffer_views[1];
switch (array_view->storage_type) {
case NANOARROW_TYPE_INT64:
@@ -810,7 +810,7 @@ static inline double ArrowArrayViewGetDoubleUnsafe(struct
ArrowArrayView* array_
static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
struct ArrowArrayView* array_view, int64_t i) {
- i += array_view->array->offset;
+ i += array_view->offset;
struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
const char* data_view = array_view->buffer_views[2].data.as_char;
@@ -843,7 +843,7 @@ static inline struct ArrowStringView
ArrowArrayViewGetStringUnsafe(
static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
struct ArrowArrayView* array_view, int64_t i) {
- i += array_view->array->offset;
+ i += array_view->offset;
struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8;
@@ -877,7 +877,7 @@ static inline struct ArrowBufferView
ArrowArrayViewGetBytesUnsafe(
static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView*
array_view,
int64_t i, struct
ArrowDecimal* out) {
- i += array_view->array->offset;
+ i += array_view->offset;
const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8;
switch (array_view->storage_type) {
case NANOARROW_TYPE_DECIMAL128:
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index 5dbac0d..ea16c57 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -226,14 +226,16 @@ TEST(ArrayTest, ArrayTestBuildByBuffer) {
array.length = 8;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
EXPECT_STREQ(ArrowErrorMessage(&error),
- "Expected buffer 1 to size >= 36 bytes but found buffer with 32
bytes");
+ "Expected string array buffer 1 to have size >= 36 bytes but
found buffer "
+ "with 32 bytes");
array.length = 7;
int32_t* offsets_buffer =
reinterpret_cast<int32_t*>(ArrowArrayBuffer(&array, 1)->data);
offsets_buffer[7] = offsets_buffer[7] + 1;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
EXPECT_STREQ(ArrowErrorMessage(&error),
- "Expected buffer 2 to size >= 11 bytes but found buffer with 10
bytes");
+ "Expected string array buffer 2 to have size >= 11 bytes but
found buffer "
+ "with 10 bytes");
array.release(&array);
}
@@ -289,7 +291,8 @@ TEST(ArrayTest, ArrayTestExplicitValidationLevel) {
EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
EINVAL);
EXPECT_STREQ(error.message,
- "Expected buffer 1 to size >= 12 bytes but found buffer with 0
bytes");
+ "Expected string array buffer 1 to have size >= 12 bytes but
found buffer "
+ "with 0 bytes");
array.release(&array);
}
@@ -992,7 +995,7 @@ TEST(ArrayTest, ArrayTestAppendToListArray) {
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
EXPECT_STREQ(
ArrowErrorMessage(&error),
- "Expected child of list array with length >= 3 but found array with
length 2");
+ "Expected child of list array to have length >= 3 but found array with
length 2");
array.children[0]->length = array.children[0]->length + 1;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
@@ -1045,15 +1048,16 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) {
array.n_children = 0;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
EXPECT_STREQ(ArrowErrorMessage(&error),
- "Expected 1 child of large list array but found 0 child
arrays");
+ "Expected 1 child of large_list array but found 0 child
arrays");
array.n_children = 1;
// Make sure final child size is checked at finish
array.children[0]->length = array.children[0]->length - 1;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
- EXPECT_STREQ(ArrowErrorMessage(&error),
- "Expected child of large list array with length >= 3 but found
array with "
- "length 2");
+ EXPECT_STREQ(
+ ArrowErrorMessage(&error),
+ "Expected child of large list array to have length >= 3 but found array
with "
+ "length 2");
array.children[0]->length = array.children[0]->length + 1;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
@@ -1118,7 +1122,7 @@ TEST(ArrayTest, ArrayTestAppendToMapArray) {
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
EXPECT_STREQ(
ArrowErrorMessage(&error),
- "Expected child of map array with length >= 1 but found array with
length 0");
+ "Expected child of map array to have length >= 1 but found array with
length 0");
array.children[0]->length = array.children[0]->length + 1;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
@@ -1177,15 +1181,16 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeListArray) {
array.n_children = 0;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
EXPECT_STREQ(ArrowErrorMessage(&error),
- "Expected 1 child of fixed-size array but found 0 child
arrays");
+ "Expected 1 child of fixed_size_list array but found 0 child
arrays");
array.n_children = 1;
// Make sure final child size is checked at finish
array.children[0]->length = array.children[0]->length - 1;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
- EXPECT_STREQ(ArrowErrorMessage(&error),
- "Expected child of fixed-size list array with length >= 8 but
found array "
- "with length 7");
+ EXPECT_STREQ(
+ ArrowErrorMessage(&error),
+ "Expected child of fixed_size_list array to have length >= 8 but found
array "
+ "with length 7");
array.children[0]->length = array.children[0]->length + 1;
EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
@@ -1431,7 +1436,8 @@ TEST(ArrayTest, ArrayViewTestBasic) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, 3 * sizeof(int32_t));
EXPECT_EQ(array_view.buffer_views[1].data.as_int32[0], 11);
@@ -1444,7 +1450,8 @@ TEST(ArrayTest, ArrayViewTestBasic) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, 3 * sizeof(int32_t));
@@ -1515,7 +1522,8 @@ TEST(ArrayTest, ArrayViewTestString) {
ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_STRING),
NANOARROW_OK);
array.null_count = 0;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
@@ -1529,7 +1537,8 @@ TEST(ArrayTest, ArrayViewTestString) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, (1 + 1) * sizeof(int32_t));
EXPECT_EQ(array_view.buffer_views[2].size_bytes, 4);
@@ -1545,7 +1554,8 @@ TEST(ArrayTest, ArrayViewTestString) {
offsets[1] = -1;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found
element size -1");
array.release(&array);
@@ -1583,7 +1593,8 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_STRING),
NANOARROW_OK);
array.null_count = 0;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
@@ -1597,7 +1608,8 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].size_bytes, (1 + 1) * sizeof(int64_t));
EXPECT_EQ(array_view.buffer_views[2].size_bytes, 4);
@@ -1613,7 +1625,8 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
offsets[1] = -1;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found
element size -1");
array.release(&array);
@@ -1683,7 +1696,8 @@ TEST(ArrayTest, ArrayViewTestList) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
// Expect error for offsets that will cause bad access
struct ArrowError error;
@@ -1697,7 +1711,8 @@ TEST(ArrayTest, ArrayViewTestList) {
offsets[1] = -1;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found
element size -1");
array.release(&array);
@@ -1725,7 +1740,8 @@ TEST(ArrayTest, ArrayViewTestListGet) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 1), 1);
@@ -1757,7 +1773,8 @@ TEST(ArrayTest, ArrayViewTestLargeListGet) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 1), 1);
@@ -1800,7 +1817,8 @@ TEST(ArrayTest, ArrayViewTestLargeList) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
// Expect error for offsets that will cause bad access
struct ArrowError error;
@@ -1814,7 +1832,8 @@ TEST(ArrayTest, ArrayViewTestLargeList) {
offsets[1] = -1;
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found
element size -1");
array.release(&array);
@@ -1877,7 +1896,8 @@ TEST(ArrayTest, ArrayViewTestStructArray) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(array_view.children[0]->buffer_views[1].size_bytes,
sizeof(int32_t));
EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
@@ -1920,7 +1940,8 @@ TEST(ArrayTest, ArrayViewTestFixedSizeListArray) {
ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(array_view.children[0]->buffer_views[1].size_bytes, 3 *
sizeof(int32_t));
EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
@@ -1957,7 +1978,8 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
ArrowArrayViewInitFromType(array_view.children[0], NANOARROW_TYPE_INT32);
ArrowArrayViewInitFromType(array_view.children[1], NANOARROW_TYPE_STRING);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 1), 1);
@@ -1969,7 +1991,8 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
// The test schema explicitly sets the type_ids 0,1 and this should work too
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 1), 1);
@@ -1983,13 +2006,15 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
int32_t* offsets =
const_cast<int32_t*>(reinterpret_cast<const int32_t*>(array.buffers[1]));
type_ids[0] = -1;
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
EXPECT_STREQ(error.message,
"[0] Expected buffer value between 0 and 1 but found value -1");
type_ids[0] = 0;
offsets[0] = -1;
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
EXPECT_STREQ(error.message,
"[0] Expected union offset for child id 0 to be between 0 and 1
but found "
"offset value -1");
@@ -2002,7 +2027,8 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
ASSERT_EQ(ArrowSchemaSetFormat(&schema, "+ud:1,0"), NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 0), 0);
EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 1), 1);
@@ -2011,7 +2037,8 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
// Check that bad type ids are caught by validate full
type_ids[0] = -1;
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
EXPECT_STREQ(error.message, "[0] Unexpected buffer value -1");
type_ids[0] = 0;
@@ -2055,7 +2082,8 @@ TEST(ArrayTest, ArrayViewTestDenseUnionGet) {
// Initialize the array view
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
// Check the values that will be used to index into children
EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 0), 0);
@@ -2100,7 +2128,8 @@ TEST(ArrayTest, ArrayViewTestSparseUnionGet) {
// Initialize the array view
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr),
NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+ NANOARROW_OK);
// Check the values that will be used to index into children
EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 0), 0);
@@ -2141,7 +2170,8 @@ void TestGetFromNumericArrayView() {
ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
@@ -2172,7 +2202,8 @@ void TestGetFromNumericArrayView() {
ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
// We're trying to test behavior with no validity buffer, so make sure
that's true
ASSERT_EQ(array_view.buffer_views[0].data.data, nullptr);
@@ -2219,7 +2250,8 @@ void TestGetFromBinary(BuilderClass& builder) {
ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
@@ -2274,7 +2306,8 @@ TEST(ArrayViewTest, ArrayViewTestGetDecimal128) {
ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
ArrowDecimal decimal;
ArrowDecimalInit(&decimal, 128, 10, 3);
@@ -2310,7 +2343,8 @@ TEST(ArrayViewTest, ArrayViewTestGetDecimal256) {
ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error),
NANOARROW_OK);
ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewValidate(&array_view,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ NANOARROW_OK);
ArrowDecimal decimal;
ArrowDecimalInit(&decimal, 256, 10, 3);
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 7b11825..c591b68 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -114,8 +114,8 @@
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
#define ArrowArrayViewSetArray \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray)
-#define ArrowArrayViewValidateFull \
- NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidateFull)
+#define ArrowArrayViewValidate \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate)
#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE,
ArrowArrayViewReset)
#define ArrowBasicArrayStreamInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit)
@@ -909,7 +909,7 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray*
array,
/// \defgroup nanoarrow-array-view Reading arrays
///
-/// These functions read and validate the contents ArrowArray structures
+/// These functions read and validate the contents ArrowArray structures.
///
/// @{
@@ -942,9 +942,17 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length);
ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
struct ArrowArray* array, struct
ArrowError* error);
-/// \brief Performs extra checks on the array that was set via
ArrowArrayViewSetArray()
-ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
- struct ArrowError* error);
+/// \brief Performs checks on the content of an ArrowArrayView
+///
+/// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray,
+/// the buffer sizes and some content (fist and last offset) have already
+/// been validated at the "default" level. If setting the buffer pointers
+/// and sizes otherwise, you may wish to perform checks at a different level.
See
+/// documentation for ArrowValidationLevel for the details of checks performed
+/// at each level.
+ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view,
+ enum ArrowValidationLevel
validation_level,
+ struct ArrowError* error);
/// \brief Reset the contents of an ArrowArrayView and frees resources
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h
index 58582ab..2fac404 100644
--- a/src/nanoarrow/nanoarrow_types.h
+++ b/src/nanoarrow/nanoarrow_types.h
@@ -471,26 +471,28 @@ static inline struct ArrowStringView ArrowCharView(const
char* value) {
return out;
}
+union ArrowBufferViewData {
+ const void* data;
+ const int8_t* as_int8;
+ const uint8_t* as_uint8;
+ const int16_t* as_int16;
+ const uint16_t* as_uint16;
+ const int32_t* as_int32;
+ const uint32_t* as_uint32;
+ const int64_t* as_int64;
+ const uint64_t* as_uint64;
+ const double* as_double;
+ const float* as_float;
+ const char* as_char;
+};
+
/// \brief An non-owning view of a buffer
/// \ingroup nanoarrow-utils
struct ArrowBufferView {
/// \brief A pointer to the start of the buffer
///
/// If size_bytes is 0, this value may be NULL.
- union {
- const void* data;
- const int8_t* as_int8;
- const uint8_t* as_uint8;
- const int16_t* as_int16;
- const uint16_t* as_uint16;
- const int32_t* as_int32;
- const uint32_t* as_uint32;
- const int64_t* as_int64;
- const uint64_t* as_uint64;
- const double* as_double;
- const float* as_float;
- const char* as_char;
- } data;
+ union ArrowBufferViewData data;
/// \brief The size of the buffer in bytes
int64_t size_bytes;
@@ -566,12 +568,23 @@ struct ArrowLayout {
/// This data structure provides access to the values contained within
/// an ArrowArray with fields provided in a more readily-extractible
/// form. You can re-use an ArrowArrayView for multiple ArrowArrays
-/// with the same storage type, or use it to represent a hypothetical
-/// ArrowArray that does not exist yet.
+/// with the same storage type, use it to represent a hypothetical
+/// ArrowArray that does not exist yet, or use it to validate the buffers
+/// of a future ArrowArray.
struct ArrowArrayView {
- /// \brief The underlying ArrowArray or NULL if it has not been set
+ /// \brief The underlying ArrowArray or NULL if it has not been set or
+ /// if the buffers in this ArrowArrayView are not backed by an ArrowArray.
struct ArrowArray* array;
+ /// \brief The number of elements from the physical start of the buffers.
+ int64_t offset;
+
+ /// \brief The number of elements in this view.
+ int64_t length;
+
+ /// \brief A cached null count or -1 to indicate that this value is unknown.
+ int64_t null_count;
+
/// \brief The type used to store values in this array
///
/// This type represents only the minimum required information to