This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 8d53f8f Update dist/ for commit
be3b2b13a7e2ad977f6a1cad9a157e617bf42dcc
8d53f8f is described below
commit 8d53f8fa7c4f1285af2cebed45e57ef2210d0174
Author: GitHub Actions <[email protected]>
AuthorDate: Wed Mar 29 12:13:21 2023 +0000
Update dist/ for commit be3b2b13a7e2ad977f6a1cad9a157e617bf42dcc
---
dist/nanoarrow.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
dist/nanoarrow.h | 23 +++++--
2 files changed, 197 insertions(+), 5 deletions(-)
diff --git a/dist/nanoarrow.c b/dist/nanoarrow.c
index 8fb7994..8fa103b 100644
--- a/dist/nanoarrow.c
+++ b/dist/nanoarrow.c
@@ -2349,6 +2349,21 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
struct ArrowArray* array,
struct ArrowError* error) {
array_view->array = array;
+
+ // Check length and offset
+ if (array->offset < 0) {
+ ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of
%ld",
+ (long)array->offset);
+ return EINVAL;
+ }
+
+ if (array->length < 0) {
+ ArrowErrorSet(error, "Expected array length >= 0 but found array length of
%ld",
+ (long)array->length);
+ return EINVAL;
+ }
+
+ // First pass setting lengths that do not depend on the data buffer
ArrowArrayViewSetLength(array_view, array->offset + array->length);
int64_t buffers_required = 0;
@@ -2375,15 +2390,25 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
if (array_view->n_children != array->n_children) {
+ ArrowErrorSet(error, "Expected %ld children but found %ld children",
+ (long)array_view->n_children, (long)array->n_children);
return EINVAL;
}
// Check child sizes and calculate sizes that depend on data in the array
buffers
+ int64_t first_offset;
int64_t last_offset;
switch (array_view->storage_type) {
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
+ first_offset = array_view->buffer_views[1].data.as_int32[0];
+ if (first_offset < 0) {
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+ (long)first_offset);
+ return EINVAL;
+ }
+
last_offset =
array_view->buffer_views[1].data.as_int32[array->offset +
array->length];
array_view->buffer_views[2].size_bytes = last_offset;
@@ -2392,6 +2417,13 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_LARGE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
+ first_offset = array_view->buffer_views[1].data.as_int64[0];
+ if (first_offset < 0) {
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+ (long)first_offset);
+ return EINVAL;
+ }
+
last_offset =
array_view->buffer_views[1].data.as_int64[array->offset +
array->length];
array_view->buffer_views[2].size_bytes = last_offset;
@@ -2421,6 +2453,13 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
if (array_view->buffer_views[1].size_bytes != 0) {
+ first_offset = array_view->buffer_views[1].data.as_int32[0];
+ if (first_offset < 0) {
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+ (long)first_offset);
+ return EINVAL;
+ }
+
last_offset =
array_view->buffer_views[1].data.as_int32[array->offset +
array->length];
if (array->children[0]->length < last_offset) {
@@ -2443,6 +2482,13 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
if (array_view->buffer_views[1].size_bytes != 0) {
+ first_offset = array_view->buffer_views[1].data.as_int64[0];
+ if (first_offset < 0) {
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+ (long)first_offset);
+ return EINVAL;
+ }
+
last_offset =
array_view->buffer_views[1].data.as_int64[array->offset +
array->length];
if (array->children[0]->length < last_offset) {
@@ -2485,6 +2531,139 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
return NANOARROW_OK;
}
+
+static int ArrowAssertIncreasingInt32(struct ArrowBufferView view,
+ struct ArrowError* error) {
+ if (view.size_bytes <= (int64_t)sizeof(int32_t)) {
+ return NANOARROW_OK;
+ }
+
+ for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int32_t); i++) {
+ int32_t diff = view.data.as_int32[i] - view.data.as_int32[i - 1];
+ if (diff < 0) {
+ ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element
size %ld",
+ (long)i, (long)diff);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowAssertIncreasingInt64(struct ArrowBufferView view,
+ struct ArrowError* error) {
+ if (view.size_bytes <= (int64_t)sizeof(int64_t)) {
+ return NANOARROW_OK;
+ }
+
+ for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int64_t); i++) {
+ int64_t diff = view.data.as_int64[i] - view.data.as_int64[i - 1];
+ if (diff < 0) {
+ ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element
size %ld",
+ (long)i, (long)diff);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowAssertRangeInt8(struct ArrowBufferView view, int8_t min_value,
+ int8_t max_value, struct ArrowError* error) {
+ for (int64_t i = 0; i < view.size_bytes; i++) {
+ if (view.data.as_int8[i] < min_value || view.data.as_int8[i] > max_value) {
+ ArrowErrorSet(error,
+ "[%ld] Expected buffer value between %d and %d but found
value %d",
+ (long)i, (int)min_value, (int)max_value,
(int)view.data.as_int8[i]);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowAssertInt8In(struct ArrowBufferView view, const int8_t* values,
+ int64_t n_values, struct ArrowError* error) {
+ for (int64_t i = 0; i < view.size_bytes; i++) {
+ int item_found = 0;
+ for (int64_t j = 0; j < n_values; j++) {
+ if (view.data.as_int8[i] == values[j]) {
+ item_found = 1;
+ break;
+ }
+ }
+
+ if (!item_found) {
+ ArrowErrorSet(error, "[%ld] Unexpected buffer value %d", (long)i,
+ (int)view.data.as_int8[i]);
+ return EINVAL;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ for (int i = 0; i < 3; i++) {
+ switch (array_view->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ NANOARROW_RETURN_NOT_OK(
+ ArrowAssertIncreasingInt32(array_view->buffer_views[i], error));
+ break;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ if (array_view->layout.element_size_bits[i] == 32) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowAssertIncreasingInt32(array_view->buffer_views[i], error));
+ } else {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowAssertIncreasingInt64(array_view->buffer_views[i], error));
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION ||
+ array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION) {
+ // Check that we have valid type ids
+ if (array_view->union_type_id_map == NULL ||
+
_ArrowParsedUnionTypeIdsWillEqualChildIndices(array_view->union_type_id_map,
+ array_view->n_children,
+ array_view->n_children))
{
+
NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(array_view->buffer_views[0], 0,
+ array_view->n_children - 1,
error));
+ } else {
+ NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(array_view->buffer_views[0],
+ array_view->union_type_id_map
+ 128,
+ array_view->n_children,
error));
+ }
+ }
+
+ if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) {
+ // Check that offsets refer to child elements that actually exist
+ for (int64_t i = 0; i < array_view->array->length; i++) {
+ int8_t child_id = ArrowArrayViewUnionChildIndex(array_view, i);
+ int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i);
+ int64_t child_length = array_view->array->children[child_id]->length;
+ if (offset < 0 || offset > child_length) {
+ ArrowErrorSet(
+ error,
+ "[%ld] Expected union offset for child id %d to be between 0 and
%ld but "
+ "found offset value %ld",
+ (long)i, (int)child_id, (long)child_length, offset);
+ return EINVAL;
+ }
+ }
+ }
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i],
error));
+ }
+
+ return NANOARROW_OK;
+}
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
diff --git a/dist/nanoarrow.h b/dist/nanoarrow.h
index 0eaec34..59d5dcf 100644
--- a/dist/nanoarrow.h
+++ b/dist/nanoarrow.h
@@ -535,7 +535,8 @@ struct ArrowArrayView {
///
/// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer
/// such that child_index == union_type_id_map[type_id] and
- /// type_id == union_type_id_map[128 + child_index]
+ /// type_id == union_type_id_map[128 + child_index]. This value may be
+ /// NULL in the case where child_id == type_id.
int8_t* union_type_id_map;
};
@@ -685,6 +686,8 @@ struct ArrowArrayPrivateData {
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
#define ArrowArrayViewSetArray \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray)
+#define ArrowArrayViewValidateFull \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidateFull)
#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE,
ArrowArrayViewReset)
#define ArrowBasicArrayStreamInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit)
@@ -1466,6 +1469,10 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length);
ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
struct ArrowArray* array, struct
ArrowError* error);
+/// \brief Performs extra checks on the array that was set via
ArrowArrayViewSetArray()
+ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+ struct ArrowError* error);
+
/// \brief Reset the contents of an ArrowArrayView and frees resources
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
@@ -2155,10 +2162,9 @@ static inline int8_t _ArrowParseUnionTypeIds(const char*
type_ids, int8_t* out)
return -1;
}
-static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char*
type_id_str,
- int64_t
n_children) {
- int8_t type_ids[128];
- int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids);
+static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const
int8_t* type_ids,
+ int64_t
n_type_ids,
+ int64_t
n_children) {
if (n_type_ids != n_children) {
return 0;
}
@@ -2172,6 +2178,13 @@ static inline int8_t
_ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id
return 1;
}
+static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char*
type_id_str,
+ int64_t
n_children) {
+ int8_t type_ids[128];
+ int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids);
+ return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids,
n_children);
+}
+
static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray*
array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;