This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d9f428d  refactor: Unify `ArrowArrayView` and `ArrowArray` validation 
(#201)
d9f428d is described below

commit d9f428dec04f1a610b1c76b58f454be560a004df
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed May 24 21:35:03 2023 -0400

    refactor: Unify `ArrowArrayView` and `ArrowArray` validation (#201)
    
    The initial motivation for this refactor was to make it possible to go
    straight from Arrow IPC to ArrowArrayView with zero heap allocations and
    improve IPC validation; however, the changes needed for that (basically,
    give every ArrowArrayView its own offset, length, and null_count)
    allowed unifying the validation that we do when building arrays and the
    validation we do when wrapping a foregin ArrowArray in an
    ArrowArrayView. This PR ensures that no internal ArrowArrayView
    functions actually refer to the `array` member since there is no
    guarantee that an ArrowArray backs the buffers. Along the way, error
    messages and documentation were improved.
---
 r/R/as-array.R                  |   8 +-
 src/nanoarrow/array.c           | 392 +++++++++++++++++++++++++---------------
 src/nanoarrow/array_inline.h    |  14 +-
 src/nanoarrow/array_test.cc     | 118 +++++++-----
 src/nanoarrow/nanoarrow.h       |  20 +-
 src/nanoarrow/nanoarrow_types.h |  47 +++--
 6 files changed, 380 insertions(+), 219 deletions(-)

diff --git a/r/R/as-array.R b/r/R/as-array.R
index a9781b1..68ed939 100644
--- a/r/R/as-array.R
+++ b/r/R/as-array.R
@@ -267,8 +267,12 @@ union_array_from_data_frame <- function(x, schema) {
     "sparse_union" = {
       struct_schema <- na_struct(schema$children)
       array <- as_nanoarrow_array(x, array = struct_schema)
-      nanoarrow_array_set_schema(array, schema, validate = FALSE)
-      array$buffers[[1]] <- as.raw(child_index)
+      array <- nanoarrow_array_modify(
+        array,
+        list(buffers = list(as.raw(child_index))),
+        validate = FALSE
+      )
+      nanoarrow_array_set_schema(array, schema, validate = TRUE)
       array
     },
     stop("Attempt to create union from non-union array type")
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index ec321e6..c669114 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -296,6 +296,16 @@ static ArrowErrorCode ArrowArrayViewInitFromArray(struct 
ArrowArrayView* array_v
   ArrowArrayViewInitFromType(array_view, private_data->storage_type);
   array_view->layout = private_data->layout;
   array_view->array = array;
+  array_view->length = array->length;
+  array_view->offset = array->offset;
+  array_view->null_count = array->null_count;
+
+  array_view->buffer_views[0].data.as_uint8 = private_data->bitmap.buffer.data;
+  array_view->buffer_views[0].size_bytes = 
private_data->bitmap.buffer.size_bytes;
+  array_view->buffer_views[1].data.as_uint8 = private_data->buffers[0].data;
+  array_view->buffer_views[1].size_bytes = private_data->buffers[0].size_bytes;
+  array_view->buffer_views[2].data.as_uint8 = private_data->buffers[1].data;
+  array_view->buffer_views[2].size_bytes = private_data->buffers[1].size_bytes;
 
   int result = ArrowArrayViewAllocateChildren(array_view, array->n_children);
   if (result != NANOARROW_OK) {
@@ -399,39 +409,6 @@ static void ArrowArrayFlushInternalPointers(struct 
ArrowArray* array) {
   }
 }
 
-static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
-    struct ArrowArray* array, struct ArrowArrayView* array_view, char 
set_length,
-    struct ArrowError* error) {
-  if (set_length) {
-    ArrowArrayViewSetLength(array_view, array->offset + array->length);
-  }
-
-  for (int64_t i = 0; i < array->n_buffers; i++) {
-    if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
-        array->null_count == 0 && array->buffers[i] == NULL) {
-      continue;
-    }
-
-    int64_t expected_size = array_view->buffer_views[i].size_bytes;
-    int64_t actual_size = ArrowArrayBuffer(array, i)->size_bytes;
-
-    if (actual_size < expected_size) {
-      ArrowErrorSet(
-          error,
-          "Expected buffer %d to size >= %ld bytes but found buffer with %ld 
bytes",
-          (int)i, (long)expected_size, (long)actual_size);
-      return EINVAL;
-    }
-  }
-
-  for (int64_t i = 0; i < array->n_children; i++) {
-    NANOARROW_RETURN_NOT_OK(ArrowArrayCheckInternalBufferSizes(
-        array->children[i], array_view->children[i], set_length, error));
-  }
-
-  return NANOARROW_OK;
-}
-
 ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
                                         enum ArrowValidationLevel 
validation_level,
                                         struct ArrowError* error) {
@@ -450,45 +427,11 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct 
ArrowArray* array,
     return NANOARROW_OK;
   }
 
-  // Check buffer sizes to make sure we are not sending an ArrowArray
-  // into the wild that is going to segfault
+  // For validation, initialize an ArrowArrayView with our known buffer sizes
   struct ArrowArrayView array_view;
-
   NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowArrayViewInitFromArray(&array_view, 
array),
                                      error);
-
-  // Check buffer sizes once without using internal buffer data since
-  // ArrowArrayViewSetArray() assumes that all the buffers are long enough
-  // and issues invalid reads on offset buffers if they are not
-  int result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 1, 
error);
-  if (result != NANOARROW_OK) {
-    ArrowArrayViewReset(&array_view);
-    return result;
-  }
-
-  if (validation_level == NANOARROW_VALIDATION_LEVEL_MINIMAL) {
-    ArrowArrayViewReset(&array_view);
-    return NANOARROW_OK;
-  }
-
-  result = ArrowArrayViewSetArray(&array_view, array, error);
-  if (result != NANOARROW_OK) {
-    ArrowArrayViewReset(&array_view);
-    return result;
-  }
-
-  result = ArrowArrayCheckInternalBufferSizes(array, &array_view, 0, error);
-  if (result != NANOARROW_OK) {
-    ArrowArrayViewReset(&array_view);
-    return result;
-  }
-
-  if (validation_level == NANOARROW_VALIDATION_LEVEL_DEFAULT) {
-    ArrowArrayViewReset(&array_view);
-    return NANOARROW_OK;
-  }
-
-  result = ArrowArrayViewValidateFull(&array_view, error);
+  int result = ArrowArrayViewValidate(&array_view, validation_level, error);
   ArrowArrayViewReset(&array_view);
   return result;
 }
@@ -604,7 +547,6 @@ void ArrowArrayViewReset(struct ArrowArrayView* array_view) 
{
 void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t 
length) {
   for (int i = 0; i < 3; i++) {
     int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
-    array_view->buffer_views[i].data.data = NULL;
 
     switch (array_view->layout.buffer_type[i]) {
       case NANOARROW_BUFFER_TYPE_VALIDITY:
@@ -648,11 +590,11 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* 
array_view, int64_t length)
   }
 }
 
-ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
-                                      struct ArrowArray* array,
-                                      struct ArrowError* error) {
-  array_view->array = array;
-
+// This version recursively extracts information from the array and stores it
+// in the array view, performing any checks that require the original array.
+static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
+                                          struct ArrowArray* array,
+                                          struct ArrowError* error) {
   // Check length and offset
   if (array->offset < 0) {
     ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of 
%ld",
@@ -666,8 +608,10 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
     return EINVAL;
   }
 
-  // First pass setting lengths that do not depend on the data buffer
-  ArrowArrayViewSetLength(array_view, array->offset + array->length);
+  array_view->array = array;
+  array_view->offset = array->offset;
+  array_view->length = array->length;
+  array_view->null_count = array->null_count;
 
   int64_t buffers_required = 0;
   for (int i = 0; i < 3; i++) {
@@ -677,28 +621,165 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
 
     buffers_required++;
 
-    // If the null_count is 0, the validity buffer can be NULL
-    if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
-        array->null_count == 0 && array->buffers[i] == NULL) {
+    // Set buffer pointer
+    array_view->buffer_views[i].data.data = array->buffers[i];
+
+    // If non-null, set buffer size to unknown.
+    if (array->buffers[i] == NULL) {
       array_view->buffer_views[i].size_bytes = 0;
+    } else {
+      array_view->buffer_views[i].size_bytes = -1;
     }
-
-    array_view->buffer_views[i].data.data = array->buffers[i];
   }
 
+  // Check the number of buffers
   if (buffers_required != array->n_buffers) {
     ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d 
buffer(s)",
                   (int)buffers_required, (int)array->n_buffers);
     return EINVAL;
   }
 
+  // Check number of children
   if (array_view->n_children != array->n_children) {
     ArrowErrorSet(error, "Expected %ld children but found %ld children",
                   (long)array_view->n_children, (long)array->n_children);
     return EINVAL;
   }
 
-  // Check child sizes and calculate sizes that depend on data in the array 
buffers
+  // Recurse for children
+  for (int64_t i = 0; i < array_view->n_children; i++) {
+    
NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view->children[i],
+                                                           array->children[i], 
error));
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
+                                         struct ArrowError* error) {
+  // Calculate buffer sizes that do not require buffer access. If marked as
+  // unknown, assign the buffer size; otherwise, validate it.
+  int64_t offset_plus_length = array_view->offset + array_view->length;
+
+  // Only loop over the first two buffers because the size of the third buffer
+  // is always data dependent for all current Arrow types.
+  for (int i = 0; i < 2; i++) {
+    int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
+    int64_t min_buffer_size_bytes;
+
+    switch (array_view->layout.buffer_type[i]) {
+      case NANOARROW_BUFFER_TYPE_VALIDITY:
+        if (array_view->null_count == 0 && 
array_view->buffer_views[i].size_bytes == 0) {
+          continue;
+        }
+
+        min_buffer_size_bytes = _ArrowBytesForBits(offset_plus_length);
+        break;
+      case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+        // Probably don't want/need to rely on the producer to have allocated 
an
+        // offsets buffer of length 1 for a zero-size array
+        min_buffer_size_bytes =
+            (offset_plus_length != 0) * element_size_bytes * 
(offset_plus_length + 1);
+        break;
+      case NANOARROW_BUFFER_TYPE_DATA:
+        min_buffer_size_bytes =
+            _ArrowRoundUpToMultipleOf8(array_view->layout.element_size_bits[i] 
*
+                                       offset_plus_length) /
+            8;
+        break;
+      case NANOARROW_BUFFER_TYPE_TYPE_ID:
+      case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+        min_buffer_size_bytes = element_size_bytes * offset_plus_length;
+        break;
+      case NANOARROW_BUFFER_TYPE_NONE:
+        continue;
+    }
+
+    // Assign or validate buffer size
+    if (array_view->buffer_views[i].size_bytes == -1) {
+      array_view->buffer_views[i].size_bytes = min_buffer_size_bytes;
+    } else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes) 
{
+      ArrowErrorSet(error,
+                    "Expected %s array buffer %d to have size >= %ld bytes but 
found "
+                    "buffer with %ld bytes",
+                    ArrowTypeString(array_view->storage_type), (int)i,
+                    (long)min_buffer_size_bytes,
+                    (long)array_view->buffer_views[i].size_bytes);
+      return EINVAL;
+    }
+  }
+
+  // For list, fixed-size list and map views, we can validate the number of 
children
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_LIST:
+    case NANOARROW_TYPE_LARGE_LIST:
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+    case NANOARROW_TYPE_MAP:
+      if (array_view->n_children != 1) {
+        ArrowErrorSet(error, "Expected 1 child of %s array but found %ld child 
arrays",
+                      ArrowTypeString(array_view->storage_type),
+                      (long)array_view->n_children);
+        return EINVAL;
+      }
+    default:
+      break;
+  }
+
+  // For struct, the sparse union, and the fixed-size list views, we can 
validate child
+  // lengths.
+  int64_t child_min_length;
+  switch (array_view->storage_type) {
+    case NANOARROW_TYPE_SPARSE_UNION:
+    case NANOARROW_TYPE_STRUCT:
+      child_min_length = (array_view->offset + array_view->length);
+      for (int64_t i = 0; i < array_view->n_children; i++) {
+        if (array_view->children[i]->length < child_min_length) {
+          ArrowErrorSet(
+              error,
+              "Expected struct child %d to have length >= %ld but found child 
with "
+              "length %ld",
+              (int)(i + 1), (long)(child_min_length),
+              (long)array_view->children[i]->length);
+          return EINVAL;
+        }
+      }
+      break;
+
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      child_min_length = (array_view->offset + array_view->length) *
+                         array_view->layout.child_size_elements;
+      if (array_view->children[0]->length < child_min_length) {
+        ArrowErrorSet(error,
+                      "Expected child of fixed_size_list array to have length 
>= %ld but "
+                      "found array with length %ld",
+                      (long)child_min_length, 
(long)array_view->children[0]->length);
+        return EINVAL;
+      }
+      break;
+    default:
+      break;
+  }
+
+  // Recurse for children
+  for (int64_t i = 0; i < array_view->n_children; i++) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowArrayViewValidateMinimal(array_view->children[i], error));
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowArrayViewValidateDefault(struct ArrowArrayView* array_view,
+                                         struct ArrowError* error) {
+  // Perform minimal validation. This will validate or assign
+  // buffer sizes as long as buffer access is not required.
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateMinimal(array_view, error));
+
+  // Calculate buffer sizes or child lengths that require accessing the offsets
+  // buffer. Where appropriate, validate that the first offset is >= 0.
+  // If a buffer size is marked as unknown, assign it; otherwise, validate it.
+  int64_t offset_plus_length = array_view->offset + array_view->length;
+
   int64_t first_offset;
   int64_t last_offset;
   switch (array_view->storage_type) {
@@ -712,11 +793,22 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
           return EINVAL;
         }
 
-        last_offset =
-            array_view->buffer_views[1].data.as_int32[array->offset + 
array->length];
-        array_view->buffer_views[2].size_bytes = last_offset;
+        last_offset = 
array_view->buffer_views[1].data.as_int32[offset_plus_length];
+
+        // If the data buffer size is unknown, assign it; otherwise, check it
+        if (array_view->buffer_views[2].size_bytes == -1) {
+          array_view->buffer_views[2].size_bytes = last_offset;
+        } else if (array_view->buffer_views[2].size_bytes < last_offset) {
+          ArrowErrorSet(error,
+                        "Expected %s array buffer 2 to have size >= %ld bytes 
but found "
+                        "buffer with %ld bytes",
+                        ArrowTypeString(array_view->storage_type), 
(long)last_offset,
+                        (long)array_view->buffer_views[2].size_bytes);
+          return EINVAL;
+        }
       }
       break;
+
     case NANOARROW_TYPE_LARGE_STRING:
     case NANOARROW_TYPE_LARGE_BINARY:
       if (array_view->buffer_views[1].size_bytes != 0) {
@@ -727,34 +819,38 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
           return EINVAL;
         }
 
-        last_offset =
-            array_view->buffer_views[1].data.as_int64[array->offset + 
array->length];
-        array_view->buffer_views[2].size_bytes = last_offset;
+        last_offset = 
array_view->buffer_views[1].data.as_int64[offset_plus_length];
+
+        // If the data buffer size is unknown, assign it; otherwise, check it
+        if (array_view->buffer_views[2].size_bytes == -1) {
+          array_view->buffer_views[2].size_bytes = last_offset;
+        } else if (array_view->buffer_views[2].size_bytes < last_offset) {
+          ArrowErrorSet(error,
+                        "Expected %s array buffer 2 to have size >= %ld bytes 
but found "
+                        "buffer with %ld bytes",
+                        ArrowTypeString(array_view->storage_type), 
(long)last_offset,
+                        (long)array_view->buffer_views[2].size_bytes);
+          return EINVAL;
+        }
       }
       break;
+
     case NANOARROW_TYPE_STRUCT:
       for (int64_t i = 0; i < array_view->n_children; i++) {
-        if (array->children[i]->length < (array->offset + array->length)) {
+        if (array_view->children[i]->length < offset_plus_length) {
           ArrowErrorSet(
               error,
               "Expected struct child %d to have length >= %ld but found child 
with "
               "length %ld",
-              (int)(i + 1), (long)(array->offset + array->length),
-              (long)array->children[i]->length);
+              (int)(i + 1), (long)offset_plus_length,
+              (long)array_view->children[i]->length);
           return EINVAL;
         }
       }
       break;
-    case NANOARROW_TYPE_LIST:
-    case NANOARROW_TYPE_MAP: {
-      const char* type_name =
-          array_view->storage_type == NANOARROW_TYPE_LIST ? "list" : "map";
-      if (array->n_children != 1) {
-        ArrowErrorSet(error, "Expected 1 child of %s array but found %d child 
arrays",
-                      type_name, (int)array->n_children);
-        return EINVAL;
-      }
 
+    case NANOARROW_TYPE_LIST:
+    case NANOARROW_TYPE_MAP:
       if (array_view->buffer_views[1].size_bytes != 0) {
         first_offset = array_view->buffer_views[1].data.as_int32[0];
         if (first_offset < 0) {
@@ -763,27 +859,20 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
           return EINVAL;
         }
 
-        last_offset =
-            array_view->buffer_views[1].data.as_int32[array->offset + 
array->length];
-        if (array->children[0]->length < last_offset) {
+        last_offset = 
array_view->buffer_views[1].data.as_int32[offset_plus_length];
+        if (array_view->children[0]->length < last_offset) {
           ArrowErrorSet(
               error,
-              "Expected child of %s array with length >= %ld but found array 
with "
+              "Expected child of %s array to have length >= %ld but found 
array with "
               "length %ld",
-              type_name, (long)last_offset, (long)array->children[0]->length);
+              ArrowTypeString(array_view->storage_type), (long)last_offset,
+              (long)array_view->children[0]->length);
           return EINVAL;
         }
       }
       break;
-    }
-    case NANOARROW_TYPE_LARGE_LIST:
-      if (array->n_children != 1) {
-        ArrowErrorSet(error,
-                      "Expected 1 child of large list array but found %d child 
arrays",
-                      (int)array->n_children);
-        return EINVAL;
-      }
 
+    case NANOARROW_TYPE_LARGE_LIST:
       if (array_view->buffer_views[1].size_bytes != 0) {
         first_offset = array_view->buffer_views[1].data.as_int64[0];
         if (first_offset < 0) {
@@ -792,49 +881,43 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
           return EINVAL;
         }
 
-        last_offset =
-            array_view->buffer_views[1].data.as_int64[array->offset + 
array->length];
-        if (array->children[0]->length < last_offset) {
+        last_offset = 
array_view->buffer_views[1].data.as_int64[offset_plus_length];
+        if (array_view->children[0]->length < last_offset) {
           ArrowErrorSet(
               error,
-              "Expected child of large list array with length >= %ld but found 
array "
+              "Expected child of large list array to have length >= %ld but 
found array "
               "with length %ld",
-              (long)last_offset, (long)array->children[0]->length);
+              (long)last_offset, (long)array_view->children[0]->length);
           return EINVAL;
         }
       }
       break;
-    case NANOARROW_TYPE_FIXED_SIZE_LIST:
-      if (array->n_children != 1) {
-        ArrowErrorSet(error,
-                      "Expected 1 child of fixed-size array but found %d child 
arrays",
-                      (int)array->n_children);
-        return EINVAL;
-      }
-
-      last_offset =
-          (array->offset + array->length) * 
array_view->layout.child_size_elements;
-      if (array->children[0]->length < last_offset) {
-        ArrowErrorSet(
-            error,
-            "Expected child of fixed-size list array with length >= %ld but 
found array "
-            "with length %ld",
-            (long)last_offset, (long)array->children[0]->length);
-        return EINVAL;
-      }
-      break;
     default:
       break;
   }
 
+  // Recurse for children
   for (int64_t i = 0; i < array_view->n_children; i++) {
     NANOARROW_RETURN_NOT_OK(
-        ArrowArrayViewSetArray(array_view->children[i], array->children[i], 
error));
+        ArrowArrayViewValidateDefault(array_view->children[i], error));
   }
 
   return NANOARROW_OK;
 }
 
+ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
+                                      struct ArrowArray* array,
+                                      struct ArrowError* error) {
+  // Extract information from the array into the array view
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array, 
error));
+
+  // Run default validation. Because we've marked all non-NULL buffers as 
having unknown
+  // size, validation will also update the buffer sizes as it goes.
+  NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, error));
+
+  return NANOARROW_OK;
+}
+
 static int ArrowAssertIncreasingInt32(struct ArrowBufferView view,
                                       struct ArrowError* error) {
   if (view.size_bytes <= (int64_t)sizeof(int32_t)) {
@@ -906,8 +989,8 @@ static int ArrowAssertInt8In(struct ArrowBufferView view, 
const int8_t* values,
   return NANOARROW_OK;
 }
 
-ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
-                                          struct ArrowError* error) {
+static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+                                      struct ArrowError* error) {
   for (int i = 0; i < 3; i++) {
     switch (array_view->layout.buffer_type[i]) {
       case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
@@ -935,8 +1018,8 @@ ArrowErrorCode ArrowArrayViewValidateFull(struct 
ArrowArrayView* array_view,
     } else if (_ArrowParsedUnionTypeIdsWillEqualChildIndices(
                    array_view->union_type_id_map, array_view->n_children,
                    array_view->n_children)) {
-      
NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(array_view->buffer_views[0], 0,
-                                                   
(int8_t)(array_view->n_children - 1), error));
+      NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(
+          array_view->buffer_views[0], 0, (int8_t)(array_view->n_children - 
1), error));
     } else {
       NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(array_view->buffer_views[0],
                                                 array_view->union_type_id_map 
+ 128,
@@ -947,10 +1030,10 @@ ArrowErrorCode ArrowArrayViewValidateFull(struct 
ArrowArrayView* array_view,
   if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION &&
       array_view->union_type_id_map != NULL) {
     // Check that offsets refer to child elements that actually exist
-    for (int64_t i = 0; i < array_view->array->length; i++) {
+    for (int64_t i = 0; i < array_view->length; i++) {
       int8_t child_id = ArrowArrayViewUnionChildIndex(array_view, i);
       int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i);
-      int64_t child_length = array_view->array->children[child_id]->length;
+      int64_t child_length = array_view->children[child_id]->length;
       if (offset < 0 || offset > child_length) {
         ArrowErrorSet(
             error,
@@ -968,3 +1051,22 @@ ArrowErrorCode ArrowArrayViewValidateFull(struct 
ArrowArrayView* array_view,
 
   return NANOARROW_OK;
 }
+
+ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view,
+                                      enum ArrowValidationLevel 
validation_level,
+                                      struct ArrowError* error) {
+  switch (validation_level) {
+    case NANOARROW_VALIDATION_LEVEL_NONE:
+      return NANOARROW_OK;
+    case NANOARROW_VALIDATION_LEVEL_MINIMAL:
+      return ArrowArrayViewValidateMinimal(array_view, error);
+    case NANOARROW_VALIDATION_LEVEL_DEFAULT:
+      return ArrowArrayViewValidateDefault(array_view, error);
+    case NANOARROW_VALIDATION_LEVEL_FULL:
+      NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateDefault(array_view, 
error));
+      return ArrowArrayViewValidateFull(array_view, error);
+  }
+
+  ArrowErrorSet(error, "validation_level not recognized");
+  return EINVAL;
+}
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 98b4f3e..dda4634 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -654,7 +654,7 @@ static inline void ArrowArrayViewMove(struct 
ArrowArrayView* src,
 
 static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, 
int64_t i) {
   const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
-  i += array_view->array->offset;
+  i += array_view->offset;
   switch (array_view->storage_type) {
     case NANOARROW_TYPE_NA:
       return 0x01;
@@ -715,7 +715,7 @@ static inline int64_t ArrowArrayViewListChildOffset(struct 
ArrowArrayView* array
 static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* 
array_view,
                                                  int64_t i) {
   struct ArrowBufferView* data_view = &array_view->buffer_views[1];
-  i += array_view->array->offset;
+  i += array_view->offset;
   switch (array_view->storage_type) {
     case NANOARROW_TYPE_INT64:
       return data_view->data.as_int64[i];
@@ -746,7 +746,7 @@ static inline int64_t ArrowArrayViewGetIntUnsafe(struct 
ArrowArrayView* array_vi
 
 static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* 
array_view,
                                                    int64_t i) {
-  i += array_view->array->offset;
+  i += array_view->offset;
   struct ArrowBufferView* data_view = &array_view->buffer_views[1];
   switch (array_view->storage_type) {
     case NANOARROW_TYPE_INT64:
@@ -778,7 +778,7 @@ static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct 
ArrowArrayView* array_
 
 static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* 
array_view,
                                                    int64_t i) {
-  i += array_view->array->offset;
+  i += array_view->offset;
   struct ArrowBufferView* data_view = &array_view->buffer_views[1];
   switch (array_view->storage_type) {
     case NANOARROW_TYPE_INT64:
@@ -810,7 +810,7 @@ static inline double ArrowArrayViewGetDoubleUnsafe(struct 
ArrowArrayView* array_
 
 static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
     struct ArrowArrayView* array_view, int64_t i) {
-  i += array_view->array->offset;
+  i += array_view->offset;
   struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
   const char* data_view = array_view->buffer_views[2].data.as_char;
 
@@ -843,7 +843,7 @@ static inline struct ArrowStringView 
ArrowArrayViewGetStringUnsafe(
 
 static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
     struct ArrowArrayView* array_view, int64_t i) {
-  i += array_view->array->offset;
+  i += array_view->offset;
   struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
   const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8;
 
@@ -877,7 +877,7 @@ static inline struct ArrowBufferView 
ArrowArrayViewGetBytesUnsafe(
 
 static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView* 
array_view,
                                                   int64_t i, struct 
ArrowDecimal* out) {
-  i += array_view->array->offset;
+  i += array_view->offset;
   const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8;
   switch (array_view->storage_type) {
     case NANOARROW_TYPE_DECIMAL128:
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index 5dbac0d..ea16c57 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -226,14 +226,16 @@ TEST(ArrayTest, ArrayTestBuildByBuffer) {
   array.length = 8;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
   EXPECT_STREQ(ArrowErrorMessage(&error),
-               "Expected buffer 1 to size >= 36 bytes but found buffer with 32 
bytes");
+               "Expected string array buffer 1 to have size >= 36 bytes but 
found buffer "
+               "with 32 bytes");
 
   array.length = 7;
   int32_t* offsets_buffer = 
reinterpret_cast<int32_t*>(ArrowArrayBuffer(&array, 1)->data);
   offsets_buffer[7] = offsets_buffer[7] + 1;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
   EXPECT_STREQ(ArrowErrorMessage(&error),
-               "Expected buffer 2 to size >= 11 bytes but found buffer with 10 
bytes");
+               "Expected string array buffer 2 to have size >= 11 bytes but 
found buffer "
+               "with 10 bytes");
 
   array.release(&array);
 }
@@ -289,7 +291,8 @@ TEST(ArrayTest, ArrayTestExplicitValidationLevel) {
   EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL, 
&error),
             EINVAL);
   EXPECT_STREQ(error.message,
-               "Expected buffer 1 to size >= 12 bytes but found buffer with 0 
bytes");
+               "Expected string array buffer 1 to have size >= 12 bytes but 
found buffer "
+               "with 0 bytes");
 
   array.release(&array);
 }
@@ -992,7 +995,7 @@ TEST(ArrayTest, ArrayTestAppendToListArray) {
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
   EXPECT_STREQ(
       ArrowErrorMessage(&error),
-      "Expected child of list array with length >= 3 but found array with 
length 2");
+      "Expected child of list array to have length >= 3 but found array with 
length 2");
 
   array.children[0]->length = array.children[0]->length + 1;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
@@ -1045,15 +1048,16 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) {
   array.n_children = 0;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
   EXPECT_STREQ(ArrowErrorMessage(&error),
-               "Expected 1 child of large list array but found 0 child 
arrays");
+               "Expected 1 child of large_list array but found 0 child 
arrays");
   array.n_children = 1;
 
   // Make sure final child size is checked at finish
   array.children[0]->length = array.children[0]->length - 1;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
-  EXPECT_STREQ(ArrowErrorMessage(&error),
-               "Expected child of large list array with length >= 3 but found 
array with "
-               "length 2");
+  EXPECT_STREQ(
+      ArrowErrorMessage(&error),
+      "Expected child of large list array to have length >= 3 but found array 
with "
+      "length 2");
 
   array.children[0]->length = array.children[0]->length + 1;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
@@ -1118,7 +1122,7 @@ TEST(ArrayTest, ArrayTestAppendToMapArray) {
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
   EXPECT_STREQ(
       ArrowErrorMessage(&error),
-      "Expected child of map array with length >= 1 but found array with 
length 0");
+      "Expected child of map array to have length >= 1 but found array with 
length 0");
 
   array.children[0]->length = array.children[0]->length + 1;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
@@ -1177,15 +1181,16 @@ TEST(ArrayTest, ArrayTestAppendToFixedSizeListArray) {
   array.n_children = 0;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
   EXPECT_STREQ(ArrowErrorMessage(&error),
-               "Expected 1 child of fixed-size array but found 0 child 
arrays");
+               "Expected 1 child of fixed_size_list array but found 0 child 
arrays");
   array.n_children = 1;
 
   // Make sure final child size is checked at finish
   array.children[0]->length = array.children[0]->length - 1;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
-  EXPECT_STREQ(ArrowErrorMessage(&error),
-               "Expected child of fixed-size list array with length >= 8 but 
found array "
-               "with length 7");
+  EXPECT_STREQ(
+      ArrowErrorMessage(&error),
+      "Expected child of fixed_size_list array to have length >= 8 but found 
array "
+      "with length 7");
 
   array.children[0]->length = array.children[0]->length + 1;
   EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
@@ -1431,7 +1436,8 @@ TEST(ArrayTest, ArrayViewTestBasic) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
   EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
   EXPECT_EQ(array_view.buffer_views[1].size_bytes, 3 * sizeof(int32_t));
   EXPECT_EQ(array_view.buffer_views[1].data.as_int32[0], 11);
@@ -1444,7 +1450,8 @@ TEST(ArrayTest, ArrayViewTestBasic) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
   EXPECT_EQ(array_view.buffer_views[0].size_bytes, 1);
   EXPECT_EQ(array_view.buffer_views[1].size_bytes, 3 * sizeof(int32_t));
 
@@ -1515,7 +1522,8 @@ TEST(ArrayTest, ArrayViewTestString) {
   ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_STRING), 
NANOARROW_OK);
   array.null_count = 0;
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
   EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
   EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
   EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
@@ -1529,7 +1537,8 @@ TEST(ArrayTest, ArrayViewTestString) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
   EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
   EXPECT_EQ(array_view.buffer_views[1].size_bytes, (1 + 1) * sizeof(int32_t));
   EXPECT_EQ(array_view.buffer_views[2].size_bytes, 4);
@@ -1545,7 +1554,8 @@ TEST(ArrayTest, ArrayViewTestString) {
 
   offsets[1] = -1;
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            EINVAL);
   EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found 
element size -1");
 
   array.release(&array);
@@ -1583,7 +1593,8 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
   ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_STRING), 
NANOARROW_OK);
   array.null_count = 0;
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
   EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
   EXPECT_EQ(array_view.buffer_views[1].size_bytes, 0);
   EXPECT_EQ(array_view.buffer_views[2].size_bytes, 0);
@@ -1597,7 +1608,8 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
   EXPECT_EQ(array_view.buffer_views[0].size_bytes, 0);
   EXPECT_EQ(array_view.buffer_views[1].size_bytes, (1 + 1) * sizeof(int64_t));
   EXPECT_EQ(array_view.buffer_views[2].size_bytes, 4);
@@ -1613,7 +1625,8 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
 
   offsets[1] = -1;
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            EINVAL);
   EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found 
element size -1");
 
   array.release(&array);
@@ -1683,7 +1696,8 @@ TEST(ArrayTest, ArrayViewTestList) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+            NANOARROW_OK);
 
   // Expect error for offsets that will cause bad access
   struct ArrowError error;
@@ -1697,7 +1711,8 @@ TEST(ArrayTest, ArrayViewTestList) {
 
   offsets[1] = -1;
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            EINVAL);
   EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found 
element size -1");
 
   array.release(&array);
@@ -1725,7 +1740,8 @@ TEST(ArrayTest, ArrayViewTestListGet) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+            NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 0), 0);
   EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 1), 1);
@@ -1757,7 +1773,8 @@ TEST(ArrayTest, ArrayViewTestLargeListGet) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+            NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 0), 0);
   EXPECT_EQ(ArrowArrayViewListChildOffset(&array_view, 1), 1);
@@ -1800,7 +1817,8 @@ TEST(ArrayTest, ArrayViewTestLargeList) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+            NANOARROW_OK);
 
   // Expect error for offsets that will cause bad access
   struct ArrowError error;
@@ -1814,7 +1832,8 @@ TEST(ArrayTest, ArrayViewTestLargeList) {
 
   offsets[1] = -1;
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            EINVAL);
   EXPECT_STREQ(error.message, "[1] Expected element size >= 0 but found 
element size -1");
 
   array.release(&array);
@@ -1877,7 +1896,8 @@ TEST(ArrayTest, ArrayViewTestStructArray) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
   EXPECT_EQ(array_view.children[0]->buffer_views[1].size_bytes, 
sizeof(int32_t));
   EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
 
@@ -1920,7 +1940,8 @@ TEST(ArrayTest, ArrayViewTestFixedSizeListArray) {
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
   EXPECT_EQ(array_view.children[0]->buffer_views[1].size_bytes, 3 * 
sizeof(int32_t));
   EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
 
@@ -1957,7 +1978,8 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
   ArrowArrayViewInitFromType(array_view.children[0], NANOARROW_TYPE_INT32);
   ArrowArrayViewInitFromType(array_view.children[1], NANOARROW_TYPE_STRING);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+            NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 0), 0);
   EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 1), 1);
@@ -1969,7 +1991,8 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
   // The test schema explicitly sets the type_ids 0,1 and this should work too
   ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+            NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 0), 0);
   EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 1), 1);
@@ -1983,13 +2006,15 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
   int32_t* offsets =
       const_cast<int32_t*>(reinterpret_cast<const int32_t*>(array.buffers[1]));
   type_ids[0] = -1;
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            EINVAL);
   EXPECT_STREQ(error.message,
                "[0] Expected buffer value between 0 and 1 but found value -1");
   type_ids[0] = 0;
 
   offsets[0] = -1;
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            EINVAL);
   EXPECT_STREQ(error.message,
                "[0] Expected union offset for child id 0 to be between 0 and 1 
but found "
                "offset value -1");
@@ -2002,7 +2027,8 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
   ASSERT_EQ(ArrowSchemaSetFormat(&schema, "+ud:1,0"), NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+            NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 0), 0);
   EXPECT_EQ(ArrowArrayViewUnionTypeId(&array_view, 1), 1);
@@ -2011,7 +2037,8 @@ TEST(ArrayTest, ArrayViewTestUnionChildIndices) {
 
   // Check that bad type ids are caught by validate full
   type_ids[0] = -1;
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), EINVAL);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            EINVAL);
   EXPECT_STREQ(error.message, "[0] Unexpected buffer value -1");
   type_ids[0] = 0;
 
@@ -2055,7 +2082,8 @@ TEST(ArrayTest, ArrayViewTestDenseUnionGet) {
   // Initialize the array view
   ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+            NANOARROW_OK);
 
   // Check the values that will be used to index into children
   EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 0), 0);
@@ -2100,7 +2128,8 @@ TEST(ArrayTest, ArrayViewTestSparseUnionGet) {
   // Initialize the array view
   ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, nullptr), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, nullptr), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, nullptr),
+            NANOARROW_OK);
 
   // Check the values that will be used to index into children
   EXPECT_EQ(ArrowArrayViewUnionChildIndex(&array_view, 0), 0);
@@ -2141,7 +2170,8 @@ void TestGetFromNumericArrayView() {
   ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
   ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
   EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
@@ -2172,7 +2202,8 @@ void TestGetFromNumericArrayView() {
   ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
   ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
 
   // We're trying to test behavior with no validity buffer, so make sure 
that's true
   ASSERT_EQ(array_view.buffer_views[0].data.data, nullptr);
@@ -2219,7 +2250,8 @@ void TestGetFromBinary(BuilderClass& builder) {
   ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
   ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
 
   EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 2), 1);
   EXPECT_EQ(ArrowArrayViewIsNull(&array_view, 3), 0);
@@ -2274,7 +2306,8 @@ TEST(ArrayViewTest, ArrayViewTestGetDecimal128) {
   ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
   ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
 
   ArrowDecimal decimal;
   ArrowDecimalInit(&decimal, 128, 10, 3);
@@ -2310,7 +2343,8 @@ TEST(ArrayViewTest, ArrayViewTestGetDecimal256) {
   ARROW_EXPECT_OK(ExportArray(*arrow_array, &array, &schema));
   ASSERT_EQ(ArrowArrayViewInitFromSchema(&array_view, &schema, &error), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
-  EXPECT_EQ(ArrowArrayViewValidateFull(&array_view, &error), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayViewValidate(&array_view, 
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+            NANOARROW_OK);
 
   ArrowDecimal decimal;
   ArrowDecimalInit(&decimal, 256, 10, 3);
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 7b11825..c591b68 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -114,8 +114,8 @@
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
 #define ArrowArrayViewSetArray \
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray)
-#define ArrowArrayViewValidateFull \
-  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidateFull)
+#define ArrowArrayViewValidate \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate)
 #define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, 
ArrowArrayViewReset)
 #define ArrowBasicArrayStreamInit \
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit)
@@ -909,7 +909,7 @@ ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* 
array,
 
 /// \defgroup nanoarrow-array-view Reading arrays
 ///
-/// These functions read and validate the contents ArrowArray structures
+/// These functions read and validate the contents ArrowArray structures.
 ///
 /// @{
 
@@ -942,9 +942,17 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* 
array_view, int64_t length);
 ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
                                       struct ArrowArray* array, struct 
ArrowError* error);
 
-/// \brief Performs extra checks on the array that was set via 
ArrowArrayViewSetArray()
-ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
-                                          struct ArrowError* error);
+/// \brief Performs checks on the content of an ArrowArrayView
+///
+/// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray,
+/// the buffer sizes and some content (fist and last offset) have already
+/// been validated at the "default" level. If setting the buffer pointers
+/// and sizes otherwise, you may wish to perform checks at a different level. 
See
+/// documentation for ArrowValidationLevel for the details of checks performed
+/// at each level.
+ArrowErrorCode ArrowArrayViewValidate(struct ArrowArrayView* array_view,
+                                      enum ArrowValidationLevel 
validation_level,
+                                      struct ArrowError* error);
 
 /// \brief Reset the contents of an ArrowArrayView and frees resources
 void ArrowArrayViewReset(struct ArrowArrayView* array_view);
diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h
index 58582ab..2fac404 100644
--- a/src/nanoarrow/nanoarrow_types.h
+++ b/src/nanoarrow/nanoarrow_types.h
@@ -471,26 +471,28 @@ static inline struct ArrowStringView ArrowCharView(const 
char* value) {
   return out;
 }
 
+union ArrowBufferViewData {
+  const void* data;
+  const int8_t* as_int8;
+  const uint8_t* as_uint8;
+  const int16_t* as_int16;
+  const uint16_t* as_uint16;
+  const int32_t* as_int32;
+  const uint32_t* as_uint32;
+  const int64_t* as_int64;
+  const uint64_t* as_uint64;
+  const double* as_double;
+  const float* as_float;
+  const char* as_char;
+};
+
 /// \brief An non-owning view of a buffer
 /// \ingroup nanoarrow-utils
 struct ArrowBufferView {
   /// \brief A pointer to the start of the buffer
   ///
   /// If size_bytes is 0, this value may be NULL.
-  union {
-    const void* data;
-    const int8_t* as_int8;
-    const uint8_t* as_uint8;
-    const int16_t* as_int16;
-    const uint16_t* as_uint16;
-    const int32_t* as_int32;
-    const uint32_t* as_uint32;
-    const int64_t* as_int64;
-    const uint64_t* as_uint64;
-    const double* as_double;
-    const float* as_float;
-    const char* as_char;
-  } data;
+  union ArrowBufferViewData data;
 
   /// \brief The size of the buffer in bytes
   int64_t size_bytes;
@@ -566,12 +568,23 @@ struct ArrowLayout {
 /// This data structure provides access to the values contained within
 /// an ArrowArray with fields provided in a more readily-extractible
 /// form. You can re-use an ArrowArrayView for multiple ArrowArrays
-/// with the same storage type, or use it to represent a hypothetical
-/// ArrowArray that does not exist yet.
+/// with the same storage type, use it to represent a hypothetical
+/// ArrowArray that does not exist yet, or use it to validate the buffers
+/// of a future ArrowArray.
 struct ArrowArrayView {
-  /// \brief The underlying ArrowArray or NULL if it has not been set
+  /// \brief The underlying ArrowArray or NULL if it has not been set or
+  /// if the buffers in this ArrowArrayView are not backed by an ArrowArray.
   struct ArrowArray* array;
 
+  /// \brief The number of elements from the physical start of the buffers.
+  int64_t offset;
+
+  /// \brief The number of elements in this view.
+  int64_t length;
+
+  /// \brief A cached null count or -1 to indicate that this value is unknown.
+  int64_t null_count;
+
   /// \brief The type used to store values in this array
   ///
   /// This type represents only the minimum required information to

Reply via email to