This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 8d53f8f  Update dist/ for commit 
be3b2b13a7e2ad977f6a1cad9a157e617bf42dcc
8d53f8f is described below

commit 8d53f8fa7c4f1285af2cebed45e57ef2210d0174
Author: GitHub Actions <[email protected]>
AuthorDate: Wed Mar 29 12:13:21 2023 +0000

    Update dist/ for commit be3b2b13a7e2ad977f6a1cad9a157e617bf42dcc
---
 dist/nanoarrow.c | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dist/nanoarrow.h |  23 +++++--
 2 files changed, 197 insertions(+), 5 deletions(-)

diff --git a/dist/nanoarrow.c b/dist/nanoarrow.c
index 8fb7994..8fa103b 100644
--- a/dist/nanoarrow.c
+++ b/dist/nanoarrow.c
@@ -2349,6 +2349,21 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
                                       struct ArrowArray* array,
                                       struct ArrowError* error) {
   array_view->array = array;
+
+  // Check length and offset
+  if (array->offset < 0) {
+    ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of 
%ld",
+                  (long)array->offset);
+    return EINVAL;
+  }
+
+  if (array->length < 0) {
+    ArrowErrorSet(error, "Expected array length >= 0 but found array length of 
%ld",
+                  (long)array->length);
+    return EINVAL;
+  }
+
+  // First pass setting lengths that do not depend on the data buffer
   ArrowArrayViewSetLength(array_view, array->offset + array->length);
 
   int64_t buffers_required = 0;
@@ -2375,15 +2390,25 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
   }
 
   if (array_view->n_children != array->n_children) {
+    ArrowErrorSet(error, "Expected %ld children but found %ld children",
+                  (long)array_view->n_children, (long)array->n_children);
     return EINVAL;
   }
 
   // Check child sizes and calculate sizes that depend on data in the array 
buffers
+  int64_t first_offset;
   int64_t last_offset;
   switch (array_view->storage_type) {
     case NANOARROW_TYPE_STRING:
     case NANOARROW_TYPE_BINARY:
       if (array_view->buffer_views[1].size_bytes != 0) {
+        first_offset = array_view->buffer_views[1].data.as_int32[0];
+        if (first_offset < 0) {
+          ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+                        (long)first_offset);
+          return EINVAL;
+        }
+
         last_offset =
             array_view->buffer_views[1].data.as_int32[array->offset + 
array->length];
         array_view->buffer_views[2].size_bytes = last_offset;
@@ -2392,6 +2417,13 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
     case NANOARROW_TYPE_LARGE_STRING:
     case NANOARROW_TYPE_LARGE_BINARY:
       if (array_view->buffer_views[1].size_bytes != 0) {
+        first_offset = array_view->buffer_views[1].data.as_int64[0];
+        if (first_offset < 0) {
+          ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+                        (long)first_offset);
+          return EINVAL;
+        }
+
         last_offset =
             array_view->buffer_views[1].data.as_int64[array->offset + 
array->length];
         array_view->buffer_views[2].size_bytes = last_offset;
@@ -2421,6 +2453,13 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
       }
 
       if (array_view->buffer_views[1].size_bytes != 0) {
+        first_offset = array_view->buffer_views[1].data.as_int32[0];
+        if (first_offset < 0) {
+          ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+                        (long)first_offset);
+          return EINVAL;
+        }
+
         last_offset =
             array_view->buffer_views[1].data.as_int32[array->offset + 
array->length];
         if (array->children[0]->length < last_offset) {
@@ -2443,6 +2482,13 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
       }
 
       if (array_view->buffer_views[1].size_bytes != 0) {
+        first_offset = array_view->buffer_views[1].data.as_int64[0];
+        if (first_offset < 0) {
+          ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
+                        (long)first_offset);
+          return EINVAL;
+        }
+
         last_offset =
             array_view->buffer_views[1].data.as_int64[array->offset + 
array->length];
         if (array->children[0]->length < last_offset) {
@@ -2485,6 +2531,139 @@ ArrowErrorCode ArrowArrayViewSetArray(struct 
ArrowArrayView* array_view,
 
   return NANOARROW_OK;
 }
+
+static int ArrowAssertIncreasingInt32(struct ArrowBufferView view,
+                                      struct ArrowError* error) {
+  if (view.size_bytes <= (int64_t)sizeof(int32_t)) {
+    return NANOARROW_OK;
+  }
+
+  for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int32_t); i++) {
+    int32_t diff = view.data.as_int32[i] - view.data.as_int32[i - 1];
+    if (diff < 0) {
+      ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element 
size %ld",
+                    (long)i, (long)diff);
+      return EINVAL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowAssertIncreasingInt64(struct ArrowBufferView view,
+                                      struct ArrowError* error) {
+  if (view.size_bytes <= (int64_t)sizeof(int64_t)) {
+    return NANOARROW_OK;
+  }
+
+  for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int64_t); i++) {
+    int64_t diff = view.data.as_int64[i] - view.data.as_int64[i - 1];
+    if (diff < 0) {
+      ArrowErrorSet(error, "[%ld] Expected element size >= 0 but found element 
size %ld",
+                    (long)i, (long)diff);
+      return EINVAL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowAssertRangeInt8(struct ArrowBufferView view, int8_t min_value,
+                                int8_t max_value, struct ArrowError* error) {
+  for (int64_t i = 0; i < view.size_bytes; i++) {
+    if (view.data.as_int8[i] < min_value || view.data.as_int8[i] > max_value) {
+      ArrowErrorSet(error,
+                    "[%ld] Expected buffer value between %d and %d but found 
value %d",
+                    (long)i, (int)min_value, (int)max_value, 
(int)view.data.as_int8[i]);
+      return EINVAL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+static int ArrowAssertInt8In(struct ArrowBufferView view, const int8_t* values,
+                             int64_t n_values, struct ArrowError* error) {
+  for (int64_t i = 0; i < view.size_bytes; i++) {
+    int item_found = 0;
+    for (int64_t j = 0; j < n_values; j++) {
+      if (view.data.as_int8[i] == values[j]) {
+        item_found = 1;
+        break;
+      }
+    }
+
+    if (!item_found) {
+      ArrowErrorSet(error, "[%ld] Unexpected buffer value %d", (long)i,
+                    (int)view.data.as_int8[i]);
+      return EINVAL;
+    }
+  }
+
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+                                          struct ArrowError* error) {
+  for (int i = 0; i < 3; i++) {
+    switch (array_view->layout.buffer_type[i]) {
+      case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+        NANOARROW_RETURN_NOT_OK(
+            ArrowAssertIncreasingInt32(array_view->buffer_views[i], error));
+        break;
+      case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+        if (array_view->layout.element_size_bits[i] == 32) {
+          NANOARROW_RETURN_NOT_OK(
+              ArrowAssertIncreasingInt32(array_view->buffer_views[i], error));
+        } else {
+          NANOARROW_RETURN_NOT_OK(
+              ArrowAssertIncreasingInt64(array_view->buffer_views[i], error));
+        }
+        break;
+      default:
+        break;
+    }
+  }
+
+  if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION ||
+      array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION) {
+    // Check that we have valid type ids
+    if (array_view->union_type_id_map == NULL ||
+        
_ArrowParsedUnionTypeIdsWillEqualChildIndices(array_view->union_type_id_map,
+                                                      array_view->n_children,
+                                                      array_view->n_children)) 
{
+      
NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(array_view->buffer_views[0], 0,
+                                                   array_view->n_children - 1, 
error));
+    } else {
+      NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(array_view->buffer_views[0],
+                                                array_view->union_type_id_map 
+ 128,
+                                                array_view->n_children, 
error));
+    }
+  }
+
+  if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION) {
+    // Check that offsets refer to child elements that actually exist
+    for (int64_t i = 0; i < array_view->array->length; i++) {
+      int8_t child_id = ArrowArrayViewUnionChildIndex(array_view, i);
+      int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i);
+      int64_t child_length = array_view->array->children[child_id]->length;
+      if (offset < 0 || offset > child_length) {
+        ArrowErrorSet(
+            error,
+            "[%ld] Expected union offset for child id %d to be between 0 and 
%ld but "
+            "found offset value %ld",
+            (long)i, (int)child_id, (long)child_length, offset);
+        return EINVAL;
+      }
+    }
+  }
+
+  for (int64_t i = 0; i < array_view->n_children; i++) {
+    
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], 
error));
+  }
+
+  return NANOARROW_OK;
+}
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
diff --git a/dist/nanoarrow.h b/dist/nanoarrow.h
index 0eaec34..59d5dcf 100644
--- a/dist/nanoarrow.h
+++ b/dist/nanoarrow.h
@@ -535,7 +535,8 @@ struct ArrowArrayView {
   ///
   /// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer
   /// such that child_index == union_type_id_map[type_id] and
-  /// type_id == union_type_id_map[128 + child_index]
+  /// type_id == union_type_id_map[128 + child_index]. This value may be
+  /// NULL in the case where child_id == type_id.
   int8_t* union_type_id_map;
 };
 
@@ -685,6 +686,8 @@ struct ArrowArrayPrivateData {
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength)
 #define ArrowArrayViewSetArray \
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray)
+#define ArrowArrayViewValidateFull \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidateFull)
 #define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, 
ArrowArrayViewReset)
 #define ArrowBasicArrayStreamInit \
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit)
@@ -1466,6 +1469,10 @@ void ArrowArrayViewSetLength(struct ArrowArrayView* 
array_view, int64_t length);
 ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
                                       struct ArrowArray* array, struct 
ArrowError* error);
 
+/// \brief Performs extra checks on the array that was set via 
ArrowArrayViewSetArray()
+ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
+                                          struct ArrowError* error);
+
 /// \brief Reset the contents of an ArrowArrayView and frees resources
 void ArrowArrayViewReset(struct ArrowArrayView* array_view);
 
@@ -2155,10 +2162,9 @@ static inline int8_t _ArrowParseUnionTypeIds(const char* 
type_ids, int8_t* out)
   return -1;
 }
 
-static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char* 
type_id_str,
-                                                             int64_t 
n_children) {
-  int8_t type_ids[128];
-  int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids);
+static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const 
int8_t* type_ids,
+                                                                   int64_t 
n_type_ids,
+                                                                   int64_t 
n_children) {
   if (n_type_ids != n_children) {
     return 0;
   }
@@ -2172,6 +2178,13 @@ static inline int8_t 
_ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id
   return 1;
 }
 
+static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char* 
type_id_str,
+                                                             int64_t 
n_children) {
+  int8_t type_ids[128];
+  int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids);
+  return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids, 
n_children);
+}
+
 static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* 
array) {
   struct ArrowArrayPrivateData* private_data =
       (struct ArrowArrayPrivateData*)array->private_data;

Reply via email to