paleolimbot commented on code in PR #710:
URL: https://github.com/apache/arrow-nanoarrow/pull/710#discussion_r1945373831


##########
src/nanoarrow/common/inline_types.h:
##########
@@ -864,6 +871,9 @@ struct ArrowArrayPrivateData {
 
   // Size of each variadic buffer in bytes
   int64_t* variadic_buffer_sizes;
+
+  // The current offset used to build list views
+  int64_t list_view_offset;

Review Comment:
   Would it be possible to just use the length of the child array instead of 
add a parameter here?



##########
src/nanoarrow/common/array_test.cc:
##########
@@ -1555,6 +1555,149 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) {
 #endif
 }
 
+TEST(ArrayTest, ArrayTestAppendToListViewArray) {
+  struct ArrowArray array;
+  struct ArrowSchema schema;
+  struct ArrowError error;
+
+  ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST_VIEW), 
NANOARROW_OK);
+  ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64), 
NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+
+  ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+  // Check that we can reserve recursively without erroring
+  ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 0);
+
+  ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+  ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+
+  ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+  EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK);
+
+  // Make sure number of children is checked at finish
+  array.n_children = 0;
+  EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
+  EXPECT_STREQ(ArrowErrorMessage(&error),
+               "Expected 1 child of list_view array but found 0 child arrays");
+  array.n_children = 1;
+
+  // Make sure final child size is checked at finish

Review Comment:
   I think we probably want the check, but because the check requires iterating 
along every item it should be checked in the `ValidateFull` rather than the 
`ValidateDefault`. That check is important for IPC because it prevents (if done 
correctly) out-of-bounds buffer access!



##########
src/nanoarrow/common/inline_types.h:
##########
@@ -630,7 +636,8 @@ enum ArrowBufferType {
   NANOARROW_BUFFER_TYPE_DATA_OFFSET,
   NANOARROW_BUFFER_TYPE_DATA,
   NANOARROW_BUFFER_TYPE_VARIADIC_DATA,
-  NANOARROW_BUFFER_TYPE_VARIADIC_SIZE
+  NANOARROW_BUFFER_TYPE_VARIADIC_SIZE,
+  NANOARROW_BUFFER_TYPE_SIZE,

Review Comment:
   Nope...this is nanoarrow native! I think a new buffer type is a good idea 
(for the view offsets as well!)



##########
src/nanoarrow/common/inline_array.h:
##########
@@ -772,6 +787,59 @@ static inline ArrowErrorCode 
ArrowArrayFinishElement(struct ArrowArray* array) {
         return EINVAL;
       }
       break;
+    case NANOARROW_TYPE_LIST_VIEW: {
+      child_length = array->children[0]->length;
+      if (child_length > INT32_MAX) {
+        return EOVERFLOW;
+      }
+
+      const int32_t current_offset = (int32_t)private_data->list_view_offset;
+      int32_t offset;
+      if (current_offset == 0) {
+        offset = 0;
+      } else {
+        struct ArrowBuffer offsets_buf, sizes_buf;
+        offsets_buf = *ArrowArrayBuffer(array, 1);
+        sizes_buf = *ArrowArrayBuffer(array, 2);
+        NANOARROW_DCHECK(offsets_buf.size_bytes == sizes_buf.size_bytes);
+        struct ArrowBufferView offsets, sizes;
+        offsets.data.data = offsets_buf.data;
+        sizes.data.data = sizes_buf.data;
+        const int32_t prev_offset = offsets.data.as_int32[current_offset - 1];
+        const int32_t prev_size = sizes.data.as_int32[current_offset - 1];
+        offset = prev_offset + prev_size;
+      }
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, 
1), offset));
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, 
2),
+                                                     (int32_t)child_length - 
offset));
+      private_data->list_view_offset = (int32_t)child_length;
+      break;
+    }
+    case NANOARROW_TYPE_LARGE_LIST_VIEW: {
+      child_length = array->children[0]->length;
+      const int64_t current_offset = private_data->list_view_offset;
+      int64_t offset;
+      if (current_offset == 0) {
+        offset = 0;
+      } else {
+        struct ArrowBuffer offsets_buf, sizes_buf;
+        offsets_buf = *ArrowArrayBuffer(array, 1);
+        sizes_buf = *ArrowArrayBuffer(array, 2);

Review Comment:
   I think these should be pointers as well



##########
src/nanoarrow/common/inline_array.h:
##########
@@ -290,18 +293,27 @@ static inline ArrowErrorCode 
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
       case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
       case NANOARROW_BUFFER_TYPE_VALIDITY:
         continue;
-      case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
-        // Append the current value at the end of the offset buffer for each 
element
-        NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
-
-        for (int64_t j = 0; j < n; j++) {
-          ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * 
(array->length + j),
-                                  size_bytes);
+      case NANOARROW_BUFFER_TYPE_SIZE:
+        NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * 
n));
+        continue;
+      case NANOARROW_BUFFER_TYPE_DATA_OFFSET: {

Review Comment:
   I think we want a new buffer type for this one because it has different 
rules than the existing `DATA_OFFSET` buffer (maybe 
`NANOARROW_BUFFER_TYPE_VIEW_OFFSET`?)



##########
src/nanoarrow/common/inline_array.h:
##########
@@ -772,6 +787,59 @@ static inline ArrowErrorCode 
ArrowArrayFinishElement(struct ArrowArray* array) {
         return EINVAL;
       }
       break;
+    case NANOARROW_TYPE_LIST_VIEW: {
+      child_length = array->children[0]->length;
+      if (child_length > INT32_MAX) {
+        return EOVERFLOW;
+      }
+
+      const int32_t current_offset = (int32_t)private_data->list_view_offset;
+      int32_t offset;
+      if (current_offset == 0) {
+        offset = 0;
+      } else {
+        struct ArrowBuffer offsets_buf, sizes_buf;
+        offsets_buf = *ArrowArrayBuffer(array, 1);
+        sizes_buf = *ArrowArrayBuffer(array, 2);

Review Comment:
   I don't think you want to dereference a buffer pointer here (that will mean 
that the changes that you make to `offsets_buf` won't be syncrhonized back to 
the array!). I think `struct ArrowBuffer* offsets_buf` will work here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to