Re: [PR] feat: Implement LIST_VIEW and LARGE_LIST_VIEW support [arrow-nanoarrow]

via GitHub Mon, 03 Feb 2025 17:34:42 -0800


WillAyd commented on code in PR #710:
URL: https://github.com/apache/arrow-nanoarrow/pull/710#discussion_r1940350829



##########
src/nanoarrow/common/inline_types.h:
##########
@@ -630,7 +636,8 @@ enum ArrowBufferType {
   NANOARROW_BUFFER_TYPE_DATA_OFFSET,
   NANOARROW_BUFFER_TYPE_DATA,
   NANOARROW_BUFFER_TYPE_VARIADIC_DATA,
-  NANOARROW_BUFFER_TYPE_VARIADIC_SIZE
+  NANOARROW_BUFFER_TYPE_VARIADIC_SIZE,
+  NANOARROW_BUFFER_TYPE_SIZE,

Review Comment:
   Not sure if we wanted to add a new buffer type for this or not - is there an 
official document upstream we follow for this?



##########
src/nanoarrow/common/inline_array.h:
##########
@@ -290,18 +293,27 @@ static inline ArrowErrorCode 
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
       case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
       case NANOARROW_BUFFER_TYPE_VALIDITY:
         continue;
-      case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
-        // Append the current value at the end of the offset buffer for each 
element
-        NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
-
-        for (int64_t j = 0; j < n; j++) {
-          ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * 
(array->length + j),
-                                  size_bytes);
+      case NANOARROW_BUFFER_TYPE_SIZE:
+        NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * 
n));
+        continue;
+      case NANOARROW_BUFFER_TYPE_DATA_OFFSET: {

Review Comment:
   The existing offset type is tightly bound to the idea that you have n+1 
elements in the buffer to provide all open and closed intervals. The list views 
do not require that, so I've specialized this here. But we could alternatively 
make a new buffer enum member for offsets that aren't double ended (?)



##########
src/nanoarrow/common/array_test.cc:
##########
@@ -1555,6 +1555,149 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) {
 #endif
 }
 
+TEST(ArrayTest, ArrayTestAppendToListViewArray) {
+  struct ArrowArray array;
+  struct ArrowSchema schema;
+  struct ArrowError error;
+
+  ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST_VIEW), 
NANOARROW_OK);
+  ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64), 
NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+
+  ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+  // Check that we can reserve recursively without erroring
+  ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 0);
+
+  ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+  ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+
+  ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+  EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK);
+
+  // Make sure number of children is checked at finish
+  array.n_children = 0;
+  EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
+  EXPECT_STREQ(ArrowErrorMessage(&error),
+               "Expected 1 child of list_view array but found 0 child arrays");
+  array.n_children = 1;
+
+  // Make sure final child size is checked at finish

Review Comment:
   This was just copied from the existing list types, where it is trivial to 
check the last offsets against the size of the array. The list view check I 
don't think is as simplistic - maybe we should just remove this from the 
validation?



##########
src/nanoarrow/common/array_test.cc:
##########
@@ -1555,6 +1555,149 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) {
 #endif
 }
 
+TEST(ArrayTest, ArrayTestAppendToListViewArray) {

Review Comment:
   These tests were copy/paste from their list counterparts; in a follow up 
commit or PR I'd like to try and parametrize these



##########
src/nanoarrow/common/inline_array.h:
##########
@@ -752,19 +763,43 @@ static inline ArrowErrorCode 
ArrowArrayFinishElement(struct ArrowArray* array) {
 
   switch (private_data->storage_type) {
     case NANOARROW_TYPE_LIST:
-    case NANOARROW_TYPE_MAP:
+    case NANOARROW_TYPE_LIST_VIEW:
+    case NANOARROW_TYPE_MAP: {
       child_length = array->children[0]->length;
       if (child_length > INT32_MAX) {
         return EOVERFLOW;
       }
       NANOARROW_RETURN_NOT_OK(
           ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), 
(int32_t)child_length));
+
+      if (private_data->storage_type == NANOARROW_TYPE_LIST_VIEW) {
+        struct ArrowBufferView buf_view;
+        buf_view.data.data = ArrowArrayBuffer(array, 1)->data;
+        const int32_t array_len = array->length;
+        const int32_t prev_offset =
+            array_len > 0 ? buf_view.data.as_int32[array_len - 1] : 0;
+        NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(
+            ArrowArrayBuffer(array, 2), (int32_t)child_length - prev_offset));
+      }
       break;
+    }
     case NANOARROW_TYPE_LARGE_LIST:
+    case NANOARROW_TYPE_LARGE_LIST_VIEW: {
       child_length = array->children[0]->length;
       NANOARROW_RETURN_NOT_OK(
           ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length));
+      if (private_data->storage_type == NANOARROW_TYPE_LARGE_LIST_VIEW) {

Review Comment:
   Generally not sure how much we want the implementation of the list views 
tied to their respective plain list types. Here I've kept the cases group 
together and specialize within the branch, but also happy to move the view 
types to their own case statements



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat: Implement LIST_VIEW and LARGE_LIST_VIEW support [arrow-nanoarrow]

Reply via email to