paleolimbot commented on code in PR #710: URL: https://github.com/apache/arrow-nanoarrow/pull/710#discussion_r1945373831
########## src/nanoarrow/common/inline_types.h: ########## @@ -864,6 +871,9 @@ struct ArrowArrayPrivateData { // Size of each variadic buffer in bytes int64_t* variadic_buffer_sizes; + + // The current offset used to build list views + int64_t list_view_offset; Review Comment: Would it be possible to just use the length of the child array instead of add a parameter here? ########## src/nanoarrow/common/array_test.cc: ########## @@ -1555,6 +1555,149 @@ TEST(ArrayTest, ArrayTestAppendToLargeListArray) { #endif } +TEST(ArrayTest, ArrayTestAppendToListViewArray) { + struct ArrowArray array; + struct ArrowSchema schema; + struct ArrowError error; + + ASSERT_EQ(ArrowSchemaInitFromType(&schema, NANOARROW_TYPE_LIST_VIEW), NANOARROW_OK); + ASSERT_EQ(ArrowSchemaSetType(schema.children[0], NANOARROW_TYPE_INT64), NANOARROW_OK); + ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK); + + // Check that we can reserve recursively without erroring + ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK); + EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 0); + + ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK); + EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK); + + ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK); + ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK); + EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK); + + EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK); + + // Make sure number of children is checked at finish + array.n_children = 0; + EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL); + EXPECT_STREQ(ArrowErrorMessage(&error), + "Expected 1 child of list_view array but found 0 child arrays"); + array.n_children = 1; + + // Make sure final child size is checked at finish Review Comment: I think we probably want the check, but because the check requires iterating along every item it should be checked in the `ValidateFull` rather than the `ValidateDefault`. That check is important for IPC because it prevents (if done correctly) out-of-bounds buffer access! ########## src/nanoarrow/common/inline_types.h: ########## @@ -630,7 +636,8 @@ enum ArrowBufferType { NANOARROW_BUFFER_TYPE_DATA_OFFSET, NANOARROW_BUFFER_TYPE_DATA, NANOARROW_BUFFER_TYPE_VARIADIC_DATA, - NANOARROW_BUFFER_TYPE_VARIADIC_SIZE + NANOARROW_BUFFER_TYPE_VARIADIC_SIZE, + NANOARROW_BUFFER_TYPE_SIZE, Review Comment: Nope...this is nanoarrow native! I think a new buffer type is a good idea (for the view offsets as well!) ########## src/nanoarrow/common/inline_array.h: ########## @@ -772,6 +787,59 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { return EINVAL; } break; + case NANOARROW_TYPE_LIST_VIEW: { + child_length = array->children[0]->length; + if (child_length > INT32_MAX) { + return EOVERFLOW; + } + + const int32_t current_offset = (int32_t)private_data->list_view_offset; + int32_t offset; + if (current_offset == 0) { + offset = 0; + } else { + struct ArrowBuffer offsets_buf, sizes_buf; + offsets_buf = *ArrowArrayBuffer(array, 1); + sizes_buf = *ArrowArrayBuffer(array, 2); + NANOARROW_DCHECK(offsets_buf.size_bytes == sizes_buf.size_bytes); + struct ArrowBufferView offsets, sizes; + offsets.data.data = offsets_buf.data; + sizes.data.data = sizes_buf.data; + const int32_t prev_offset = offsets.data.as_int32[current_offset - 1]; + const int32_t prev_size = sizes.data.as_int32[current_offset - 1]; + offset = prev_offset + prev_size; + } + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), offset)); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, 2), + (int32_t)child_length - offset)); + private_data->list_view_offset = (int32_t)child_length; + break; + } + case NANOARROW_TYPE_LARGE_LIST_VIEW: { + child_length = array->children[0]->length; + const int64_t current_offset = private_data->list_view_offset; + int64_t offset; + if (current_offset == 0) { + offset = 0; + } else { + struct ArrowBuffer offsets_buf, sizes_buf; + offsets_buf = *ArrowArrayBuffer(array, 1); + sizes_buf = *ArrowArrayBuffer(array, 2); Review Comment: I think these should be pointers as well ########## src/nanoarrow/common/inline_array.h: ########## @@ -290,18 +293,27 @@ static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* a case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE: case NANOARROW_BUFFER_TYPE_VALIDITY: continue; - case NANOARROW_BUFFER_TYPE_DATA_OFFSET: - // Append the current value at the end of the offset buffer for each element - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); - - for (int64_t j = 0; j < n; j++) { - ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j), - size_bytes); + case NANOARROW_BUFFER_TYPE_SIZE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); + continue; + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: { Review Comment: I think we want a new buffer type for this one because it has different rules than the existing `DATA_OFFSET` buffer (maybe `NANOARROW_BUFFER_TYPE_VIEW_OFFSET`?) ########## src/nanoarrow/common/inline_array.h: ########## @@ -772,6 +787,59 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { return EINVAL; } break; + case NANOARROW_TYPE_LIST_VIEW: { + child_length = array->children[0]->length; + if (child_length > INT32_MAX) { + return EOVERFLOW; + } + + const int32_t current_offset = (int32_t)private_data->list_view_offset; + int32_t offset; + if (current_offset == 0) { + offset = 0; + } else { + struct ArrowBuffer offsets_buf, sizes_buf; + offsets_buf = *ArrowArrayBuffer(array, 1); + sizes_buf = *ArrowArrayBuffer(array, 2); Review Comment: I don't think you want to dereference a buffer pointer here (that will mean that the changes that you make to `offsets_buf` won't be syncrhonized back to the array!). I think `struct ArrowBuffer* offsets_buf` will work here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org