bkietz commented on code in PR #596:
URL: https://github.com/apache/arrow-nanoarrow/pull/596#discussion_r1771796038


##########
src/nanoarrow/common/array_test.cc:
##########
@@ -895,6 +895,144 @@ TEST(ArrayTest, ArrayTestAppendToLargeStringArray) {
   ArrowArrayRelease(&array);
 }
 
+TEST(ArrayTest, ArrayTestAppendToStringViewArray) {
+  struct ArrowArray array;
+
+  ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_STRING_VIEW), 
NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+  // Check that we can reserve
+  ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayBuffer(&array, 1)->capacity_bytes,
+            5 * sizeof(union ArrowBinaryView));
+
+  std::string str1{"this_is_a_relatively_long_string"};
+  std::string filler(NANOARROW_BINARY_VIEW_BLOCK_SIZE - 34, 'x');
+  std::string str2{"goes_into_second_variadic_buffer"};
+
+  EXPECT_EQ(ArrowArrayAppendString(&array, "1234"_asv), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+  EXPECT_EQ(
+      ArrowArrayAppendString(&array, {{str1.c_str()}, 
static_cast<int64_t>(str1.size())}),
+      NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendString(
+                &array, {{filler.c_str()}, 
static_cast<int64_t>(filler.size())}),
+            NANOARROW_OK);
+  EXPECT_EQ(
+      ArrowArrayAppendString(&array, {{str2.c_str()}, 
static_cast<int64_t>(str2.size())}),
+      NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+
+  EXPECT_EQ(array.length, 7);
+  EXPECT_EQ(array.null_count, 2);
+  auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+  auto inline_buffer = reinterpret_cast<const union 
ArrowBinaryView*>(array.buffers[1]);
+  auto vbuf1 = reinterpret_cast<const char*>(array.buffers[2]);
+  auto vbuf2 = reinterpret_cast<const char*>(array.buffers[3]);
+  auto sizes_buffer = reinterpret_cast<const int64_t*>(array.buffers[4]);
+
+  EXPECT_EQ(validity_buffer[0], 0b01111001);
+  EXPECT_EQ(memcmp(inline_buffer[0].inlined.data, "1234", 4), 0);
+  EXPECT_EQ(inline_buffer[0].inlined.size, 4);
+  EXPECT_EQ(memcmp(inline_buffer[3].ref.prefix, str1.data(), 4), 0);
+  EXPECT_EQ(inline_buffer[3].ref.size, str1.size());
+  EXPECT_EQ(inline_buffer[3].ref.buffer_index, 0);
+  EXPECT_EQ(inline_buffer[3].ref.offset, 0);
+
+  EXPECT_EQ(memcmp(inline_buffer[5].ref.prefix, filler.data(), 4), 0);
+  EXPECT_EQ(inline_buffer[5].ref.size, filler.size());
+  EXPECT_EQ(inline_buffer[5].ref.buffer_index, 0);
+  EXPECT_EQ(inline_buffer[5].ref.offset, str1.size());
+
+  EXPECT_EQ(memcmp(inline_buffer[6].ref.prefix, str2.data(), 4), 0);
+  EXPECT_EQ(inline_buffer[6].ref.size, str2.size());
+  EXPECT_EQ(inline_buffer[6].ref.buffer_index, 1);
+  EXPECT_EQ(inline_buffer[6].ref.offset, 0);
+
+  EXPECT_EQ(memcmp(vbuf1, str1.c_str(), str1.size()), 0);
+  EXPECT_EQ(sizes_buffer[0], str1.size() + filler.size());
+
+  EXPECT_EQ(memcmp(vbuf2, str2.c_str(), str2.size()), 0);
+  EXPECT_EQ(sizes_buffer[1], str2.size());
+
+  // TODO: need to add overload for ViewArrayAsBytes
+  /*
+  EXPECT_THAT(nanoarrow::ViewArrayAsBytes<64>(&array),
+              ElementsAre("1234"_asv, NA, NA, "56789"_asv, ""_asv));
+  */
+  ArrowArrayRelease(&array);
+}
+
+TEST(ArrayTest, ArrayTestAppendToBinaryViewArray) {
+  struct ArrowArray array;
+
+  ASSERT_EQ(ArrowArrayInitFromType(&array, NANOARROW_TYPE_BINARY_VIEW), 
NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+  // Check that we can reserve
+  ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayBuffer(&array, 1)->capacity_bytes,
+            5 * sizeof(union ArrowBinaryView));
+
+  std::string str1{"this_is_a_relatively_long_string"};
+  std::string filler(NANOARROW_BINARY_VIEW_BLOCK_SIZE - 34, 'x');
+  std::string str2{"goes_into_second_variadic_buffer"};
+
+  EXPECT_EQ(ArrowArrayAppendBytes(&array, {{"1234"}, 4}), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+  EXPECT_EQ(
+      ArrowArrayAppendBytes(&array, {{str1.c_str()}, 
static_cast<int64_t>(str1.size())}),
+      NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendEmpty(&array, 1), NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayAppendBytes(
+                &array, {{filler.c_str()}, 
static_cast<int64_t>(filler.size())}),
+            NANOARROW_OK);
+  EXPECT_EQ(
+      ArrowArrayAppendBytes(&array, {{str2.c_str()}, 
static_cast<int64_t>(str2.size())}),
+      NANOARROW_OK);
+  EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
+
+  EXPECT_EQ(array.length, 7);
+  EXPECT_EQ(array.null_count, 2);
+  auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+  auto inline_buffer = reinterpret_cast<const union 
ArrowBinaryView*>(array.buffers[1]);
+  auto vbuf1 = reinterpret_cast<const char*>(array.buffers[2]);
+  auto vbuf2 = reinterpret_cast<const char*>(array.buffers[3]);
+  auto sizes_buffer = reinterpret_cast<const int64_t*>(array.buffers[4]);
+
+  EXPECT_EQ(validity_buffer[0], 0b01111001);
+  EXPECT_EQ(memcmp(inline_buffer[0].inlined.data, "1234", 4), 0);
+  EXPECT_EQ(inline_buffer[0].inlined.size, 4);
+  EXPECT_EQ(memcmp(inline_buffer[3].ref.prefix, str1.data(), 4), 0);
+  EXPECT_EQ(inline_buffer[3].ref.size, str1.size());
+  EXPECT_EQ(inline_buffer[3].ref.buffer_index, 0);
+  EXPECT_EQ(inline_buffer[3].ref.offset, 0);
+
+  EXPECT_EQ(memcmp(inline_buffer[5].ref.prefix, filler.data(), 4), 0);
+  EXPECT_EQ(inline_buffer[5].ref.size, filler.size());
+  EXPECT_EQ(inline_buffer[5].ref.buffer_index, 0);
+  EXPECT_EQ(inline_buffer[5].ref.offset, str1.size());
+
+  EXPECT_EQ(memcmp(inline_buffer[6].ref.prefix, str2.data(), 4), 0);
+  EXPECT_EQ(inline_buffer[6].ref.size, str2.size());
+  EXPECT_EQ(inline_buffer[6].ref.buffer_index, 1);
+  EXPECT_EQ(inline_buffer[6].ref.offset, 0);
+
+  EXPECT_EQ(memcmp(vbuf1, str1.c_str(), str1.size()), 0);
+  EXPECT_EQ(sizes_buffer[0], str1.size() + filler.size());
+
+  EXPECT_EQ(memcmp(vbuf2, str2.c_str(), str2.size()), 0);
+  EXPECT_EQ(sizes_buffer[1], str2.size());
+
+  // TODO: need to add overload for ViewArrayAsBytes

Review Comment:
   Could you open a follow up issue and link it here? I'll write it as soon as 
this PR merges



##########
src/nanoarrow/common/array.c:
##########
@@ -720,7 +759,18 @@ static int ArrowArrayViewSetArrayInternal(struct 
ArrowArrayView* array_view,
     }
   }
 
-  // Check the number of buffers
+  if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW ||
+      array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) {
+    const int64_t n_buffers = array->n_buffers;
+    const int32_t nfixed_buf = NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
+
+    int32_t nvariadic_buf = (int32_t)(n_buffers - nfixed_buf - 1);
+    nvariadic_buf = (nvariadic_buf < 0) ? 0 : nvariadic_buf;
+    array_view->n_variadic_buffers = nvariadic_buf;
+    buffers_required += nvariadic_buf + 1;
+    array_view->variadic_buffer_sizes = (int64_t*)array->buffers[n_buffers - 
1];

Review Comment:
   nit
   
   ```suggestion
       if (nvariadic_buf > 0) {
         array_view->n_variadic_buffers = nvariadic_buf;
         buffers_required += nvariadic_buf + 1;
         array_view->variadic_buffer_sizes = (int64_t*)array->buffers[n_buffers 
- 1];
       }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to