This is an automated email from the ASF dual-hosted git repository.

willayd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 76fb7eee Implement ViewBinaryViewArrayAsBytes (#709)
76fb7eee is described below

commit 76fb7eee23a017545d9ef29c08f0b33f7e809e71
Author: William Ayd <[email protected]>
AuthorDate: Thu Jan 30 13:00:21 2025 -0500

    Implement ViewBinaryViewArrayAsBytes (#709)
---
 src/nanoarrow/hpp/view.hpp     | 54 +++++++++++++++++++++++++++++++++++++++
 src/nanoarrow/hpp/view_test.cc | 58 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 112 insertions(+)

diff --git a/src/nanoarrow/hpp/view.hpp b/src/nanoarrow/hpp/view.hpp
index 0b235c5a..b9197583 100644
--- a/src/nanoarrow/hpp/view.hpp
+++ b/src/nanoarrow/hpp/view.hpp
@@ -234,6 +234,60 @@ class ViewArrayAsBytes {
   value_type operator[](int64_t i) const { return range_.get(i); }
 };
 
+class ViewBinaryViewArrayAsBytes {
+ private:
+  struct Get {
+    const uint8_t* validity;
+    const union ArrowBinaryView* inline_data;
+    const void** variadic_buffers;
+
+    internal::Maybe<ArrowStringView> operator()(int64_t i) const {
+      if (validity == nullptr || ArrowBitGet(validity, i)) {
+        const union ArrowBinaryView* bv = &inline_data[i];
+        if (bv->inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) {
+          return ArrowStringView{reinterpret_cast<const 
char*>(bv->inlined.data),
+                                 bv->inlined.size};
+        }
+
+        return ArrowStringView{
+            reinterpret_cast<const 
char*>(variadic_buffers[bv->ref.buffer_index]) +
+                bv->ref.offset,
+            bv->ref.size};
+      }
+      return NA;
+    }
+  };
+
+  internal::RandomAccessRange<Get> range_;
+
+ public:
+  ViewBinaryViewArrayAsBytes(const ArrowArrayView* array_view)
+      : range_{
+            Get{
+                array_view->buffer_views[0].data.as_uint8,
+                array_view->buffer_views[1].data.as_binary_view,
+                array_view->variadic_buffers,
+            },
+            array_view->offset,
+            array_view->length,
+        } {}
+
+  ViewBinaryViewArrayAsBytes(const ArrowArray* array)
+      : range_{
+            Get{static_cast<const uint8_t*>(array->buffers[0]),
+                static_cast<const union ArrowBinaryView*>(array->buffers[1]),
+                array->buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS},
+            array->offset,
+            array->length,
+        } {}
+
+  using value_type = typename internal::RandomAccessRange<Get>::value_type;
+  using const_iterator = typename 
internal::RandomAccessRange<Get>::const_iterator;
+  const_iterator begin() const { return range_.begin(); }
+  const_iterator end() const { return range_.end(); }
+  value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
 /// \brief A range-for compatible wrapper for ArrowArray of fixed size binary
 ///
 /// Provides a sequence of optional<ArrowStringView> referencing each non-null
diff --git a/src/nanoarrow/hpp/view_test.cc b/src/nanoarrow/hpp/view_test.cc
index e4cfa499..37a9342e 100644
--- a/src/nanoarrow/hpp/view_test.cc
+++ b/src/nanoarrow/hpp/view_test.cc
@@ -82,6 +82,64 @@ TEST(NanoarrowHppTest, NanoarrowHppViewArrayAsBytesTest) {
   }
 }
 
+class BinaryViewTypeTestFixture
+    : public ::testing::TestWithParam<std::tuple<int, enum ArrowType>> {
+ protected:
+  enum ArrowType data_type;
+};
+
+TEST_P(BinaryViewTypeTestFixture, NanoarrowHppViewBinaryViewArrayAsBytesTest) {
+  using namespace nanoarrow::literals;
+
+  nanoarrow::UniqueArray array{};
+  const auto [offset, type] = GetParam();
+  ASSERT_EQ(ArrowArrayInitFromType(array.get(), type), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayStartAppending(array.get()), NANOARROW_OK);
+
+  ASSERT_EQ(ArrowArrayAppendString(array.get(), "foo"_asv), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayAppendNull(array.get(), 1), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayAppendString(array.get(), 
"this_string_is_longer_than_inline"_asv),
+            NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayAppendNull(array.get(), 1), NANOARROW_OK);
+  ASSERT_EQ(ArrowArrayFinishBuildingDefault(array.get(), nullptr), 
NANOARROW_OK);
+  array->offset = offset;
+  array->length = array->length - offset;
+
+  ArrowStringView expected[] = {"foo"_asv, ""_asv,
+                                "this_string_is_longer_than_inline"_asv, 
""_asv,
+                                "here_is_another_string"_asv};
+
+  nanoarrow::UniqueArrayView array_view{};
+  ArrowArrayViewInitFromType(array_view.get(), type);
+  ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr), 
NANOARROW_OK);
+
+  int i = offset;
+  for (auto slot : nanoarrow::ViewBinaryViewArrayAsBytes(array.get())) {
+    if (i == 1 || i == 3) {
+      EXPECT_EQ(slot, nanoarrow::NA);
+    } else {
+      EXPECT_EQ(slot, expected[i]);
+    }
+    ++i;
+  }
+
+  i = offset;
+  for (auto slot : nanoarrow::ViewBinaryViewArrayAsBytes(array_view.get())) {
+    if (i == 1 || i == 3) {
+      EXPECT_EQ(slot, nanoarrow::NA);
+    } else {
+      EXPECT_EQ(slot, expected[i]);
+    }
+    ++i;
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NanoarrowHppTest, BinaryViewTypeTestFixture,
+    ::testing::Combine(::testing::Values(0, 2),
+                       ::testing::Values(NANOARROW_TYPE_BINARY_VIEW,
+                                         NANOARROW_TYPE_STRING_VIEW)));
+
 TEST(NanoarrowHppTest, NanoarrowHppViewArrayAsFixedSizeBytesTest) {
   using namespace nanoarrow::literals;
 

Reply via email to