This is an automated email from the ASF dual-hosted git repository.
willayd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 76fb7eee Implement ViewBinaryViewArrayAsBytes (#709)
76fb7eee is described below
commit 76fb7eee23a017545d9ef29c08f0b33f7e809e71
Author: William Ayd <[email protected]>
AuthorDate: Thu Jan 30 13:00:21 2025 -0500
Implement ViewBinaryViewArrayAsBytes (#709)
---
src/nanoarrow/hpp/view.hpp | 54 +++++++++++++++++++++++++++++++++++++++
src/nanoarrow/hpp/view_test.cc | 58 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 112 insertions(+)
diff --git a/src/nanoarrow/hpp/view.hpp b/src/nanoarrow/hpp/view.hpp
index 0b235c5a..b9197583 100644
--- a/src/nanoarrow/hpp/view.hpp
+++ b/src/nanoarrow/hpp/view.hpp
@@ -234,6 +234,60 @@ class ViewArrayAsBytes {
value_type operator[](int64_t i) const { return range_.get(i); }
};
+class ViewBinaryViewArrayAsBytes {
+ private:
+ struct Get {
+ const uint8_t* validity;
+ const union ArrowBinaryView* inline_data;
+ const void** variadic_buffers;
+
+ internal::Maybe<ArrowStringView> operator()(int64_t i) const {
+ if (validity == nullptr || ArrowBitGet(validity, i)) {
+ const union ArrowBinaryView* bv = &inline_data[i];
+ if (bv->inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) {
+ return ArrowStringView{reinterpret_cast<const
char*>(bv->inlined.data),
+ bv->inlined.size};
+ }
+
+ return ArrowStringView{
+ reinterpret_cast<const
char*>(variadic_buffers[bv->ref.buffer_index]) +
+ bv->ref.offset,
+ bv->ref.size};
+ }
+ return NA;
+ }
+ };
+
+ internal::RandomAccessRange<Get> range_;
+
+ public:
+ ViewBinaryViewArrayAsBytes(const ArrowArrayView* array_view)
+ : range_{
+ Get{
+ array_view->buffer_views[0].data.as_uint8,
+ array_view->buffer_views[1].data.as_binary_view,
+ array_view->variadic_buffers,
+ },
+ array_view->offset,
+ array_view->length,
+ } {}
+
+ ViewBinaryViewArrayAsBytes(const ArrowArray* array)
+ : range_{
+ Get{static_cast<const uint8_t*>(array->buffers[0]),
+ static_cast<const union ArrowBinaryView*>(array->buffers[1]),
+ array->buffers + NANOARROW_BINARY_VIEW_FIXED_BUFFERS},
+ array->offset,
+ array->length,
+ } {}
+
+ using value_type = typename internal::RandomAccessRange<Get>::value_type;
+ using const_iterator = typename
internal::RandomAccessRange<Get>::const_iterator;
+ const_iterator begin() const { return range_.begin(); }
+ const_iterator end() const { return range_.end(); }
+ value_type operator[](int64_t i) const { return range_.get(i); }
+};
+
/// \brief A range-for compatible wrapper for ArrowArray of fixed size binary
///
/// Provides a sequence of optional<ArrowStringView> referencing each non-null
diff --git a/src/nanoarrow/hpp/view_test.cc b/src/nanoarrow/hpp/view_test.cc
index e4cfa499..37a9342e 100644
--- a/src/nanoarrow/hpp/view_test.cc
+++ b/src/nanoarrow/hpp/view_test.cc
@@ -82,6 +82,64 @@ TEST(NanoarrowHppTest, NanoarrowHppViewArrayAsBytesTest) {
}
}
+class BinaryViewTypeTestFixture
+ : public ::testing::TestWithParam<std::tuple<int, enum ArrowType>> {
+ protected:
+ enum ArrowType data_type;
+};
+
+TEST_P(BinaryViewTypeTestFixture, NanoarrowHppViewBinaryViewArrayAsBytesTest) {
+ using namespace nanoarrow::literals;
+
+ nanoarrow::UniqueArray array{};
+ const auto [offset, type] = GetParam();
+ ASSERT_EQ(ArrowArrayInitFromType(array.get(), type), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayStartAppending(array.get()), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendString(array.get(), "foo"_asv), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendNull(array.get(), 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendString(array.get(),
"this_string_is_longer_than_inline"_asv),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendNull(array.get(), 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuildingDefault(array.get(), nullptr),
NANOARROW_OK);
+ array->offset = offset;
+ array->length = array->length - offset;
+
+ ArrowStringView expected[] = {"foo"_asv, ""_asv,
+ "this_string_is_longer_than_inline"_asv,
""_asv,
+ "here_is_another_string"_asv};
+
+ nanoarrow::UniqueArrayView array_view{};
+ ArrowArrayViewInitFromType(array_view.get(), type);
+ ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), nullptr),
NANOARROW_OK);
+
+ int i = offset;
+ for (auto slot : nanoarrow::ViewBinaryViewArrayAsBytes(array.get())) {
+ if (i == 1 || i == 3) {
+ EXPECT_EQ(slot, nanoarrow::NA);
+ } else {
+ EXPECT_EQ(slot, expected[i]);
+ }
+ ++i;
+ }
+
+ i = offset;
+ for (auto slot : nanoarrow::ViewBinaryViewArrayAsBytes(array_view.get())) {
+ if (i == 1 || i == 3) {
+ EXPECT_EQ(slot, nanoarrow::NA);
+ } else {
+ EXPECT_EQ(slot, expected[i]);
+ }
+ ++i;
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ NanoarrowHppTest, BinaryViewTypeTestFixture,
+ ::testing::Combine(::testing::Values(0, 2),
+ ::testing::Values(NANOARROW_TYPE_BINARY_VIEW,
+ NANOARROW_TYPE_STRING_VIEW)));
+
TEST(NanoarrowHppTest, NanoarrowHppViewArrayAsFixedSizeBytesTest) {
using namespace nanoarrow::literals;