This is an automated email from the ASF dual-hosted git repository.

bkietz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 94fc124288 GH-36099: [C++] Add Utf8View and BinaryView to the c ABI 
(#38443)
94fc124288 is described below

commit 94fc124288a0b925c26ea3800ec380020abe9e42
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Wed Nov 29 12:22:20 2023 -0500

    GH-36099: [C++] Add Utf8View and BinaryView to the c ABI (#38443)
    
    
    
    ### Rationale for this change
    
    Utf8View and BinaryView should be added to the c ABI spec and to the c++ 
library's importer/exporter.
    
    ### Are these changes tested?
    
    Yes, minimally
    
    ### Are there any user-facing changes?
    
    View arrays will be importable/exportable through the c ABI in c++
    
    * Closes: #36099
    
    Authored-by: Benjamin Kietzman <[email protected]>
    Signed-off-by: Benjamin Kietzman <[email protected]>
---
 cpp/src/arrow/c/bridge.cc                  |  87 +++++++++++++++---
 cpp/src/arrow/c/bridge_test.cc             | 137 ++++++++++++++++++++++++-----
 dev/archery/archery/cli.py                 |   2 +-
 dev/archery/archery/integration/datagen.py |   4 +-
 docs/source/format/CDataInterface.rst      |  12 +++
 5 files changed, 203 insertions(+), 39 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index eeec75f2f4..238afb0328 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <cerrno>
 #include <cstring>
+#include <memory>
 #include <string>
 #include <string_view>
 #include <utility>
@@ -41,6 +42,7 @@
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/range.h"
 #include "arrow/util/small_vector.h"
 #include "arrow/util/string.h"
 #include "arrow/util/value_parsing.h"
@@ -260,7 +262,7 @@ struct SchemaExporter {
       // Dictionary type: parent struct describes index type,
       // child dictionary struct describes value type.
       RETURN_NOT_OK(VisitTypeInline(*dict_type.index_type(), this));
-      dict_exporter_.reset(new SchemaExporter());
+      dict_exporter_ = std::make_unique<SchemaExporter>();
       RETURN_NOT_OK(dict_exporter_->ExportType(*dict_type.value_type()));
     } else {
       RETURN_NOT_OK(VisitTypeInline(type, this));
@@ -357,10 +359,14 @@ struct SchemaExporter {
 
   Status Visit(const LargeBinaryType& type) { return SetFormat("Z"); }
 
+  Status Visit(const BinaryViewType& type) { return SetFormat("vz"); }
+
   Status Visit(const StringType& type) { return SetFormat("u"); }
 
   Status Visit(const LargeStringType& type) { return SetFormat("U"); }
 
+  Status Visit(const StringViewType& type) { return SetFormat("vu"); }
+
   Status Visit(const Date32Type& type) { return SetFormat("tdD"); }
 
   Status Visit(const Date64Type& type) { return SetFormat("tdm"); }
@@ -521,13 +527,14 @@ namespace {
 
 struct ExportedArrayPrivateData : 
PoolAllocationMixin<ExportedArrayPrivateData> {
   // The buffers are owned by the ArrayData member
-  StaticVector<const void*, 3> buffers_;
+  SmallVector<const void*, 3> buffers_;
   struct ArrowArray dictionary_;
   SmallVector<struct ArrowArray, 1> children_;
   SmallVector<struct ArrowArray*, 4> child_pointers_;
 
   std::shared_ptr<ArrayData> data_;
   std::shared_ptr<Device::SyncEvent> sync_;
+  std::vector<int64_t> variadic_buffer_sizes_;
 
   ExportedArrayPrivateData() = default;
   ARROW_DEFAULT_MOVE_AND_ASSIGN(ExportedArrayPrivateData);
@@ -570,15 +577,32 @@ struct ArrayExporter {
       --n_buffers;
       ++buffers_begin;
     }
+
+    bool need_variadic_buffer_sizes =
+        data->type->id() == Type::BINARY_VIEW || data->type->id() == 
Type::STRING_VIEW;
+    if (need_variadic_buffer_sizes) {
+      ++n_buffers;
+    }
+
     export_.buffers_.resize(n_buffers);
     std::transform(buffers_begin, data->buffers.end(), 
export_.buffers_.begin(),
                    [](const std::shared_ptr<Buffer>& buffer) -> const void* {
                      return buffer ? buffer->data() : nullptr;
                    });
 
+    if (need_variadic_buffer_sizes) {
+      auto variadic_buffers = util::span(data->buffers).subspan(2);
+      export_.variadic_buffer_sizes_.resize(variadic_buffers.size());
+      size_t i = 0;
+      for (const auto& buf : variadic_buffers) {
+        export_.variadic_buffer_sizes_[i++] = buf->size();
+      }
+      export_.buffers_.back() = export_.variadic_buffer_sizes_.data();
+    }
+
     // Export dictionary
     if (data->dictionary != nullptr) {
-      dict_exporter_.reset(new ArrayExporter());
+      dict_exporter_ = std::make_unique<ArrayExporter>();
       RETURN_NOT_OK(dict_exporter_->Export(data->dictionary));
     }
 
@@ -795,7 +819,7 @@ Status InvalidFormatString(std::string_view v) {
 
 class FormatStringParser {
  public:
-  FormatStringParser() {}
+  FormatStringParser() = default;
 
   explicit FormatStringParser(std::string_view v) : view_(v), index_(0) {}
 
@@ -941,8 +965,6 @@ Result<DecodedMetadata> DecodeMetadata(const char* 
metadata) {
 }
 
 struct SchemaImporter {
-  SchemaImporter() : c_struct_(nullptr), guard_(nullptr) {}
-
   Status Import(struct ArrowSchema* src) {
     if (ArrowSchemaIsReleased(src)) {
       return Status::Invalid("Cannot import released ArrowSchema");
@@ -1068,6 +1090,8 @@ struct SchemaImporter {
         return ProcessPrimitive(binary());
       case 'Z':
         return ProcessPrimitive(large_binary());
+      case 'v':
+        return ProcessBinaryView();
       case 'w':
         return ProcessFixedSizeBinary();
       case 'd':
@@ -1080,6 +1104,17 @@ struct SchemaImporter {
     return f_parser_.Invalid();
   }
 
+  Status ProcessBinaryView() {
+    RETURN_NOT_OK(f_parser_.CheckHasNext());
+    switch (f_parser_.Next()) {
+      case 'z':
+        return ProcessPrimitive(binary_view());
+      case 'u':
+        return ProcessPrimitive(utf8_view());
+    }
+    return f_parser_.Invalid();
+  }
+
   Status ProcessTemporal() {
     RETURN_NOT_OK(f_parser_.CheckHasNext());
     switch (f_parser_.Next()) {
@@ -1360,8 +1395,8 @@ struct SchemaImporter {
     return Status::OK();
   }
 
-  struct ArrowSchema* c_struct_;
-  SchemaExportGuard guard_;
+  struct ArrowSchema* c_struct_{nullptr};
+  SchemaExportGuard guard_{nullptr};
   FormatStringParser f_parser_;
   int64_t recursion_level_;
   std::vector<SchemaImporter> child_importers_;
@@ -1429,7 +1464,7 @@ class ImportedBuffer : public Buffer {
                  std::shared_ptr<ImportedArrayData> import)
       : Buffer(data, size, mm, nullptr, device_type), 
import_(std::move(import)) {}
 
-  ~ImportedBuffer() override {}
+  ~ImportedBuffer() override = default;
 
   std::shared_ptr<Device::SyncEvent> device_sync_event() override {
     return import_->device_sync_;
@@ -1441,9 +1476,7 @@ class ImportedBuffer : public Buffer {
 
 struct ArrayImporter {
   explicit ArrayImporter(const std::shared_ptr<DataType>& type)
-      : type_(type),
-        zero_size_buffer_(std::make_shared<Buffer>(kZeroSizeArea, 0)),
-        device_type_(DeviceAllocationType::kCPU) {}
+      : type_(type), zero_size_buffer_(std::make_shared<Buffer>(kZeroSizeArea, 
0)) {}
 
   Status Import(struct ArrowDeviceArray* src, const DeviceMemoryMapper& 
mapper) {
     ARROW_ASSIGN_OR_RAISE(memory_mgr_, mapper(src->device_type, 
src->device_id));
@@ -1591,6 +1624,10 @@ struct ArrayImporter {
 
   Status Visit(const LargeBinaryType& type) { return ImportStringLike(type); }
 
+  Status Visit(const StringViewType& type) { return ImportBinaryView(type); }
+
+  Status Visit(const BinaryViewType& type) { return ImportBinaryView(type); }
+
   Status Visit(const ListType& type) { return ImportListLike(type); }
 
   Status Visit(const LargeListType& type) { return ImportListLike(type); }
@@ -1673,6 +1710,28 @@ struct ArrayImporter {
     return Status::OK();
   }
 
+  Status ImportBinaryView(const BinaryViewType&) {
+    RETURN_NOT_OK(CheckNoChildren());
+    if (c_struct_->n_buffers < 3) {
+      return Status::Invalid("Expected at least 3 buffers for imported type ",
+                             type_->ToString(), ", ArrowArray struct has ",
+                             c_struct_->n_buffers);
+    }
+    RETURN_NOT_OK(AllocateArrayData());
+    RETURN_NOT_OK(ImportNullBitmap());
+    RETURN_NOT_OK(ImportFixedSizeBuffer(1, BinaryViewType::kSize));
+
+    // The last C data buffer stores buffer sizes, and shouldn't be imported
+    auto* buffer_sizes =
+        static_cast<const int64_t*>(c_struct_->buffers[c_struct_->n_buffers - 
1]);
+
+    for (int32_t buffer_id = 2; buffer_id < c_struct_->n_buffers - 1; 
++buffer_id) {
+      RETURN_NOT_OK(ImportBuffer(buffer_id, buffer_sizes[buffer_id - 2]));
+    }
+    data_->buffers.pop_back();
+    return Status::OK();
+  }
+
   template <typename StringType>
   Status ImportStringLike(const StringType& type) {
     RETURN_NOT_OK(CheckNoChildren());
@@ -1836,7 +1895,7 @@ struct ArrayImporter {
   std::shared_ptr<Buffer> zero_size_buffer_;
 
   std::shared_ptr<MemoryManager> memory_mgr_;
-  DeviceAllocationType device_type_;
+  DeviceAllocationType device_type_{DeviceAllocationType::kCPU};
 };
 
 }  // namespace
@@ -2042,7 +2101,7 @@ class ArrayStreamBatchReader : public RecordBatchReader {
     DCHECK(!ArrowArrayStreamIsReleased(&stream_));
   }
 
-  ~ArrayStreamBatchReader() {
+  ~ArrayStreamBatchReader() override {
     if (!ArrowArrayStreamIsReleased(&stream_)) {
       ArrowArrayStreamRelease(&stream_);
     }
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 362df83378..326c67f5ec 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -38,11 +38,13 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/binary_view_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/range.h"
 
 // TODO(GH-37221): Remove these ifdef checks when compute dependency is removed
 #ifdef ARROW_COMPUTE
@@ -58,6 +60,7 @@ using internal::ArrayStreamExportTraits;
 using internal::checked_cast;
 using internal::SchemaExportGuard;
 using internal::SchemaExportTraits;
+using internal::Zip;
 
 template <typename T>
 struct ExportTraits {};
@@ -91,7 +94,7 @@ class ReleaseCallback {
  public:
   using CType = typename Traits::CType;
 
-  explicit ReleaseCallback(CType* c_struct) : called_(false) {
+  explicit ReleaseCallback(CType* c_struct) {
     orig_release_ = c_struct->release;
     orig_private_data_ = c_struct->private_data;
     c_struct->release = StaticRelease;
@@ -123,7 +126,7 @@ class ReleaseCallback {
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(ReleaseCallback);
 
-  bool called_;
+  bool called_{false};
   void (*orig_release_)(CType*);
   void* orig_private_data_;
 };
@@ -238,8 +241,7 @@ struct SchemaExportChecker {
             flattened_flags.empty()
                 ? std::vector<int64_t>(flattened_formats_.size(), 
kDefaultFlags)
                 : std::move(flattened_flags)),
-        flattened_metadata_(std::move(flattened_metadata)),
-        flattened_index_(0) {}
+        flattened_metadata_(std::move(flattened_metadata)) {}
 
   void operator()(struct ArrowSchema* c_export, bool inner = false) {
     ASSERT_LT(flattened_index_, flattened_formats_.size());
@@ -288,7 +290,7 @@ struct SchemaExportChecker {
   const std::vector<std::string> flattened_names_;
   std::vector<int64_t> flattened_flags_;
   const std::vector<std::string> flattened_metadata_;
-  size_t flattened_index_;
+  size_t flattened_index_{0};
 };
 
 class TestSchemaExport : public ::testing::Test {
@@ -354,6 +356,8 @@ TEST_F(TestSchemaExport, Primitive) {
   TestPrimitive(large_binary(), "Z");
   TestPrimitive(utf8(), "u");
   TestPrimitive(large_utf8(), "U");
+  TestPrimitive(binary_view(), "vz");
+  TestPrimitive(utf8_view(), "vu");
 
   TestPrimitive(decimal(16, 4), "d:16,4");
   TestPrimitive(decimal256(16, 4), "d:16,4,256");
@@ -565,12 +569,24 @@ struct ArrayExportChecker {
       --expected_n_buffers;
       ++expected_buffers;
     }
-    ASSERT_EQ(c_export->n_buffers, expected_n_buffers);
+    bool has_variadic_buffer_sizes = expected_data.type->id() == 
Type::STRING_VIEW ||
+                                     expected_data.type->id() == 
Type::BINARY_VIEW;
+    ASSERT_EQ(c_export->n_buffers, expected_n_buffers + 
has_variadic_buffer_sizes);
     ASSERT_NE(c_export->buffers, nullptr);
-    for (int64_t i = 0; i < c_export->n_buffers; ++i) {
+
+    for (int64_t i = 0; i < expected_n_buffers; ++i) {
       auto expected_ptr = expected_buffers[i] ? expected_buffers[i]->data() : 
nullptr;
       ASSERT_EQ(c_export->buffers[i], expected_ptr);
     }
+    if (has_variadic_buffer_sizes) {
+      auto variadic_buffers = util::span(expected_data.buffers).subspan(2);
+      auto variadic_buffer_sizes = util::span(
+          static_cast<const int64_t*>(c_export->buffers[c_export->n_buffers - 
1]),
+          variadic_buffers.size());
+      for (auto [buf, size] : Zip(variadic_buffers, variadic_buffer_sizes)) {
+        ASSERT_EQ(buf->size(), size);
+      }
+    }
 
     if (expected_data.dictionary != nullptr) {
       // Recurse into dictionary
@@ -883,6 +899,8 @@ TEST_F(TestArrayExport, Primitive) {
   TestPrimitive(large_binary(), R"(["foo", "bar", null])");
   TestPrimitive(utf8(), R"(["foo", "bar", null])");
   TestPrimitive(large_utf8(), R"(["foo", "bar", null])");
+  TestPrimitive(binary_view(), R"(["foo", "bar", null])");
+  TestPrimitive(utf8_view(), R"(["foo", "bar", null])");
 
   TestPrimitive(decimal(16, 4), R"(["1234.5670", null])");
   TestPrimitive(decimal256(16, 4), R"(["1234.5670", null])");
@@ -896,6 +914,39 @@ TEST_F(TestArrayExport, PrimitiveSliced) {
   TestPrimitive(factory);
 }
 
+constexpr std::string_view binary_view_buffer_content0 = "12345foo bar baz 
quux",
+                           binary_view_buffer_content1 = 
"BinaryViewMultipleBuffers";
+
+static const BinaryViewType::c_type binary_view_buffer1[] = {
+    util::ToBinaryView(binary_view_buffer_content0, 0, 0),
+    util::ToInlineBinaryView("foo"),
+    util::ToBinaryView(binary_view_buffer_content1, 1, 0),
+    util::ToInlineBinaryView("bar"),
+    util::ToBinaryView(binary_view_buffer_content0.substr(5), 0, 5),
+    util::ToInlineBinaryView("baz"),
+    util::ToBinaryView(binary_view_buffer_content1.substr(6, 13), 1, 6),
+    util::ToInlineBinaryView("quux"),
+};
+
+static auto MakeBinaryViewArrayWithMultipleDataBuffers() {
+  static const auto kLength = 
static_cast<int64_t>(std::size(binary_view_buffer1));
+  return std::make_shared<BinaryViewArray>(
+      binary_view(), kLength,
+      Buffer::FromVector(std::vector(binary_view_buffer1, binary_view_buffer1 
+ kLength)),
+      BufferVector{
+          Buffer::FromString(std::string{binary_view_buffer_content0}),
+          Buffer::FromString(std::string{binary_view_buffer_content1}),
+      });
+}
+
+TEST_F(TestArrayExport, BinaryViewMultipleBuffers) {
+  TestPrimitive(MakeBinaryViewArrayWithMultipleDataBuffers);
+  TestPrimitive([&] {
+    auto arr = MakeBinaryViewArrayWithMultipleDataBuffers();
+    return arr->Slice(1, arr->length() - 2);
+  });
+}
+
 TEST_F(TestArrayExport, Null) {
   TestPrimitive(null(), "[null, null, null]");
   TestPrimitive(null(), "[]");
@@ -1220,13 +1271,16 @@ TEST_F(TestArrayExport, ExportRecordBatch) {
 
 static const char kMyDeviceTypeName[] = "arrowtest::MyDevice";
 static const ArrowDeviceType kMyDeviceType = ARROW_DEVICE_EXT_DEV;
-static const void* kMyEventPtr = 
reinterpret_cast<void*>(uintptr_t(0xBAADF00D));
+static const void* kMyEventPtr =
+    reinterpret_cast<void*>(static_cast<uintptr_t>(0xBAADF00D));
 
 class MyBuffer final : public MutableBuffer {
  public:
   using MutableBuffer::MutableBuffer;
 
-  ~MyBuffer() { default_memory_pool()->Free(const_cast<uint8_t*>(data_), 
size_); }
+  ~MyBuffer() override {
+    default_memory_pool()->Free(const_cast<uint8_t*>(data_), size_);
+  }
 
   std::shared_ptr<Device::SyncEvent> device_sync_event() override { return 
device_sync_; }
 
@@ -1256,7 +1310,7 @@ class MyDevice : public Device {
     explicit MySyncEvent(void* sync_event, release_fn_t release_sync_event)
         : Device::SyncEvent(sync_event, release_sync_event) {}
 
-    virtual ~MySyncEvent() = default;
+    ~MySyncEvent() override = default;
     Status Wait() override { return Status::OK(); }
     Status Record(const Device::Stream&) override { return Status::OK(); }
   };
@@ -1966,6 +2020,10 @@ TEST_F(TestSchemaImport, String) {
   CheckImport(large_utf8());
   FillPrimitive("Z");
   CheckImport(large_binary());
+  FillPrimitive("vu");
+  CheckImport(utf8_view());
+  FillPrimitive("vz");
+  CheckImport(binary_view());
 
   FillPrimitive("w:3");
   CheckImport(fixed_size_binary(3));
@@ -2419,6 +2477,16 @@ static const void* large_string_buffers_no_nulls1[3] = {
 static const void* large_string_buffers_omitted[3] = {
     nullptr, large_string_offsets_buffer1, nullptr};
 
+constexpr int64_t binary_view_buffer_sizes1[] = 
{binary_view_buffer_content0.size(),
+                                                 
binary_view_buffer_content1.size()};
+static const void* binary_view_buffers_no_nulls1[] = {
+    nullptr,
+    binary_view_buffer1,
+    binary_view_buffer_content0.data(),
+    binary_view_buffer_content1.data(),
+    binary_view_buffer_sizes1,
+};
+
 static const int32_t list_offsets_buffer1[] = {0, 2, 2, 5, 6, 8};
 static const void* list_buffers_no_nulls1[2] = {nullptr, list_offsets_buffer1};
 static const void* list_buffers_nulls1[2] = {bits_buffer1, 
list_offsets_buffer1};
@@ -2510,6 +2578,16 @@ class TestArrayImport : public ::testing::Test {
     c->buffers = buffers;
   }
 
+  void FillStringViewLike(struct ArrowArray* c, int64_t length, int64_t 
null_count,
+                          int64_t offset, const void** buffers,
+                          int32_t data_buffer_count) {
+    c->length = length;
+    c->null_count = null_count;
+    c->offset = offset;
+    c->n_buffers = 2 + data_buffer_count + 1;
+    c->buffers = buffers;
+  }
+
   void FillListLike(struct ArrowArray* c, int64_t length, int64_t null_count,
                     int64_t offset, const void** buffers) {
     c->length = length;
@@ -2583,6 +2661,12 @@ class TestArrayImport : public ::testing::Test {
     FillStringLike(&c_struct_, length, null_count, offset, buffers);
   }
 
+  void FillStringViewLike(int64_t length, int64_t null_count, int64_t offset,
+                          const void** buffers, int32_t data_buffer_count) {
+    FillStringViewLike(&c_struct_, length, null_count, offset, buffers,
+                       data_buffer_count);
+  }
+
   void FillListLike(int64_t length, int64_t null_count, int64_t offset,
                     const void** buffers) {
     FillListLike(&c_struct_, length, null_count, offset, buffers);
@@ -2834,6 +2918,10 @@ TEST_F(TestArrayImport, String) {
   FillStringLike(4, 0, 0, large_string_buffers_no_nulls1);
   CheckImport(ArrayFromJSON(large_binary(), R"(["foo", "", "bar", "quux"])"));
 
+  auto length = static_cast<int64_t>(std::size(binary_view_buffer1));
+  FillStringViewLike(length, 0, 0, binary_view_buffers_no_nulls1, 2);
+  CheckImport(MakeBinaryViewArrayWithMultipleDataBuffers());
+
   // Empty array with null data pointers
   FillStringLike(0, 0, 0, string_buffers_omitted);
   CheckImport(ArrayFromJSON(utf8(), "[]"));
@@ -3530,15 +3618,16 @@ TEST_F(TestSchemaRoundtrip, Primitive) {
   TestWithTypeFactory(boolean);
   TestWithTypeFactory(float16);
 
-  TestWithTypeFactory(std::bind(decimal128, 19, 4));
-  TestWithTypeFactory(std::bind(decimal256, 19, 4));
-  TestWithTypeFactory(std::bind(decimal128, 19, 0));
-  TestWithTypeFactory(std::bind(decimal256, 19, 0));
-  TestWithTypeFactory(std::bind(decimal128, 19, -5));
-  TestWithTypeFactory(std::bind(decimal256, 19, -5));
-  TestWithTypeFactory(std::bind(fixed_size_binary, 3));
+  TestWithTypeFactory([] { return decimal128(19, 4); });
+  TestWithTypeFactory([] { return decimal256(19, 4); });
+  TestWithTypeFactory([] { return decimal128(19, 0); });
+  TestWithTypeFactory([] { return decimal256(19, 0); });
+  TestWithTypeFactory([] { return decimal128(19, -5); });
+  TestWithTypeFactory([] { return decimal256(19, -5); });
+  TestWithTypeFactory([] { return fixed_size_binary(3); });
   TestWithTypeFactory(binary);
   TestWithTypeFactory(large_utf8);
+  TestWithTypeFactory(binary_view);
 }
 
 TEST_F(TestSchemaRoundtrip, Temporal) {
@@ -3546,8 +3635,8 @@ TEST_F(TestSchemaRoundtrip, Temporal) {
   TestWithTypeFactory(day_time_interval);
   TestWithTypeFactory(month_interval);
   TestWithTypeFactory(month_day_nano_interval);
-  TestWithTypeFactory(std::bind(time64, TimeUnit::NANO));
-  TestWithTypeFactory(std::bind(duration, TimeUnit::MICRO));
+  TestWithTypeFactory([] { return time64(TimeUnit::NANO); });
+  TestWithTypeFactory([] { return duration(TimeUnit::MICRO); });
   TestWithTypeFactory([]() { return arrow::timestamp(TimeUnit::MICRO, 
"Europe/Paris"); });
 }
 
@@ -3803,6 +3892,14 @@ TEST_F(TestArrayRoundtrip, Primitive) {
                      R"([[4, 5, 6], [1, -600, 5000], null, null])");
 }
 
+TEST_F(TestArrayRoundtrip, BinaryViewMultipleBuffers) {
+  TestWithArrayFactory(MakeBinaryViewArrayWithMultipleDataBuffers);
+  TestWithArrayFactory([&] {
+    auto arr = MakeBinaryViewArrayWithMultipleDataBuffers();
+    return arr->Slice(1, arr->length() - 2);
+  });
+}
+
 TEST_F(TestArrayRoundtrip, UnknownNullCount) {
   TestWithArrayFactory([]() -> Result<std::shared_ptr<Array>> {
     auto arr = ArrayFromJSON(int32(), "[0, 1, 2]");
@@ -4205,8 +4302,6 @@ TEST_F(TestDeviceArrayRoundtrip, Primitive) {
   TestWithJSON(mm, int32(), "[4, 5, null]");
 }
 
-// TODO C -> C++ -> C roundtripping tests?
-
 ////////////////////////////////////////////////////////////////////////////
 // Array stream export tests
 
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 7a3b45f978..7211b6c87a 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -42,7 +42,7 @@ logging.basicConfig(level=logging.INFO)
 BOOL = ArrowBool()
 
 
[email protected]()
[email protected](context_settings={"help_option_names": ["-h", "--help"]})
 @click.option("--debug", type=BOOL, is_flag=True, default=False,
               help="Increase logging with debugging output.")
 @click.option("--pdb", type=BOOL, is_flag=True, default=False,
diff --git a/dev/archery/archery/integration/datagen.py 
b/dev/archery/archery/integration/datagen.py
index ff10c0bb03..80cc1c1e76 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1858,9 +1858,7 @@ def get_generated_json_files(tempdir=None):
         .skip_tester('Go')
         .skip_tester('Java')
         .skip_tester('JS')
-        .skip_tester('Rust')
-        .skip_format(SKIP_C_SCHEMA, 'C++')
-        .skip_format(SKIP_C_ARRAY, 'C++'),
+        .skip_tester('Rust'),
 
         generate_extension_case()
         .skip_tester('C#')
diff --git a/docs/source/format/CDataInterface.rst 
b/docs/source/format/CDataInterface.rst
index 36952ad99e..c9beddabed 100644
--- a/docs/source/format/CDataInterface.rst
+++ b/docs/source/format/CDataInterface.rst
@@ -140,10 +140,14 @@ strings:
 
+-----------------+---------------------------------------------------+------------+
 | ``Z``           | large binary                                      |        
    |
 
+-----------------+---------------------------------------------------+------------+
+| ``vz``          | binary view                                       |        
    |
++-----------------+---------------------------------------------------+------------+
 | ``u``           | utf-8 string                                      |        
    |
 
+-----------------+---------------------------------------------------+------------+
 | ``U``           | large utf-8 string                                |        
    |
 
+-----------------+---------------------------------------------------+------------+
+| ``vu``          | utf-8 view                                        |        
    |
++-----------------+---------------------------------------------------+------------+
 | ``d:19,10``     | decimal128 [precision 19, scale 10]               |        
    |
 
+-----------------+---------------------------------------------------+------------+
 | ``d:19,10,NNN`` | decimal bitwidth = NNN [precision 19, scale 10]   |        
    |
@@ -548,6 +552,14 @@ parameterized extension types).
 The ``ArrowArray`` structure exported from an extension array simply points
 to the storage data of the extension array.
 
+Binary view arrays
+------------------
+
+For binary or utf-8 view arrays, an extra buffer is appended which stores
+the lengths of each variadic data buffer as ``int64_t``. This buffer is
+necessary since these buffer lengths are not trivially extractable from
+other data in an array of binary or utf-8 view type.
+
 .. _c-data-interface-semantics:
 
 Semantics

Reply via email to