This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 610c201ff0 GH-46659: [C++] Fix export of extension arrays with binary
view/string view storage (#46660)
610c201ff0 is described below
commit 610c201ff015acf6df201f3c48af67070b23713b
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Jun 4 17:15:08 2025 -0500
GH-46659: [C++] Fix export of extension arrays with binary view/string view
storage (#46660)
### Rationale for this change
Extension arrays exported with binary view/string view storage did not
export the variadic sizes buffer which resulted in crashes when reimporting.
### What changes are included in this PR?
The expression that controlled whether the variadic sizes buffer was
written was updated.
### Are these changes tested?
Yes, a test was added
### Are there any user-facing changes?
No
* GitHub Issue: #46659
Lead-authored-by: Dewey Dunnington <[email protected]>
Co-authored-by: Dewey Dunnington <[email protected]>
Co-authored-by: Benjamin Kietzman <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
---
cpp/src/arrow/c/bridge.cc | 4 ++--
cpp/src/arrow/c/bridge_test.cc | 13 +++++++++++--
cpp/src/arrow/testing/extension_type.h | 20 ++++++++++++++++++++
cpp/src/arrow/testing/gtest_util.cc | 28 ++++++++++++++++++++++++++++
4 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 0c158322d0..9c01300df4 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -586,8 +586,8 @@ struct ArrayExporter {
++buffers_begin;
}
- bool need_variadic_buffer_sizes =
- data->type->id() == Type::BINARY_VIEW || data->type->id() ==
Type::STRING_VIEW;
+ bool need_variadic_buffer_sizes = data->type->storage_id() ==
Type::BINARY_VIEW ||
+ data->type->storage_id() ==
Type::STRING_VIEW;
if (need_variadic_buffer_sizes) {
++n_buffers;
}
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 75d5d1f428..4bcb821650 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -580,8 +580,10 @@ struct ArrayExportChecker {
--expected_n_buffers;
++expected_buffers;
}
- bool has_variadic_buffer_sizes = expected_data.type->id() ==
Type::STRING_VIEW ||
- expected_data.type->id() ==
Type::BINARY_VIEW;
+
+ bool has_variadic_buffer_sizes =
+ expected_data.type->storage_id() == Type::BINARY_VIEW ||
+ expected_data.type->storage_id() == Type::STRING_VIEW;
ASSERT_EQ(c_export->n_buffers, expected_n_buffers +
has_variadic_buffer_sizes);
ASSERT_NE(c_export->buffers, nullptr);
@@ -960,6 +962,13 @@ TEST_F(TestArrayExport, BinaryViewMultipleBuffers) {
});
}
+TEST_F(TestArrayExport, BinaryViewExtensionWithMultipleBuffers) {
+ TestPrimitive([&] {
+ auto storage = MakeBinaryViewArrayWithMultipleDataBuffers();
+ return BinaryViewExtensionType::WrapArray(binary_view_extension_type(),
storage);
+ });
+}
+
TEST_F(TestArrayExport, Null) {
TestPrimitive(null(), "[null, null, null]");
TestPrimitive(null(), "[]");
diff --git a/cpp/src/arrow/testing/extension_type.h
b/cpp/src/arrow/testing/extension_type.h
index a4526e31c2..9b4492a543 100644
--- a/cpp/src/arrow/testing/extension_type.h
+++ b/cpp/src/arrow/testing/extension_type.h
@@ -132,6 +132,23 @@ class ARROW_TESTING_EXPORT DictExtensionType : public
ExtensionType {
std::string Serialize() const override { return "dict-extension-serialized";
}
};
+class ARROW_TESTING_EXPORT BinaryViewExtensionType : public ExtensionType {
+ public:
+ BinaryViewExtensionType() : ExtensionType(binary_view()) {}
+
+ std::string extension_name() const override { return "binary_view"; }
+
+ bool ExtensionEquals(const ExtensionType& other) const override;
+
+ std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const
override;
+
+ Result<std::shared_ptr<DataType>> Deserialize(
+ std::shared_ptr<DataType> storage_type,
+ const std::string& serialized) const override;
+
+ std::string Serialize() const override { return "binary_view_serialized"; }
+};
+
// A minimal extension type that does not error when passed blank extension
information
class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public
ExtensionType {
public:
@@ -190,6 +207,9 @@ std::shared_ptr<DataType> list_extension_type();
ARROW_TESTING_EXPORT
std::shared_ptr<DataType> dict_extension_type();
+ARROW_TESTING_EXPORT
+std::shared_ptr<DataType> binary_view_extension_type();
+
ARROW_TESTING_EXPORT
std::shared_ptr<DataType> complex128();
diff --git a/cpp/src/arrow/testing/gtest_util.cc
b/cpp/src/arrow/testing/gtest_util.cc
index 99d6cabde9..c49106757e 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -944,6 +944,30 @@ Result<std::shared_ptr<DataType>>
DictExtensionType::Deserialize(
return std::make_shared<DictExtensionType>();
}
+bool BinaryViewExtensionType::ExtensionEquals(const ExtensionType& other)
const {
+ return (other.extension_name() == this->extension_name());
+}
+
+std::shared_ptr<Array> BinaryViewExtensionType::MakeArray(
+ std::shared_ptr<ArrayData> data) const {
+ DCHECK_EQ(data->type->id(), Type::EXTENSION);
+ DCHECK_EQ("binary_view",
+ static_cast<const ExtensionType&>(*data->type).extension_name());
+ return std::make_shared<TinyintArray>(data);
+}
+
+Result<std::shared_ptr<DataType>> BinaryViewExtensionType::Deserialize(
+ std::shared_ptr<DataType> storage_type, const std::string& serialized)
const {
+ if (serialized != "binary_view_serialized") {
+ return Status::Invalid("Type identifier did not match: '", serialized,
"'");
+ }
+ if (!storage_type->Equals(*int16())) {
+ return Status::Invalid("Invalid storage type for BinaryViewExtensionType:
",
+ storage_type->ToString());
+ }
+ return std::make_shared<BinaryViewExtensionType>();
+}
+
bool Complex128Type::ExtensionEquals(const ExtensionType& other) const {
return (other.extension_name() == this->extension_name());
}
@@ -976,6 +1000,10 @@ std::shared_ptr<DataType> list_extension_type() {
return std::make_shared<ListExtensionType>();
}
+std::shared_ptr<DataType> binary_view_extension_type() {
+ return std::make_shared<BinaryViewExtensionType>();
+}
+
std::shared_ptr<DataType> dict_extension_type() {
return std::make_shared<DictExtensionType>();
}