This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b275483274 GH-41741: [C++] Check that extension metadata key is
present before attempting to delete it (#41763)
b275483274 is described below
commit b2754832741d1566900e102e0add330a1d620f27
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri May 24 09:11:09 2024 -0300
GH-41741: [C++] Check that extension metadata key is present before
attempting to delete it (#41763)
### Rationale for this change
Neither Schema.fbs nor the Arrow C Data interface nor the columnar
specification indicates that the ARROW:extension:metadata key must be present;
however, the `ImportType()` implementation assumes that both
`ARROW:extension:name` and `ARROW:extension:metadata` are both present and
throws an exception if `ARROW:extension:metadata` is missing. This causes
pyarrow to crash (see issue for reproducer).
### What changes are included in this PR?
This PR checks that the extension metadata is present before attempting to
delete it.
### Are these changes tested?
Yes (test added).
### Are there any user-facing changes?
No.
* GitHub Issue: #41741
Authored-by: Dewey Dunnington <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
---
cpp/src/arrow/c/bridge.cc | 10 ++++++++--
cpp/src/arrow/c/bridge_test.cc | 17 +++++++++++++++++
cpp/src/arrow/testing/extension_type.h | 19 +++++++++++++++++++
3 files changed, 44 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 3e2e04ba0b..afb664c3bc 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1059,8 +1059,14 @@ struct SchemaImporter {
ARROW_ASSIGN_OR_RAISE(
type_, registered_ext_type->Deserialize(std::move(type_),
metadata_.extension_serialized));
- RETURN_NOT_OK(metadata_.metadata->DeleteMany(
- {metadata_.extension_name_index,
metadata_.extension_serialized_index}));
+ // If metadata is present, delete both metadata keys (otherwise, just
remove
+ // the extension name key)
+ if (metadata_.extension_serialized_index >= 0) {
+ RETURN_NOT_OK(metadata_.metadata->DeleteMany(
+ {metadata_.extension_name_index,
metadata_.extension_serialized_index}));
+ } else {
+
RETURN_NOT_OK(metadata_.metadata->Delete(metadata_.extension_name_index));
+ }
}
}
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 0ecfb5a957..e3ec262422 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -4102,6 +4102,23 @@ TEST_F(TestArrayRoundtrip, RegisteredExtension) {
TestWithArrayFactory(NestedFactory(ExampleDictExtension));
}
+TEST_F(TestArrayRoundtrip, RegisteredExtensionNoMetadata) {
+ auto ext_type = std::make_shared<MetadataOptionalExtensionType>();
+ ExtensionTypeGuard guard(ext_type);
+
+ auto ext_metadata =
+ KeyValueMetadata::Make({"ARROW:extension:name"},
{ext_type->extension_name()});
+ auto ext_field = field("", ext_type->storage_type(), true,
std::move(ext_metadata));
+
+ struct ArrowSchema c_schema {};
+ SchemaExportGuard schema_guard(&c_schema);
+ ASSERT_OK(ExportField(*ext_field, &c_schema));
+
+ ASSERT_OK_AND_ASSIGN(auto ext_type_roundtrip, ImportType(&c_schema));
+ ASSERT_EQ(ext_type_roundtrip->id(), Type::EXTENSION);
+ AssertTypeEqual(ext_type_roundtrip, ext_type);
+}
+
TEST_F(TestArrayRoundtrip, UnregisteredExtension) {
auto StorageExtractor = [](ArrayFactory factory) {
return [factory]() -> Result<std::shared_ptr<Array>> {
diff --git a/cpp/src/arrow/testing/extension_type.h
b/cpp/src/arrow/testing/extension_type.h
index 846e3c7a16..6515631f20 100644
--- a/cpp/src/arrow/testing/extension_type.h
+++ b/cpp/src/arrow/testing/extension_type.h
@@ -132,6 +132,25 @@ class ARROW_TESTING_EXPORT DictExtensionType : public
ExtensionType {
std::string Serialize() const override { return "dict-extension-serialized";
}
};
+// A minimal extension type that does not error when passed blank extension
information
+class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public
ExtensionType {
+ public:
+ MetadataOptionalExtensionType() : ExtensionType(null()) {}
+ std::string extension_name() const override { return "metadata.optional"; }
+ std::string Serialize() const override { return ""; }
+ std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const
override {
+ return nullptr;
+ }
+ bool ExtensionEquals(const ExtensionType& other) const override {
+ return other.extension_name() == extension_name();
+ }
+ Result<std::shared_ptr<DataType>> Deserialize(
+ std::shared_ptr<DataType> storage_type,
+ const std::string& serialized_data) const override {
+ return std::make_shared<MetadataOptionalExtensionType>();
+ }
+};
+
class ARROW_TESTING_EXPORT Complex128Array : public ExtensionArray {
public:
using ExtensionArray::ExtensionArray;