This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b275483274 GH-41741: [C++] Check that extension metadata key is 
present before attempting to delete it (#41763)
b275483274 is described below

commit b2754832741d1566900e102e0add330a1d620f27
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri May 24 09:11:09 2024 -0300

    GH-41741: [C++] Check that extension metadata key is present before 
attempting to delete it (#41763)
    
    ### Rationale for this change
    
    Neither Schema.fbs nor the Arrow C Data interface nor the columnar 
specification indicates that the ARROW:extension:metadata key must be present; 
however, the `ImportType()` implementation assumes that both 
`ARROW:extension:name` and `ARROW:extension:metadata` are both present and 
throws an exception if `ARROW:extension:metadata` is missing. This causes 
pyarrow to crash (see issue for reproducer).
    
    ### What changes are included in this PR?
    
    This PR checks that the extension metadata is present before attempting to 
delete it.
    
    ### Are these changes tested?
    
    Yes (test added).
    
    ### Are there any user-facing changes?
    
    No.
    * GitHub Issue: #41741
    
    Authored-by: Dewey Dunnington <[email protected]>
    Signed-off-by: Dewey Dunnington <[email protected]>
---
 cpp/src/arrow/c/bridge.cc              | 10 ++++++++--
 cpp/src/arrow/c/bridge_test.cc         | 17 +++++++++++++++++
 cpp/src/arrow/testing/extension_type.h | 19 +++++++++++++++++++
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 3e2e04ba0b..afb664c3bc 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1059,8 +1059,14 @@ struct SchemaImporter {
         ARROW_ASSIGN_OR_RAISE(
             type_, registered_ext_type->Deserialize(std::move(type_),
                                                     
metadata_.extension_serialized));
-        RETURN_NOT_OK(metadata_.metadata->DeleteMany(
-            {metadata_.extension_name_index, 
metadata_.extension_serialized_index}));
+        // If metadata is present, delete both metadata keys (otherwise, just 
remove
+        // the extension name key)
+        if (metadata_.extension_serialized_index >= 0) {
+          RETURN_NOT_OK(metadata_.metadata->DeleteMany(
+              {metadata_.extension_name_index, 
metadata_.extension_serialized_index}));
+        } else {
+          
RETURN_NOT_OK(metadata_.metadata->Delete(metadata_.extension_name_index));
+        }
       }
     }
 
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 0ecfb5a957..e3ec262422 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -4102,6 +4102,23 @@ TEST_F(TestArrayRoundtrip, RegisteredExtension) {
   TestWithArrayFactory(NestedFactory(ExampleDictExtension));
 }
 
+TEST_F(TestArrayRoundtrip, RegisteredExtensionNoMetadata) {
+  auto ext_type = std::make_shared<MetadataOptionalExtensionType>();
+  ExtensionTypeGuard guard(ext_type);
+
+  auto ext_metadata =
+      KeyValueMetadata::Make({"ARROW:extension:name"}, 
{ext_type->extension_name()});
+  auto ext_field = field("", ext_type->storage_type(), true, 
std::move(ext_metadata));
+
+  struct ArrowSchema c_schema {};
+  SchemaExportGuard schema_guard(&c_schema);
+  ASSERT_OK(ExportField(*ext_field, &c_schema));
+
+  ASSERT_OK_AND_ASSIGN(auto ext_type_roundtrip, ImportType(&c_schema));
+  ASSERT_EQ(ext_type_roundtrip->id(), Type::EXTENSION);
+  AssertTypeEqual(ext_type_roundtrip, ext_type);
+}
+
 TEST_F(TestArrayRoundtrip, UnregisteredExtension) {
   auto StorageExtractor = [](ArrayFactory factory) {
     return [factory]() -> Result<std::shared_ptr<Array>> {
diff --git a/cpp/src/arrow/testing/extension_type.h 
b/cpp/src/arrow/testing/extension_type.h
index 846e3c7a16..6515631f20 100644
--- a/cpp/src/arrow/testing/extension_type.h
+++ b/cpp/src/arrow/testing/extension_type.h
@@ -132,6 +132,25 @@ class ARROW_TESTING_EXPORT DictExtensionType : public 
ExtensionType {
   std::string Serialize() const override { return "dict-extension-serialized"; 
}
 };
 
+// A minimal extension type that does not error when passed blank extension 
information
+class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public 
ExtensionType {
+ public:
+  MetadataOptionalExtensionType() : ExtensionType(null()) {}
+  std::string extension_name() const override { return "metadata.optional"; }
+  std::string Serialize() const override { return ""; }
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override {
+    return nullptr;
+  }
+  bool ExtensionEquals(const ExtensionType& other) const override {
+    return other.extension_name() == extension_name();
+  }
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const override {
+    return std::make_shared<MetadataOptionalExtensionType>();
+  }
+};
+
 class ARROW_TESTING_EXPORT Complex128Array : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;

Reply via email to