Copilot commented on code in PR #49969:
URL: https://github.com/apache/arrow/pull/49969#discussion_r3238261538


##########
c_glib/arrow-glib/basic-data-type.cpp:
##########
@@ -2720,16 +2734,25 @@ 
garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
     type = GARROW_TYPE_DURATION_DATA_TYPE;
     break;
   case arrow::Type::type::EXTENSION:
+    type = GARROW_TYPE_UNKNOWN_EXTENSION_DATA_TYPE;
     {
-      auto g_extension_data_type =
-        std::static_pointer_cast<garrow::GExtensionType>(*arrow_data_type);
-      if (g_extension_data_type) {
-        auto garrow_data_type = g_extension_data_type->garrow_data_type();
-        g_object_ref(garrow_data_type);
-        return GARROW_DATA_TYPE(garrow_data_type);
+      auto arrow_extension_data_type =
+        std::static_pointer_cast<arrow::ExtensionType>(*arrow_data_type);
+      auto name = arrow_extension_data_type->extension_name();
+      if (name == "arrow.fixed_shape_tensor") {
+        type = GARROW_TYPE_FIXED_SHAPE_TENSOR_DATA_TYPE;
+      } else if (name == "arrow.uuid") {
+        type = GARROW_TYPE_UUID_DATA_TYPE;
+      } else {
+        auto g_extension_data_type =
+          std::dynamic_pointer_cast<garrow::GExtensionType>(*arrow_data_type);
+        if (g_extension_data_type) {
+          auto garrow_data_type = g_extension_data_type->garrow_data_type();
+          g_object_ref(garrow_data_type);
+          return GARROW_DATA_TYPE(garrow_data_type);
+        }
       }

Review Comment:
   In the EXTENSION case, the code checks canonical extension_name values 
(fixed_shape_tensor/uuid) before attempting to unwrap garrow::GExtensionType. 
If a GLib-defined custom extension type happens to use one of these reserved 
names, this will incorrectly construct a 
GArrowFixedShapeTensorDataType/GArrowUUIDDataType wrapper around a 
garrow::GExtensionType instance, and later static_pointer_casts inside those 
wrappers (e.g., to arrow::extension::FixedShapeTensorType) can become UB/crash. 
Prefer unwrapping garrow::GExtensionType first (dynamic_pointer_cast and return 
its garrow_data_type), then fall back to name-based mapping for non-GLib 
extension types, and finally to GArrowUnknownExtensionDataType.



##########
c_glib/arrow-glib/basic-data-type.cpp:
##########
@@ -2720,16 +2734,25 @@ 
garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
     type = GARROW_TYPE_DURATION_DATA_TYPE;
     break;
   case arrow::Type::type::EXTENSION:
+    type = GARROW_TYPE_UNKNOWN_EXTENSION_DATA_TYPE;
     {
-      auto g_extension_data_type =
-        std::static_pointer_cast<garrow::GExtensionType>(*arrow_data_type);
-      if (g_extension_data_type) {
-        auto garrow_data_type = g_extension_data_type->garrow_data_type();
-        g_object_ref(garrow_data_type);
-        return GARROW_DATA_TYPE(garrow_data_type);
+      auto arrow_extension_data_type =
+        std::static_pointer_cast<arrow::ExtensionType>(*arrow_data_type);
+      auto name = arrow_extension_data_type->extension_name();
+      if (name == "arrow.fixed_shape_tensor") {
+        type = GARROW_TYPE_FIXED_SHAPE_TENSOR_DATA_TYPE;
+      } else if (name == "arrow.uuid") {
+        type = GARROW_TYPE_UUID_DATA_TYPE;
+      } else {

Review Comment:
   There doesn’t appear to be a regression test covering the newly added 
fallback path for truly unknown extension types (e.g., 
arrow::extension::OpaqueType / "arrow.opaque"), which is the main crash 
scenario described in GH-49956. Consider adding a test that round-trips a 
schema/field containing an unknown extension type through the GLib conversion 
and asserts it doesn’t segfault and produces a GArrowUnknownExtensionDataType 
wrapper.



##########
c_glib/arrow-glib/basic-data-type.cpp:
##########
@@ -2080,6 +2080,20 @@ namespace garrow {
 
 G_BEGIN_DECLS
 
+G_DEFINE_TYPE(GArrowUnknownExtensionDataType,
+              garrow_unknown_extension_data_type,
+              GARROW_TYPE_EXTENSION_DATA_TYPE)
+
+static void
+garrow_unknown_extension_data_type_init(GArrowUnknownExtensionDataType *object)
+{
+}
+
+static void
+garrow_unknown_extension_data_type_class_init(GArrowUnknownExtensionDataTypeClass
 *klass)
+{
+}

Review Comment:
   GArrowUnknownExtensionDataType is a new public GObject type, but it isn’t 
mentioned in the gtk-doc SECTION: basic-data-type overview list at the top of 
this file. Please add it there (and any other relevant public API docs) so 
generated reference docs reflect the new user-facing type.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to