pitrou commented on a change in pull request #12505:
URL: https://github.com/apache/arrow/pull/12505#discussion_r813927745



##########
File path: python/pyarrow/tests/test_extension_type.py
##########
@@ -360,6 +360,64 @@ def test_ext_array_conversion_to_pandas():
     pd.testing.assert_series_equal(result, expected)
 
 
+def test_struct_w_ext_array_to_numpy():
+    # ARROW-15291
+    # Check that we don't segfault when trying to build
+    # a numpy array from a StructArray with a field being
+    # an ExtensionArray
+
+    storage1 = pa.array([1, 2, 3], type=pa.int64())
+    storage2 = pa.array([b"123", b"456", b"789"], type=pa.binary(3))
+    ty1 = IntegerType()
+    ty2 = ParamExtType(3)
+
+    arr1 = pa.ExtensionArray.from_storage(ty1, storage1)
+    arr2 = pa.ExtensionArray.from_storage(ty2, storage2)
+
+    sarr1 = pa.StructArray.from_arrays([arr1], ["f0"])
+    sarr2 = pa.StructArray.from_arrays([arr2], ["f1"])
+
+    result = sarr1.to_numpy(zero_copy_only=False)
+    expected = np.array([{'f0': 1}, {'f0': 2},
+                         {'f0': 3}], dtype=object)
+    np.testing.assert_array_equal(result, expected)
+
+    result = sarr2.to_numpy(zero_copy_only=False)
+    expected = np.array([{'f1': b'123'}, {'f1': b'456'},
+                         {'f1': b'789'}], dtype=object)
+    np.testing.assert_array_equal(result, expected)
+
+
[email protected]
+def test_struct_w_ext_array_to_pandas():

Review comment:
       Perhaps you can share most of the code between the two cases?

##########
File path: cpp/src/arrow/python/arrow_to_pandas.cc
##########
@@ -656,8 +656,19 @@ Status ConvertStruct(PandasOptions options, const 
ChunkedArray& data,
     // Convert the struct arrays first
     for (int32_t i = 0; i < num_fields; i++) {
       const auto field = arr->field(static_cast<int>(i));
-      RETURN_NOT_OK(ConvertArrayToPandas(options, field, nullptr,
-                                         fields_data[i + 
fields_data_offset].ref()));
+      // In case the field is an extension array, use .storage() to convert to 
Pandas
+      if (field->type()->id() == Type::EXTENSION){
+        // Save the field object as an Extension Array
+        const ExtensionArray& arr_ext = checked_cast<const 
ExtensionArray&>(*field);
+        // Save the storage Array and use it to convert to Pandas
+        const std::shared_ptr<Array> field_ext = arr_ext.storage();
+        RETURN_NOT_OK(ConvertArrayToPandas(options, field_ext, nullptr,
+                                           fields_data[i + 
fields_data_offset].ref()));
+      }
+      else{
+        RETURN_NOT_OK(ConvertArrayToPandas(options, field, nullptr,
+                                           fields_data[i + 
fields_data_offset].ref()));
+      }

Review comment:
       You might want to make this terser, e.g. (untested):
   ```suggestion
         // In case the field is an extension array, use .storage() to convert 
to Pandas
         if (field->type()->id() == Type::EXTENSION){
           const ExtensionArray& arr_ext = checked_cast<const 
ExtensionArray&>(*field);
           field = arr_ext.storage();
         }
         RETURN_NOT_OK(ConvertArrayToPandas(options, field, nullptr,
                                            fields_data[i + 
fields_data_offset].ref()));
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to