pitrou commented on a change in pull request #12505:
URL: https://github.com/apache/arrow/pull/12505#discussion_r813927745
##########
File path: python/pyarrow/tests/test_extension_type.py
##########
@@ -360,6 +360,64 @@ def test_ext_array_conversion_to_pandas():
pd.testing.assert_series_equal(result, expected)
+def test_struct_w_ext_array_to_numpy():
+ # ARROW-15291
+ # Check that we don't segfault when trying to build
+ # a numpy array from a StructArray with a field being
+ # an ExtensionArray
+
+ storage1 = pa.array([1, 2, 3], type=pa.int64())
+ storage2 = pa.array([b"123", b"456", b"789"], type=pa.binary(3))
+ ty1 = IntegerType()
+ ty2 = ParamExtType(3)
+
+ arr1 = pa.ExtensionArray.from_storage(ty1, storage1)
+ arr2 = pa.ExtensionArray.from_storage(ty2, storage2)
+
+ sarr1 = pa.StructArray.from_arrays([arr1], ["f0"])
+ sarr2 = pa.StructArray.from_arrays([arr2], ["f1"])
+
+ result = sarr1.to_numpy(zero_copy_only=False)
+ expected = np.array([{'f0': 1}, {'f0': 2},
+ {'f0': 3}], dtype=object)
+ np.testing.assert_array_equal(result, expected)
+
+ result = sarr2.to_numpy(zero_copy_only=False)
+ expected = np.array([{'f1': b'123'}, {'f1': b'456'},
+ {'f1': b'789'}], dtype=object)
+ np.testing.assert_array_equal(result, expected)
+
+
[email protected]
+def test_struct_w_ext_array_to_pandas():
Review comment:
Perhaps you can share most of the code between the two cases?
##########
File path: cpp/src/arrow/python/arrow_to_pandas.cc
##########
@@ -656,8 +656,19 @@ Status ConvertStruct(PandasOptions options, const
ChunkedArray& data,
// Convert the struct arrays first
for (int32_t i = 0; i < num_fields; i++) {
const auto field = arr->field(static_cast<int>(i));
- RETURN_NOT_OK(ConvertArrayToPandas(options, field, nullptr,
- fields_data[i +
fields_data_offset].ref()));
+ // In case the field is an extension array, use .storage() to convert to
Pandas
+ if (field->type()->id() == Type::EXTENSION){
+ // Save the field object as an Extension Array
+ const ExtensionArray& arr_ext = checked_cast<const
ExtensionArray&>(*field);
+ // Save the storage Array and use it to convert to Pandas
+ const std::shared_ptr<Array> field_ext = arr_ext.storage();
+ RETURN_NOT_OK(ConvertArrayToPandas(options, field_ext, nullptr,
+ fields_data[i +
fields_data_offset].ref()));
+ }
+ else{
+ RETURN_NOT_OK(ConvertArrayToPandas(options, field, nullptr,
+ fields_data[i +
fields_data_offset].ref()));
+ }
Review comment:
You might want to make this terser, e.g. (untested):
```suggestion
// In case the field is an extension array, use .storage() to convert
to Pandas
if (field->type()->id() == Type::EXTENSION){
const ExtensionArray& arr_ext = checked_cast<const
ExtensionArray&>(*field);
field = arr_ext.storage();
}
RETURN_NOT_OK(ConvertArrayToPandas(options, field, nullptr,
fields_data[i +
fields_data_offset].ref()));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]