AlenkaF commented on code in PR #34559:
URL: https://github.com/apache/arrow/pull/34559#discussion_r1144801496
##########
python/pyarrow/tests/test_extension_type.py:
##########
@@ -1127,3 +1141,45 @@ def test_cpp_extension_in_python(tmpdir):
reconstructed_array = batch.column(0)
assert reconstructed_array.type == uuid_type
assert reconstructed_array == array
+
+
+def test_extension_to_pandas_storage_type(registered_period_type):
+ period_type, _ = registered_period_type
+ np_arr = np.array([1, 2, 3, 4])
+ storage = pa.array([1, 2, 3, 4], pa.int64())
+ arr = pa.ExtensionArray.from_storage(period_type, storage)
+
+ if isinstance(period_type, PeriodTypeWithToPandasDtype):
+ pandas_dtype = period_type.to_pandas_dtype()
+ else:
+ pandas_dtype = np_arr.dtype
+
+ # Test arrays
+ result = arr.to_pandas()
+ assert result.dtype == pandas_dtype
+
+ # Test the change in ConvertChunkedArrayToPandas
+ chunked_arr = pa.chunked_array([arr])
+ result = chunked_arr.to_numpy()
+ assert result.dtype == np_arr.dtype
+
+ result = chunked_arr.to_pandas()
+ # TODO: to_pandas should take use of to_pandas_dtype
+ # if defined!
+ # assert result.dtype == pandas_dtype
+ assert result.dtype == np_arr.dtype
+
+ # Test the change in ConvertTableToPandas
+ data = [
+ pa.array([1, 2, 3, 4]),
+ pa.array(['foo', 'bar', None, None]),
+ pa.array([True, None, True, False]),
+ arr
+ ]
+ my_schema = pa.schema([('f0', pa.int8()),
+ ('f1', pa.string()),
+ ('f2', pa.bool_()),
+ ('ext', period_type)])
+ table = pa.Table.from_arrays(data, schema=my_schema)
+ result = table.to_pandas()
+ assert result["ext"].dtype == pandas_dtype
Review Comment:
True for the callable. Will add that to the test (a callable that is not a
dict and `pd.ArrowDtype`).
It feels a bit out of place that the `types_mapper` argument doesn't really
work for extension columns, as the only case that it does is if the function is
not a dict and it transforms to a pandas `pd.ArrowDtype`. Maybe adding the info
to the docs might be good.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]