raulcd opened a new issue, #35321: URL: https://github.com/apache/arrow/issues/35321
### Describe the bug, including details regarding any error messages, version, and platform. The pandas nightly tests and release verification jobs are failing: - [test-conda-python-3.8-pandas-latest](https://github.com/ursacomputing/crossbow/actions/runs/4792360361/jobs/8523791672) - [test-conda-python-3.8-pandas-nightly](https://github.com/ursacomputing/crossbow/actions/runs/4792357427/jobs/8523785745) - [test-conda-python-3.9-pandas-upstream_devel](https://github.com/ursacomputing/crossbow/actions/runs/4792356978/jobs/8523784707) - [verify-rc-source-python-linux-almalinux-8-amd64](https://github.com/ursacomputing/crossbow/actions/runs/4786502455/jobs/8510514881) - [verify-rc-source-python-linux-conda-latest-amd64](https://github.com/ursacomputing/crossbow/actions/runs/4786507381/jobs/8510525045) - [verify-rc-source-python-linux-ubuntu-20.04-amd64](https://github.com/ursacomputing/crossbow/actions/runs/4786509036/jobs/8510528619) - [verify-rc-source-python-linux-ubuntu-22.04-amd64](https://github.com/ursacomputing/crossbow/actions/runs/4786508280/jobs/8510527050) - [verify-rc-source-python-macos-amd64](https://github.com/ursacomputing/crossbow/actions/runs/4786510209/jobs/8510531034) - [verify-rc-source-python-macos-arm64](https://github.com/ursacomputing/crossbow/actions/runs/4786502769/jobs/8510515284) - [verify-rc-source-python-macos-conda-amd64](https://github.com/ursacomputing/crossbow/actions/runs/4786513371/jobs/8510537552) Due to the following test failing: ``` test_extension_to_pandas_storage_type[registered_period_type0] test_extension_to_pandas_storage_type[registered_period_type1] test_extension_to_pandas_storage_type[registered_period_type2] ``` This started happening since the new pandas 2.0.1 was released: https://pypi.org/project/pandas/#history The full error: ``` =================================== FAILURES =================================== ________ test_extension_to_pandas_storage_type[registered_period_type0] ________ registered_period_type = (PeriodType(DataType(int64)), <class 'pyarrow.lib.ExtensionArray'>) @pytest.mark.pandas def test_extension_to_pandas_storage_type(registered_period_type): period_type, _ = registered_period_type np_arr = np.array([1, 2, 3, 4], dtype='i8') storage = pa.array([1, 2, 3, 4], pa.int64()) arr = pa.ExtensionArray.from_storage(period_type, storage) if isinstance(period_type, PeriodTypeWithToPandasDtype): pandas_dtype = period_type.to_pandas_dtype() else: pandas_dtype = np_arr.dtype # Test arrays result = arr.to_pandas() assert result.dtype == pandas_dtype # Test chunked arrays chunked_arr = pa.chunked_array([arr]) result = chunked_arr.to_numpy() assert result.dtype == np_arr.dtype result = chunked_arr.to_pandas() assert result.dtype == pandas_dtype # Test Table.to_pandas data = [ pa.array([1, 2, 3, 4]), pa.array(['foo', 'bar', None, None]), pa.array([True, None, True, False]), arr ] my_schema = pa.schema([('f0', pa.int8()), ('f1', pa.string()), ('f2', pa.bool_()), ('ext', period_type)]) table = pa.Table.from_arrays(data, schema=my_schema) result = table.to_pandas() assert result["ext"].dtype == pandas_dtype import pandas as pd if Version(pd.__version__) > Version("2.0.0"): # Check the usage of types_mapper > result = table.to_pandas(types_mapper=pd.ArrowDtype) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/tests/test_extension_type.py:1302: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ pyarrow/array.pxi:852: in pyarrow.lib._PandasConvertible.to_pandas ??? pyarrow/table.pxi:4114: in pyarrow.lib.Table._to_pandas ??? opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:820: in table_to_blockmanager blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in _table_to_blocks return [_reconstruct_block(item, columns, extension_columns) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in <listcomp> return [_reconstruct_block(item, columns, extension_columns) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:781: in _reconstruct_block block = _int.make_block(pd_ext_arr, placement=placement) opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/internals/api.py:73: in make_block if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype): opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/dtypes/common.py:415: in is_period_dtype return arr_or_dtype.type is Period _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = extension<test.period<PeriodType>>[pyarrow] @property def type(self): """ Returns associated scalar type. """ pa_type = self.pyarrow_dtype if pa.types.is_integer(pa_type): return int elif pa.types.is_floating(pa_type): return float elif pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type): return str elif ( pa.types.is_binary(pa_type) or pa.types.is_fixed_size_binary(pa_type) or pa.types.is_large_binary(pa_type) ): return bytes elif pa.types.is_boolean(pa_type): return bool elif pa.types.is_duration(pa_type): if pa_type.unit == "ns": return Timedelta else: return timedelta elif pa.types.is_timestamp(pa_type): if pa_type.unit == "ns": return Timestamp else: return datetime elif pa.types.is_date(pa_type): return date elif pa.types.is_time(pa_type): return time elif pa.types.is_decimal(pa_type): return Decimal elif pa.types.is_dictionary(pa_type): # TODO: Potentially change this & CategoricalDtype.type to # something more representative of the scalar return CategoricalDtypeType elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type): return list elif pa.types.is_map(pa_type): return dict elif pa.types.is_null(pa_type): # TODO: None? pd.NA? pa.null? return type(pa_type) else: > raise NotImplementedError(pa_type) E NotImplementedError: extension<test.period<PeriodType>> opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/arrays/arrow/dtype.py:148: NotImplementedError ________ test_extension_to_pandas_storage_type[registered_period_type1] ________ registered_period_type = (PeriodTypeWithClass(DataType(int64)), <class 'pyarrow.tests.test_extension_type.PeriodArray'>) @pytest.mark.pandas def test_extension_to_pandas_storage_type(registered_period_type): period_type, _ = registered_period_type np_arr = np.array([1, 2, 3, 4], dtype='i8') storage = pa.array([1, 2, 3, 4], pa.int64()) arr = pa.ExtensionArray.from_storage(period_type, storage) if isinstance(period_type, PeriodTypeWithToPandasDtype): pandas_dtype = period_type.to_pandas_dtype() else: pandas_dtype = np_arr.dtype # Test arrays result = arr.to_pandas() assert result.dtype == pandas_dtype # Test chunked arrays chunked_arr = pa.chunked_array([arr]) result = chunked_arr.to_numpy() assert result.dtype == np_arr.dtype result = chunked_arr.to_pandas() assert result.dtype == pandas_dtype # Test Table.to_pandas data = [ pa.array([1, 2, 3, 4]), pa.array(['foo', 'bar', None, None]), pa.array([True, None, True, False]), arr ] my_schema = pa.schema([('f0', pa.int8()), ('f1', pa.string()), ('f2', pa.bool_()), ('ext', period_type)]) table = pa.Table.from_arrays(data, schema=my_schema) result = table.to_pandas() assert result["ext"].dtype == pandas_dtype import pandas as pd if Version(pd.__version__) > Version("2.0.0"): # Check the usage of types_mapper > result = table.to_pandas(types_mapper=pd.ArrowDtype) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/tests/test_extension_type.py:1302: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ pyarrow/array.pxi:852: in pyarrow.lib._PandasConvertible.to_pandas ??? pyarrow/table.pxi:4114: in pyarrow.lib.Table._to_pandas ??? opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:820: in table_to_blockmanager blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in _table_to_blocks return [_reconstruct_block(item, columns, extension_columns) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in <listcomp> return [_reconstruct_block(item, columns, extension_columns) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:781: in _reconstruct_block block = _int.make_block(pd_ext_arr, placement=placement) opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/internals/api.py:73: in make_block if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype): opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/dtypes/common.py:415: in is_period_dtype return arr_or_dtype.type is Period _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = extension<test.period<PeriodTypeWithClass>>[pyarrow] @property def type(self): """ Returns associated scalar type. """ pa_type = self.pyarrow_dtype if pa.types.is_integer(pa_type): return int elif pa.types.is_floating(pa_type): return float elif pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type): return str elif ( pa.types.is_binary(pa_type) or pa.types.is_fixed_size_binary(pa_type) or pa.types.is_large_binary(pa_type) ): return bytes elif pa.types.is_boolean(pa_type): return bool elif pa.types.is_duration(pa_type): if pa_type.unit == "ns": return Timedelta else: return timedelta elif pa.types.is_timestamp(pa_type): if pa_type.unit == "ns": return Timestamp else: return datetime elif pa.types.is_date(pa_type): return date elif pa.types.is_time(pa_type): return time elif pa.types.is_decimal(pa_type): return Decimal elif pa.types.is_dictionary(pa_type): # TODO: Potentially change this & CategoricalDtype.type to # something more representative of the scalar return CategoricalDtypeType elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type): return list elif pa.types.is_map(pa_type): return dict elif pa.types.is_null(pa_type): # TODO: None? pd.NA? pa.null? return type(pa_type) else: > raise NotImplementedError(pa_type) E NotImplementedError: extension<test.period<PeriodTypeWithClass>> opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/arrays/arrow/dtype.py:148: NotImplementedError ________ test_extension_to_pandas_storage_type[registered_period_type2] ________ registered_period_type = (PeriodTypeWithToPandasDtype(DataType(int64)), <class 'pyarrow.lib.ExtensionArray'>) @pytest.mark.pandas def test_extension_to_pandas_storage_type(registered_period_type): period_type, _ = registered_period_type np_arr = np.array([1, 2, 3, 4], dtype='i8') storage = pa.array([1, 2, 3, 4], pa.int64()) arr = pa.ExtensionArray.from_storage(period_type, storage) if isinstance(period_type, PeriodTypeWithToPandasDtype): pandas_dtype = period_type.to_pandas_dtype() else: pandas_dtype = np_arr.dtype # Test arrays result = arr.to_pandas() assert result.dtype == pandas_dtype # Test chunked arrays chunked_arr = pa.chunked_array([arr]) result = chunked_arr.to_numpy() assert result.dtype == np_arr.dtype result = chunked_arr.to_pandas() assert result.dtype == pandas_dtype # Test Table.to_pandas data = [ pa.array([1, 2, 3, 4]), pa.array(['foo', 'bar', None, None]), pa.array([True, None, True, False]), arr ] my_schema = pa.schema([('f0', pa.int8()), ('f1', pa.string()), ('f2', pa.bool_()), ('ext', period_type)]) table = pa.Table.from_arrays(data, schema=my_schema) result = table.to_pandas() assert result["ext"].dtype == pandas_dtype import pandas as pd if Version(pd.__version__) > Version("2.0.0"): # Check the usage of types_mapper > result = table.to_pandas(types_mapper=pd.ArrowDtype) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/tests/test_extension_type.py:1302: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ pyarrow/array.pxi:852: in pyarrow.lib._PandasConvertible.to_pandas ??? pyarrow/table.pxi:4114: in pyarrow.lib.Table._to_pandas ??? opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:820: in table_to_blockmanager blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in _table_to_blocks return [_reconstruct_block(item, columns, extension_columns) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:1170: in <listcomp> return [_reconstruct_block(item, columns, extension_columns) opt/conda/envs/arrow/lib/python3.8/site-packages/pyarrow/pandas_compat.py:781: in _reconstruct_block block = _int.make_block(pd_ext_arr, placement=placement) opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/internals/api.py:73: in make_block if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype): opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/dtypes/common.py:415: in is_period_dtype return arr_or_dtype.type is Period _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = extension<test.period<PeriodTypeWithToPandasDtype>>[pyarrow] @property def type(self): """ Returns associated scalar type. """ pa_type = self.pyarrow_dtype if pa.types.is_integer(pa_type): return int elif pa.types.is_floating(pa_type): return float elif pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type): return str elif ( pa.types.is_binary(pa_type) or pa.types.is_fixed_size_binary(pa_type) or pa.types.is_large_binary(pa_type) ): return bytes elif pa.types.is_boolean(pa_type): return bool elif pa.types.is_duration(pa_type): if pa_type.unit == "ns": return Timedelta else: return timedelta elif pa.types.is_timestamp(pa_type): if pa_type.unit == "ns": return Timestamp else: return datetime elif pa.types.is_date(pa_type): return date elif pa.types.is_time(pa_type): return time elif pa.types.is_decimal(pa_type): return Decimal elif pa.types.is_dictionary(pa_type): # TODO: Potentially change this & CategoricalDtype.type to # something more representative of the scalar return CategoricalDtypeType elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type): return list elif pa.types.is_map(pa_type): return dict elif pa.types.is_null(pa_type): # TODO: None? pd.NA? pa.null? return type(pa_type) else: > raise NotImplementedError(pa_type) E NotImplementedError: extension<test.period<PeriodTypeWithToPandasDtype>> opt/conda/envs/arrow/lib/python3.8/site-packages/pandas/core/arrays/arrow/dtype.py:148: NotImplementedError ``` ### Component(s) Python -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
