kou commented on issue #37480: URL: https://github.com/apache/arrow/issues/37480#issuecomment-1700351376
https://github.com/apache/arrow/actions/runs/6028403835/job/16355661665#step:6:249 ```text =================================== FAILURES =================================== _________________ TestConvertMetadata.test_binary_column_name __________________ self = <pyarrow.tests.test_pandas.TestConvertMetadata object at 0x118ce1aa0> def test_binary_column_name(self): if Version("2.0.0") <= Version(pd.__version__) < Version("2.1.0"): # TODO: regression in pandas, hopefully fixed in next version # https://issues.apache.org/jira/browse/ARROW-18394 # https://github.com/pandas-dev/pandas/issues/50127 pytest.skip("Regression in pandas 2.0.0") column_data = ['い'] key = 'あ'.encode() data = {key: column_data} df = pd.DataFrame(data) # we can't use _check_pandas_roundtrip here because our metadata # is always decoded as utf8: even if binary goes in, utf8 comes out t = pa.Table.from_pandas(df, preserve_index=True) > df2 = t.to_pandas() /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pyarrow/tests/test_pandas.py:473: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ pyarrow/io.pxi:867: in pyarrow.lib.PythonFile.__cinit__ ??? pyarrow/table.pxi:4096: in pyarrow.lib.Table._to_pandas ??? /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pyarrow/pandas_compat.py:771: in table_to_blockmanager columns = _deserialize_column_index(table, all_columns, column_indexes) /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pyarrow/pandas_compat.py:887: in _deserialize_column_index columns = _reconstruct_columns_from_metadata(columns, column_indexes) /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pyarrow/pandas_compat.py:1105: in _reconstruct_columns_from_metadata level = level.astype(dtype) /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/indexes/base.py:1092: in astype result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/indexes/base.py:568: in __new__ klass = cls._dtype_to_subclass(arr.dtype) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ cls = <class 'pandas.core.indexes.base.Index'>, dtype = dtype('S3') @final @classmethod def _dtype_to_subclass(cls, dtype: DtypeObj): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 if isinstance(dtype, ExtensionDtype): if isinstance(dtype, DatetimeTZDtype): from pandas import DatetimeIndex return DatetimeIndex elif isinstance(dtype, CategoricalDtype): /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pyarrow/tests/test_pandas.py:3094: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ pyarrow/io.pxi:867: in pyarrow.lib.PythonFile.__cinit__ ??? pyarrow/table.pxi:4096: in pyarrow.lib.Table._to_pandas ??? /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pyarrow/pandas_compat.py:771: in table_to_blockmanager columns = _deserialize_column_index(table, all_columns, column_indexes) /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pyarrow/pandas_compat.py:887: in _deserialize_column_index columns = _reconstruct_columns_from_metadata(columns, column_indexes) /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pyarrow/pandas_compat.py:1105: in _reconstruct_columns_from_metadata level = level.astype(dtype) /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/indexes/base.py:1092: in astype result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/indexes/base.py:568: in __new__ klass = cls._dtype_to_subclass(arr.dtype) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ cls = <class 'pandas.core.indexes.base.Index'>, dtype = dtype('S3') @final @classmethod def _dtype_to_subclass(cls, dtype: DtypeObj): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 if isinstance(dtype, ExtensionDtype): if isinstance(dtype, DatetimeTZDtype): from pandas import DatetimeIndex return DatetimeIndex elif isinstance(dtype, CategoricalDtype): from pandas import CategoricalIndex return CategoricalIndex elif isinstance(dtype, IntervalDtype): from pandas import IntervalIndex return IntervalIndex elif isinstance(dtype, PeriodDtype): from pandas import PeriodIndex return PeriodIndex return Index if dtype.kind == "M": from pandas import DatetimeIndex return DatetimeIndex elif dtype.kind == "m": from pandas import TimedeltaIndex return TimedeltaIndex elif dtype.kind == "O": # NB: assuming away MultiIndex return Index elif issubclass(dtype.type, str) or is_numeric_dtype(dtype): return Index > raise NotImplementedError(dtype) E NotImplementedError: |S3 /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pandas/core/indexes/base.py:633: NotImplementedError ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
