HyukjinKwon commented on PR #48769:
URL: https://github.com/apache/arrow/pull/48769#issuecomment-3850642403
I believe they are not related to this PR. They are mostly failing as:
```
=================================== FAILURES
===================================
_____________ TestConvertMetadata.test_column_index_names_with_tz
______________
self = <pyarrow.tests.test_pandas.TestConvertMetadata object at
0x7f0615b62360>
def test_column_index_names_with_tz(self):
# ARROW-13756
# Bug if index is timezone aware DataTimeIndex
df = pd.DataFrame(
np.random.randn(5, 3),
columns=pd.date_range("2021-01-01", periods=3, freq="50D",
tz="CET")
)
> _check_pandas_roundtrip(df, preserve_index=True)
opt/conda/envs/arrow/lib/python3.11/site-packages/pyarrow/tests/test_pandas.py:223:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _
opt/conda/envs/arrow/lib/python3.11/site-packages/pyarrow/tests/test_pandas.py:124:
in _check_pandas_roundtrip
tm.assert_frame_equal(result, expected, check_dtype=check_dtype,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _
left = DatetimeIndex(['2021-01-01 00:00:00+01:00', '2021-02-20
00:00:00+01:00',
'2021-04-11 00:00:00+02:00'],
dtype='datetime64[ns, CET]', freq=None)
right = DatetimeIndex(['2021-01-01 00:00:00+01:00', '2021-02-20
00:00:00+01:00',
'2021-04-11 00:00:00+02:00'],
dtype='datetime64[us, CET]', freq='50D')
obj = 'DataFrame.columns'
def _check_types(left, right, obj: str = "Index") -> None:
if not exact:
return
assert_class_equal(left, right, exact=exact, obj=obj)
assert_attr_equal("inferred_type", left, right, obj=obj)
# Skip exact dtype checking when `check_categorical` is False
if isinstance(left.dtype, CategoricalDtype) and isinstance(
right.dtype, CategoricalDtype
):
if check_categorical:
assert_attr_equal("dtype", left, right, obj=obj)
assert_index_equal(left.categories, right.categories,
exact=exact)
return
> assert_attr_equal("dtype", left, right, obj=obj)
E AssertionError: DataFrame.columns are different
E
E Attribute "dtype" are different
E [left]: datetime64[ns, CET]
E [right]: datetime64[us, CET]
opt/conda/envs/arrow/lib/python3.11/site-packages/pandas/_testing/asserters.py:264:
AssertionError
______________ TestConvertMetadata.test_mismatch_metadata_schema
_______________
self = <pyarrow.tests.test_pandas.TestConvertMetadata object at
0x7f06160d3670>
def test_mismatch_metadata_schema(self):
# ARROW-10511
# It is possible that the metadata and actual schema is not fully
# matching (eg no timezone information for tz-aware column)
# -> to_pandas() conversion should not fail on that
df = pd.DataFrame({"datetime": pd.date_range("2020-01-01",
periods=3)})
# OPTION 1: casting after conversion
table = pa.Table.from_pandas(df)
# cast the "datetime" column to be tz-aware
new_col = table["datetime"].cast(pa.timestamp('ns', tz="UTC"))
new_table1 = table.set_column(
0, pa.field("datetime", new_col.type), new_col
)
# OPTION 2: specify schema during conversion
schema = pa.schema([("datetime", pa.timestamp('ns', tz="UTC"))])
new_table2 = pa.Table.from_pandas(df, schema=schema)
expected = df.copy()
expected["datetime"] = expected["datetime"].dt.tz_localize("UTC")
for new_table in [new_table1, new_table2]:
# ensure the new table still has the pandas metadata
assert new_table.schema.pandas_metadata is not None
# convert to pandas
result = new_table.to_pandas()
> tm.assert_frame_equal(result, expected)
E AssertionError: Attributes of DataFrame.iloc[:, 0] (column
name="datetime") are different
E
E Attribute "dtype" are different
E [left]: datetime64[ns, UTC]
E [right]: datetime64[us, UTC]
opt/conda/envs/arrow/lib/python3.11/site-packages/pyarrow/tests/test_pandas.py:734:
AssertionError
```
Let me file an issue and take a separate look.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]