HyukjinKwon commented on PR #48769:
URL: https://github.com/apache/arrow/pull/48769#issuecomment-3850642403

   I believe they are not related to this PR. They are mostly failing as:
   
   ```
   =================================== FAILURES 
===================================
   _____________ TestConvertMetadata.test_column_index_names_with_tz 
______________
   
   self = <pyarrow.tests.test_pandas.TestConvertMetadata object at 
0x7f0615b62360>
   
       def test_column_index_names_with_tz(self):
           # ARROW-13756
           # Bug if index is timezone aware DataTimeIndex
       
           df = pd.DataFrame(
               np.random.randn(5, 3),
               columns=pd.date_range("2021-01-01", periods=3, freq="50D", 
tz="CET")
           )
   >       _check_pandas_roundtrip(df, preserve_index=True)
   
   
opt/conda/envs/arrow/lib/python3.11/site-packages/pyarrow/tests/test_pandas.py:223:
 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   
opt/conda/envs/arrow/lib/python3.11/site-packages/pyarrow/tests/test_pandas.py:124:
 in _check_pandas_roundtrip
       tm.assert_frame_equal(result, expected, check_dtype=check_dtype,
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   
   left = DatetimeIndex(['2021-01-01 00:00:00+01:00', '2021-02-20 
00:00:00+01:00',
                  '2021-04-11 00:00:00+02:00'],
                 dtype='datetime64[ns, CET]', freq=None)
   right = DatetimeIndex(['2021-01-01 00:00:00+01:00', '2021-02-20 
00:00:00+01:00',
                  '2021-04-11 00:00:00+02:00'],
                 dtype='datetime64[us, CET]', freq='50D')
   obj = 'DataFrame.columns'
   
       def _check_types(left, right, obj: str = "Index") -> None:
           if not exact:
               return
       
           assert_class_equal(left, right, exact=exact, obj=obj)
           assert_attr_equal("inferred_type", left, right, obj=obj)
       
           # Skip exact dtype checking when `check_categorical` is False
           if isinstance(left.dtype, CategoricalDtype) and isinstance(
               right.dtype, CategoricalDtype
           ):
               if check_categorical:
                   assert_attr_equal("dtype", left, right, obj=obj)
                   assert_index_equal(left.categories, right.categories, 
exact=exact)
               return
       
   >       assert_attr_equal("dtype", left, right, obj=obj)
   E       AssertionError: DataFrame.columns are different
   E       
   E       Attribute "dtype" are different
   E       [left]:  datetime64[ns, CET]
   E       [right]: datetime64[us, CET]
   
   
opt/conda/envs/arrow/lib/python3.11/site-packages/pandas/_testing/asserters.py:264:
 AssertionError
   ______________ TestConvertMetadata.test_mismatch_metadata_schema 
_______________
   
   self = <pyarrow.tests.test_pandas.TestConvertMetadata object at 
0x7f06160d3670>
   
       def test_mismatch_metadata_schema(self):
           # ARROW-10511
           # It is possible that the metadata and actual schema is not fully
           # matching (eg no timezone information for tz-aware column)
           # -> to_pandas() conversion should not fail on that
           df = pd.DataFrame({"datetime": pd.date_range("2020-01-01", 
periods=3)})
       
           # OPTION 1: casting after conversion
           table = pa.Table.from_pandas(df)
           # cast the "datetime" column to be tz-aware
           new_col = table["datetime"].cast(pa.timestamp('ns', tz="UTC"))
           new_table1 = table.set_column(
               0, pa.field("datetime", new_col.type), new_col
           )
       
           # OPTION 2: specify schema during conversion
           schema = pa.schema([("datetime", pa.timestamp('ns', tz="UTC"))])
           new_table2 = pa.Table.from_pandas(df, schema=schema)
       
           expected = df.copy()
           expected["datetime"] = expected["datetime"].dt.tz_localize("UTC")
       
           for new_table in [new_table1, new_table2]:
               # ensure the new table still has the pandas metadata
               assert new_table.schema.pandas_metadata is not None
               # convert to pandas
               result = new_table.to_pandas()
   >           tm.assert_frame_equal(result, expected)
   E           AssertionError: Attributes of DataFrame.iloc[:, 0] (column 
name="datetime") are different
   E           
   E           Attribute "dtype" are different
   E           [left]:  datetime64[ns, UTC]
   E           [right]: datetime64[us, UTC]
   
   
opt/conda/envs/arrow/lib/python3.11/site-packages/pyarrow/tests/test_pandas.py:734:
 AssertionError
   ```
   
   Let me file an issue and take a separate look.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to