This is an automated email from the ASF dual-hosted git repository.
rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new ba612971b8 GH-48978: [Python] test failures on pandas 3.0 for
fastparquet and for zoneinfo w/o pytz (#48979)
ba612971b8 is described below
commit ba612971b8d421c4daf9629eadb6d1e425efa294
Author: tadeja <[email protected]>
AuthorDate: Fri Feb 20 13:06:50 2026 +0100
GH-48978: [Python] test failures on pandas 3.0 for fastparquet and for
zoneinfo w/o pytz (#48979)
### Rationale for this change
Closes #48978
### What changes are included in this PR?
Update to `parquet/test_basic.py test_fastparquet_cross_compatibility` for
fastparquet string and categorical dtype differences causing failure `Attribute
"dtype" are different`
Update to `test_pandas.py test_timestamp_as_object_non_nanosecond` for
failure `ValueError: fromutc: dt.tzinfo is not self`.
### Are these changes tested?
Yes. Initially tested locally with pandas upgraded to 3.0 as CI was still
running with pandas 2.3.3 cached.
### Are there any user-facing changes?
No.
* GitHub Issue: #48978
Lead-authored-by: Tadeja Kadunc <[email protected]>
Co-authored-by: tadeja <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Rok Mihevc <[email protected]>
Signed-off-by: Rok Mihevc <[email protected]>
---
python/pyarrow/tests/parquet/test_basic.py | 11 +++++++----
python/pyarrow/tests/test_pandas.py | 3 ++-
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/python/pyarrow/tests/parquet/test_basic.py
b/python/pyarrow/tests/parquet/test_basic.py
index 345aee3c4e..03fcf2defe 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -736,6 +736,7 @@ def test_parquet_file_too_small(tempdir):
@pytest.mark.fastparquet
@pytest.mark.filterwarnings("ignore:RangeIndex:FutureWarning")
@pytest.mark.filterwarnings("ignore:tostring:DeprecationWarning:fastparquet")
[email protected]("ignore:unclosed file:ResourceWarning")
def test_fastparquet_cross_compatibility(tempdir):
fp = pytest.importorskip('fastparquet')
@@ -759,17 +760,19 @@ def test_fastparquet_cross_compatibility(tempdir):
fp_file = fp.ParquetFile(file_arrow)
df_fp = fp_file.to_pandas()
- tm.assert_frame_equal(df, df_fp)
+ # pandas 3 defaults to StringDtype for strings, fastparquet still returns
object
+ # TODO: remove astype casts once fastparquet supports pandas 3 StringDtype
+ tm.assert_frame_equal(df_fp, df.astype({"a": object}))
# Fastparquet -> arrow
file_fastparquet = str(tempdir / "cross_compat_fastparquet.parquet")
- fp.write(file_fastparquet, df)
+ # fastparquet doesn't support writing pandas 3 StringDtype yet
+ fp.write(file_fastparquet, df.astype({"a": object}))
table_fp = pq.read_pandas(file_fastparquet)
# for fastparquet written file, categoricals comes back as strings
# (no arrow schema in parquet metadata)
- df['f'] = df['f'].astype(object)
- tm.assert_frame_equal(table_fp.to_pandas(), df)
+ tm.assert_frame_equal(table_fp.to_pandas(), df.astype({"f": object}))
@pytest.mark.parametrize('array_factory', [
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index cecf10f216..5fde980dd8 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -24,6 +24,7 @@ import warnings
from collections import OrderedDict
from datetime import date, datetime, time, timedelta, timezone
+from zoneinfo import ZoneInfo
import hypothesis as h
import hypothesis.strategies as st
@@ -4956,7 +4957,7 @@ def test_timestamp_as_object_non_nanosecond(resolution,
tz, dt):
assert isinstance(result[0], datetime)
if tz:
assert result[0].tzinfo is not None
- expected = result[0].tzinfo.fromutc(dt)
+ expected = dt.replace(tzinfo=timezone.utc).astimezone(ZoneInfo(tz))
else:
assert result[0].tzinfo is None
expected = dt