(arrow) branch main updated: GH-48314: [Python] Compat with pandas 3.0 changed default datetime unit (#48319)

raulcd Mon, 12 Jan 2026 01:03:22 -0800

This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new b90a2b82d8 GH-48314: [Python] Compat with pandas 3.0 changed default 
datetime unit (#48319)
b90a2b82d8 is described below

commit b90a2b82d85b1479470b7f1bdd941c9a59ecd3d4
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Mon Jan 12 10:02:10 2026 +0100

    GH-48314: [Python] Compat with pandas 3.0 changed default datetime unit 
(#48319)
    
    ### Rationale for this change
    
    pandas 3.0 changes the default datetime/timedelta resolution from 
nanoseconds to microseconds. We already had mostly accounted for that in 
previous PRs, but pandas made this change the last days in a few additional 
places (eg `pd.date_range`), uncovering some more issues
    
    ### What changes are included in this PR?
    
    - Don't hardcode the nanosecond unit in the metadata and when recreating a 
datetime-tz column's Index
    - Update a few tests to account for those changes
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    No
    
    * GitHub Issue: #48314
    
    Lead-authored-by: Raúl Cumplido <[email protected]>
    Co-authored-by: Joris Van den Bossche <[email protected]>
    Signed-off-by: Raúl Cumplido <[email protected]>
---
 python/pyarrow/pandas_compat.py     |  4 ++--
 python/pyarrow/tests/test_pandas.py | 26 ++++++++++++++++++++------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index dfed76d371..dfca59cbf5 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -140,7 +140,7 @@ def get_extension_dtype_info(column):
         physical_dtype = str(cats.codes.dtype)
     elif hasattr(dtype, 'tz'):
         metadata = {'timezone': pa.lib.tzinfo_to_string(dtype.tz)}
-        physical_dtype = 'datetime64[ns]'
+        physical_dtype = f'datetime64[{dtype.unit}]'
     else:
         metadata = None
         physical_dtype = str(dtype)
@@ -1188,7 +1188,7 @@ def _reconstruct_columns_from_metadata(columns, 
column_indexes):
             if _pandas_api.is_ge_v3():
                 # with pandas 3+, to_datetime returns a unit depending on the 
string
                 # data, so we restore it to the original unit from the metadata
-                level = level.as_unit(np.datetime_data(dtype)[0])
+                level = level.as_unit(np.datetime_data(numpy_dtype)[0])
         # GH-41503: if the column index was decimal, restore to decimal
         elif pandas_dtype == "decimal":
             level = _pandas_api.pd.Index([decimal.Decimal(i) for i in level])
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index daa9c8314a..481292e1e6 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -212,13 +212,14 @@ class TestConvertMetadata:
         df.columns.names = ['a']
         _check_pandas_roundtrip(df, preserve_index=True)
 
-    def test_column_index_names_with_tz(self):
+    @pytest.mark.parametrize("tz", [None, "Europe/Brussels"])
+    def test_column_index_names_datetime(self, tz):
         # ARROW-13756
         # Bug if index is timezone aware DataTimeIndex
 
         df = pd.DataFrame(
             np.random.randn(5, 3),
-            columns=pd.date_range("2021-01-01", periods=3, freq="50D", 
tz="CET")
+            columns=pd.date_range("2021-01-01", periods=3, freq="50D", tz=tz)
         )
         _check_pandas_roundtrip(df, preserve_index=True)
 
@@ -447,11 +448,16 @@ class TestConvertMetadata:
         assert len(md) == 1
         assert md['encoding'] == 'UTF-8'
 
-    def test_datetimetz_column_index(self):
+    @pytest.mark.parametrize('unit', ['us', 'ns'])
+    def test_datetimetz_column_index(self, unit):
+        ext_kwargs = {}
+        if Version(pd.__version__) >= Version("2.0.0"):
+            # unit argument not supported on date_range for pandas < 2.0.0
+            ext_kwargs = {'unit': unit}
         df = pd.DataFrame(
             [(1, 'a', 2.0), (2, 'b', 3.0), (3, 'c', 4.0)],
             columns=pd.date_range(
-                start='2017-01-01', periods=3, tz='America/New_York'
+                start='2017-01-01', periods=3, tz='America/New_York', 
**ext_kwargs
             )
         )
         t = pa.Table.from_pandas(df, preserve_index=True)
@@ -460,7 +466,10 @@ class TestConvertMetadata:
         column_indexes, = js['column_indexes']
         assert column_indexes['name'] is None
         assert column_indexes['pandas_type'] == 'datetimetz'
-        assert column_indexes['numpy_type'] == 'datetime64[ns]'
+        if ext_kwargs:
+            assert column_indexes['numpy_type'] == f'datetime64[{unit}]'
+        else:
+            assert column_indexes['numpy_type'] == 'datetime64[ns]'
 
         md = column_indexes['metadata']
         assert md['timezone'] == 'America/New_York'
@@ -709,7 +718,12 @@ class TestConvertMetadata:
         # It is possible that the metadata and actual schema is not fully
         # matching (eg no timezone information for tz-aware column)
         # -> to_pandas() conversion should not fail on that
-        df = pd.DataFrame({"datetime": pd.date_range("2020-01-01", periods=3)})
+        ext_kwargs = {}
+        if Version(pd.__version__) >= Version("2.0.0"):
+            # unit argument not supported on date_range for pandas < 2.0.0
+            ext_kwargs = {'unit': 'ns'}
+        df = pd.DataFrame({"datetime": pd.date_range(
+            "2020-01-01", periods=3, **ext_kwargs)})
 
         # OPTION 1: casting after conversion
         table = pa.Table.from_pandas(df)

(arrow) branch main updated: GH-48314: [Python] Compat with pandas 3.0 changed default datetime unit (#48319)

Reply via email to