This is an automated email from the ASF dual-hosted git repository.

willayd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 0cd9c8e824 GH-33473 [Python] Fix KeyError on Pandas roundtrip with 
RangeIndex in MultiIndex (#39983)
0cd9c8e824 is described below

commit 0cd9c8e824fb365ce66137648f47f7ed4fb9e764
Author: Eirik <[email protected]>
AuthorDate: Mon Nov 24 16:12:18 2025 +0100

    GH-33473 [Python] Fix KeyError on Pandas roundtrip with RangeIndex in 
MultiIndex (#39983)
    
    ### Rationale for this change
    Fixes bug when round-tripping to Pandas with a specific combination of 
indices
    
    ### Are these changes tested?
    Yes
    
    ### Are there any user-facing changes?
    No
    
    I don't know if this counts as a a "Critical Fix". Without it, 
`Table.from_pandas()` can return a table which cannot be converted back with 
`table.to_pandas()` due to a column missing from the `"pandas"` field in the 
table metadata.
    * Closes: #33473
    
    Authored-by: Eirik B. Stavestrand <[email protected]>
    Signed-off-by: Will Ayd <[email protected]>
---
 python/pyarrow/pandas_compat.py     | 20 +++++++++++---------
 python/pyarrow/tests/test_pandas.py |  8 ++++++++
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index b0009507e4..dfed76d371 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -221,8 +221,14 @@ def construct_metadata(columns_to_convert, df, 
column_names, index_levels,
         # see https://github.com/apache/arrow/pull/44963#discussion_r1875771953
         column_field_names = [str(name) for name in column_names]
 
-    num_serialized_index_levels = len([descr for descr in index_descriptors
-                                       if not isinstance(descr, dict)])
+    serialized_index_levels = [
+        (level, descriptor)
+        for level, descriptor in zip(index_levels, index_descriptors)
+        if not isinstance(descriptor, dict)
+    ]
+
+    num_serialized_index_levels = len(serialized_index_levels)
+
     # Use ntypes instead of Python shorthand notation [:-len(x)] as [:-0]
     # behaves differently to what we want.
     ntypes = len(types)
@@ -240,13 +246,9 @@ def construct_metadata(columns_to_convert, df, 
column_names, index_levels,
     index_column_metadata = []
     if preserve_index is not False:
         non_str_index_names = []
-        for level, arrow_type, descriptor in zip(index_levels, index_types,
-                                                 index_descriptors):
-            if isinstance(descriptor, dict):
-                # The index is represented in a non-serialized fashion,
-                # e.g. RangeIndex
-                continue
-
+        for (level, descriptor), arrow_type in zip(
+            serialized_index_levels, index_types
+        ):
             if level.name is not None and not isinstance(level.name, str):
                 non_str_index_names.append(level.name)
 
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 5a55e90fbb..7f9b04eaab 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -345,6 +345,14 @@ class TestConvertMetadata:
             )
             _check_pandas_roundtrip(df, preserve_index=True)
 
+    def test_multiindex_rangeindex(self):
+        # https://github.com/apache/arrow/issues/33473
+        multiindex = pd.MultiIndex.from_arrays(
+            [pd.RangeIndex(0, 2), pd.Index([1, 2])]
+        )
+        df = pd.DataFrame(pd.Series([1, 2], name="a"), index=multiindex)
+        _check_pandas_roundtrip(df, preserve_index=None)
+
     def test_integer_index_column(self):
         df = pd.DataFrame([(1, 'a'), (2, 'b'), (3, 'c')])
         _check_pandas_roundtrip(df, preserve_index=True)

Reply via email to