This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b679a96d42 GH-34283 [Python] Add types_mapper support to index for 
to_pandas (#34445)
b679a96d42 is described below

commit b679a96d426f4df1a2d15d452f312c968cdfc8f6
Author: Patrick Hoefler <[email protected]>
AuthorDate: Thu Mar 9 07:21:05 2023 +0000

    GH-34283 [Python] Add types_mapper support to index for to_pandas (#34445)
    
    
    
    ### Rationale for this change
    
    ### What changes are included in this PR?
    
    Only respects types_mapper for indexes as well
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    Technically this breaks the API in a way that we would now respect the 
types_mapper for the index.
    
    - [x] closes #34283
    
    cc @ jorisvandenbossche
    
    Authored-by: Patrick Hoefler <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 python/pyarrow/pandas_compat.py     | 10 +++++-----
 python/pyarrow/tests/test_pandas.py | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index d624459ca4..a6de60e87b 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -808,7 +808,7 @@ def table_to_blockmanager(options, table, categories=None,
         index_descriptors = pandas_metadata['index_columns']
         table = _add_any_metadata(table, pandas_metadata)
         table, index = _reconstruct_index(table, index_descriptors,
-                                          all_columns)
+                                          all_columns, types_mapper)
         ext_columns_dtypes = _get_extension_dtypes(
             table, all_columns, types_mapper)
     else:
@@ -940,7 +940,7 @@ def _deserialize_column_index(block_table, all_columns, 
column_indexes):
     return columns
 
 
-def _reconstruct_index(table, index_descriptors, all_columns):
+def _reconstruct_index(table, index_descriptors, all_columns, 
types_mapper=None):
     # 0. 'field_name' is the name of the column in the arrow Table
     # 1. 'name' is the user-facing name of the column, that is, it came from
     #    pandas
@@ -959,7 +959,7 @@ def _reconstruct_index(table, index_descriptors, 
all_columns):
     for descr in index_descriptors:
         if isinstance(descr, str):
             result_table, index_level, index_name = _extract_index_level(
-                table, result_table, descr, field_name_to_metadata)
+                table, result_table, descr, field_name_to_metadata, 
types_mapper)
             if index_level is None:
                 # ARROW-1883: the serialized index column was not found
                 continue
@@ -995,7 +995,7 @@ def _reconstruct_index(table, index_descriptors, 
all_columns):
 
 
 def _extract_index_level(table, result_table, field_name,
-                         field_name_to_metadata):
+                         field_name_to_metadata, types_mapper=None):
     logical_name = field_name_to_metadata[field_name]['name']
     index_name = _backwards_compatible_index_name(field_name, logical_name)
     i = table.schema.get_field_index(field_name)
@@ -1007,7 +1007,7 @@ def _extract_index_level(table, result_table, field_name,
     pd = _pandas_api.pd
 
     col = table.column(i)
-    values = col.to_pandas().values
+    values = col.to_pandas(types_mapper=types_mapper).values
 
     if hasattr(values, 'flags') and not values.flags.writeable:
         # ARROW-1054: in pandas 0.19.2, factorize will reject
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 4d0ddf8754..2c3c986565 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -4159,6 +4159,24 @@ def 
test_roundtrip_empty_table_with_extension_dtype_index():
                                         dtype='object')
 
 
[email protected]("index", ["a", ["a", "b"]])
+def test_to_pandas_types_mapper_index(index):
+    if Version(pd.__version__) < Version("1.5.0"):
+        pytest.skip("ArrowDtype missing")
+    df = pd.DataFrame(
+        {
+            "a": [1, 2],
+            "b": [3, 4],
+            "c": [5, 6],
+        },
+        dtype=pd.ArrowDtype(pa.int64()),
+    ).set_index(index)
+    expected = df.copy()
+    table = pa.table(df)
+    result = table.to_pandas(types_mapper=pd.ArrowDtype)
+    tm.assert_frame_equal(result, expected)
+
+
 def test_array_to_pandas_types_mapper():
     # https://issues.apache.org/jira/browse/ARROW-9664
     if Version(pd.__version__) < Version("1.2.0"):

Reply via email to