This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2fadab2aa6 GH-38342: [Python] Update to_pandas to use non-deprecated 
DataFrame constructor (#38374)
2fadab2aa6 is described below

commit 2fadab2aa65425ec4e392e5cf8fd2082f3685212
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Fri Dec 1 13:11:35 2023 +0100

    GH-38342: [Python] Update to_pandas to use non-deprecated DataFrame 
constructor (#38374)
    
    ### Rationale for this change
    
    Avoiding a deprecation warning from pandas
    
    * Closes: #38342
    
    Authored-by: Joris Van den Bossche <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 python/pyarrow/pandas-shim.pxi  | 11 ++++++++---
 python/pyarrow/pandas_compat.py | 13 ++++++++++---
 python/pyarrow/table.pxi        |  6 +++---
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index a0c0cabf6d..273575b779 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -37,7 +37,7 @@ cdef class _PandasAPIShim(object):
         object _array_like_types, _is_extension_array_dtype
         bint has_sparse
         bint _pd024
-        bint _is_v1
+        bint _is_v1, _is_ge_v21
 
     def __init__(self):
         self._tried_importing_pandas = False
@@ -74,8 +74,9 @@ cdef class _PandasAPIShim(object):
                     "installed. Therefore, pandas-specific integration is not "
                     "used.".format(self._version), stacklevel=2)
                 return
-        elif self._loose_version < Version('2.0.0'):
-            self._is_v1 = True
+
+        self._is_v1 = self._loose_version < Version('2.0.0')
+        self._is_ge_v21 = self._loose_version >= Version('2.1.0')
 
         self._compat_module = pdcompat
         self._data_frame = pd.DataFrame
@@ -158,6 +159,10 @@ cdef class _PandasAPIShim(object):
         self._check_import()
         return self._is_v1
 
+    def is_ge_v21(self):
+        self._check_import()
+        return self._is_ge_v21
+
     @property
     def categorical_type(self):
         self._check_import()
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index be29f68a13..80e313be02 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -744,9 +744,11 @@ def make_datetimetz(unit, tz):
     return _pandas_api.datetimetz_type(unit, tz=tz)
 
 
-def table_to_blockmanager(options, table, categories=None,
-                          ignore_metadata=False, types_mapper=None):
+def table_to_dataframe(
+    options, table, categories=None, ignore_metadata=False, types_mapper=None
+):
     from pandas.core.internals import BlockManager
+    from pandas import DataFrame
 
     all_columns = []
     column_indexes = []
@@ -770,7 +772,12 @@ def table_to_blockmanager(options, table, categories=None,
     blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
 
     axes = [columns, index]
-    return BlockManager(blocks, axes)
+    mgr = BlockManager(blocks, axes)
+    if _pandas_api.is_ge_v21():
+        df = DataFrame._from_mgr(mgr, mgr.axes)
+    else:
+        df = DataFrame(mgr)
+    return df
 
 
 # Set of the string repr of all numpy dtypes that can be stored in a pandas
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index bbed789553..f93f595090 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -4191,12 +4191,12 @@ cdef class Table(_Tabular):
 
     def _to_pandas(self, options, categories=None, ignore_metadata=False,
                    types_mapper=None):
-        from pyarrow.pandas_compat import table_to_blockmanager
-        mgr = table_to_blockmanager(
+        from pyarrow.pandas_compat import table_to_dataframe
+        df = table_to_dataframe(
             options, self, categories,
             ignore_metadata=ignore_metadata,
             types_mapper=types_mapper)
-        return pandas_api.data_frame(mgr)
+        return df
 
     @property
     def schema(self):

Reply via email to