This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2fadab2aa6 GH-38342: [Python] Update to_pandas to use non-deprecated
DataFrame constructor (#38374)
2fadab2aa6 is described below
commit 2fadab2aa65425ec4e392e5cf8fd2082f3685212
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Fri Dec 1 13:11:35 2023 +0100
GH-38342: [Python] Update to_pandas to use non-deprecated DataFrame
constructor (#38374)
### Rationale for this change
Avoiding a deprecation warning from pandas
* Closes: #38342
Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/pandas-shim.pxi | 11 ++++++++---
python/pyarrow/pandas_compat.py | 13 ++++++++++---
python/pyarrow/table.pxi | 6 +++---
3 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index a0c0cabf6d..273575b779 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -37,7 +37,7 @@ cdef class _PandasAPIShim(object):
object _array_like_types, _is_extension_array_dtype
bint has_sparse
bint _pd024
- bint _is_v1
+ bint _is_v1, _is_ge_v21
def __init__(self):
self._tried_importing_pandas = False
@@ -74,8 +74,9 @@ cdef class _PandasAPIShim(object):
"installed. Therefore, pandas-specific integration is not "
"used.".format(self._version), stacklevel=2)
return
- elif self._loose_version < Version('2.0.0'):
- self._is_v1 = True
+
+ self._is_v1 = self._loose_version < Version('2.0.0')
+ self._is_ge_v21 = self._loose_version >= Version('2.1.0')
self._compat_module = pdcompat
self._data_frame = pd.DataFrame
@@ -158,6 +159,10 @@ cdef class _PandasAPIShim(object):
self._check_import()
return self._is_v1
+ def is_ge_v21(self):
+ self._check_import()
+ return self._is_ge_v21
+
@property
def categorical_type(self):
self._check_import()
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index be29f68a13..80e313be02 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -744,9 +744,11 @@ def make_datetimetz(unit, tz):
return _pandas_api.datetimetz_type(unit, tz=tz)
-def table_to_blockmanager(options, table, categories=None,
- ignore_metadata=False, types_mapper=None):
+def table_to_dataframe(
+ options, table, categories=None, ignore_metadata=False, types_mapper=None
+):
from pandas.core.internals import BlockManager
+ from pandas import DataFrame
all_columns = []
column_indexes = []
@@ -770,7 +772,12 @@ def table_to_blockmanager(options, table, categories=None,
blocks = _table_to_blocks(options, table, categories, ext_columns_dtypes)
axes = [columns, index]
- return BlockManager(blocks, axes)
+ mgr = BlockManager(blocks, axes)
+ if _pandas_api.is_ge_v21():
+ df = DataFrame._from_mgr(mgr, mgr.axes)
+ else:
+ df = DataFrame(mgr)
+ return df
# Set of the string repr of all numpy dtypes that can be stored in a pandas
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index bbed789553..f93f595090 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -4191,12 +4191,12 @@ cdef class Table(_Tabular):
def _to_pandas(self, options, categories=None, ignore_metadata=False,
types_mapper=None):
- from pyarrow.pandas_compat import table_to_blockmanager
- mgr = table_to_blockmanager(
+ from pyarrow.pandas_compat import table_to_dataframe
+ df = table_to_dataframe(
options, self, categories,
ignore_metadata=ignore_metadata,
types_mapper=types_mapper)
- return pandas_api.data_frame(mgr)
+ return df
@property
def schema(self):