This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 87ea379148 GH-29705: [Python] Clean-up no-longer-used pandas dataframe
serialization helpers (#36413)
87ea379148 is described below
commit 87ea379148ea6321ebb1b565f00b27c597167082
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Tue Jul 4 20:12:56 2023 +0200
GH-29705: [Python] Clean-up no-longer-used pandas dataframe serialization
helpers (#36413)
Small follow-up on https://github.com/apache/arrow/pull/34926, which
removed the `pyarrow.serialization` functionality, making those functions
obsolete.
* Closes: #29705
Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/pandas_compat.py | 53 +----------------------------------------
1 file changed, 1 insertion(+), 52 deletions(-)
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 6b9514ea6b..5369677e87 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -673,54 +673,7 @@ def get_datetimetz_type(values, dtype, type_):
return values, type_
# ----------------------------------------------------------------------
-# Converting pandas.DataFrame to a dict containing only NumPy arrays or other
-# objects friendly to pyarrow.serialize
-
-
-def dataframe_to_serialized_dict(frame):
- block_manager = frame._data
-
- blocks = []
- axes = [ax for ax in block_manager.axes]
-
- for block in block_manager.blocks:
- values = block.values
- block_data = {}
-
- if _pandas_api.is_datetimetz(values.dtype):
- block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz)
- if hasattr(values, 'values'):
- values = values.values
- elif _pandas_api.is_categorical(values):
- block_data.update(dictionary=values.categories,
- ordered=values.ordered)
- values = values.codes
- block_data.update(
- placement=block.mgr_locs.as_array,
- block=values
- )
-
- # If we are dealing with an object array, pickle it instead.
- if values.dtype == np.dtype(object):
- block_data['object'] = None
- block_data['block'] = builtin_pickle.dumps(
- values, protocol=builtin_pickle.HIGHEST_PROTOCOL)
-
- blocks.append(block_data)
-
- return {
- 'blocks': blocks,
- 'axes': axes
- }
-
-
-def serialized_dict_to_dataframe(data):
- import pandas.core.internals as _int
- reconstructed_blocks = [_reconstruct_block(block)
- for block in data['blocks']]
-
- block_mgr = _int.BlockManager(reconstructed_blocks, data['axes'])
- return _pandas_api.data_frame(block_mgr)
+# Converting pyarrow.Table efficiently to pandas.DataFrame
def _reconstruct_block(item, columns=None, extension_columns=None):
@@ -790,10 +743,6 @@ def make_datetimetz(tz):
return _pandas_api.datetimetz_type('ns', tz=tz)
-# ----------------------------------------------------------------------
-# Converting pyarrow.Table efficiently to pandas.DataFrame
-
-
def table_to_blockmanager(options, table, categories=None,
ignore_metadata=False, types_mapper=None):
from pandas.core.internals import BlockManager