[ 
https://issues.apache.org/jira/browse/ARROW-1971?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16321367#comment-16321367
 ] 

ASF GitHub Bot commented on ARROW-1971:
---------------------------------------

wesm closed pull request #1462: ARROW-1971: [Python] Add pandas serialization 
to the default
URL: https://github.com/apache/arrow/pull/1462
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index 689ec15d3..61f2e83f3 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -21,7 +21,6 @@
 
 import numpy as np
 
-from pyarrow import serialize_pandas, deserialize_pandas
 from pyarrow.compat import builtin_pickle
 from pyarrow.lib import _default_serialization_context, frombuffer
 
@@ -61,6 +60,48 @@ def _load_pickle_from_buffer(data):
 _deserialize_numpy_array_pickle = _load_pickle_from_buffer
 
 
+# ----------------------------------------------------------------------
+# pandas-specific serialization matters
+
+def _register_custom_pandas_handlers(context):
+    # ARROW-1784, faster path for pandas-only visibility
+
+    try:
+        import pandas as pd
+    except ImportError:
+        return
+
+    import pyarrow.pandas_compat as pdcompat
+
+    def _serialize_pandas_dataframe(obj):
+        return pdcompat.dataframe_to_serialized_dict(obj)
+
+    def _deserialize_pandas_dataframe(data):
+        return pdcompat.serialized_dict_to_dataframe(data)
+
+    def _serialize_pandas_series(obj):
+        return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj}))
+
+    def _deserialize_pandas_series(data):
+        deserialized = _deserialize_pandas_dataframe(data)
+        return deserialized[deserialized.columns[0]]
+
+    context.register_type(
+        pd.Series, 'pd.Series',
+        custom_serializer=_serialize_pandas_series,
+        custom_deserializer=_deserialize_pandas_series)
+
+    context.register_type(
+        pd.Index, 'pd.Index',
+        custom_serializer=_pickle_to_buffer,
+        custom_deserializer=_load_pickle_from_buffer)
+
+    context.register_type(
+        pd.DataFrame, 'pd.DataFrame',
+        custom_serializer=_serialize_pandas_dataframe,
+        custom_deserializer=_deserialize_pandas_dataframe)
+
+
 def register_default_serialization_handlers(serialization_context):
 
     # ----------------------------------------------------------------------
@@ -136,90 +177,13 @@ def _deserialize_torch_tensor(data):
         # no torch
         pass
 
-
-register_default_serialization_handlers(_default_serialization_context)
+    _register_custom_pandas_handlers(serialization_context)
 
 
-# ----------------------------------------------------------------------
-# pandas-specific serialization matters
-
+register_default_serialization_handlers(_default_serialization_context)
 
 pandas_serialization_context = _default_serialization_context.clone()
 
-
-def _register_pandas_arrow_handlers(context):
-    try:
-        import pandas as pd
-    except ImportError:
-        return
-
-    def _serialize_pandas_series(obj):
-        return serialize_pandas(pd.DataFrame({obj.name: obj}))
-
-    def _deserialize_pandas_series(data):
-        deserialized = deserialize_pandas(data)
-        return deserialized[deserialized.columns[0]]
-
-    def _serialize_pandas_dataframe(obj):
-        return serialize_pandas(obj)
-
-    def _deserialize_pandas_dataframe(data):
-        return deserialize_pandas(data)
-
-    context.register_type(
-        pd.Series, 'pd.Series',
-        custom_serializer=_serialize_pandas_series,
-        custom_deserializer=_deserialize_pandas_series)
-
-    context.register_type(
-        pd.DataFrame, 'pd.DataFrame',
-        custom_serializer=_serialize_pandas_dataframe,
-        custom_deserializer=_deserialize_pandas_dataframe)
-
-
-def _register_custom_pandas_handlers(context):
-    # ARROW-1784, faster path for pandas-only visibility
-
-    try:
-        import pandas as pd
-    except ImportError:
-        return
-
-    import pyarrow.pandas_compat as pdcompat
-
-    def _serialize_pandas_dataframe(obj):
-        return pdcompat.dataframe_to_serialized_dict(obj)
-
-    def _deserialize_pandas_dataframe(data):
-        return pdcompat.serialized_dict_to_dataframe(data)
-
-    def _serialize_pandas_series(obj):
-        return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj}))
-
-    def _deserialize_pandas_series(data):
-        deserialized = _deserialize_pandas_dataframe(data)
-        return deserialized[deserialized.columns[0]]
-
-    context.register_type(
-        pd.Series, 'pd.Series',
-        custom_serializer=_serialize_pandas_series,
-        custom_deserializer=_deserialize_pandas_series)
-
-    context.register_type(
-        pd.Index, 'pd.Index',
-        custom_serializer=_pickle_to_buffer,
-        custom_deserializer=_load_pickle_from_buffer)
-
-    context.register_type(
-        pd.DataFrame, 'pd.DataFrame',
-        custom_serializer=_serialize_pandas_dataframe,
-        custom_deserializer=_deserialize_pandas_dataframe)
-
-
-_register_pandas_arrow_handlers(_default_serialization_context)
-_register_custom_pandas_handlers(pandas_serialization_context)
-
-
 pandas_serialization_context.register_type(
     np.ndarray, 'np.array',
     custom_serializer=_serialize_numpy_array_pickle,


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Add pandas serialization to the default
> ---------------------------------------
>
>                 Key: ARROW-1971
>                 URL: https://issues.apache.org/jira/browse/ARROW-1971
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Python
>    Affects Versions: 0.8.0
>            Reporter: Devin Petersohn
>            Priority: Minor
>              Labels: pull-request-available
>
> {{pyarrow.register_default_serialization_handlers(context)}} does not include 
> pandas serialization. We do not use the default Arrow context, so we are 
> forced to call {{_register_pandas_arrow_handlers}} to register pandas 
> correctly.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to