[ 
https://issues.apache.org/jira/browse/ARROW-1893?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16282058#comment-16282058
 ] 

ASF GitHub Bot commented on ARROW-1893:
---------------------------------------

wesm closed pull request #1398: ARROW-1893: [Python] Convert memoryview to 
bytes when loading from pickle in Python 2.7
URL: https://github.com/apache/arrow/pull/1398
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py
index 866cbdd96..1b19ca0e4 100644
--- a/python/pyarrow/compat.py
+++ b/python/pyarrow/compat.py
@@ -70,7 +70,7 @@ class Categorical(ClassPlaceholder):
 
 
 if PY2:
-    import cPickle
+    import cPickle as builtin_pickle
 
     try:
         from cdecimal import Decimal
@@ -107,6 +107,8 @@ def frombytes(o):
     def unichar(s):
         return unichr(s)
 else:
+    import pickle as builtin_pickle
+
     unicode_type = str
     def lzip(*x):
         return list(zip(*x))
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index b6d2b0258..3059dfc1b 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -16,18 +16,19 @@
 # under the License.
 
 from collections import OrderedDict, defaultdict
+import six
 import sys
-import pickle
 
 import numpy as np
 
 from pyarrow import serialize_pandas, deserialize_pandas
+from pyarrow.compat import builtin_pickle
 from pyarrow.lib import _default_serialization_context, frombuffer
 
 try:
     import cloudpickle
 except ImportError:
-    cloudpickle = pickle
+    cloudpickle = builtin_pickle
 
 
 # ----------------------------------------------------------------------
@@ -44,12 +45,16 @@ def _deserialize_numpy_array_list(data):
 
 
 def _pickle_to_buffer(x):
-    pickled = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL)
+    pickled = builtin_pickle.dumps(x, protocol=builtin_pickle.HIGHEST_PROTOCOL)
     return frombuffer(pickled)
 
 
 def _load_pickle_from_buffer(data):
-    return pickle.loads(memoryview(data))
+    as_memoryview = memoryview(data)
+    if six.PY2:
+        return builtin_pickle.loads(as_memoryview.tobytes())
+    else:
+        return builtin_pickle.loads(as_memoryview)
 
 
 _serialize_numpy_array_pickle = _pickle_to_buffer
diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index d17d89e24..2543e7d17 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -1570,19 +1570,21 @@ def 
test_backwards_compatible_index_multi_level_some_named():
     tm.assert_frame_equal(result, expected)
 
 
[email protected]('precision', range(1, 39))
-def test_decimal_roundtrip(tmpdir, precision):
+def test_decimal_roundtrip(tmpdir):
     num_values = 10
 
     columns = {}
 
-    for scale in range(0, precision + 1):
-        with util.random_seed(0):
-            random_decimal_values = [
-                util.randdecimal(precision, scale) for _ in range(num_values)
-            ]
-        column_name = 'dec_precision_{:d}_scale_{:d}'.format(precision, scale)
-        columns[column_name] = random_decimal_values
+    for precision in range(1, 39):
+        for scale in range(0, precision + 1):
+            with util.random_seed(0):
+                random_decimal_values = [
+                    util.randdecimal(precision, scale)
+                    for _ in range(num_values)
+                ]
+            column_name = ('dec_precision_{:d}_scale_{:d}'
+                           .format(precision, scale))
+            columns[column_name] = random_decimal_values
 
     expected = pd.DataFrame(columns)
     filename = tmpdir.join('decimals.parquet')


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [Python] test_primitive_serialization fails on Python 2.7.3
> -----------------------------------------------------------
>
>                 Key: ARROW-1893
>                 URL: https://issues.apache.org/jira/browse/ARROW-1893
>             Project: Apache Arrow
>          Issue Type: Bug
>            Reporter: Uwe L. Korn
>            Assignee: Wes McKinney
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>
> {{test_primitive_serialization}} fails on with the following error: Python 
> 2.7.3
> {code}
> str = <memory at 0x44dff28>
>  
>      def loads(str):
>  >       file = StringIO(str)
> E       TypeError: expected read buffer, memoryview found
> {code}
> More context:
> {code}
>      def test_primitive_serialization(large_memory_map):
>          with pa.memory_map(large_memory_map, mode="r+") as mmap:
>              for obj in PRIMITIVE_OBJECTS:
>                  serialization_roundtrip(obj, mmap)
> >               serialization_roundtrip(obj, mmap, 
> > pa.pandas_serialization_context)
> {code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to