This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new aaa978b ARROW-1893: [Python] Convert memoryview to bytes when loading from pickle in Python 2.7 aaa978b is described below commit aaa978b9c072d3c436838ad2a35b0f6bde1891b6 Author: Wes McKinney <wes.mckin...@twosigma.com> AuthorDate: Thu Dec 7 11:00:56 2017 -0500 ARROW-1893: [Python] Convert memoryview to bytes when loading from pickle in Python 2.7 It seems somewhere in the 2.7.x series, Python 2.7 acquired the ability to load from memoryview. To be on the safe side, we'll always convert memoryview to bytes. Here's a related workaround from IPython: https://github.com/ipython/ipython_genutils/blob/master/ipython_genutils/py3compat.py#L153 Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #1398 from wesm/ARROW-1893 and squashes the following commits: cbe69134 [Wes McKinney] Not all versions of Python 2.7 can load pickles directly from memoryview --- python/pyarrow/compat.py | 4 +++- python/pyarrow/serialization.py | 13 +++++++++---- python/pyarrow/tests/test_parquet.py | 20 +++++++++++--------- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py index 866cbdd..1b19ca0 100644 --- a/python/pyarrow/compat.py +++ b/python/pyarrow/compat.py @@ -70,7 +70,7 @@ else: if PY2: - import cPickle + import cPickle as builtin_pickle try: from cdecimal import Decimal @@ -107,6 +107,8 @@ if PY2: def unichar(s): return unichr(s) else: + import pickle as builtin_pickle + unicode_type = str def lzip(*x): return list(zip(*x)) diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py index b6d2b02..3059dfc 100644 --- a/python/pyarrow/serialization.py +++ b/python/pyarrow/serialization.py @@ -16,18 +16,19 @@ # under the License. from collections import OrderedDict, defaultdict +import six import sys -import pickle import numpy as np from pyarrow import serialize_pandas, deserialize_pandas +from pyarrow.compat import builtin_pickle from pyarrow.lib import _default_serialization_context, frombuffer try: import cloudpickle except ImportError: - cloudpickle = pickle + cloudpickle = builtin_pickle # ---------------------------------------------------------------------- @@ -44,12 +45,16 @@ def _deserialize_numpy_array_list(data): def _pickle_to_buffer(x): - pickled = pickle.dumps(x, protocol=pickle.HIGHEST_PROTOCOL) + pickled = builtin_pickle.dumps(x, protocol=builtin_pickle.HIGHEST_PROTOCOL) return frombuffer(pickled) def _load_pickle_from_buffer(data): - return pickle.loads(memoryview(data)) + as_memoryview = memoryview(data) + if six.PY2: + return builtin_pickle.loads(as_memoryview.tobytes()) + else: + return builtin_pickle.loads(as_memoryview) _serialize_numpy_array_pickle = _pickle_to_buffer diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index d17d89e..2543e7d 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -1570,19 +1570,21 @@ carat cut color clarity depth table price x y z tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize('precision', range(1, 39)) -def test_decimal_roundtrip(tmpdir, precision): +def test_decimal_roundtrip(tmpdir): num_values = 10 columns = {} - for scale in range(0, precision + 1): - with util.random_seed(0): - random_decimal_values = [ - util.randdecimal(precision, scale) for _ in range(num_values) - ] - column_name = 'dec_precision_{:d}_scale_{:d}'.format(precision, scale) - columns[column_name] = random_decimal_values + for precision in range(1, 39): + for scale in range(0, precision + 1): + with util.random_seed(0): + random_decimal_values = [ + util.randdecimal(precision, scale) + for _ in range(num_values) + ] + column_name = ('dec_precision_{:d}_scale_{:d}' + .format(precision, scale)) + columns[column_name] = random_decimal_values expected = pd.DataFrame(columns) filename = tmpdir.join('decimals.parquet') -- To stop receiving notification emails like this one, please contact ['"commits@arrow.apache.org" <commits@arrow.apache.org>'].