This is an automated email from the ASF dual-hosted git repository. pcmoritz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 3d41978 ARROW-2611: [Python] Fix Python 2 integer serialization 3d41978 is described below commit 3d419781b662c0dcdf922edaaff7202c0414c7ed Author: Peter Schafhalter <pschafhal...@berkeley.edu> AuthorDate: Thu May 17 17:53:34 2018 -0700 ARROW-2611: [Python] Fix Python 2 integer serialization Fixes an issue where serialization turns integers into longs in Python 2. ```python In [1]: import pyarrow as pa In [2]: value = 1 In [3]: type(value) Out[3]: int In [4]: serialized = pa.serialize(value) In [5]: deserialized = serialized.deserialize() In [6]: type(deserialized) Out[6]: long ``` Author: Peter Schafhalter <pschafhal...@berkeley.edu> Closes #2055 from pschafhalter/fix-python2-int-serialization and squashes the following commits: 7b96b679 <Peter Schafhalter> Fix bug with Python 3 C++ API 5d8ff268 <Peter Schafhalter> Add type checking in assert_equal d5e5e5db <Peter Schafhalter> Fix python2 integer serialization bug --- cpp/src/arrow/python/arrow_to_python.cc | 10 +++++++++- cpp/src/arrow/python/python_to_arrow.cc | 11 ++++++++++- python/pyarrow/tests/test_serialization.py | 4 ++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/python/arrow_to_python.cc b/cpp/src/arrow/python/arrow_to_python.cc index 57884e5..52d2a2d 100644 --- a/cpp/src/arrow/python/arrow_to_python.cc +++ b/cpp/src/arrow/python/arrow_to_python.cc @@ -112,9 +112,17 @@ Status GetValue(PyObject* context, const UnionArray& parent, const Array& arr, case Type::BOOL: *result = PyBool_FromLong(checked_cast<const BooleanArray&>(arr).Value(index)); return Status::OK(); - case Type::INT64: + case Type::INT64: { +#if PY_MAJOR_VERSION < 3 + const std::string& child_name = parent.type()->child(type)->name(); + if (child_name == "py2_int") { + *result = PyInt_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index)); + return Status::OK(); + } +#endif *result = PyLong_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index)); return Status::OK(); + } case Type::BINARY: { int32_t nchars; const uint8_t* str = checked_cast<const BinaryArray&>(arr).GetValue(index, &nchars); diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc index 64cf2b4..23445ee 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/cpp/src/arrow/python/python_to_arrow.cc @@ -61,6 +61,7 @@ class SequenceBuilder { nones_(pool), bools_(::arrow::boolean(), pool), ints_(::arrow::int64(), pool), + py2_ints_(::arrow::int64(), pool), bytes_(::arrow::binary(), pool), strings_(pool), half_floats_(::arrow::float16(), pool), @@ -103,6 +104,11 @@ class SequenceBuilder { return AppendPrimitive(data, &bool_tag_, &bools_); } + /// Appending a python 2 int64_t to the sequence + Status AppendPy2Int64(const int64_t data) { + return AppendPrimitive(data, &py2_int_tag_, &py2_ints_); + } + /// Appending an int64_t to the sequence Status AppendInt64(const int64_t data) { return AppendPrimitive(data, &int_tag_, &ints_); @@ -250,6 +256,7 @@ class SequenceBuilder { RETURN_NOT_OK(AddElement(bool_tag_, &bools_)); RETURN_NOT_OK(AddElement(int_tag_, &ints_)); + RETURN_NOT_OK(AddElement(py2_int_tag_, &py2_ints_, "py2_int")); RETURN_NOT_OK(AddElement(string_tag_, &strings_)); RETURN_NOT_OK(AddElement(bytes_tag_, &bytes_)); RETURN_NOT_OK(AddElement(half_float_tag_, &half_floats_)); @@ -280,6 +287,7 @@ class SequenceBuilder { NullBuilder nones_; BooleanBuilder bools_; Int64Builder ints_; + Int64Builder py2_ints_; BinaryBuilder bytes_; StringBuilder strings_; HalfFloatBuilder half_floats_; @@ -302,6 +310,7 @@ class SequenceBuilder { // happens in the UPDATE macro in sequence.cc. int8_t bool_tag_ = -1; int8_t int_tag_ = -1; + int8_t py2_int_tag_ = -1; int8_t string_tag_ = -1; int8_t bytes_tag_ = -1; int8_t half_float_tag_ = -1; @@ -481,7 +490,7 @@ Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder, } #if PY_MAJOR_VERSION < 3 } else if (PyInt_Check(elem)) { - RETURN_NOT_OK(builder->AppendInt64(static_cast<int64_t>(PyInt_AS_LONG(elem)))); + RETURN_NOT_OK(builder->AppendPy2Int64(static_cast<int64_t>(PyInt_AS_LONG(elem)))); #endif } else if (PyBytes_Check(elem)) { auto data = reinterpret_cast<uint8_t*>(PyBytes_AS_STRING(elem)); diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py index f288611..94986a7 100644 --- a/python/pyarrow/tests/test_serialization.py +++ b/python/pyarrow/tests/test_serialization.py @@ -100,8 +100,8 @@ def assert_equal(obj1, obj2): for i in range(len(obj1)): assert_equal(obj1[i], obj2[i]) else: - assert obj1 == obj2, ("Objects {} and {} are different." - .format(obj1, obj2)) + assert type(obj1) == type(obj2) and obj1 == obj2, \ + "Objects {} and {} are different.".format(obj1, obj2) PRIMITIVE_OBJECTS = [ -- To stop receiving notification emails like this one, please contact pcmor...@apache.org.