This is an automated email from the ASF dual-hosted git repository.

pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3d41978  ARROW-2611: [Python] Fix Python 2 integer serialization
3d41978 is described below

commit 3d419781b662c0dcdf922edaaff7202c0414c7ed
Author: Peter Schafhalter <pschafhal...@berkeley.edu>
AuthorDate: Thu May 17 17:53:34 2018 -0700

    ARROW-2611: [Python] Fix Python 2 integer serialization
    
    Fixes an issue where serialization turns integers into longs in Python 2.
    
    ```python
    In [1]: import pyarrow as pa
    
    In [2]: value = 1
    
    In [3]: type(value)
    Out[3]: int
    
    In [4]: serialized = pa.serialize(value)
    
    In [5]: deserialized = serialized.deserialize()
    
    In [6]: type(deserialized)
    Out[6]: long
    ```
    
    Author: Peter Schafhalter <pschafhal...@berkeley.edu>
    
    Closes #2055 from pschafhalter/fix-python2-int-serialization and squashes 
the following commits:
    
    7b96b679 <Peter Schafhalter> Fix bug with Python 3 C++ API
    5d8ff268 <Peter Schafhalter> Add type checking in assert_equal
    d5e5e5db <Peter Schafhalter> Fix python2 integer serialization bug
---
 cpp/src/arrow/python/arrow_to_python.cc    | 10 +++++++++-
 cpp/src/arrow/python/python_to_arrow.cc    | 11 ++++++++++-
 python/pyarrow/tests/test_serialization.py |  4 ++--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/python/arrow_to_python.cc 
b/cpp/src/arrow/python/arrow_to_python.cc
index 57884e5..52d2a2d 100644
--- a/cpp/src/arrow/python/arrow_to_python.cc
+++ b/cpp/src/arrow/python/arrow_to_python.cc
@@ -112,9 +112,17 @@ Status GetValue(PyObject* context, const UnionArray& 
parent, const Array& arr,
     case Type::BOOL:
       *result = PyBool_FromLong(checked_cast<const 
BooleanArray&>(arr).Value(index));
       return Status::OK();
-    case Type::INT64:
+    case Type::INT64: {
+#if PY_MAJOR_VERSION < 3
+      const std::string& child_name = parent.type()->child(type)->name();
+      if (child_name == "py2_int") {
+        *result = PyInt_FromSsize_t(checked_cast<const 
Int64Array&>(arr).Value(index));
+        return Status::OK();
+      }
+#endif
       *result = PyLong_FromSsize_t(checked_cast<const 
Int64Array&>(arr).Value(index));
       return Status::OK();
+    }
     case Type::BINARY: {
       int32_t nchars;
       const uint8_t* str = checked_cast<const 
BinaryArray&>(arr).GetValue(index, &nchars);
diff --git a/cpp/src/arrow/python/python_to_arrow.cc 
b/cpp/src/arrow/python/python_to_arrow.cc
index 64cf2b4..23445ee 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -61,6 +61,7 @@ class SequenceBuilder {
         nones_(pool),
         bools_(::arrow::boolean(), pool),
         ints_(::arrow::int64(), pool),
+        py2_ints_(::arrow::int64(), pool),
         bytes_(::arrow::binary(), pool),
         strings_(pool),
         half_floats_(::arrow::float16(), pool),
@@ -103,6 +104,11 @@ class SequenceBuilder {
     return AppendPrimitive(data, &bool_tag_, &bools_);
   }
 
+  /// Appending a python 2 int64_t to the sequence
+  Status AppendPy2Int64(const int64_t data) {
+    return AppendPrimitive(data, &py2_int_tag_, &py2_ints_);
+  }
+
   /// Appending an int64_t to the sequence
   Status AppendInt64(const int64_t data) {
     return AppendPrimitive(data, &int_tag_, &ints_);
@@ -250,6 +256,7 @@ class SequenceBuilder {
 
     RETURN_NOT_OK(AddElement(bool_tag_, &bools_));
     RETURN_NOT_OK(AddElement(int_tag_, &ints_));
+    RETURN_NOT_OK(AddElement(py2_int_tag_, &py2_ints_, "py2_int"));
     RETURN_NOT_OK(AddElement(string_tag_, &strings_));
     RETURN_NOT_OK(AddElement(bytes_tag_, &bytes_));
     RETURN_NOT_OK(AddElement(half_float_tag_, &half_floats_));
@@ -280,6 +287,7 @@ class SequenceBuilder {
   NullBuilder nones_;
   BooleanBuilder bools_;
   Int64Builder ints_;
+  Int64Builder py2_ints_;
   BinaryBuilder bytes_;
   StringBuilder strings_;
   HalfFloatBuilder half_floats_;
@@ -302,6 +310,7 @@ class SequenceBuilder {
   // happens in the UPDATE macro in sequence.cc.
   int8_t bool_tag_ = -1;
   int8_t int_tag_ = -1;
+  int8_t py2_int_tag_ = -1;
   int8_t string_tag_ = -1;
   int8_t bytes_tag_ = -1;
   int8_t half_float_tag_ = -1;
@@ -481,7 +490,7 @@ Status Append(PyObject* context, PyObject* elem, 
SequenceBuilder* builder,
     }
 #if PY_MAJOR_VERSION < 3
   } else if (PyInt_Check(elem)) {
-    
RETURN_NOT_OK(builder->AppendInt64(static_cast<int64_t>(PyInt_AS_LONG(elem))));
+    
RETURN_NOT_OK(builder->AppendPy2Int64(static_cast<int64_t>(PyInt_AS_LONG(elem))));
 #endif
   } else if (PyBytes_Check(elem)) {
     auto data = reinterpret_cast<uint8_t*>(PyBytes_AS_STRING(elem));
diff --git a/python/pyarrow/tests/test_serialization.py 
b/python/pyarrow/tests/test_serialization.py
index f288611..94986a7 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -100,8 +100,8 @@ def assert_equal(obj1, obj2):
         for i in range(len(obj1)):
             assert_equal(obj1[i], obj2[i])
     else:
-        assert obj1 == obj2, ("Objects {} and {} are different."
-                              .format(obj1, obj2))
+        assert type(obj1) == type(obj2) and obj1 == obj2, \
+                "Objects {} and {} are different.".format(obj1, obj2)
 
 
 PRIMITIVE_OBJECTS = [

-- 
To stop receiving notification emails like this one, please contact
pcmor...@apache.org.

Reply via email to