This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0265fb4  ARROW-2151: [Python] Fix conversion from np.uint64 scalars
0265fb4 is described below

commit 0265fb4012c7ceb25b755b6995428b4d74ef9e66
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 15 18:50:32 2018 +0100

    ARROW-2151: [Python] Fix conversion from np.uint64 scalars
    
    Also add tests for ARROW-1345, ARROW-2008.
    
    Author: Antoine Pitrou <[email protected]>
    
    Closes #1602 from pitrou/ARROW-2151-convert-np-uint64 and squashes the 
following commits:
    
    613af18 [Antoine Pitrou] ARROW-2151: [Python] Fix conversion from np.uint64 
scalars
---
 cpp/src/arrow/python/builtin_convert.cc      |  4 +-
 cpp/src/arrow/python/helpers.cc              | 17 +++++++++
 cpp/src/arrow/python/helpers.h               |  2 +
 python/pyarrow/tests/test_convert_builtin.py | 57 +++++++++++++++++++++++++---
 python/pyarrow/tests/test_convert_pandas.py  |  9 +++++
 5 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/python/builtin_convert.cc 
b/cpp/src/arrow/python/builtin_convert.cc
index 60f58ea..a286c6b 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -538,8 +538,8 @@ class UInt64Converter : public 
TypedConverterVisitor<UInt64Builder, UInt64Conver
  public:
   // Append a non-missing item
   Status AppendItem(PyObject* obj) {
-    const auto val = static_cast<int64_t>(PyLong_AsUnsignedLongLong(obj));
-    RETURN_IF_PYERROR();
+    uint64_t val;
+    RETURN_NOT_OK(internal::UInt64FromPythonInt(obj, &val));
     return typed_builder_->Append(val);
   }
 };
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index 494f929..df1db99 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -176,6 +176,23 @@ bool IsPyInteger(PyObject* obj) {
 #endif
 }
 
+Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) {
+  OwnedRef ref;
+  // PyLong_AsUnsignedLongLong() doesn't handle conversion from non-ints
+  // (e.g. np.uint64), so do it ourselves
+  if (!PyLong_Check(obj)) {
+    ref.reset(PyNumber_Long(obj));
+    RETURN_IF_PYERROR();
+    obj = ref.obj();
+  }
+  auto result = static_cast<uint64_t>(PyLong_AsUnsignedLongLong(obj));
+  if (result == static_cast<uint64_t>(-1)) {
+    RETURN_IF_PYERROR();
+  }
+  *out = static_cast<uint64_t>(result);
+  return Status::OK();
+}
+
 }  // namespace internal
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index c82bdab..c0171aa 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -57,6 +57,8 @@ Status DecimalFromPythonDecimal(PyObject* python_decimal, 
const DecimalType& arr
                                 Decimal128* out);
 bool IsPyInteger(PyObject* obj);
 
+Status UInt64FromPythonInt(PyObject* obj, uint64_t* out);
+
 }  // namespace internal
 }  // namespace py
 }  // namespace arrow
diff --git a/python/pyarrow/tests/test_convert_builtin.py 
b/python/pyarrow/tests/test_convert_builtin.py
index 5cd4a52..8423ff0 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -28,6 +28,20 @@ import numpy as np
 import six
 
 
+int_type_pairs = [
+    (np.int8, pa.int8()),
+    (np.int16, pa.int64()),
+    (np.int32, pa.int32()),
+    (np.int64, pa.int64()),
+    (np.uint8, pa.uint8()),
+    (np.uint16, pa.uint64()),
+    (np.uint32, pa.uint32()),
+    (np.uint64, pa.uint64())]
+
+
+np_int_types, _ = zip(*int_type_pairs)
+
+
 class StrangeIterable:
     def __init__(self, lst):
         self.lst = lst
@@ -146,7 +160,20 @@ def test_sequence_all_none():
 
 
 @pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
-def test_sequence_integer(seq):
[email protected]("np_scalar_pa_type", int_type_pairs)
+def test_sequence_integer(seq, np_scalar_pa_type):
+    np_scalar, pa_type = np_scalar_pa_type
+    expected = [1, None, 3, None,
+                np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
+    arr = pa.array(seq(expected), type=pa_type)
+    assert len(arr) == 6
+    assert arr.null_count == 2
+    assert arr.type == pa_type
+    assert arr.to_pylist() == expected
+
+
[email protected]("seq", [_as_list, _as_tuple, _as_dict_values])
+def test_sequence_integer_inferred(seq):
     expected = [1, None, 3, None]
     arr = pa.array(seq(expected))
     assert len(arr) == 4
@@ -156,12 +183,32 @@ def test_sequence_integer(seq):
 
 
 @pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
[email protected]("np_scalar", [np.int16, np.int32, np.int64, np.uint16,
-                                       np.uint32, np.uint64])
-def test_sequence_numpy_integer(seq, np_scalar):
[email protected]("np_scalar_pa_type", int_type_pairs)
+def test_sequence_numpy_integer(seq, np_scalar_pa_type):
+    np_scalar, pa_type = np_scalar_pa_type
+    expected = [np_scalar(1), None, np_scalar(3), None,
+                np_scalar(np.iinfo(np_scalar).min),
+                np_scalar(np.iinfo(np_scalar).max)]
+    arr = pa.array(seq(expected), type=pa_type)
+    assert len(arr) == 6
+    assert arr.null_count == 2
+    assert arr.type == pa_type
+    assert arr.to_pylist() == expected
+
+
[email protected]("seq", [_as_list, _as_tuple, _as_dict_values])
[email protected]("np_scalar_pa_type", int_type_pairs)
+def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
+    np_scalar, pa_type = np_scalar_pa_type
     expected = [np_scalar(1), None, np_scalar(3), None]
+    if np_scalar != np.uint64:
+        expected += [np_scalar(np.iinfo(np_scalar).min),
+                     np_scalar(np.iinfo(np_scalar).max)]
+    else:
+        # max(uint64) is too large for the inferred int64 type
+        expected += [0, np.iinfo(np.int64).max]
     arr = pa.array(seq(expected))
-    assert len(arr) == 4
+    assert len(arr) == 6
     assert arr.null_count == 2
     assert arr.type == pa.int64()
     assert arr.to_pylist() == expected
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index 5b6f6bc..95137ff 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1328,6 +1328,15 @@ class TestListTypes(object):
 
         tm.assert_frame_equal(result, df)
 
+    def test_array_from_nested_arrays(self):
+        df, schema = dataframe_with_arrays()
+        for field in schema:
+            arr = df[field.name].values
+            expected = pa.array(list(arr), type=field.type)
+            result = pa.array(arr)
+            assert result.type == field.type  # == list<scalar>
+            assert result.equals(expected)
+
 
 class TestConvertStructTypes(object):
     """

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to