[ 
https://issues.apache.org/jira/browse/ARROW-2151?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16366015#comment-16366015
 ] 

ASF GitHub Bot commented on ARROW-2151:
---------------------------------------

xhochy closed pull request #1602: ARROW-2151: [Python] Fix conversion from 
np.uint64 scalars
URL: https://github.com/apache/arrow/pull/1602
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/python/builtin_convert.cc 
b/cpp/src/arrow/python/builtin_convert.cc
index 60f58ea5b..a286c6bd5 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -538,8 +538,8 @@ class UInt64Converter : public 
TypedConverterVisitor<UInt64Builder, UInt64Conver
  public:
   // Append a non-missing item
   Status AppendItem(PyObject* obj) {
-    const auto val = static_cast<int64_t>(PyLong_AsUnsignedLongLong(obj));
-    RETURN_IF_PYERROR();
+    uint64_t val;
+    RETURN_NOT_OK(internal::UInt64FromPythonInt(obj, &val));
     return typed_builder_->Append(val);
   }
 };
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index 494f92900..df1db9991 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -176,6 +176,23 @@ bool IsPyInteger(PyObject* obj) {
 #endif
 }
 
+Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) {
+  OwnedRef ref;
+  // PyLong_AsUnsignedLongLong() doesn't handle conversion from non-ints
+  // (e.g. np.uint64), so do it ourselves
+  if (!PyLong_Check(obj)) {
+    ref.reset(PyNumber_Long(obj));
+    RETURN_IF_PYERROR();
+    obj = ref.obj();
+  }
+  auto result = static_cast<uint64_t>(PyLong_AsUnsignedLongLong(obj));
+  if (result == static_cast<uint64_t>(-1)) {
+    RETURN_IF_PYERROR();
+  }
+  *out = static_cast<uint64_t>(result);
+  return Status::OK();
+}
+
 }  // namespace internal
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index c82bdabc4..c0171aa2f 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -57,6 +57,8 @@ Status DecimalFromPythonDecimal(PyObject* python_decimal, 
const DecimalType& arr
                                 Decimal128* out);
 bool IsPyInteger(PyObject* obj);
 
+Status UInt64FromPythonInt(PyObject* obj, uint64_t* out);
+
 }  // namespace internal
 }  // namespace py
 }  // namespace arrow
diff --git a/python/pyarrow/tests/test_convert_builtin.py 
b/python/pyarrow/tests/test_convert_builtin.py
index 5cd4a52a2..8423ff00b 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -28,6 +28,20 @@
 import six
 
 
+int_type_pairs = [
+    (np.int8, pa.int8()),
+    (np.int16, pa.int64()),
+    (np.int32, pa.int32()),
+    (np.int64, pa.int64()),
+    (np.uint8, pa.uint8()),
+    (np.uint16, pa.uint64()),
+    (np.uint32, pa.uint32()),
+    (np.uint64, pa.uint64())]
+
+
+np_int_types, _ = zip(*int_type_pairs)
+
+
 class StrangeIterable:
     def __init__(self, lst):
         self.lst = lst
@@ -146,7 +160,20 @@ def test_sequence_all_none():
 
 
 @pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
-def test_sequence_integer(seq):
+@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
+def test_sequence_integer(seq, np_scalar_pa_type):
+    np_scalar, pa_type = np_scalar_pa_type
+    expected = [1, None, 3, None,
+                np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
+    arr = pa.array(seq(expected), type=pa_type)
+    assert len(arr) == 6
+    assert arr.null_count == 2
+    assert arr.type == pa_type
+    assert arr.to_pylist() == expected
+
+
+@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
+def test_sequence_integer_inferred(seq):
     expected = [1, None, 3, None]
     arr = pa.array(seq(expected))
     assert len(arr) == 4
@@ -156,12 +183,32 @@ def test_sequence_integer(seq):
 
 
 @pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
-@pytest.mark.parametrize("np_scalar", [np.int16, np.int32, np.int64, np.uint16,
-                                       np.uint32, np.uint64])
-def test_sequence_numpy_integer(seq, np_scalar):
+@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
+def test_sequence_numpy_integer(seq, np_scalar_pa_type):
+    np_scalar, pa_type = np_scalar_pa_type
+    expected = [np_scalar(1), None, np_scalar(3), None,
+                np_scalar(np.iinfo(np_scalar).min),
+                np_scalar(np.iinfo(np_scalar).max)]
+    arr = pa.array(seq(expected), type=pa_type)
+    assert len(arr) == 6
+    assert arr.null_count == 2
+    assert arr.type == pa_type
+    assert arr.to_pylist() == expected
+
+
+@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
+@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
+def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
+    np_scalar, pa_type = np_scalar_pa_type
     expected = [np_scalar(1), None, np_scalar(3), None]
+    if np_scalar != np.uint64:
+        expected += [np_scalar(np.iinfo(np_scalar).min),
+                     np_scalar(np.iinfo(np_scalar).max)]
+    else:
+        # max(uint64) is too large for the inferred int64 type
+        expected += [0, np.iinfo(np.int64).max]
     arr = pa.array(seq(expected))
-    assert len(arr) == 4
+    assert len(arr) == 6
     assert arr.null_count == 2
     assert arr.type == pa.int64()
     assert arr.to_pylist() == expected
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index 5b6f6bcdf..95137ffb2 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1328,6 +1328,15 @@ def test_empty_list_roundtrip(self):
 
         tm.assert_frame_equal(result, df)
 
+    def test_array_from_nested_arrays(self):
+        df, schema = dataframe_with_arrays()
+        for field in schema:
+            arr = df[field.name].values
+            expected = pa.array(list(arr), type=field.type)
+            result = pa.array(arr)
+            assert result.type == field.type  # == list<scalar>
+            assert result.equals(expected)
+
 
 class TestConvertStructTypes(object):
     """


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Python] Error when converting from list of uint64 arrays
> ---------------------------------------------------------
>
>                 Key: ARROW-2151
>                 URL: https://issues.apache.org/jira/browse/ARROW-2151
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Python
>    Affects Versions: 0.8.0
>            Reporter: Antoine Pitrou
>            Assignee: Antoine Pitrou
>            Priority: Minor
>              Labels: pull-request-available
>             Fix For: 0.9.0
>
>
> {code:python}
> >>> pa.array(np.uint64([0,1,2]), type=pa.uint64())
> <pyarrow.lib.UInt64Array object at 0x7f0e6c1eab88>
> [
>   0,
>   1,
>   2
> ]
> >>> pa.array([np.uint64([0,1,2])], type=pa.list_(pa.uint64()))
> Traceback (most recent call last):
>   File "<ipython-input-70-1b103d3e4574>", line 1, in <module>
>     pa.array([np.uint64([0,1,2])], type=pa.list_(pa.uint64()))
>   File "array.pxi", line 181, in pyarrow.lib.array
>   File "array.pxi", line 36, in pyarrow.lib._sequence_to_array
>   File "error.pxi", line 98, in pyarrow.lib.check_status
> ArrowException: Unknown error: 
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:979 code: 
> AppendPySequence(seq, size, real_type, builder.get())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:402 code: 
> static_cast<Derived*>(this)->AppendSingle(ref.obj())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:402 code: 
> static_cast<Derived*>(this)->AppendSingle(ref.obj())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:542 code: 
> CheckPyError()
> an integer is required
> {code}
> More simply, it also fails with a sequence of Numpy uint64 scalars:
> {code:python}
> >>> pa.array([np.uint64(1)], type=pa.uint64())
> Traceback (most recent call last):
>   File "<ipython-input-71-679cb6c34137>", line 1, in <module>
>     pa.array([np.uint64(1)], type=pa.uint64())
>   File "array.pxi", line 181, in pyarrow.lib.array
>   File "array.pxi", line 36, in pyarrow.lib._sequence_to_array
>   File "error.pxi", line 98, in pyarrow.lib.check_status
> ArrowException: Unknown error: 
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:979 code: 
> AppendPySequence(seq, size, real_type, builder.get())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:402 code: 
> static_cast<Derived*>(this)->AppendSingle(ref.obj())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:542 code: 
> CheckPyError()
> an integer is required
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to