[
https://issues.apache.org/jira/browse/ARROW-2151?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16366015#comment-16366015
]
ASF GitHub Bot commented on ARROW-2151:
---------------------------------------
xhochy closed pull request #1602: ARROW-2151: [Python] Fix conversion from
np.uint64 scalars
URL: https://github.com/apache/arrow/pull/1602
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/cpp/src/arrow/python/builtin_convert.cc
b/cpp/src/arrow/python/builtin_convert.cc
index 60f58ea5b..a286c6bd5 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -538,8 +538,8 @@ class UInt64Converter : public
TypedConverterVisitor<UInt64Builder, UInt64Conver
public:
// Append a non-missing item
Status AppendItem(PyObject* obj) {
- const auto val = static_cast<int64_t>(PyLong_AsUnsignedLongLong(obj));
- RETURN_IF_PYERROR();
+ uint64_t val;
+ RETURN_NOT_OK(internal::UInt64FromPythonInt(obj, &val));
return typed_builder_->Append(val);
}
};
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index 494f92900..df1db9991 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -176,6 +176,23 @@ bool IsPyInteger(PyObject* obj) {
#endif
}
+Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) {
+ OwnedRef ref;
+ // PyLong_AsUnsignedLongLong() doesn't handle conversion from non-ints
+ // (e.g. np.uint64), so do it ourselves
+ if (!PyLong_Check(obj)) {
+ ref.reset(PyNumber_Long(obj));
+ RETURN_IF_PYERROR();
+ obj = ref.obj();
+ }
+ auto result = static_cast<uint64_t>(PyLong_AsUnsignedLongLong(obj));
+ if (result == static_cast<uint64_t>(-1)) {
+ RETURN_IF_PYERROR();
+ }
+ *out = static_cast<uint64_t>(result);
+ return Status::OK();
+}
+
} // namespace internal
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index c82bdabc4..c0171aa2f 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -57,6 +57,8 @@ Status DecimalFromPythonDecimal(PyObject* python_decimal,
const DecimalType& arr
Decimal128* out);
bool IsPyInteger(PyObject* obj);
+Status UInt64FromPythonInt(PyObject* obj, uint64_t* out);
+
} // namespace internal
} // namespace py
} // namespace arrow
diff --git a/python/pyarrow/tests/test_convert_builtin.py
b/python/pyarrow/tests/test_convert_builtin.py
index 5cd4a52a2..8423ff00b 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -28,6 +28,20 @@
import six
+int_type_pairs = [
+ (np.int8, pa.int8()),
+ (np.int16, pa.int64()),
+ (np.int32, pa.int32()),
+ (np.int64, pa.int64()),
+ (np.uint8, pa.uint8()),
+ (np.uint16, pa.uint64()),
+ (np.uint32, pa.uint32()),
+ (np.uint64, pa.uint64())]
+
+
+np_int_types, _ = zip(*int_type_pairs)
+
+
class StrangeIterable:
def __init__(self, lst):
self.lst = lst
@@ -146,7 +160,20 @@ def test_sequence_all_none():
@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
-def test_sequence_integer(seq):
[email protected]("np_scalar_pa_type", int_type_pairs)
+def test_sequence_integer(seq, np_scalar_pa_type):
+ np_scalar, pa_type = np_scalar_pa_type
+ expected = [1, None, 3, None,
+ np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
+ arr = pa.array(seq(expected), type=pa_type)
+ assert len(arr) == 6
+ assert arr.null_count == 2
+ assert arr.type == pa_type
+ assert arr.to_pylist() == expected
+
+
[email protected]("seq", [_as_list, _as_tuple, _as_dict_values])
+def test_sequence_integer_inferred(seq):
expected = [1, None, 3, None]
arr = pa.array(seq(expected))
assert len(arr) == 4
@@ -156,12 +183,32 @@ def test_sequence_integer(seq):
@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
[email protected]("np_scalar", [np.int16, np.int32, np.int64, np.uint16,
- np.uint32, np.uint64])
-def test_sequence_numpy_integer(seq, np_scalar):
[email protected]("np_scalar_pa_type", int_type_pairs)
+def test_sequence_numpy_integer(seq, np_scalar_pa_type):
+ np_scalar, pa_type = np_scalar_pa_type
+ expected = [np_scalar(1), None, np_scalar(3), None,
+ np_scalar(np.iinfo(np_scalar).min),
+ np_scalar(np.iinfo(np_scalar).max)]
+ arr = pa.array(seq(expected), type=pa_type)
+ assert len(arr) == 6
+ assert arr.null_count == 2
+ assert arr.type == pa_type
+ assert arr.to_pylist() == expected
+
+
[email protected]("seq", [_as_list, _as_tuple, _as_dict_values])
[email protected]("np_scalar_pa_type", int_type_pairs)
+def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
+ np_scalar, pa_type = np_scalar_pa_type
expected = [np_scalar(1), None, np_scalar(3), None]
+ if np_scalar != np.uint64:
+ expected += [np_scalar(np.iinfo(np_scalar).min),
+ np_scalar(np.iinfo(np_scalar).max)]
+ else:
+ # max(uint64) is too large for the inferred int64 type
+ expected += [0, np.iinfo(np.int64).max]
arr = pa.array(seq(expected))
- assert len(arr) == 4
+ assert len(arr) == 6
assert arr.null_count == 2
assert arr.type == pa.int64()
assert arr.to_pylist() == expected
diff --git a/python/pyarrow/tests/test_convert_pandas.py
b/python/pyarrow/tests/test_convert_pandas.py
index 5b6f6bcdf..95137ffb2 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1328,6 +1328,15 @@ def test_empty_list_roundtrip(self):
tm.assert_frame_equal(result, df)
+ def test_array_from_nested_arrays(self):
+ df, schema = dataframe_with_arrays()
+ for field in schema:
+ arr = df[field.name].values
+ expected = pa.array(list(arr), type=field.type)
+ result = pa.array(arr)
+ assert result.type == field.type # == list<scalar>
+ assert result.equals(expected)
+
class TestConvertStructTypes(object):
"""
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [Python] Error when converting from list of uint64 arrays
> ---------------------------------------------------------
>
> Key: ARROW-2151
> URL: https://issues.apache.org/jira/browse/ARROW-2151
> Project: Apache Arrow
> Issue Type: Bug
> Components: Python
> Affects Versions: 0.8.0
> Reporter: Antoine Pitrou
> Assignee: Antoine Pitrou
> Priority: Minor
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
> {code:python}
> >>> pa.array(np.uint64([0,1,2]), type=pa.uint64())
> <pyarrow.lib.UInt64Array object at 0x7f0e6c1eab88>
> [
> 0,
> 1,
> 2
> ]
> >>> pa.array([np.uint64([0,1,2])], type=pa.list_(pa.uint64()))
> Traceback (most recent call last):
> File "<ipython-input-70-1b103d3e4574>", line 1, in <module>
> pa.array([np.uint64([0,1,2])], type=pa.list_(pa.uint64()))
> File "array.pxi", line 181, in pyarrow.lib.array
> File "array.pxi", line 36, in pyarrow.lib._sequence_to_array
> File "error.pxi", line 98, in pyarrow.lib.check_status
> ArrowException: Unknown error:
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:979 code:
> AppendPySequence(seq, size, real_type, builder.get())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:402 code:
> static_cast<Derived*>(this)->AppendSingle(ref.obj())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:402 code:
> static_cast<Derived*>(this)->AppendSingle(ref.obj())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:542 code:
> CheckPyError()
> an integer is required
> {code}
> More simply, it also fails with a sequence of Numpy uint64 scalars:
> {code:python}
> >>> pa.array([np.uint64(1)], type=pa.uint64())
> Traceback (most recent call last):
> File "<ipython-input-71-679cb6c34137>", line 1, in <module>
> pa.array([np.uint64(1)], type=pa.uint64())
> File "array.pxi", line 181, in pyarrow.lib.array
> File "array.pxi", line 36, in pyarrow.lib._sequence_to_array
> File "error.pxi", line 98, in pyarrow.lib.check_status
> ArrowException: Unknown error:
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:979 code:
> AppendPySequence(seq, size, real_type, builder.get())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:402 code:
> static_cast<Derived*>(this)->AppendSingle(ref.obj())
> /home/antoine/arrow/cpp/src/arrow/python/builtin_convert.cc:542 code:
> CheckPyError()
> an integer is required
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)