This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0265fb4 ARROW-2151: [Python] Fix conversion from np.uint64 scalars
0265fb4 is described below
commit 0265fb4012c7ceb25b755b6995428b4d74ef9e66
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Feb 15 18:50:32 2018 +0100
ARROW-2151: [Python] Fix conversion from np.uint64 scalars
Also add tests for ARROW-1345, ARROW-2008.
Author: Antoine Pitrou <[email protected]>
Closes #1602 from pitrou/ARROW-2151-convert-np-uint64 and squashes the
following commits:
613af18 [Antoine Pitrou] ARROW-2151: [Python] Fix conversion from np.uint64
scalars
---
cpp/src/arrow/python/builtin_convert.cc | 4 +-
cpp/src/arrow/python/helpers.cc | 17 +++++++++
cpp/src/arrow/python/helpers.h | 2 +
python/pyarrow/tests/test_convert_builtin.py | 57 +++++++++++++++++++++++++---
python/pyarrow/tests/test_convert_pandas.py | 9 +++++
5 files changed, 82 insertions(+), 7 deletions(-)
diff --git a/cpp/src/arrow/python/builtin_convert.cc
b/cpp/src/arrow/python/builtin_convert.cc
index 60f58ea..a286c6b 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -538,8 +538,8 @@ class UInt64Converter : public
TypedConverterVisitor<UInt64Builder, UInt64Conver
public:
// Append a non-missing item
Status AppendItem(PyObject* obj) {
- const auto val = static_cast<int64_t>(PyLong_AsUnsignedLongLong(obj));
- RETURN_IF_PYERROR();
+ uint64_t val;
+ RETURN_NOT_OK(internal::UInt64FromPythonInt(obj, &val));
return typed_builder_->Append(val);
}
};
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index 494f929..df1db99 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -176,6 +176,23 @@ bool IsPyInteger(PyObject* obj) {
#endif
}
+Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) {
+ OwnedRef ref;
+ // PyLong_AsUnsignedLongLong() doesn't handle conversion from non-ints
+ // (e.g. np.uint64), so do it ourselves
+ if (!PyLong_Check(obj)) {
+ ref.reset(PyNumber_Long(obj));
+ RETURN_IF_PYERROR();
+ obj = ref.obj();
+ }
+ auto result = static_cast<uint64_t>(PyLong_AsUnsignedLongLong(obj));
+ if (result == static_cast<uint64_t>(-1)) {
+ RETURN_IF_PYERROR();
+ }
+ *out = static_cast<uint64_t>(result);
+ return Status::OK();
+}
+
} // namespace internal
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index c82bdab..c0171aa 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -57,6 +57,8 @@ Status DecimalFromPythonDecimal(PyObject* python_decimal,
const DecimalType& arr
Decimal128* out);
bool IsPyInteger(PyObject* obj);
+Status UInt64FromPythonInt(PyObject* obj, uint64_t* out);
+
} // namespace internal
} // namespace py
} // namespace arrow
diff --git a/python/pyarrow/tests/test_convert_builtin.py
b/python/pyarrow/tests/test_convert_builtin.py
index 5cd4a52..8423ff0 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -28,6 +28,20 @@ import numpy as np
import six
+int_type_pairs = [
+ (np.int8, pa.int8()),
+ (np.int16, pa.int64()),
+ (np.int32, pa.int32()),
+ (np.int64, pa.int64()),
+ (np.uint8, pa.uint8()),
+ (np.uint16, pa.uint64()),
+ (np.uint32, pa.uint32()),
+ (np.uint64, pa.uint64())]
+
+
+np_int_types, _ = zip(*int_type_pairs)
+
+
class StrangeIterable:
def __init__(self, lst):
self.lst = lst
@@ -146,7 +160,20 @@ def test_sequence_all_none():
@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
-def test_sequence_integer(seq):
[email protected]("np_scalar_pa_type", int_type_pairs)
+def test_sequence_integer(seq, np_scalar_pa_type):
+ np_scalar, pa_type = np_scalar_pa_type
+ expected = [1, None, 3, None,
+ np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
+ arr = pa.array(seq(expected), type=pa_type)
+ assert len(arr) == 6
+ assert arr.null_count == 2
+ assert arr.type == pa_type
+ assert arr.to_pylist() == expected
+
+
[email protected]("seq", [_as_list, _as_tuple, _as_dict_values])
+def test_sequence_integer_inferred(seq):
expected = [1, None, 3, None]
arr = pa.array(seq(expected))
assert len(arr) == 4
@@ -156,12 +183,32 @@ def test_sequence_integer(seq):
@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
[email protected]("np_scalar", [np.int16, np.int32, np.int64, np.uint16,
- np.uint32, np.uint64])
-def test_sequence_numpy_integer(seq, np_scalar):
[email protected]("np_scalar_pa_type", int_type_pairs)
+def test_sequence_numpy_integer(seq, np_scalar_pa_type):
+ np_scalar, pa_type = np_scalar_pa_type
+ expected = [np_scalar(1), None, np_scalar(3), None,
+ np_scalar(np.iinfo(np_scalar).min),
+ np_scalar(np.iinfo(np_scalar).max)]
+ arr = pa.array(seq(expected), type=pa_type)
+ assert len(arr) == 6
+ assert arr.null_count == 2
+ assert arr.type == pa_type
+ assert arr.to_pylist() == expected
+
+
[email protected]("seq", [_as_list, _as_tuple, _as_dict_values])
[email protected]("np_scalar_pa_type", int_type_pairs)
+def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
+ np_scalar, pa_type = np_scalar_pa_type
expected = [np_scalar(1), None, np_scalar(3), None]
+ if np_scalar != np.uint64:
+ expected += [np_scalar(np.iinfo(np_scalar).min),
+ np_scalar(np.iinfo(np_scalar).max)]
+ else:
+ # max(uint64) is too large for the inferred int64 type
+ expected += [0, np.iinfo(np.int64).max]
arr = pa.array(seq(expected))
- assert len(arr) == 4
+ assert len(arr) == 6
assert arr.null_count == 2
assert arr.type == pa.int64()
assert arr.to_pylist() == expected
diff --git a/python/pyarrow/tests/test_convert_pandas.py
b/python/pyarrow/tests/test_convert_pandas.py
index 5b6f6bc..95137ff 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1328,6 +1328,15 @@ class TestListTypes(object):
tm.assert_frame_equal(result, df)
+ def test_array_from_nested_arrays(self):
+ df, schema = dataframe_with_arrays()
+ for field in schema:
+ arr = df[field.name].values
+ expected = pa.array(list(arr), type=field.type)
+ result = pa.array(arr)
+ assert result.type == field.type # == list<scalar>
+ assert result.equals(expected)
+
class TestConvertStructTypes(object):
"""
--
To stop receiving notification emails like this one, please contact
[email protected].