Repository: arrow Updated Branches: refs/heads/master f268e927a -> 4c3481ea5
ARROW-535: [Python] Add type mapping for NPY_LONGLONG Based on https://github.com/wesm/feather/pull/107 Author: Uwe L. Korn <uw...@xhochy.com> Closes #323 from xhochy/ARROW-535 and squashes the following commits: 72221fa [Uwe L. Korn] Address review comments 5d3c046 [Uwe L. Korn] ARROW-535: [Python] Add type mapping for NPY_LONGLONG Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4c3481ea Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4c3481ea Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4c3481ea Branch: refs/heads/master Commit: 4c3481ea5438d52878f390b0f562f6113f111a8f Parents: f268e92 Author: Uwe L. Korn <uw...@xhochy.com> Authored: Tue Feb 7 11:13:00 2017 -0500 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Tue Feb 7 11:13:00 2017 -0500 ---------------------------------------------------------------------- python/pyarrow/tests/test_convert_pandas.py | 6 +++-- python/src/pyarrow/adapters/pandas.cc | 29 ++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/4c3481ea/python/pyarrow/tests/test_convert_pandas.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index f04fbe5..960653d 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -127,13 +127,14 @@ class TestPandasConversion(unittest.TestCase): tm.assert_frame_equal(result, ex_frame) def test_integer_no_nulls(self): - data = {} + data = OrderedDict() fields = [] numpy_dtypes = [('i1', A.int8()), ('i2', A.int16()), ('i4', A.int32()), ('i8', A.int64()), ('u1', A.uint8()), ('u2', A.uint16()), - ('u4', A.uint32()), ('u8', A.uint64())] + ('u4', A.uint32()), ('u8', A.uint64()), + ('longlong', A.int64()), ('ulonglong', A.uint64())] num_values = 100 for dtype, arrow_dtype in numpy_dtypes: @@ -148,6 +149,7 @@ class TestPandasConversion(unittest.TestCase): schema = A.Schema.from_fields(fields) self._check_pandas_roundtrip(df, expected_schema=schema) + def test_integer_with_nulls(self): # pandas requires upcast to float dtype http://git-wip-us.apache.org/repos/asf/arrow/blob/4c3481ea/python/src/pyarrow/adapters/pandas.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index 345dc90..b4e0d2f 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -91,11 +91,17 @@ NPY_INT_DECL(INT8, Int8, int8_t); NPY_INT_DECL(INT16, Int16, int16_t); NPY_INT_DECL(INT32, Int32, int32_t); NPY_INT_DECL(INT64, Int64, int64_t); + NPY_INT_DECL(UINT8, UInt8, uint8_t); NPY_INT_DECL(UINT16, UInt16, uint16_t); NPY_INT_DECL(UINT32, UInt32, uint32_t); NPY_INT_DECL(UINT64, UInt64, uint64_t); +#if NPY_INT64 != NPY_LONGLONG +NPY_INT_DECL(LONGLONG, Int64, int64_t); +NPY_INT_DECL(ULONGLONG, UInt64, uint64_t); +#endif + template <> struct npy_traits<NPY_FLOAT32> { typedef float value_type; @@ -1706,16 +1712,35 @@ Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo, return Status::Invalid("only handle 1-dimensional arrays"); } - switch (PyArray_DESCR(arr)->type_num) { + int type_num = PyArray_DESCR(arr)->type_num; + +#if (NPY_INT64 == NPY_LONGLONG) && (NPY_SIZEOF_LONGLONG == 8) + // Both LONGLONG and INT64 can be observed in the wild, which is buggy. We set + // U/LONGLONG to U/INT64 so things work properly. + if (type_num == NPY_LONGLONG) { + type_num = NPY_INT64; + } + if (type_num == NPY_ULONGLONG) { + type_num = NPY_UINT64; + } +#endif + + switch (type_num) { TO_ARROW_CASE(BOOL); TO_ARROW_CASE(INT8); TO_ARROW_CASE(INT16); TO_ARROW_CASE(INT32); TO_ARROW_CASE(INT64); +#if (NPY_INT64 != NPY_LONGLONG) + TO_ARROW_CASE(LONGLONG); +#endif TO_ARROW_CASE(UINT8); TO_ARROW_CASE(UINT16); TO_ARROW_CASE(UINT32); TO_ARROW_CASE(UINT64); +#if (NPY_UINT64 != NPY_ULONGLONG) + TO_ARROW_CASE(ULONGLONG); +#endif TO_ARROW_CASE(FLOAT32); TO_ARROW_CASE(FLOAT64); TO_ARROW_CASE(DATETIME); @@ -1726,7 +1751,7 @@ Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo, } break; default: std::stringstream ss; - ss << "unsupported type " << PyArray_DESCR(arr)->type_num << std::endl; + ss << "Unsupported numpy type " << PyArray_DESCR(arr)->type_num << std::endl; return Status::NotImplemented(ss.str()); } return Status::OK();