Repository: arrow Updated Branches: refs/heads/master 48f9780a8 -> 78288b5fc
ARROW-371: Handle pandas-nullable types correctly Author: Uwe L. Korn <uw...@xhochy.com> Closes #205 from xhochy/ARROW-371 and squashes the following commits: 1f73e8b [Uwe L. Korn] ARROW-371: Handle pandas-nullable types correctly Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/78288b5f Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/78288b5f Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/78288b5f Branch: refs/heads/master Commit: 78288b5fca8ff527257e487d45c7e68f7dbd8cd2 Parents: 48f9780 Author: Uwe L. Korn <uw...@xhochy.com> Authored: Wed Nov 16 16:18:50 2016 -0500 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Wed Nov 16 16:18:50 2016 -0500 ---------------------------------------------------------------------- python/pyarrow/tests/test_convert_pandas.py | 22 +++++++++++- python/src/pyarrow/adapters/pandas.cc | 46 ++++++++++++------------ 2 files changed, 44 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/78288b5f/python/pyarrow/tests/test_convert_pandas.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 5530299..b527ca7 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -165,7 +165,7 @@ class TestPandasConversion(unittest.TestCase): expected = pd.DataFrame({'strings': values * repeats}) self._check_pandas_roundtrip(df, expected) - def test_timestamps_notimezone(self): + def test_timestamps_notimezone_no_nulls(self): df = pd.DataFrame({ 'datetime64': np.array([ '2007-07-13T01:23:34.123', @@ -184,6 +184,26 @@ class TestPandasConversion(unittest.TestCase): }) self._check_pandas_roundtrip(df, timestamps_to_ms=False) + def test_timestamps_notimezone_nulls(self): + df = pd.DataFrame({ + 'datetime64': np.array([ + '2007-07-13T01:23:34.123', + None, + '2010-08-13T05:46:57.437'], + dtype='datetime64[ms]') + }) + df.info() + self._check_pandas_roundtrip(df, timestamps_to_ms=True) + + df = pd.DataFrame({ + 'datetime64': np.array([ + '2007-07-13T01:23:34.123456789', + None, + '2010-08-13T05:46:57.437699912'], + dtype='datetime64[ns]') + }) + self._check_pandas_roundtrip(df, timestamps_to_ms=False) + # def test_category(self): # repeats = 1000 # values = [b'foo', None, u'bar', 'qux', np.nan] http://git-wip-us.apache.org/repos/asf/arrow/blob/78288b5f/python/src/pyarrow/adapters/pandas.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index 6a3966b..1f5b700 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -489,20 +489,20 @@ struct arrow_traits<arrow::Type::BOOL> { static constexpr int npy_type = NPY_BOOL; static constexpr bool supports_nulls = false; static constexpr bool is_boolean = true; - static constexpr bool is_integer = false; - static constexpr bool is_floating = false; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = false; }; -#define INT_DECL(TYPE) \ - template <> \ - struct arrow_traits<arrow::Type::TYPE> { \ - static constexpr int npy_type = NPY_##TYPE; \ - static constexpr bool supports_nulls = false; \ - static constexpr double na_value = NAN; \ - static constexpr bool is_boolean = false; \ - static constexpr bool is_integer = true; \ - static constexpr bool is_floating = false; \ - typedef typename npy_traits<NPY_##TYPE>::value_type T; \ +#define INT_DECL(TYPE) \ + template <> \ + struct arrow_traits<arrow::Type::TYPE> { \ + static constexpr int npy_type = NPY_##TYPE; \ + static constexpr bool supports_nulls = false; \ + static constexpr double na_value = NAN; \ + static constexpr bool is_boolean = false; \ + static constexpr bool is_pandas_numeric_not_nullable = true; \ + static constexpr bool is_pandas_numeric_nullable = false; \ + typedef typename npy_traits<NPY_##TYPE>::value_type T; \ }; INT_DECL(INT8); @@ -520,8 +520,8 @@ struct arrow_traits<arrow::Type::FLOAT> { static constexpr bool supports_nulls = true; static constexpr float na_value = NAN; static constexpr bool is_boolean = false; - static constexpr bool is_integer = false; - static constexpr bool is_floating = true; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = true; typedef typename npy_traits<NPY_FLOAT32>::value_type T; }; @@ -531,8 +531,8 @@ struct arrow_traits<arrow::Type::DOUBLE> { static constexpr bool supports_nulls = true; static constexpr double na_value = NAN; static constexpr bool is_boolean = false; - static constexpr bool is_integer = false; - static constexpr bool is_floating = true; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = true; typedef typename npy_traits<NPY_FLOAT64>::value_type T; }; @@ -542,8 +542,8 @@ struct arrow_traits<arrow::Type::TIMESTAMP> { static constexpr bool supports_nulls = true; static constexpr int64_t na_value = std::numeric_limits<int64_t>::min(); static constexpr bool is_boolean = false; - static constexpr bool is_integer = true; - static constexpr bool is_floating = false; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = true; typedef typename npy_traits<NPY_DATETIME>::value_type T; }; @@ -552,8 +552,8 @@ struct arrow_traits<arrow::Type::STRING> { static constexpr int npy_type = NPY_OBJECT; static constexpr bool supports_nulls = true; static constexpr bool is_boolean = false; - static constexpr bool is_integer = false; - static constexpr bool is_floating = false; + static constexpr bool is_pandas_numeric_not_nullable = false; + static constexpr bool is_pandas_numeric_nullable = false; }; @@ -655,7 +655,7 @@ class ArrowDeserializer { template <int T2> inline typename std::enable_if< - arrow_traits<T2>::is_floating, Status>::type + arrow_traits<T2>::is_pandas_numeric_nullable, Status>::type ConvertValues(const std::shared_ptr<Array>& arr) { typedef typename arrow_traits<T2>::T T; @@ -668,7 +668,7 @@ class ArrowDeserializer { T* out_values = reinterpret_cast<T*>(PyArray_DATA(out_)); for (int64_t i = 0; i < arr->length(); ++i) { - out_values[i] = arr->IsNull(i) ? NAN : in_values[i]; + out_values[i] = arr->IsNull(i) ? arrow_traits<T2>::na_value : in_values[i]; } } else { // Zero-Copy. We can pass the data pointer directly to NumPy. @@ -683,7 +683,7 @@ class ArrowDeserializer { // Integer specialization template <int T2> inline typename std::enable_if< - arrow_traits<T2>::is_integer, Status>::type + arrow_traits<T2>::is_pandas_numeric_not_nullable, Status>::type ConvertValues(const std::shared_ptr<Array>& arr) { typedef typename arrow_traits<T2>::T T;