This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 59030fe ARROW-1730, ARROW-1738: [Python] Fix wrong datetime conversion 59030fe is described below commit 59030fe1d319d36157554fb7880e9b574c60a3d1 Author: Licht-T <lich...@outlook.jp> AuthorDate: Thu Oct 26 22:44:46 2017 -0400 ARROW-1730, ARROW-1738: [Python] Fix wrong datetime conversion This closes [ARROW-1730](https://issues.apache.org/jira/browse/ARROW-1730) and [ARROW-1738](https://issues.apache.org/jira/browse/ARROW-1738). Author: Licht-T <lich...@outlook.jp> Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #1256 from Licht-T/fix-pylong-force-cast-to-date-type and squashes the following commits: b4334641 [Wes McKinney] Code review comments, Add test cases for invalid values 23ad1a52 [Licht-T] Fix C++ lint issues d9cd15b0 [Licht-T] TST: Add tests for datetime converter 560a97e4 [Licht-T] BUG: Implement time unit resolution converter c1063eb2 [Licht-T] BUG: Fix PyLong force casting to PyDateTime/PyDate type --- cpp/src/arrow/python/builtin_convert.cc | 44 ++++++++++++++++-- cpp/src/arrow/python/util/datetime.h | 14 ++++++ python/pyarrow/tests/test_convert_builtin.py | 69 ++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc index f7862d1..d52627e 100644 --- a/cpp/src/arrow/python/builtin_convert.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -522,18 +522,51 @@ class UInt64Converter : public TypedConverterVisitor<UInt64Builder, UInt64Conver class DateConverter : public TypedConverterVisitor<Date64Builder, DateConverter> { public: inline Status AppendItem(const OwnedRef& item) { - auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj()); - return typed_builder_->Append(PyDate_to_ms(pydate)); + int64_t t; + if (PyDate_Check(item.obj())) { + auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj()); + t = PyDate_to_ms(pydate); + } else { + t = static_cast<int64_t>(PyLong_AsLongLong(item.obj())); + RETURN_IF_PYERROR(); + } + return typed_builder_->Append(t); } }; class TimestampConverter : public TypedConverterVisitor<Date64Builder, TimestampConverter> { public: + explicit TimestampConverter(TimeUnit::type unit) : unit_(unit) {} + inline Status AppendItem(const OwnedRef& item) { - auto pydatetime = reinterpret_cast<PyDateTime_DateTime*>(item.obj()); - return typed_builder_->Append(PyDateTime_to_us(pydatetime)); + int64_t t; + if (PyDateTime_Check(item.obj())) { + auto pydatetime = reinterpret_cast<PyDateTime_DateTime*>(item.obj()); + + switch (unit_) { + case TimeUnit::SECOND: + t = PyDateTime_to_s(pydatetime); + break; + case TimeUnit::MILLI: + t = PyDateTime_to_ms(pydatetime); + break; + case TimeUnit::MICRO: + t = PyDateTime_to_us(pydatetime); + break; + case TimeUnit::NANO: + t = PyDateTime_to_ns(pydatetime); + break; + } + } else { + t = static_cast<int64_t>(PyLong_AsLongLong(item.obj())); + RETURN_IF_PYERROR(); + } + return typed_builder_->Append(t); } + + private: + TimeUnit::type unit_; }; class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, DoubleConverter> { @@ -687,7 +720,8 @@ std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type case Type::DATE64: return std::make_shared<DateConverter>(); case Type::TIMESTAMP: - return std::make_shared<TimestampConverter>(); + return std::make_shared<TimestampConverter>( + static_cast<const TimestampType&>(*type).unit()); case Type::DOUBLE: return std::make_shared<DoubleConverter>(); case Type::BINARY: diff --git a/cpp/src/arrow/python/util/datetime.h b/cpp/src/arrow/python/util/datetime.h index 782960f..c110bc6 100644 --- a/cpp/src/arrow/python/util/datetime.h +++ b/cpp/src/arrow/python/util/datetime.h @@ -247,12 +247,26 @@ static inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) { return total_seconds * 1000; } +static inline int64_t PyDateTime_to_s(PyDateTime_DateTime* pydatetime) { + return PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime)) / 1000LL; +} + +static inline int64_t PyDateTime_to_ms(PyDateTime_DateTime* pydatetime) { + int64_t date_ms = PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime)); + int ms = PyDateTime_DATE_GET_MICROSECOND(pydatetime) / 1000; + return date_ms + ms; +} + static inline int64_t PyDateTime_to_us(PyDateTime_DateTime* pydatetime) { int64_t ms = PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime)); int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime); return ms * 1000 + us; } +static inline int64_t PyDateTime_to_ns(PyDateTime_DateTime* pydatetime) { + return PyDateTime_to_us(pydatetime) * 1000; +} + static inline int32_t PyDate_to_days(PyDateTime_Date* pydate) { return static_cast<int32_t>(PyDate_to_ms(pydate) / 86400000LL); } diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index d18ed95..414266d 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -197,6 +197,75 @@ class TestConvertSequence(unittest.TestCase): assert arr[3].as_py() == datetime.datetime(2010, 8, 13, 5, 46, 57, 437699) + def test_timestamp_with_unit(self): + data = [ + datetime.datetime(2007, 7, 13, 1, 23, 34, 123456), + ] + + s = pa.timestamp('s') + ms = pa.timestamp('ms') + us = pa.timestamp('us') + ns = pa.timestamp('ns') + + arr_s = pa.array(data, type=s) + assert len(arr_s) == 1 + assert arr_s.type == s + assert arr_s[0].as_py() == datetime.datetime(2007, 7, 13, 1, + 23, 34, 0) + + arr_ms = pa.array(data, type=ms) + assert len(arr_ms) == 1 + assert arr_ms.type == ms + assert arr_ms[0].as_py() == datetime.datetime(2007, 7, 13, 1, + 23, 34, 123000) + + arr_us = pa.array(data, type=us) + assert len(arr_us) == 1 + assert arr_us.type == us + assert arr_us[0].as_py() == datetime.datetime(2007, 7, 13, 1, + 23, 34, 123456) + + arr_ns = pa.array(data, type=ns) + assert len(arr_ns) == 1 + assert arr_ns.type == ns + assert arr_ns[0].as_py() == datetime.datetime(2007, 7, 13, 1, + 23, 34, 123456) + + def test_timestamp_from_int_with_unit(self): + data = [1] + + s = pa.timestamp('s') + ms = pa.timestamp('ms') + us = pa.timestamp('us') + ns = pa.timestamp('ns') + + arr_s = pa.array(data, type=s) + assert len(arr_s) == 1 + assert arr_s.type == s + assert str(arr_s[0]) == "Timestamp('1970-01-01 00:00:01')" + + arr_ms = pa.array(data, type=ms) + assert len(arr_ms) == 1 + assert arr_ms.type == ms + assert str(arr_ms[0]) == "Timestamp('1970-01-01 00:00:00.001000')" + + arr_us = pa.array(data, type=us) + assert len(arr_us) == 1 + assert arr_us.type == us + assert str(arr_us[0]) == "Timestamp('1970-01-01 00:00:00.000001')" + + arr_ns = pa.array(data, type=ns) + assert len(arr_ns) == 1 + assert arr_ns.type == ns + assert str(arr_ns[0]) == "Timestamp('1970-01-01 00:00:00.000000001')" + + with pytest.raises(pa.ArrowException): + class CustomClass(): + pass + pa.array([1, CustomClass()], type=ns) + pa.array([1, CustomClass()], type=pa.date32()) + pa.array([1, CustomClass()], type=pa.date64()) + def test_mixed_nesting_levels(self): pa.array([1, 2, None]) pa.array([[1], [2], None]) -- To stop receiving notification emails like this one, please contact ['"commits@arrow.apache.org" <commits@arrow.apache.org>'].