This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 59030fe  ARROW-1730, ARROW-1738: [Python] Fix wrong datetime conversion
59030fe is described below

commit 59030fe1d319d36157554fb7880e9b574c60a3d1
Author: Licht-T <lich...@outlook.jp>
AuthorDate: Thu Oct 26 22:44:46 2017 -0400

    ARROW-1730, ARROW-1738: [Python] Fix wrong datetime conversion
    
    This closes [ARROW-1730](https://issues.apache.org/jira/browse/ARROW-1730) 
and [ARROW-1738](https://issues.apache.org/jira/browse/ARROW-1738).
    
    Author: Licht-T <lich...@outlook.jp>
    Author: Wes McKinney <wes.mckin...@twosigma.com>
    
    Closes #1256 from Licht-T/fix-pylong-force-cast-to-date-type and squashes 
the following commits:
    
    b4334641 [Wes McKinney] Code review comments, Add test cases for invalid 
values
    23ad1a52 [Licht-T] Fix C++ lint issues
    d9cd15b0 [Licht-T] TST: Add tests for datetime converter
    560a97e4 [Licht-T] BUG: Implement time unit resolution converter
    c1063eb2 [Licht-T] BUG: Fix PyLong force casting to PyDateTime/PyDate type
---
 cpp/src/arrow/python/builtin_convert.cc      | 44 ++++++++++++++++--
 cpp/src/arrow/python/util/datetime.h         | 14 ++++++
 python/pyarrow/tests/test_convert_builtin.py | 69 ++++++++++++++++++++++++++++
 3 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/python/builtin_convert.cc 
b/cpp/src/arrow/python/builtin_convert.cc
index f7862d1..d52627e 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -522,18 +522,51 @@ class UInt64Converter : public 
TypedConverterVisitor<UInt64Builder, UInt64Conver
 class DateConverter : public TypedConverterVisitor<Date64Builder, 
DateConverter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
-    auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
-    return typed_builder_->Append(PyDate_to_ms(pydate));
+    int64_t t;
+    if (PyDate_Check(item.obj())) {
+      auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
+      t = PyDate_to_ms(pydate);
+    } else {
+      t = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+      RETURN_IF_PYERROR();
+    }
+    return typed_builder_->Append(t);
   }
 };
 
 class TimestampConverter
     : public TypedConverterVisitor<Date64Builder, TimestampConverter> {
  public:
+  explicit TimestampConverter(TimeUnit::type unit) : unit_(unit) {}
+
   inline Status AppendItem(const OwnedRef& item) {
-    auto pydatetime = reinterpret_cast<PyDateTime_DateTime*>(item.obj());
-    return typed_builder_->Append(PyDateTime_to_us(pydatetime));
+    int64_t t;
+    if (PyDateTime_Check(item.obj())) {
+      auto pydatetime = reinterpret_cast<PyDateTime_DateTime*>(item.obj());
+
+      switch (unit_) {
+        case TimeUnit::SECOND:
+          t = PyDateTime_to_s(pydatetime);
+          break;
+        case TimeUnit::MILLI:
+          t = PyDateTime_to_ms(pydatetime);
+          break;
+        case TimeUnit::MICRO:
+          t = PyDateTime_to_us(pydatetime);
+          break;
+        case TimeUnit::NANO:
+          t = PyDateTime_to_ns(pydatetime);
+          break;
+      }
+    } else {
+      t = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+      RETURN_IF_PYERROR();
+    }
+    return typed_builder_->Append(t);
   }
+
+ private:
+  TimeUnit::type unit_;
 };
 
 class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, 
DoubleConverter> {
@@ -687,7 +720,8 @@ std::shared_ptr<SeqConverter> GetConverter(const 
std::shared_ptr<DataType>& type
     case Type::DATE64:
       return std::make_shared<DateConverter>();
     case Type::TIMESTAMP:
-      return std::make_shared<TimestampConverter>();
+      return std::make_shared<TimestampConverter>(
+          static_cast<const TimestampType&>(*type).unit());
     case Type::DOUBLE:
       return std::make_shared<DoubleConverter>();
     case Type::BINARY:
diff --git a/cpp/src/arrow/python/util/datetime.h 
b/cpp/src/arrow/python/util/datetime.h
index 782960f..c110bc6 100644
--- a/cpp/src/arrow/python/util/datetime.h
+++ b/cpp/src/arrow/python/util/datetime.h
@@ -247,12 +247,26 @@ static inline int64_t PyDate_to_ms(PyDateTime_Date* 
pydate) {
   return total_seconds * 1000;
 }
 
+static inline int64_t PyDateTime_to_s(PyDateTime_DateTime* pydatetime) {
+  return PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime)) / 1000LL;
+}
+
+static inline int64_t PyDateTime_to_ms(PyDateTime_DateTime* pydatetime) {
+  int64_t date_ms = 
PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime));
+  int ms = PyDateTime_DATE_GET_MICROSECOND(pydatetime) / 1000;
+  return date_ms + ms;
+}
+
 static inline int64_t PyDateTime_to_us(PyDateTime_DateTime* pydatetime) {
   int64_t ms = PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime));
   int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
   return ms * 1000 + us;
 }
 
+static inline int64_t PyDateTime_to_ns(PyDateTime_DateTime* pydatetime) {
+  return PyDateTime_to_us(pydatetime) * 1000;
+}
+
 static inline int32_t PyDate_to_days(PyDateTime_Date* pydate) {
   return static_cast<int32_t>(PyDate_to_ms(pydate) / 86400000LL);
 }
diff --git a/python/pyarrow/tests/test_convert_builtin.py 
b/python/pyarrow/tests/test_convert_builtin.py
index d18ed95..414266d 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -197,6 +197,75 @@ class TestConvertSequence(unittest.TestCase):
         assert arr[3].as_py() == datetime.datetime(2010, 8, 13, 5,
                                                    46, 57, 437699)
 
+    def test_timestamp_with_unit(self):
+        data = [
+            datetime.datetime(2007, 7, 13, 1, 23, 34, 123456),
+        ]
+
+        s = pa.timestamp('s')
+        ms = pa.timestamp('ms')
+        us = pa.timestamp('us')
+        ns = pa.timestamp('ns')
+
+        arr_s = pa.array(data, type=s)
+        assert len(arr_s) == 1
+        assert arr_s.type == s
+        assert arr_s[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                     23, 34, 0)
+
+        arr_ms = pa.array(data, type=ms)
+        assert len(arr_ms) == 1
+        assert arr_ms.type == ms
+        assert arr_ms[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                      23, 34, 123000)
+
+        arr_us = pa.array(data, type=us)
+        assert len(arr_us) == 1
+        assert arr_us.type == us
+        assert arr_us[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                      23, 34, 123456)
+
+        arr_ns = pa.array(data, type=ns)
+        assert len(arr_ns) == 1
+        assert arr_ns.type == ns
+        assert arr_ns[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                      23, 34, 123456)
+
+    def test_timestamp_from_int_with_unit(self):
+        data = [1]
+
+        s = pa.timestamp('s')
+        ms = pa.timestamp('ms')
+        us = pa.timestamp('us')
+        ns = pa.timestamp('ns')
+
+        arr_s = pa.array(data, type=s)
+        assert len(arr_s) == 1
+        assert arr_s.type == s
+        assert str(arr_s[0]) == "Timestamp('1970-01-01 00:00:01')"
+
+        arr_ms = pa.array(data, type=ms)
+        assert len(arr_ms) == 1
+        assert arr_ms.type == ms
+        assert str(arr_ms[0]) == "Timestamp('1970-01-01 00:00:00.001000')"
+
+        arr_us = pa.array(data, type=us)
+        assert len(arr_us) == 1
+        assert arr_us.type == us
+        assert str(arr_us[0]) == "Timestamp('1970-01-01 00:00:00.000001')"
+
+        arr_ns = pa.array(data, type=ns)
+        assert len(arr_ns) == 1
+        assert arr_ns.type == ns
+        assert str(arr_ns[0]) == "Timestamp('1970-01-01 00:00:00.000000001')"
+
+        with pytest.raises(pa.ArrowException):
+            class CustomClass():
+                pass
+            pa.array([1, CustomClass()], type=ns)
+            pa.array([1, CustomClass()], type=pa.date32())
+            pa.array([1, CustomClass()], type=pa.date64())
+
     def test_mixed_nesting_levels(self):
         pa.array([1, 2, None])
         pa.array([[1], [2], None])

-- 
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <commits@arrow.apache.org>'].

Reply via email to