[ 
https://issues.apache.org/jira/browse/ARROW-2106?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16360326#comment-16360326
 ] 

ASF GitHub Bot commented on ARROW-2106:
---------------------------------------

wesm closed pull request #1585: ARROW-2106: [Python] Add conversion for a 
series of datetime objects
URL: https://github.com/apache/arrow/pull/1585
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc 
b/cpp/src/arrow/python/numpy_to_arrow.cc
index 1b1673bb8..d487d9d9d 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -475,6 +475,7 @@ class NumPyConverter {
   Status ConvertLists(const std::shared_ptr<DataType>& type, ListBuilder* 
builder,
                       PyObject* list);
   Status ConvertDecimals();
+  Status ConvertDateTimes();
   Status ConvertTimes();
   Status ConvertObjectsInfer();
   Status ConvertObjectsInferAndCast();
@@ -783,6 +784,35 @@ Status NumPyConverter::ConvertDecimals() {
   return PushBuilderResult(&builder);
 }
 
+Status NumPyConverter::ConvertDateTimes() {
+  // Convert array of datetime.datetime objects to Arrow
+  PyAcquireGIL lock;
+  PyDateTime_IMPORT;
+
+  Ndarray1DIndexer<PyObject*> objects(arr_);
+
+  // datetime.datetime stores microsecond resolution
+  TimestampBuilder builder(::arrow::timestamp(TimeUnit::MICRO), pool_);
+  RETURN_NOT_OK(builder.Resize(length_));
+
+  PyObject* obj = NULLPTR;
+  for (int64_t i = 0; i < length_; ++i) {
+    obj = objects[i];
+    if (PyDateTime_Check(obj)) {
+      RETURN_NOT_OK(
+          
builder.Append(PyDateTime_to_us(reinterpret_cast<PyDateTime_DateTime*>(obj))));
+    } else if (PandasObjectIsNull(obj)) {
+      RETURN_NOT_OK(builder.AppendNull());
+    } else {
+      std::stringstream ss;
+      ss << "Error converting from Python objects to Timestamp: ";
+      RETURN_NOT_OK(InvalidConversion(obj, "datetime.datetime", &ss));
+      return Status::Invalid(ss.str());
+    }
+  }
+  return PushBuilderResult(&builder);
+}
+
 Status NumPyConverter::ConvertTimes() {
   // Convert array of datetime.time objects to Arrow
   PyAcquireGIL lock;
@@ -1005,6 +1035,8 @@ Status NumPyConverter::ConvertObjectsInfer() {
     } else if (PyDate_CheckExact(obj)) {
       // We could choose Date32 or Date64
       return ConvertDates<Date32Type>();
+    } else if (PyDateTime_CheckExact(obj)) {
+      return ConvertDateTimes();
     } else if (PyTime_Check(obj)) {
       return ConvertTimes();
     } else if (PyObject_IsInstance(const_cast<PyObject*>(obj), Decimal.obj())) 
{
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index 7dbf0d7ed..8d14f86ae 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -18,7 +18,7 @@
 
 from collections import OrderedDict
 
-from datetime import date, time
+from datetime import date, datetime, time, timedelta
 import decimal
 import json
 
@@ -649,6 +649,22 @@ def test_timestamps_with_timezone(self):
 
         _check_pandas_roundtrip(df)
 
+    def test_python_datetime(self):
+        # ARROW-2106
+        date_array = [datetime.today() + timedelta(days=x) for x in range(10)]
+        df = pd.DataFrame({
+            'datetime': pd.Series(date_array, dtype=object)
+        })
+
+        table = pa.Table.from_pandas(df)
+        assert isinstance(table[0].data.chunk(0), pa.TimestampArray)
+
+        result = table.to_pandas()
+        expected_df = pd.DataFrame({
+            'datetime': date_array
+        })
+        tm.assert_frame_equal(expected_df, result)
+
     def test_datetime64_to_date32(self):
         # ARROW-1718
         arr = pa.array([date(2017, 10, 23), None])


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [Python] pyarrow.array can't take a pandas Series of python datetime objects.
> -----------------------------------------------------------------------------
>
>                 Key: ARROW-2106
>                 URL: https://issues.apache.org/jira/browse/ARROW-2106
>             Project: Apache Arrow
>          Issue Type: Bug
>    Affects Versions: 0.8.0
>            Reporter: Naveen Michaud-Agrawal
>            Assignee: Uwe L. Korn
>            Priority: Minor
>              Labels: pull-request-available
>             Fix For: 0.9.0
>
>
> {{> import pyarrow}}
>  > from datetime import datetime
>  > import pandas
>  > dt = pandas.Series([datetime(2017, 12, 1), datetime(2017, 12, 3), 
> datetime(2017, 12, 15)], dtype=object)
>  > pyarrow.array(dt, from_pandas=True)
> Raises following:
> ---------------------------------------------------------------------------
>  ArrowInvalid Traceback (most recent call last)
>  <ipython-input-8-0d49f7fc5c49> in <module>()
>  ----> 1 pyarrow.array(dt, from_pandas=True)
> array.pxi in pyarrow.lib.array()
> array.pxi in pyarrow.lib._ndarray_to_array()
> error.pxi in pyarrow.lib.check_status()
> ArrowInvalid: Error inferring Arrow type for Python object array. Got Python 
> object of type datetime but can only handle these types: string, bool, float, 
> int, date, time, decimal, list, array
> As far as I can tell, the issue seems to be the call to PyDate_CheckExact 
> here (instead of using PyDate_Check):
> [https://github.com/apache/arrow/blob/3098c1411930259070efb571fb350304b18ddc70/cpp/src/arrow/python/numpy_to_arrow.cc#L1005]



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to