Repository: arrow
Updated Branches:
  refs/heads/master 608b89e16 -> a16c1246e


ARROW-1137: Python: Ensure Pandas roundtrip of all-None column

Change-Id: Ib815d3fa42f0a0ae6c0d9850e9b0b435bad1c331

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #764 from xhochy/ARROW-1137 and squashes the following commits:

59c0df89 [Uwe L. Korn] Remove unused variables
1d11513f [Uwe L. Korn] ARROW-1137: Python: Ensure Pandas roundtrip of all-None 
column


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/a16c1246
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/a16c1246
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/a16c1246

Branch: refs/heads/master
Commit: a16c1246ec25a020cacc1330650ac7d4cfd6d230
Parents: 608b89e
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Thu Jun 22 18:06:12 2017 -0400
Committer: Wes McKinney <wes.mckin...@twosigma.com>
Committed: Thu Jun 22 18:06:12 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/python/pandas_convert.cc      | 63 +++++++++++++++++++++---
 python/pyarrow/tests/test_convert_pandas.py |  5 ++
 2 files changed, 61 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/a16c1246/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc 
b/cpp/src/arrow/python/pandas_convert.cc
index ac61cbc..6b0e342 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -1305,6 +1305,22 @@ inline Status ConvertBinaryLike(const ChunkedArray& 
data, PyObject** out_values)
   return Status::OK();
 }
 
+inline Status ConvertNulls(const ChunkedArray& data, PyObject** out_values) {
+  PyAcquireGIL lock;
+  for (int c = 0; c < data.num_chunks(); c++) {
+    std::shared_ptr<Array> arr = data.chunk(c);
+
+    const bool has_nulls = data.null_count() > 0;
+    for (int64_t i = 0; i < arr->length(); ++i) {
+      // All values are null
+      Py_INCREF(Py_None);
+      *out_values = Py_None;
+      ++out_values;
+    }
+  }
+  return Status::OK();
+}
+
 inline Status ConvertFixedSizeBinary(const ChunkedArray& data, PyObject** 
out_values) {
   PyAcquireGIL lock;
   for (int c = 0; c < data.num_chunks(); c++) {
@@ -1457,6 +1473,8 @@ class ObjectBlock : public PandasBlock {
       RETURN_NOT_OK(ConvertFixedSizeBinary(data, out_buffer));
     } else if (type == Type::DECIMAL) {
       RETURN_NOT_OK(ConvertDecimals(data, out_buffer));
+    } else if (type == Type::NA) {
+      RETURN_NOT_OK(ConvertNulls(data, out_buffer));
     } else if (type == Type::LIST) {
       auto list_type = std::static_pointer_cast<ListType>(col->type());
       switch (list_type->value_type()->id()) {
@@ -1506,7 +1524,12 @@ class IntBlock : public PandasBlock {
 
     const ChunkedArray& data = *col->data().get();
 
-    if (type != ARROW_TYPE) { return 
Status::NotImplemented(col->type()->ToString()); }
+    if (type != ARROW_TYPE) {
+      std::stringstream ss;
+      ss << "Cannot write Arrow data of type " << col->type()->ToString();
+      ss << " to a Pandas int" << sizeof(C_TYPE) << " block.";
+      return Status::NotImplemented(ss.str());
+    }
 
     ConvertIntegerNoNullsSameType<C_TYPE>(data, out_buffer);
     placement_data_[rel_placement] = abs_placement;
@@ -1532,7 +1555,12 @@ class Float32Block : public PandasBlock {
       int64_t rel_placement) override {
     Type::type type = col->type()->id();
 
-    if (type != Type::FLOAT) { return 
Status::NotImplemented(col->type()->ToString()); }
+    if (type != Type::FLOAT) {
+      std::stringstream ss;
+      ss << "Cannot write Arrow data of type " << col->type()->ToString();
+      ss << " to a Pandas float32 block.";
+      return Status::NotImplemented(ss.str());
+    }
 
     float* out_buffer = reinterpret_cast<float*>(block_data_) + rel_placement 
* num_rows_;
 
@@ -1584,7 +1612,10 @@ class Float64Block : public PandasBlock {
         ConvertNumericNullable<double>(data, NAN, out_buffer);
         break;
       default:
-        return Status::NotImplemented(col->type()->ToString());
+        std::stringstream ss;
+        ss << "Cannot write Arrow data of type " << col->type()->ToString();
+        ss << " to a Pandas float64 block.";
+        return Status::NotImplemented(ss.str());
     }
 
 #undef INTEGER_CASE
@@ -1603,7 +1634,12 @@ class BoolBlock : public PandasBlock {
       int64_t rel_placement) override {
     Type::type type = col->type()->id();
 
-    if (type != Type::BOOL) { return 
Status::NotImplemented(col->type()->ToString()); }
+    if (type != Type::BOOL) {
+      std::stringstream ss;
+      ss << "Cannot write Arrow data of type " << col->type()->ToString();
+      ss << " to a Pandas boolean block.";
+      return Status::NotImplemented(ss.str());
+    }
 
     uint8_t* out_buffer =
         reinterpret_cast<uint8_t*>(block_data_) + rel_placement * num_rows_;
@@ -1660,7 +1696,10 @@ class DatetimeBlock : public PandasBlock {
         return Status::NotImplemented("Unsupported time unit");
       }
     } else {
-      return Status::NotImplemented(col->type()->ToString());
+      std::stringstream ss;
+      ss << "Cannot write Arrow data of type " << col->type()->ToString();
+      ss << " to a Pandas datetime block.";
+      return Status::NotImplemented(ss.str());
     }
 
     placement_data_[rel_placement] = abs_placement;
@@ -1917,8 +1956,14 @@ class DataFrameBlockCreator {
         case Type::DECIMAL:
           output_type = PandasBlock::DECIMAL;
           break;
+        case Type::NA:
+          output_type = PandasBlock::OBJECT;
+          break;
         default:
-          return Status::NotImplemented(col->type()->ToString());
+          std::stringstream ss;
+          ss << "No known equivalent Pandas block for Arrow data of type ";
+          ss << col->type()->ToString() << " is known.";
+          return Status::NotImplemented(ss.str());
       }
 
       int block_placement = 0;
@@ -2301,7 +2346,11 @@ class ArrowDeserializer {
     return Status::OK();
   }
 
-  Status Visit(const NullType& type) { return Status::NotImplemented("null 
type"); }
+  Status Visit(const NullType& type) {
+    RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
+    auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
+    return ConvertNulls(data_, out_values);
+  }
 
   Status Visit(const StructType& type) { return Status::NotImplemented("struct 
type"); }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/a16c1246/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index ca30455..d17ef3c 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -98,6 +98,11 @@ class TestPandasConversion(unittest.TestCase):
             tm.assert_series_equal(pd.Series(result), expected,
                                    check_names=False)
 
+    def test_all_none_objects(self):
+        df = pd.DataFrame({'a': [None, None, None]})
+        self._check_pandas_roundtrip(df)
+
+
     def test_float_no_nulls(self):
         data = {}
         fields = []

Reply via email to