Repository: arrow Updated Branches: refs/heads/master 5bee596ca -> 74bc4dd48
ARROW-511: Python: Implement List conversions for single arrays Author: Uwe L. Korn <[email protected]> Closes #320 from xhochy/ARROW-511 and squashes the following commits: 2ff63f9 [Uwe L. Korn] Use _check_pandas_roundtrip 6c8fa6d [Uwe L. Korn] Python: Implement List conversions for single arrays Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/74bc4dd4 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/74bc4dd4 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/74bc4dd4 Branch: refs/heads/master Commit: 74bc4dd480d6153cf1fb5d6fb7cdbb22d1e6e5d9 Parents: 5bee596 Author: Uwe L. Korn <[email protected]> Authored: Sun Feb 5 15:29:11 2017 -0500 Committer: Wes McKinney <[email protected]> Committed: Sun Feb 5 15:29:11 2017 -0500 ---------------------------------------------------------------------- python/pyarrow/tests/test_convert_pandas.py | 7 +++++- python/src/pyarrow/adapters/pandas.cc | 31 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/74bc4dd4/python/pyarrow/tests/test_convert_pandas.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index ddbb02a..f04fbe5 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -22,6 +22,7 @@ import datetime import unittest import numpy as np +import numpy.testing as npt import pandas as pd import pandas.util.testing as tm @@ -80,7 +81,7 @@ class TestPandasConversion(unittest.TestCase): arr = A.Array.from_pandas(values, timestamps_to_ms=timestamps_to_ms, field=field) result = arr.to_pandas() - tm.assert_series_equal(pd.Series(result), pd.Series(values)) + tm.assert_series_equal(pd.Series(result), pd.Series(values), check_names=False) def test_float_no_nulls(self): data = {} @@ -332,6 +333,10 @@ class TestPandasConversion(unittest.TestCase): table = A.Table.from_pandas(df, schema=schema) assert table.schema.equals(schema) + for column in df.columns: + field = schema.field_by_name(column) + self._check_array_roundtrip(df[column], field=field) + def test_threaded_conversion(self): df = _alltypes_example() self._check_pandas_roundtrip(df, nthreads=2, http://git-wip-us.apache.org/repos/asf/arrow/blob/74bc4dd4/python/src/pyarrow/adapters/pandas.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index 920779f..8d05821 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -1817,6 +1817,7 @@ class ArrowDeserializer { CONVERT_CASE(DATE); CONVERT_CASE(TIMESTAMP); CONVERT_CASE(DICTIONARY); + CONVERT_CASE(LIST); default: { std::stringstream ss; ss << "Arrow type reading not implemented for " << col_->type()->ToString(); @@ -1914,6 +1915,36 @@ class ArrowDeserializer { return ConvertBinaryLike<arrow::BinaryArray>(data_, out_values); } +#define CONVERTVALUES_LISTSLIKE_CASE(ArrowType, ArrowEnum) \ + case Type::ArrowEnum: \ + return ConvertListsLike<::arrow::ArrowType>(col_, out_values); + + template <int T2> + inline typename std::enable_if<T2 == Type::LIST, Status>::type ConvertValues() { + RETURN_NOT_OK(AllocateOutput(NPY_OBJECT)); + auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_)); + auto list_type = std::static_pointer_cast<ListType>(col_->type()); + switch (list_type->value_type()->type) { + CONVERTVALUES_LISTSLIKE_CASE(UInt8Type, UINT8) + CONVERTVALUES_LISTSLIKE_CASE(Int8Type, INT8) + CONVERTVALUES_LISTSLIKE_CASE(UInt16Type, UINT16) + CONVERTVALUES_LISTSLIKE_CASE(Int16Type, INT16) + CONVERTVALUES_LISTSLIKE_CASE(UInt32Type, UINT32) + CONVERTVALUES_LISTSLIKE_CASE(Int32Type, INT32) + CONVERTVALUES_LISTSLIKE_CASE(UInt64Type, UINT64) + CONVERTVALUES_LISTSLIKE_CASE(Int64Type, INT64) + CONVERTVALUES_LISTSLIKE_CASE(TimestampType, TIMESTAMP) + CONVERTVALUES_LISTSLIKE_CASE(FloatType, FLOAT) + CONVERTVALUES_LISTSLIKE_CASE(DoubleType, DOUBLE) + CONVERTVALUES_LISTSLIKE_CASE(StringType, STRING) + default: { + std::stringstream ss; + ss << "Not implemented type for lists: " << list_type->value_type()->ToString(); + return Status::NotImplemented(ss.str()); + } + } + } + template <int TYPE> inline typename std::enable_if<TYPE == Type::DICTIONARY, Status>::type ConvertValues() { std::shared_ptr<PandasBlock> block;
