This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 6a2b98e ARROW-2818: [Python] Better error message when trying to
convert sparse pandas data to arrow Table
6a2b98e is described below
commit 6a2b98ee1a0c2f33452ccbafb0120fc1b9d90ea0
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Fri Jun 7 13:12:09 2019 -0500
ARROW-2818: [Python] Better error message when trying to convert sparse
pandas data to arrow Table
https://issues.apache.org/jira/browse/ARROW-2818
Author: Joris Van den Bossche <[email protected]>
Closes #4497 from jorisvandenbossche/ARROW-2818-sparse-error-message and
squashes the following commits:
eddac8c51 <Joris Van den Bossche> ARROW-2818: better error message when
trying to convert sparse pandas data to arrow Table
---
python/pyarrow/pandas-shim.pxi | 6 ++++++
python/pyarrow/pandas_compat.py | 4 ++++
python/pyarrow/tests/test_pandas.py | 6 ++++++
3 files changed, 16 insertions(+)
diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index bb0fb94..e87c829 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -152,6 +152,12 @@ cdef class _PandasAPIShim(object):
else:
return False
+ cpdef is_sparse(self, obj):
+ if self._have_pandas_internal():
+ return self._types_api.is_sparse(obj)
+ else:
+ return False
+
cpdef is_data_frame(self, obj):
if self._have_pandas_internal():
return isinstance(obj, self._data_frame)
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 8db97c0..6eb713e 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -338,6 +338,10 @@ def _get_columns_to_convert(df, schema, preserve_index,
columns):
col = df[name]
name = _column_name_to_strings(name)
+ if _pandas_api.is_sparse(col):
+ raise TypeError(
+ "Sparse pandas data (column {}) not supported.".format(name))
+
if schema is not None:
field = schema.field_by_name(name)
type = getattr(field, "type", None)
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index 6d002db..6e2c9f1 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2301,6 +2301,12 @@ class TestConvertMisc(object):
table = pa.Table.from_pandas(df, schema=schema, safe=False)
assert table.column('B').type == pa.int32()
+ def test_error_sparse(self):
+ # ARROW-2818
+ df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
+ with pytest.raises(TypeError, match="Sparse pandas data"):
+ pa.Table.from_pandas(df)
+
def test_safe_cast_from_float_with_nans_to_int():
# TODO(kszucs): write tests for creating Date32 and Date64 arrays, see