This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 6a2b98e  ARROW-2818: [Python] Better error message when trying to 
convert sparse pandas data to arrow Table
6a2b98e is described below

commit 6a2b98ee1a0c2f33452ccbafb0120fc1b9d90ea0
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Fri Jun 7 13:12:09 2019 -0500

    ARROW-2818: [Python] Better error message when trying to convert sparse 
pandas data to arrow Table
    
    https://issues.apache.org/jira/browse/ARROW-2818
    
    Author: Joris Van den Bossche <[email protected]>
    
    Closes #4497 from jorisvandenbossche/ARROW-2818-sparse-error-message and 
squashes the following commits:
    
    eddac8c51 <Joris Van den Bossche> ARROW-2818:  better error message when 
trying to convert sparse pandas data to arrow Table
---
 python/pyarrow/pandas-shim.pxi      | 6 ++++++
 python/pyarrow/pandas_compat.py     | 4 ++++
 python/pyarrow/tests/test_pandas.py | 6 ++++++
 3 files changed, 16 insertions(+)

diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index bb0fb94..e87c829 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -152,6 +152,12 @@ cdef class _PandasAPIShim(object):
         else:
             return False
 
+    cpdef is_sparse(self, obj):
+        if self._have_pandas_internal():
+            return self._types_api.is_sparse(obj)
+        else:
+            return False
+
     cpdef is_data_frame(self, obj):
         if self._have_pandas_internal():
             return isinstance(obj, self._data_frame)
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 8db97c0..6eb713e 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -338,6 +338,10 @@ def _get_columns_to_convert(df, schema, preserve_index, 
columns):
         col = df[name]
         name = _column_name_to_strings(name)
 
+        if _pandas_api.is_sparse(col):
+            raise TypeError(
+                "Sparse pandas data (column {}) not supported.".format(name))
+
         if schema is not None:
             field = schema.field_by_name(name)
             type = getattr(field, "type", None)
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 6d002db..6e2c9f1 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2301,6 +2301,12 @@ class TestConvertMisc(object):
         table = pa.Table.from_pandas(df, schema=schema, safe=False)
         assert table.column('B').type == pa.int32()
 
+    def test_error_sparse(self):
+        # ARROW-2818
+        df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 3])})
+        with pytest.raises(TypeError, match="Sparse pandas data"):
+            pa.Table.from_pandas(df)
+
 
 def test_safe_cast_from_float_with_nans_to_int():
     # TODO(kszucs): write tests for creating Date32 and Date64 arrays, see

Reply via email to