This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9ad8602  ARROW-2432: [Python] Fix Pandas decimal type conversion with 
None values
9ad8602 is described below

commit 9ad86024dfa4d1452e647524a2f94dc6a588d377
Author: Bryan Cutler <cutl...@gmail.com>
AuthorDate: Thu Apr 12 13:57:22 2018 +0200

    ARROW-2432: [Python] Fix Pandas decimal type conversion with None values
    
    This fixes conversion of Pandas decimal types to Arrow with None values. 
Previously, if the type was specified, an error would occur when checking if 
the object was Decimal. If the type was not specified, a segmentation fault 
would occur when attempting to find the max precision and scale.
    
    Added new tests which include None values for both the above cases.
    
    Author: Bryan Cutler <cutl...@gmail.com>
    
    Closes #1878 from BryanCutler/python-decimals-None-error-ARROW-2432 and 
squashes the following commits:
    
    00a1b4d <Bryan Cutler> forgot to specify expected type in test
    e60ec09 <Bryan Cutler> fix flake8 formatting
    7c3977d <Bryan Cutler> fixed up tests to check types
    ca16983 <Bryan Cutler> fix case for infer decimal type
    dabdcda <Bryan Cutler> remove predict macro
    89e53fc <Bryan Cutler> fix for case of explicit type, still seg fault when 
infer
    916aa8d <Bryan Cutler> added tests
---
 cpp/src/arrow/python/decimal.cc             |  9 ++++----
 cpp/src/arrow/python/numpy_to_arrow.cc      | 25 +++++++++++-----------
 python/pyarrow/tests/test_convert_pandas.py | 32 +++++++++++++++++++++--------
 3 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/cpp/src/arrow/python/decimal.cc b/cpp/src/arrow/python/decimal.cc
index 10593c7..051f31f 100644
--- a/cpp/src/arrow/python/decimal.cc
+++ b/cpp/src/arrow/python/decimal.cc
@@ -184,14 +184,15 @@ Status DecimalMetadata::Update(int32_t 
suggested_precision, int32_t suggested_sc
 }
 
 Status DecimalMetadata::Update(PyObject* object) {
-  DCHECK(PyDecimal_Check(object)) << "Object is not a Python Decimal";
+  bool is_decimal = PyDecimal_Check(object);
+  DCHECK(is_decimal) << "Object is not a Python Decimal";
 
-  if (ARROW_PREDICT_FALSE(PyDecimal_ISNAN(object))) {
+  if (ARROW_PREDICT_FALSE(!is_decimal || PyDecimal_ISNAN(object))) {
     return Status::OK();
   }
 
-  int32_t precision;
-  int32_t scale;
+  int32_t precision = 0;
+  int32_t scale = 0;
   RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale));
   return Update(precision, scale);
 }
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc 
b/cpp/src/arrow/python/numpy_to_arrow.cc
index e37013c..e3fb71b 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -743,7 +743,9 @@ Status NumPyConverter::ConvertDecimals() {
 
   if (type_ == NULLPTR) {
     for (PyObject* object : objects) {
-      RETURN_NOT_OK(max_decimal_metadata.Update(object));
+      if (!internal::PandasObjectIsNull(object)) {
+        RETURN_NOT_OK(max_decimal_metadata.Update(object));
+      }
     }
 
     type_ =
@@ -758,22 +760,19 @@ Status NumPyConverter::ConvertDecimals() {
   for (PyObject* object : objects) {
     const int is_decimal = PyObject_IsInstance(object, decimal_type_.obj());
 
-    if (ARROW_PREDICT_FALSE(is_decimal == 0)) {
+    if (is_decimal == 1) {
+      Decimal128 value;
+      RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, 
&value));
+      RETURN_NOT_OK(builder.Append(value));
+    } else if (is_decimal == 0 && internal::PandasObjectIsNull(object)) {
+      RETURN_NOT_OK(builder.AppendNull());
+    } else {
+      // PyObject_IsInstance could error and set an exception
+      RETURN_IF_PYERROR();
       std::stringstream ss;
       ss << "Error converting from Python objects to Decimal: ";
       RETURN_NOT_OK(InvalidConversion(object, "decimal.Decimal", &ss));
       return Status::Invalid(ss.str());
-    } else if (ARROW_PREDICT_FALSE(is_decimal == -1)) {
-      DCHECK_NE(PyErr_Occurred(), nullptr);
-      RETURN_IF_PYERROR();
-    }
-
-    if (internal::PandasObjectIsNull(object)) {
-      RETURN_NOT_OK(builder.AppendNull());
-    } else {
-      Decimal128 value;
-      RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, 
&value));
-      RETURN_NOT_OK(builder.Append(value));
     }
   }
   return PushBuilderResult(&builder);
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index de61201..bbb5b2d 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -80,9 +80,15 @@ def _check_pandas_roundtrip(df, expected=None, nthreads=1,
                                             else False))
 
 
-def _check_series_roundtrip(s, type_=None):
+def _check_series_roundtrip(s, type_=None, expected_pa_type=None):
     arr = pa.array(s, from_pandas=True, type=type_)
 
+    if type_ is not None and expected_pa_type is None:
+        expected_pa_type = type_
+
+    if expected_pa_type is not None:
+        assert arr.type == expected_pa_type
+
     result = pd.Series(arr.to_pandas(), name=s.name)
     if patypes.is_timestamp(arr.type) and arr.type.tz is not None:
         result = (result.dt.tz_localize('utc')
@@ -1149,19 +1155,15 @@ class TestConvertStringLikeTypes(object):
 
     def test_variable_size_bytes(self):
         s = pd.Series([b'123', b'', b'a', None])
-        arr = pa.Array.from_pandas(s, type=pa.binary())
-        assert arr.type == pa.binary()
         _check_series_roundtrip(s, type_=pa.binary())
 
     def test_binary_from_bytearray(self):
-        s = pd.Series([bytearray(b'123'), bytearray(b''), bytearray(b'a')])
+        s = pd.Series([bytearray(b'123'), bytearray(b''), bytearray(b'a'),
+                       None])
         # Explicitly set type
-        arr = pa.Array.from_pandas(s, type=pa.binary())
-        assert arr.type == pa.binary()
-        # Infer type from bytearrays
-        arr = pa.Array.from_pandas(s)
-        assert arr.type == pa.binary()
         _check_series_roundtrip(s, type_=pa.binary())
+        # Infer type from bytearrays
+        _check_series_roundtrip(s, expected_pa_type=pa.binary())
 
     def test_table_empty_str(self):
         values = ['', '', '', '', '']
@@ -1326,6 +1328,18 @@ class TestConvertDecimalTypes(object):
         expected = [decimal.Decimal('0.01000'), decimal.Decimal('0.00100')]
         assert array.to_pylist() == expected
 
+    def test_decimal_with_None_explicit_type(self):
+        series = pd.Series([decimal.Decimal('3.14'), None])
+        _check_series_roundtrip(series, type_=pa.decimal128(12, 5))
+
+        # Test that having all None values still produces decimal array
+        series = pd.Series([None] * 2)
+        _check_series_roundtrip(series, type_=pa.decimal128(12, 5))
+
+    def test_decimal_with_None_infer_type(self):
+        series = pd.Series([decimal.Decimal('3.14'), None])
+        _check_series_roundtrip(series, expected_pa_type=pa.decimal128(3, 2))
+
 
 class TestListTypes(object):
     """

-- 
To stop receiving notification emails like this one, please contact
apit...@apache.org.

Reply via email to