[ 
https://issues.apache.org/jira/browse/ARROW-2432?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16435418#comment-16435418
 ] 

ASF GitHub Bot commented on ARROW-2432:
---------------------------------------

pitrou closed pull request #1878: ARROW-2432: [Python] Fix Pandas decimal type 
conversion with None values
URL: https://github.com/apache/arrow/pull/1878
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/python/decimal.cc b/cpp/src/arrow/python/decimal.cc
index 10593c729e..051f31faac 100644
--- a/cpp/src/arrow/python/decimal.cc
+++ b/cpp/src/arrow/python/decimal.cc
@@ -184,14 +184,15 @@ Status DecimalMetadata::Update(int32_t 
suggested_precision, int32_t suggested_sc
 }
 
 Status DecimalMetadata::Update(PyObject* object) {
-  DCHECK(PyDecimal_Check(object)) << "Object is not a Python Decimal";
+  bool is_decimal = PyDecimal_Check(object);
+  DCHECK(is_decimal) << "Object is not a Python Decimal";
 
-  if (ARROW_PREDICT_FALSE(PyDecimal_ISNAN(object))) {
+  if (ARROW_PREDICT_FALSE(!is_decimal || PyDecimal_ISNAN(object))) {
     return Status::OK();
   }
 
-  int32_t precision;
-  int32_t scale;
+  int32_t precision = 0;
+  int32_t scale = 0;
   RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale));
   return Update(precision, scale);
 }
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc 
b/cpp/src/arrow/python/numpy_to_arrow.cc
index e37013c7e4..e3fb71b7d5 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -743,7 +743,9 @@ Status NumPyConverter::ConvertDecimals() {
 
   if (type_ == NULLPTR) {
     for (PyObject* object : objects) {
-      RETURN_NOT_OK(max_decimal_metadata.Update(object));
+      if (!internal::PandasObjectIsNull(object)) {
+        RETURN_NOT_OK(max_decimal_metadata.Update(object));
+      }
     }
 
     type_ =
@@ -758,22 +760,19 @@ Status NumPyConverter::ConvertDecimals() {
   for (PyObject* object : objects) {
     const int is_decimal = PyObject_IsInstance(object, decimal_type_.obj());
 
-    if (ARROW_PREDICT_FALSE(is_decimal == 0)) {
+    if (is_decimal == 1) {
+      Decimal128 value;
+      RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, 
&value));
+      RETURN_NOT_OK(builder.Append(value));
+    } else if (is_decimal == 0 && internal::PandasObjectIsNull(object)) {
+      RETURN_NOT_OK(builder.AppendNull());
+    } else {
+      // PyObject_IsInstance could error and set an exception
+      RETURN_IF_PYERROR();
       std::stringstream ss;
       ss << "Error converting from Python objects to Decimal: ";
       RETURN_NOT_OK(InvalidConversion(object, "decimal.Decimal", &ss));
       return Status::Invalid(ss.str());
-    } else if (ARROW_PREDICT_FALSE(is_decimal == -1)) {
-      DCHECK_NE(PyErr_Occurred(), nullptr);
-      RETURN_IF_PYERROR();
-    }
-
-    if (internal::PandasObjectIsNull(object)) {
-      RETURN_NOT_OK(builder.AppendNull());
-    } else {
-      Decimal128 value;
-      RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, 
&value));
-      RETURN_NOT_OK(builder.Append(value));
     }
   }
   return PushBuilderResult(&builder);
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index de6120176f..bbb5b2d044 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -80,9 +80,15 @@ def _check_pandas_roundtrip(df, expected=None, nthreads=1,
                                             else False))
 
 
-def _check_series_roundtrip(s, type_=None):
+def _check_series_roundtrip(s, type_=None, expected_pa_type=None):
     arr = pa.array(s, from_pandas=True, type=type_)
 
+    if type_ is not None and expected_pa_type is None:
+        expected_pa_type = type_
+
+    if expected_pa_type is not None:
+        assert arr.type == expected_pa_type
+
     result = pd.Series(arr.to_pandas(), name=s.name)
     if patypes.is_timestamp(arr.type) and arr.type.tz is not None:
         result = (result.dt.tz_localize('utc')
@@ -1149,19 +1155,15 @@ def 
test_fixed_size_bytes_does_not_accept_varying_lengths(self):
 
     def test_variable_size_bytes(self):
         s = pd.Series([b'123', b'', b'a', None])
-        arr = pa.Array.from_pandas(s, type=pa.binary())
-        assert arr.type == pa.binary()
         _check_series_roundtrip(s, type_=pa.binary())
 
     def test_binary_from_bytearray(self):
-        s = pd.Series([bytearray(b'123'), bytearray(b''), bytearray(b'a')])
+        s = pd.Series([bytearray(b'123'), bytearray(b''), bytearray(b'a'),
+                       None])
         # Explicitly set type
-        arr = pa.Array.from_pandas(s, type=pa.binary())
-        assert arr.type == pa.binary()
-        # Infer type from bytearrays
-        arr = pa.Array.from_pandas(s)
-        assert arr.type == pa.binary()
         _check_series_roundtrip(s, type_=pa.binary())
+        # Infer type from bytearrays
+        _check_series_roundtrip(s, expected_pa_type=pa.binary())
 
     def test_table_empty_str(self):
         values = ['', '', '', '', '']
@@ -1326,6 +1328,18 @@ def test_decimal_with_different_precisions(self):
         expected = [decimal.Decimal('0.01000'), decimal.Decimal('0.00100')]
         assert array.to_pylist() == expected
 
+    def test_decimal_with_None_explicit_type(self):
+        series = pd.Series([decimal.Decimal('3.14'), None])
+        _check_series_roundtrip(series, type_=pa.decimal128(12, 5))
+
+        # Test that having all None values still produces decimal array
+        series = pd.Series([None] * 2)
+        _check_series_roundtrip(series, type_=pa.decimal128(12, 5))
+
+    def test_decimal_with_None_infer_type(self):
+        series = pd.Series([decimal.Decimal('3.14'), None])
+        _check_series_roundtrip(series, expected_pa_type=pa.decimal128(3, 2))
+
 
 class TestListTypes(object):
     """


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Python] from_pandas fails when converting decimals if have None values
> -----------------------------------------------------------------------
>
>                 Key: ARROW-2432
>                 URL: https://issues.apache.org/jira/browse/ARROW-2432
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Python
>    Affects Versions: 0.9.0
>            Reporter: Bryan Cutler
>            Assignee: Bryan Cutler
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.10.0
>
>
> Using from_pandas to convert decimals fails if encounters a value of 
> {{None}}. For example:
> {code:java}
> In [1]: import pyarrow as pa
> ...: import pandas as pd
> ...: from decimal import Decimal
> ...:
> In [2]: s_dec = pd.Series([Decimal('3.14'), None])
> In [3]: pa.Array.from_pandas(s_dec, type=pa.decimal128(3, 2))
> ---------------------------------------------------------------------------
> ArrowInvalid Traceback (most recent call last)
> <ipython-input-3-2da56007a0da> in <module>()
> ----> 1 pa.Array.from_pandas(s_dec, type=pa.decimal128(3, 2))
> array.pxi in pyarrow.lib.Array.from_pandas()
> array.pxi in pyarrow.lib.array()
> error.pxi in pyarrow.lib.check_status()
> error.pxi in pyarrow.lib.check_status()
> ArrowInvalid: Error converting from Python objects to Decimal: Got Python 
> object of type NoneType but can only handle these types: decimal.Decimal
> {code}
> The above error is raised when specifying decimal type. When no type is 
> specified, a seg fault happens.
> This previously worked in 0.8.0.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to