[jira] [Commented] (ARROW-2145) [Python] Decimal conversion not working for NaN values

ASF GitHub Bot (JIRA) Fri, 23 Feb 2018 09:51:30 -0800

    [ 
https://issues.apache.org/jira/browse/ARROW-2145?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16374729#comment-16374729
 ]


ASF GitHub Bot commented on ARROW-2145:
---------------------------------------

cpcloud closed pull request #1610: ARROW-2145/ARROW-2157: [Python] Decimal 
conversion not working for NaN values
URL: https://github.com/apache/arrow/pull/1610
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc 
b/cpp/src/arrow/python/arrow_to_pandas.cc
index 125892afe..6ffc36d20 100644
--- a/cpp/src/arrow/python/arrow_to_pandas.cc
+++ b/cpp/src/arrow/python/arrow_to_pandas.cc
@@ -639,11 +639,11 @@ static Status ConvertTimes(PandasOptions options, const 
ChunkedArray& data,
 static Status ConvertDecimals(PandasOptions options, const ChunkedArray& data,
                               PyObject** out_values) {
   PyAcquireGIL lock;
-  OwnedRef decimal_ref;
-  OwnedRef Decimal_ref;
-  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_ref));
-  RETURN_NOT_OK(internal::ImportFromModule(decimal_ref, "Decimal", 
&Decimal_ref));
-  PyObject* Decimal = Decimal_ref.obj();
+  OwnedRef decimal;
+  OwnedRef Decimal;
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
+  RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
+  PyObject* decimal_constructor = Decimal.obj();
 
   for (int c = 0; c < data.num_chunks(); c++) {
     const auto& arr = static_cast<const 
arrow::Decimal128Array&>(*data.chunk(c));
@@ -653,7 +653,8 @@ static Status ConvertDecimals(PandasOptions options, const 
ChunkedArray& data,
         Py_INCREF(Py_None);
         *out_values++ = Py_None;
       } else {
-        *out_values++ = internal::DecimalFromString(Decimal, 
arr.FormatValue(i));
+        *out_values++ =
+            internal::DecimalFromString(decimal_constructor, 
arr.FormatValue(i));
         RETURN_IF_PYERROR();
       }
     }
diff --git a/cpp/src/arrow/python/builtin_convert.cc 
b/cpp/src/arrow/python/builtin_convert.cc
index a286c6bd5..891793cc9 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -76,7 +76,18 @@ class ScalarVisitor {
         timestamp_count_(0),
         float_count_(0),
         binary_count_(0),
-        unicode_count_(0) {}
+        unicode_count_(0),
+        decimal_count_(0),
+        max_decimal_metadata_(std::numeric_limits<int32_t>::min(),
+                              std::numeric_limits<int32_t>::min()),
+        decimal_type_() {
+    OwnedRefNoGIL decimal_module;
+    Status status = ::arrow::py::internal::ImportModule("decimal", 
&decimal_module);
+    DCHECK(status.ok()) << "Unable to import decimal module";
+    status = ::arrow::py::internal::ImportFromModule(decimal_module, "Decimal",
+                                                     &decimal_type_);
+    DCHECK(status.ok()) << "Unable to import decimal.Decimal";
+  }
 
   Status Visit(PyObject* obj) {
     ++total_count_;
@@ -111,10 +122,16 @@ class ScalarVisitor {
         ss << type->ToString();
         return Status::Invalid(ss.str());
       }
+    } else if (PyObject_IsInstance(obj, decimal_type_.obj())) {
+      // Don't infer anything if we encounter a Decimal('nan')
+      if (!internal::PyDecimal_ISNAN(obj)) {
+        RETURN_NOT_OK(max_decimal_metadata_.Update(obj));
+      }
+      ++decimal_count_;
     } else {
       // TODO(wesm): accumulate error information somewhere
       static std::string supported_types =
-          "bool, float, integer, date, datetime, bytes, unicode";
+          "bool, float, integer, date, datetime, bytes, unicode, decimal";
       std::stringstream ss;
       ss << "Error inferring Arrow data type for collection of Python objects. 
";
       RETURN_NOT_OK(InvalidConversion(obj, supported_types, &ss));
@@ -125,7 +142,9 @@ class ScalarVisitor {
 
   std::shared_ptr<DataType> GetType() {
     // TODO(wesm): handling mixed-type cases
-    if (float_count_) {
+    if (decimal_count_) {
+      return decimal(max_decimal_metadata_.precision(), 
max_decimal_metadata_.scale());
+    } else if (float_count_) {
       return float64();
     } else if (int_count_) {
       // TODO(wesm): tighter type later
@@ -157,8 +176,13 @@ class ScalarVisitor {
   int64_t float_count_;
   int64_t binary_count_;
   int64_t unicode_count_;
+  int64_t decimal_count_;
+
+  internal::DecimalMetadata max_decimal_metadata_;
+
   // Place to accumulate errors
   // std::vector<Status> errors_;
+  OwnedRefNoGIL decimal_type_;
 };
 
 static constexpr int MAX_NESTING_LEVELS = 32;
@@ -379,17 +403,14 @@ class TypedConverter : public SeqConverter {
   BuilderType* typed_builder_;
 };
 
-// We use the CRTP trick here to devirtualize the AppendItem() and AppendNull()
+// We use the CRTP trick here to devirtualize the AppendItem(), AppendNull(), 
and IsNull()
 // method calls.
 template <typename BuilderType, class Derived>
 class TypedConverterVisitor : public TypedConverter<BuilderType> {
  public:
   Status AppendSingle(PyObject* obj) override {
-    if (obj == Py_None) {
-      return static_cast<Derived*>(this)->AppendNull();
-    } else {
-      return static_cast<Derived*>(this)->AppendItem(obj);
-    }
+    auto self = static_cast<Derived*>(this);
+    return self->IsNull(obj) ? self->AppendNull() : self->AppendItem(obj);
   }
 
   Status AppendMultiple(PyObject* obj, int64_t size) override {
@@ -409,6 +430,7 @@ class TypedConverterVisitor : public 
TypedConverter<BuilderType> {
 
   // Append a missing item (default implementation)
   Status AppendNull() { return this->typed_builder_->AppendNull(); }
+  bool IsNull(PyObject* obj) const { return obj == Py_None; }
 };
 
 class NullConverter : public TypedConverterVisitor<NullBuilder, NullConverter> 
{
@@ -830,12 +852,16 @@ class DecimalConverter
  public:
   // Append a non-missing item
   Status AppendItem(PyObject* obj) {
-    /// TODO(phillipc): Check for nan?
     Decimal128 value;
     const auto& type = static_cast<const 
DecimalType&>(*typed_builder_->type());
     RETURN_NOT_OK(internal::DecimalFromPythonDecimal(obj, type, &value));
     return typed_builder_->Append(value);
   }
+
+  bool IsNull(PyObject* obj) const {
+    return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj) 
||
+           (internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj));
+  }
 };
 
 // Dynamic constructor for sequence converters
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index df1db9991..1c83205e8 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -61,6 +61,7 @@ namespace internal {
 Status ImportModule(const std::string& module_name, OwnedRef* ref) {
   PyObject* module = PyImport_ImportModule(module_name.c_str());
   RETURN_IF_PYERROR();
+  DCHECK_NE(module, nullptr) << "unable to import the " << module_name << " 
module";
   ref->reset(module);
   return Status::OK();
 }
@@ -71,6 +72,7 @@ Status ImportFromModule(const OwnedRef& module, const 
std::string& name, OwnedRe
 
   PyObject* attr = PyObject_GetAttrString(module.obj(), name.c_str());
   RETURN_IF_PYERROR();
+  DCHECK_NE(attr, nullptr) << "unable to import the " << name << " object";
   ref->reset(attr);
   return Status::OK();
 }
@@ -93,8 +95,13 @@ Status PythonDecimalToString(PyObject* python_decimal, 
std::string* out) {
   return Status::OK();
 }
 
-Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* 
precision,
-                                     int32_t* scale) {
+// \brief Infer the precision and scale of a Python decimal.Decimal instance
+// \param python_decimal[in] An instance of decimal.Decimal
+// \param precision[out] The value of the inferred precision
+// \param scale[out] The value of the inferred scale
+// \return The status of the operation
+static Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* 
precision,
+                                            int32_t* scale) {
   DCHECK_NE(python_decimal, NULLPTR);
   DCHECK_NE(precision, NULLPTR);
   DCHECK_NE(scale, NULLPTR);
@@ -193,6 +200,53 @@ Status UInt64FromPythonInt(PyObject* obj, uint64_t* out) {
   return Status::OK();
 }
 
+bool PyFloat_isnan(PyObject* obj) {
+  return PyFloat_Check(obj) && std::isnan(PyFloat_AS_DOUBLE(obj));
+}
+
+bool PyDecimal_Check(PyObject* obj) {
+  // TODO(phillipc): Is this expensive?
+  OwnedRef Decimal;
+  OwnedRef decimal;
+  Status status = ImportModule("decimal", &decimal);
+  DCHECK(status.ok()) << "Error during import of the decimal module";
+  status = ImportFromModule(decimal, "Decimal", &Decimal);
+  DCHECK(status.ok())
+      << "Error during import of the Decimal object from the decimal module";
+  const int32_t result = PyObject_IsInstance(obj, Decimal.obj());
+  DCHECK_NE(result, -1) << " error during PyObject_IsInstance check";
+  return result == 1;
+}
+
+bool PyDecimal_ISNAN(PyObject* obj) {
+  DCHECK(PyDecimal_Check(obj)) << "obj is not an instance of decimal.Decimal";
+  OwnedRef is_nan(PyObject_CallMethod(obj, "is_nan", ""));
+  return PyObject_IsTrue(is_nan.obj()) == 1;
+}
+
+DecimalMetadata::DecimalMetadata()
+    : precision_(std::numeric_limits<int32_t>::min()),
+      scale_(std::numeric_limits<int32_t>::min()) {}
+
+DecimalMetadata::DecimalMetadata(int32_t precision, int32_t scale)
+    : precision_(precision), scale_(scale) {}
+
+Status DecimalMetadata::Update(int32_t suggested_precision, int32_t 
suggested_scale) {
+  precision_ = std::max(precision_, suggested_precision);
+  scale_ = std::max(scale_, suggested_scale);
+  return Status::OK();
+}
+
+Status DecimalMetadata::Update(PyObject* object) {
+  DCHECK(PyDecimal_Check(object)) << "Object is not a Python Decimal";
+  DCHECK(!PyDecimal_ISNAN(object))
+      << "Decimal object cannot be NAN when inferring precision and scale";
+  int32_t precision;
+  int32_t scale;
+  RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale));
+  return Update(precision, scale);
+}
+
 }  // namespace internal
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index c0171aa2f..d39c62824 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -36,29 +36,89 @@ namespace py {
 
 class OwnedRef;
 
-ARROW_EXPORT
-std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
+// \brief Get an arrow DataType instance from Arrow's Type::type enum
+// \param[in] type One of the values of Arrow's Type::type enum
+// \return A shared pointer to DataType
+ARROW_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
 
 namespace internal {
 
+// \brief Import a Python module
+// \param[in] module_name The name of the module
+// \param[out] ref The OwnedRef containing the module PyObject*
 Status ImportModule(const std::string& module_name, OwnedRef* ref);
-Status ImportFromModule(const OwnedRef& module, const std::string& module_name,
-                        OwnedRef* ref);
 
+// \brief Import an object from a Python module
+// \param[in] module A Python module
+// \param[in] name The name of the object to import
+// \param[out] ref The OwnedRef containing the \c name attribute of the Python 
module \c
+// module
+Status ImportFromModule(const OwnedRef& module, const std::string& name, 
OwnedRef* ref);
+
+// \brief Convert a Python Decimal object to a C++ string
+// \param[in] python_decimal A Python decimal.Decimal instance
+// \param[out] The string representation of the Python Decimal instance
+// \return The status of the operation
 Status PythonDecimalToString(PyObject* python_decimal, std::string* out);
 
-Status InferDecimalPrecisionAndScale(PyObject* python_decimal,
-                                     int32_t* precision = NULLPTR,
-                                     int32_t* scale = NULLPTR);
-
+// \brief Convert a C++ std::string to a Python Decimal instance
+// \param[in] decimal_constructor The decimal type object
+// \param[in] decimal_string A decimal string
+// \return An instance of decimal.Decimal
 PyObject* DecimalFromString(PyObject* decimal_constructor,
                             const std::string& decimal_string);
+
+// \brief Convert a Python decimal to an Arrow Decimal128 object
+// \param[in] python_decimal A Python decimal.Decimal instance
+// \param[in] arrow_type An instance of arrow::DecimalType
+// \param[out] out A pointer to a Decimal128
+// \return The status of the operation
 Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& 
arrow_type,
                                 Decimal128* out);
+
+// \brief Check whether obj is an integer, independent of Python versions.
 bool IsPyInteger(PyObject* obj);
 
+// \brief Check whether obj is nan
+bool PyFloat_isnan(PyObject* obj);
+
+// \brief Check whether obj is an instance of Decimal
+bool PyDecimal_Check(PyObject* obj);
+
+// \brief Check whether obj is nan. This function will abort the program if 
the argument
+// is not a Decimal instance
+bool PyDecimal_ISNAN(PyObject* obj);
+
+// \brief Convert a Python integer into an unsigned 64-bit integer
+// \param[in] obj A Python integer
+// \param[out] out A pointer to a C uint64_t to hold the result of the 
conversion
+// \return The status of the operation
 Status UInt64FromPythonInt(PyObject* obj, uint64_t* out);
 
+// \brief Helper class to track and update the precision and scale of a decimal
+class DecimalMetadata {
+ public:
+  DecimalMetadata();
+  DecimalMetadata(int32_t precision, int32_t scale);
+
+  // \brief Adjust the precision and scale of a decimal type given a new 
precision and a
+  // new scale \param[in] suggested_precision A candidate precision \param[in]
+  // suggested_scale A candidate scale \return The status of the operation
+  Status Update(int32_t suggested_precision, int32_t suggested_scale);
+
+  // \brief A convenient interface for updating the precision and scale based 
on a Python
+  // Decimal object \param object A Python Decimal object \return The status 
of the
+  // operation
+  Status Update(PyObject* object);
+
+  int32_t precision() const { return precision_; }
+  int32_t scale() const { return scale_; }
+
+ private:
+  int32_t precision_;
+  int32_t scale_;
+};
+
 }  // namespace internal
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/python/numpy-internal.h 
b/cpp/src/arrow/python/numpy-internal.h
index 6c9c871a1..8d4308065 100644
--- a/cpp/src/arrow/python/numpy-internal.h
+++ b/cpp/src/arrow/python/numpy-internal.h
@@ -54,6 +54,9 @@ class Ndarray1DIndexer {
 
   T* data() const { return data_; }
 
+  T* begin() const { return data(); }
+  T* end() const { return begin() + size() * stride_; }
+
   bool is_strided() const { return stride_ == 1; }
 
   T& operator[](size_type index) { return data_[index * stride_]; }
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc 
b/cpp/src/arrow/python/numpy_to_arrow.cc
index 23418ad92..79a911ba4 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -67,17 +67,9 @@ constexpr int64_t kBinaryMemoryLimit = 
std::numeric_limits<int32_t>::max();
 
 namespace {
 
-inline bool PyFloat_isnan(PyObject* obj) {
-  if (PyFloat_Check(obj)) {
-    double val = PyFloat_AS_DOUBLE(obj);
-    return val != val;
-  } else {
-    return false;
-  }
-}
-
 inline bool PandasObjectIsNull(PyObject* obj) {
-  return obj == Py_None || obj == numpy_nan || PyFloat_isnan(obj);
+  return obj == Py_None || obj == numpy_nan || internal::PyFloat_isnan(obj) ||
+         (internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj));
 }
 
 inline bool PyObject_is_string(PyObject* obj) {
@@ -88,10 +80,8 @@ inline bool PyObject_is_string(PyObject* obj) {
 #endif
 }
 
-inline bool PyObject_is_float(PyObject* obj) { return PyFloat_Check(obj); }
-
 inline bool PyObject_is_integer(PyObject* obj) {
-  return (!PyBool_Check(obj)) && PyArray_IsIntegerScalar(obj);
+  return !PyBool_Check(obj) && PyArray_IsIntegerScalar(obj);
 }
 
 template <int TYPE>
@@ -743,59 +733,38 @@ Status NumPyConverter::ConvertDates() {
 Status NumPyConverter::ConvertDecimals() {
   PyAcquireGIL lock;
 
-  // Import the decimal module and Decimal class
-  OwnedRef decimal;
-  OwnedRef Decimal;
-  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
-  RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
-
+  internal::DecimalMetadata max_decimal_metadata;
   Ndarray1DIndexer<PyObject*> objects(arr_);
-  PyObject* object = objects[0];
 
   if (type_ == NULLPTR) {
-    int32_t precision;
-    int32_t desired_scale;
-
-    int32_t tmp_precision;
-    int32_t tmp_scale;
-
-    RETURN_NOT_OK(
-        internal::InferDecimalPrecisionAndScale(objects[0], &precision, 
&desired_scale));
-
-    for (int64_t i = 1; i < length_; ++i) {
-      RETURN_NOT_OK(internal::InferDecimalPrecisionAndScale(objects[i], 
&tmp_precision,
-                                                            &tmp_scale));
-      precision = std::max(precision, tmp_precision);
-
-      if (std::abs(desired_scale) < std::abs(tmp_scale)) {
-        desired_scale = tmp_scale;
-      }
+    for (PyObject* object : objects) {
+      RETURN_NOT_OK(max_decimal_metadata.Update(object));
     }
 
-    type_ = ::arrow::decimal(precision, desired_scale);
+    type_ =
+        ::arrow::decimal(max_decimal_metadata.precision(), 
max_decimal_metadata.scale());
   }
 
   Decimal128Builder builder(type_, pool_);
   RETURN_NOT_OK(builder.Resize(length_));
 
   const auto& decimal_type = static_cast<const DecimalType&>(*type_);
-  PyObject* Decimal_type_object = Decimal.obj();
-
-  for (int64_t i = 0; i < length_; ++i) {
-    object = objects[i];
 
-    if (PyObject_IsInstance(object, Decimal_type_object)) {
-      Decimal128 value;
-      RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, 
&value));
-      RETURN_NOT_OK(builder.Append(value));
-    } else if (PandasObjectIsNull(object)) {
-      RETURN_NOT_OK(builder.AppendNull());
-    } else {
+  for (PyObject* object : objects) {
+    if (ARROW_PREDICT_FALSE(!internal::PyDecimal_Check(object))) {
       std::stringstream ss;
       ss << "Error converting from Python objects to Decimal: ";
       RETURN_NOT_OK(InvalidConversion(object, "decimal.Decimal", &ss));
       return Status::Invalid(ss.str());
     }
+
+    if (PandasObjectIsNull(object)) {
+      RETURN_NOT_OK(builder.AppendNull());
+    } else {
+      Decimal128 value;
+      RETURN_NOT_OK(internal::DecimalFromPythonDecimal(object, decimal_type, 
&value));
+      RETURN_NOT_OK(builder.Append(value));
+    }
   }
   return PushBuilderResult(&builder);
 }
@@ -1045,18 +1014,13 @@ Status NumPyConverter::ConvertObjectsInfer() {
   objects.Init(arr_);
   PyDateTime_IMPORT;
 
-  OwnedRef decimal;
-  OwnedRef Decimal;
-  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
-  RETURN_NOT_OK(internal::ImportFromModule(decimal, "Decimal", &Decimal));
-
   for (int64_t i = 0; i < length_; ++i) {
     PyObject* obj = objects[i];
     if (PandasObjectIsNull(obj)) {
       continue;
     } else if (PyObject_is_string(obj)) {
       return ConvertObjectStrings();
-    } else if (PyObject_is_float(obj)) {
+    } else if (PyFloat_Check(obj)) {
       return ConvertObjectFloats();
     } else if (PyBool_Check(obj)) {
       return ConvertBooleans();
@@ -1069,7 +1033,7 @@ Status NumPyConverter::ConvertObjectsInfer() {
       return ConvertDateTimes();
     } else if (PyTime_Check(obj)) {
       return ConvertTimes();
-    } else if (PyObject_IsInstance(const_cast<PyObject*>(obj), Decimal.obj())) 
{
+    } else if (internal::PyDecimal_Check(obj)) {
       return ConvertDecimals();
     } else if (PyList_Check(obj)) {
       std::shared_ptr<DataType> inferred_type;
diff --git a/cpp/src/arrow/python/python-test.cc 
b/cpp/src/arrow/python/python-test.cc
index b76caaece..f0bd015f0 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "gtest/gtest.h"
-
 #include <memory>
 
+#include <gtest/gtest.h>
+
 #include "arrow/python/platform.h"
 
 #include "arrow/array.h"
@@ -33,15 +33,6 @@
 namespace arrow {
 namespace py {
 
-TEST(PyBuffer, InvalidInputObject) {
-  std::shared_ptr<Buffer> res;
-  PyObject* input = Py_None;
-  auto old_refcnt = Py_REFCNT(input);
-  ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res));
-  PyErr_Clear();
-  ASSERT_EQ(old_refcnt, Py_REFCNT(input));
-}
-
 TEST(OwnedRef, TestMoves) {
   PyAcquireGIL lock;
   std::vector<OwnedRef> vec;
@@ -78,12 +69,13 @@ TEST(OwnedRefNoGIL, TestMoves) {
 
 class DecimalTest : public ::testing::Test {
  public:
-  DecimalTest() : lock_(), decimal_module_(), decimal_constructor_() {
-    auto s = internal::ImportModule("decimal", &decimal_module_);
+  DecimalTest() : lock_(), decimal_constructor_() {
+    OwnedRef decimal_module;
+    auto s = internal::ImportModule("decimal", &decimal_module);
     DCHECK(s.ok()) << s.message();
-    DCHECK_NE(decimal_module_.obj(), NULLPTR);
+    DCHECK_NE(decimal_module.obj(), NULLPTR);
 
-    s = internal::ImportFromModule(decimal_module_, "Decimal", 
&decimal_constructor_);
+    s = internal::ImportFromModule(decimal_module, "Decimal", 
&decimal_constructor_);
     DCHECK(s.ok()) << s.message();
 
     DCHECK_NE(decimal_constructor_.obj(), NULLPTR);
@@ -94,16 +86,26 @@ class DecimalTest : public ::testing::Test {
     return ref;
   }
 
+  PyObject* decimal_constructor() const { return decimal_constructor_.obj(); }
+
  private:
   PyAcquireGIL lock_;
-  OwnedRef decimal_module_;
   OwnedRef decimal_constructor_;
 };
 
+TEST(PyBuffer, InvalidInputObject) {
+  std::shared_ptr<Buffer> res;
+  PyObject* input = Py_None;
+  auto old_refcnt = Py_REFCNT(input);
+  ASSERT_RAISES(PythonError, PyBuffer::FromPyObject(input, &res));
+  PyErr_Clear();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(input));
+}
+
 TEST_F(DecimalTest, TestPythonDecimalToString) {
   std::string decimal_string("-39402950693754869342983");
 
-  OwnedRef python_object = this->CreatePythonDecimal(decimal_string);
+  OwnedRef python_object(this->CreatePythonDecimal(decimal_string));
   ASSERT_NE(python_object.obj(), nullptr);
 
   std::string string_result;
@@ -114,35 +116,29 @@ TEST_F(DecimalTest, TestInferPrecisionAndScale) {
   std::string decimal_string("-394029506937548693.42983");
   OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string));
 
-  int32_t precision;
-  int32_t scale;
-
-  ASSERT_OK(
-      internal::InferDecimalPrecisionAndScale(python_decimal.obj(), 
&precision, &scale));
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal.obj()));
 
   const auto expected_precision =
       static_cast<int32_t>(decimal_string.size() - 2);  // 1 for -, 1 for .
   const int32_t expected_scale = 5;
 
-  ASSERT_EQ(expected_precision, precision);
-  ASSERT_EQ(expected_scale, scale);
+  ASSERT_EQ(expected_precision, metadata.precision());
+  ASSERT_EQ(expected_scale, metadata.scale());
 }
 
 TEST_F(DecimalTest, TestInferPrecisionAndNegativeScale) {
   std::string decimal_string("-3.94042983E+10");
   OwnedRef python_decimal(this->CreatePythonDecimal(decimal_string));
 
-  int32_t precision;
-  int32_t scale;
-
-  ASSERT_OK(
-      internal::InferDecimalPrecisionAndScale(python_decimal.obj(), 
&precision, &scale));
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal.obj()));
 
   const auto expected_precision = 9;
   const int32_t expected_scale = -2;
 
-  ASSERT_EQ(expected_precision, precision);
-  ASSERT_EQ(expected_scale, scale);
+  ASSERT_EQ(expected_precision, metadata.precision());
+  ASSERT_EQ(expected_scale, metadata.scale());
 }
 
 TEST(PandasConversionTest, TestObjectBlockWriteFails) {
@@ -226,14 +222,12 @@ TEST_F(DecimalTest, FromPythonDecimalRescaleTruncateable) 
{
 
 TEST_F(DecimalTest, TestOverflowFails) {
   Decimal128 value;
-  int32_t precision;
-  int32_t scale;
   OwnedRef python_decimal(
       this->CreatePythonDecimal("9999999999999999999999999999999999999.9"));
-  ASSERT_OK(
-      internal::InferDecimalPrecisionAndScale(python_decimal.obj(), 
&precision, &scale));
-  ASSERT_EQ(38, precision);
-  ASSERT_EQ(1, scale);
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal.obj()));
+  ASSERT_EQ(38, metadata.precision());
+  ASSERT_EQ(1, metadata.scale());
 
   auto type = ::arrow::decimal(38, 38);
   const auto& decimal_type = static_cast<const DecimalType&>(*type);
@@ -241,5 +235,63 @@ TEST_F(DecimalTest, TestOverflowFails) {
                                                             decimal_type, 
&value));
 }
 
+TEST_F(DecimalTest, TestNoneAndNaN) {
+  OwnedRef list_ref(PyList_New(4));
+  PyObject* list = list_ref.obj();
+
+  ASSERT_NE(list, nullptr);
+
+  PyObject* constructor = this->decimal_constructor();
+  PyObject* decimal_value = internal::DecimalFromString(constructor, "1.234");
+  ASSERT_NE(decimal_value, nullptr);
+
+  Py_INCREF(Py_None);
+  PyObject* missing_value1 = Py_None;
+  ASSERT_NE(missing_value1, nullptr);
+
+  PyObject* missing_value2 = PyFloat_FromDouble(NPY_NAN);
+  ASSERT_NE(missing_value2, nullptr);
+
+  PyObject* missing_value3 = internal::DecimalFromString(constructor, "nan");
+  ASSERT_NE(missing_value3, nullptr);
+
+  // This steals a reference to each object, so we don't need to decref them 
later,
+  // just the list
+  ASSERT_EQ(PyList_SetItem(list, 0, decimal_value), 0);
+  ASSERT_EQ(PyList_SetItem(list, 1, missing_value1), 0);
+  ASSERT_EQ(PyList_SetItem(list, 2, missing_value2), 0);
+  ASSERT_EQ(PyList_SetItem(list, 3, missing_value3), 0);
+
+  MemoryPool* pool = default_memory_pool();
+  std::shared_ptr<Array> arr;
+  ASSERT_OK(ConvertPySequence(list, pool, &arr));
+  ASSERT_TRUE(arr->IsValid(0));
+  ASSERT_TRUE(arr->IsNull(1));
+  ASSERT_TRUE(arr->IsNull(2));
+  ASSERT_TRUE(arr->IsNull(3));
+}
+
+TEST_F(DecimalTest, TestMixedPrecisionAndScale) {
+  PyObject* value2 = internal::DecimalFromString(this->decimal_constructor(), 
"0.001");
+  PyObject* value1 = internal::DecimalFromString(this->decimal_constructor(), 
"1.01E5");
+
+  OwnedRef list_ref(PyList_New(2));
+  PyObject* list = list_ref.obj();
+
+  ASSERT_NE(list, nullptr);
+  ASSERT_EQ(PyList_SetItem(list, 0, value1), 0);
+  ASSERT_EQ(PyList_SetItem(list, 1, value2), 0);
+
+  MemoryPool* pool = default_memory_pool();
+  std::shared_ptr<Array> arr;
+  ASSERT_OK(ConvertPySequence(list, pool, &arr));
+  const auto& type = static_cast<const DecimalType&>(*arr->type());
+
+  int32_t expected_precision = 9;
+  int32_t expected_scale = 3;
+  ASSERT_EQ(expected_precision, type.precision());
+  ASSERT_EQ(expected_scale, type.scale());
+}
+
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal-test.cc 
b/cpp/src/arrow/util/decimal-test.cc
index e4406747d..f3f348cf6 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -14,7 +14,6 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-//
 
 #include <cstdint>
 #include <string>
diff --git a/python/pyarrow/tests/test_convert_builtin.py 
b/python/pyarrow/tests/test_convert_builtin.py
index 8423ff00b..516431a74 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -639,3 +639,10 @@ def test_structarray_from_arrays_coerce():
         pa.StructArray.from_arrays(arrays)
 
     assert result.equals(expected)
+
+
+def test_decimal_array_with_none_and_nan():
+    values = [decimal.Decimal('1.234'), None, np.nan, decimal.Decimal('nan')]
+    array = pa.array(values)
+    assert array.type == pa.decimal128(4, 3)
+    assert array.to_pylist() == values[:2] + [None, None]


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Python] Decimal conversion not working for NaN values
> ------------------------------------------------------
>
>                 Key: ARROW-2145
>                 URL: https://issues.apache.org/jira/browse/ARROW-2145
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: C++, Python
>    Affects Versions: 0.8.0
>            Reporter: Antony Mayi
>            Assignee: Phillip Cloud
>            Priority: Major
>              Labels: pull-request-available
>
> {code:python}
> import pyarrow as pa
> import pandas as pd
> import decimal
> pa.Table.from_pandas(pd.DataFrame({'a': [decimal.Decimal('1.1'), 
> decimal.Decimal('NaN')]}))
> {code}
> throws following exception:
> {code}
> Traceback (most recent call last):
>   File "<stdin>", line 1, in <module>
>   File "pyarrow/table.pxi", line 875, in pyarrow.lib.Table.from_pandas 
> (/arrow/python/build/temp.linux-x86_64-3.6/lib.cxx:44927)
>   File "/lib/python3.6/site-packages/pyarrow/pandas_compat.py", line 350, in 
> dataframe_to_arrays
>     convert_types)]
>   File "/lib/python3.6/site-packages/pyarrow/pandas_compat.py", line 349, in 
> <listcomp>
>     for c, t in zip(columns_to_convert,
>   File "/lib/python3.6/site-packages/pyarrow/pandas_compat.py", line 345, in 
> convert_column
>     return pa.array(col, from_pandas=True, type=ty)
>   File "pyarrow/array.pxi", line 170, in pyarrow.lib.array 
> (/arrow/python/build/temp.linux-x86_64-3.6/lib.cxx:29224)
>   File "pyarrow/array.pxi", line 70, in pyarrow.lib._ndarray_to_array 
> (/arrow/python/build/temp.linux-x86_64-3.6/lib.cxx:28465)
>   File "pyarrow/error.pxi", line 98, in pyarrow.lib.check_status 
> (/arrow/python/build/temp.linux-x86_64-3.6/lib.cxx:9068)
> pyarrow.lib.ArrowException: Unknown error: an integer is required (got type 
> str)
> {code}
> Same problem with other special decimal values like {{infinity}}.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

[jira] [Commented] (ARROW-2145) [Python] Decimal conversion not working for NaN values

Reply via email to