C++] Decimal Values with too-high precision are multiplied by 100

wesm Wed, 21 Feb 2018 15:14:00 -0800

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


commit 9fefc23519d7f4dbd796807562f5302cceda44d7
Author: Phillip Cloud <cpcl...@gmail.com>
AuthorDate: Wed Feb 21 10:42:11 2018 -0800

    ARROW-2162: [Python/C++] Decimal Values with too-high precision are 
multiplied by 100
    
    Author: Phillip Cloud <cpcl...@gmail.com>
    
    Closes #1619 from cpcloud/ARROW-2162 and squashes the following commits:
    
    57f95eee [Phillip Cloud] ARROW-2162: [Python/C++] Decimal Values with 
too-high precision are multiplied by 100
---
 cpp/src/arrow/python/python-test.cc         | 40 +++++++++++++++++++++++++++++
 cpp/src/arrow/util/decimal.cc               | 38 ++++++++++++++++++++-------
 python/pyarrow/tests/test_convert_pandas.py | 11 ++++++++
 3 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/python/python-test.cc 
b/cpp/src/arrow/python/python-test.cc
index a2b832b..b76caae 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -201,5 +201,45 @@ TEST(BuiltinConversionTest, TestMixedTypeFails) {
   ASSERT_RAISES(UnknownError, ConvertPySequence(list, pool, &arr));
 }
 
+TEST_F(DecimalTest, FromPythonDecimalRescaleNotTruncateable) {
+  // We fail when truncating values that would lose data if cast to a decimal 
type with
+  // lower scale
+  Decimal128 value;
+  OwnedRef python_decimal(this->CreatePythonDecimal("1.001"));
+  auto type = ::arrow::decimal(10, 2);
+  const auto& decimal_type = static_cast<const DecimalType&>(*type);
+  ASSERT_RAISES(Invalid, 
internal::DecimalFromPythonDecimal(python_decimal.obj(),
+                                                            decimal_type, 
&value));
+}
+
+TEST_F(DecimalTest, FromPythonDecimalRescaleTruncateable) {
+  // We allow truncation of values that do not lose precision when dividing by 
10 * the
+  // difference between the scales, e.g., 1.000 -> 1.00
+  Decimal128 value;
+  OwnedRef python_decimal(this->CreatePythonDecimal("1.000"));
+  auto type = ::arrow::decimal(10, 2);
+  const auto& decimal_type = static_cast<const DecimalType&>(*type);
+  ASSERT_OK(
+      internal::DecimalFromPythonDecimal(python_decimal.obj(), decimal_type, 
&value));
+  ASSERT_EQ(100, value.low_bits());
+}
+
+TEST_F(DecimalTest, TestOverflowFails) {
+  Decimal128 value;
+  int32_t precision;
+  int32_t scale;
+  OwnedRef python_decimal(
+      this->CreatePythonDecimal("9999999999999999999999999999999999999.9"));
+  ASSERT_OK(
+      internal::InferDecimalPrecisionAndScale(python_decimal.obj(), 
&precision, &scale));
+  ASSERT_EQ(38, precision);
+  ASSERT_EQ(1, scale);
+
+  auto type = ::arrow::decimal(38, 38);
+  const auto& decimal_type = static_cast<const DecimalType&>(*type);
+  ASSERT_RAISES(Invalid, 
internal::DecimalFromPythonDecimal(python_decimal.obj(),
+                                                            decimal_type, 
&value));
+}
+
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index e999854..a3c8cda 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -854,26 +854,46 @@ static const Decimal128 ScaleMultipliers[] = {
     Decimal128("10000000000000000000000000000000000000"),
     Decimal128("100000000000000000000000000000000000000")};
 
+static bool RescaleWouldCauseDataLoss(const Decimal128& value, int32_t 
delta_scale,
+                                      int32_t abs_delta_scale, Decimal128* 
result) {
+  Decimal128 multiplier(ScaleMultipliers[abs_delta_scale]);
+
+  if (delta_scale < 0) {
+    DCHECK_NE(multiplier, 0);
+    Decimal128 remainder;
+    Status status = value.Divide(multiplier, result, &remainder);
+    DCHECK(status.ok()) << status.message();
+    return remainder != 0;
+  }
+
+  *result = value * multiplier;
+  return *result < value;
+}
+
 Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale,
                            Decimal128* out) const {
-  DCHECK_NE(out, NULLPTR);
-  DCHECK_NE(original_scale, new_scale);
-  const int32_t delta_scale = original_scale - new_scale;
+  DCHECK_NE(out, NULLPTR) << "out is nullptr";
+  DCHECK_NE(original_scale, new_scale) << "original_scale != new_scale";
+
+  const int32_t delta_scale = new_scale - original_scale;
   const int32_t abs_delta_scale = std::abs(delta_scale);
+
   DCHECK_GE(abs_delta_scale, 1);
   DCHECK_LE(abs_delta_scale, 38);
 
-  const Decimal128 scale_multiplier = ScaleMultipliers[abs_delta_scale];
-  const Decimal128 result = *this * scale_multiplier;
+  Decimal128 result(*this);
+  const bool rescale_would_cause_data_loss =
+      RescaleWouldCauseDataLoss(result, delta_scale, abs_delta_scale, out);
 
-  if (ARROW_PREDICT_FALSE(result < *this)) {
+  // Fail if we overflow or truncate
+  if (ARROW_PREDICT_FALSE(rescale_would_cause_data_loss)) {
     std::stringstream buf;
-    buf << "Rescaling decimal value from original scale " << original_scale
-        << " to new scale " << new_scale << " would cause overflow";
+    buf << "Rescaling decimal value " << ToString(original_scale)
+        << " from original scale of " << original_scale << " to new scale of "
+        << new_scale << " would cause data loss";
     return Status::Invalid(buf.str());
   }
 
-  *out = result;
   return Status::OK();
 }
 
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index 92f6c25..6b62622 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1132,6 +1132,17 @@ class TestConvertDecimalTypes(object):
         df = converted.to_pandas()
         tm.assert_frame_equal(df, expected)
 
+    def test_decimal_fails_with_truncation(self):
+        data1 = [decimal.Decimal('1.234')]
+        type1 = pa.decimal128(10, 2)
+        with pytest.raises(pa.ArrowException):
+            pa.array(data1, type=type1)
+
+        data2 = [decimal.Decimal('1.2345')]
+        type2 = pa.decimal128(10, 3)
+        with pytest.raises(pa.ArrowException):
+            pa.array(data2, type=type2)
+
 
 class TestListTypes(object):
     """

-- 
To stop receiving notification emails like this one, please contact
w...@apache.org.

[arrow] 08/08: ARROW-2162: [Python/C++] Decimal Values with too-high precision are multiplied by 100

Reply via email to