[jira] [Commented] (ARROW-2162) [Python/C++] Decimal Values with too-high precision are multiplied by 100

ASF GitHub Bot (JIRA) Wed, 21 Feb 2018 10:48:07 -0800

    [ 
https://issues.apache.org/jira/browse/ARROW-2162?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16371838#comment-16371838
 ]


ASF GitHub Bot commented on ARROW-2162:
---------------------------------------

cpcloud closed pull request #1619: ARROW-2162: [Python/C++] Decimal Values with 
too-high precision are multiplied by 100
URL: https://github.com/apache/arrow/pull/1619
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/python/python-test.cc 
b/cpp/src/arrow/python/python-test.cc
index a2b832bdb..b76caaece 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -201,5 +201,45 @@ TEST(BuiltinConversionTest, TestMixedTypeFails) {
   ASSERT_RAISES(UnknownError, ConvertPySequence(list, pool, &arr));
 }
 
+TEST_F(DecimalTest, FromPythonDecimalRescaleNotTruncateable) {
+  // We fail when truncating values that would lose data if cast to a decimal 
type with
+  // lower scale
+  Decimal128 value;
+  OwnedRef python_decimal(this->CreatePythonDecimal("1.001"));
+  auto type = ::arrow::decimal(10, 2);
+  const auto& decimal_type = static_cast<const DecimalType&>(*type);
+  ASSERT_RAISES(Invalid, 
internal::DecimalFromPythonDecimal(python_decimal.obj(),
+                                                            decimal_type, 
&value));
+}
+
+TEST_F(DecimalTest, FromPythonDecimalRescaleTruncateable) {
+  // We allow truncation of values that do not lose precision when dividing by 
10 * the
+  // difference between the scales, e.g., 1.000 -> 1.00
+  Decimal128 value;
+  OwnedRef python_decimal(this->CreatePythonDecimal("1.000"));
+  auto type = ::arrow::decimal(10, 2);
+  const auto& decimal_type = static_cast<const DecimalType&>(*type);
+  ASSERT_OK(
+      internal::DecimalFromPythonDecimal(python_decimal.obj(), decimal_type, 
&value));
+  ASSERT_EQ(100, value.low_bits());
+}
+
+TEST_F(DecimalTest, TestOverflowFails) {
+  Decimal128 value;
+  int32_t precision;
+  int32_t scale;
+  OwnedRef python_decimal(
+      this->CreatePythonDecimal("9999999999999999999999999999999999999.9"));
+  ASSERT_OK(
+      internal::InferDecimalPrecisionAndScale(python_decimal.obj(), 
&precision, &scale));
+  ASSERT_EQ(38, precision);
+  ASSERT_EQ(1, scale);
+
+  auto type = ::arrow::decimal(38, 38);
+  const auto& decimal_type = static_cast<const DecimalType&>(*type);
+  ASSERT_RAISES(Invalid, 
internal::DecimalFromPythonDecimal(python_decimal.obj(),
+                                                            decimal_type, 
&value));
+}
+
 }  // namespace py
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index e999854b1..a3c8cda76 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -854,26 +854,46 @@ static const Decimal128 ScaleMultipliers[] = {
     Decimal128("10000000000000000000000000000000000000"),
     Decimal128("100000000000000000000000000000000000000")};
 
+static bool RescaleWouldCauseDataLoss(const Decimal128& value, int32_t 
delta_scale,
+                                      int32_t abs_delta_scale, Decimal128* 
result) {
+  Decimal128 multiplier(ScaleMultipliers[abs_delta_scale]);
+
+  if (delta_scale < 0) {
+    DCHECK_NE(multiplier, 0);
+    Decimal128 remainder;
+    Status status = value.Divide(multiplier, result, &remainder);
+    DCHECK(status.ok()) << status.message();
+    return remainder != 0;
+  }
+
+  *result = value * multiplier;
+  return *result < value;
+}
+
 Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale,
                            Decimal128* out) const {
-  DCHECK_NE(out, NULLPTR);
-  DCHECK_NE(original_scale, new_scale);
-  const int32_t delta_scale = original_scale - new_scale;
+  DCHECK_NE(out, NULLPTR) << "out is nullptr";
+  DCHECK_NE(original_scale, new_scale) << "original_scale != new_scale";
+
+  const int32_t delta_scale = new_scale - original_scale;
   const int32_t abs_delta_scale = std::abs(delta_scale);
+
   DCHECK_GE(abs_delta_scale, 1);
   DCHECK_LE(abs_delta_scale, 38);
 
-  const Decimal128 scale_multiplier = ScaleMultipliers[abs_delta_scale];
-  const Decimal128 result = *this * scale_multiplier;
+  Decimal128 result(*this);
+  const bool rescale_would_cause_data_loss =
+      RescaleWouldCauseDataLoss(result, delta_scale, abs_delta_scale, out);
 
-  if (ARROW_PREDICT_FALSE(result < *this)) {
+  // Fail if we overflow or truncate
+  if (ARROW_PREDICT_FALSE(rescale_would_cause_data_loss)) {
     std::stringstream buf;
-    buf << "Rescaling decimal value from original scale " << original_scale
-        << " to new scale " << new_scale << " would cause overflow";
+    buf << "Rescaling decimal value " << ToString(original_scale)
+        << " from original scale of " << original_scale << " to new scale of "
+        << new_scale << " would cause data loss";
     return Status::Invalid(buf.str());
   }
 
-  *out = result;
   return Status::OK();
 }
 
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index f7718f06a..ef5eaaabc 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1132,6 +1132,17 @@ def test_decimal_128_to_pandas(self):
         df = converted.to_pandas()
         tm.assert_frame_equal(df, expected)
 
+    def test_decimal_fails_with_truncation(self):
+        data1 = [decimal.Decimal('1.234')]
+        type1 = pa.decimal128(10, 2)
+        with pytest.raises(pa.ArrowException):
+            pa.array(data1, type=type1)
+
+        data2 = [decimal.Decimal('1.2345')]
+        type2 = pa.decimal128(10, 3)
+        with pytest.raises(pa.ArrowException):
+            pa.array(data2, type=type2)
+
 
 class TestListTypes(object):
     """


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Python/C++] Decimal Values with too-high precision are multiplied by 100
> -------------------------------------------------------------------------
>
>                 Key: ARROW-2162
>                 URL: https://issues.apache.org/jira/browse/ARROW-2162
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: C++, Python
>    Affects Versions: 0.8.0
>            Reporter: Phillip Cloud
>            Assignee: Phillip Cloud
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.9.0
>
>
> From GitHub:
> This works as expected:
> {code}
> >>> pyarrow.array([decimal.Decimal('1.23')], pyarrow.decimal128(10,2))[0]
> Decimal('1.23')
> {code}
> Storing an extra digit of precision multiplies the stored value by a factor 
> of 100:
> {code}
> >>> pyarrow.array([decimal.Decimal('1.234')], pyarrow.decimal128(10,2))[0]
> Decimal('123.40')
> {code}
> Ideally I would get an exception since the value I'm trying to store doesn't 
> fit in the declared type of the array. It would be less good, but still ok, 
> if the stored value were 1.23 (truncating the extra digit). I didn't expect 
> pyarrow to silently store a value that differs from the original value by a 
> factor of 100.
> I originally thought that the code was incorrectly multiplying through by an 
> extra factor of 10**scale, but that doesn't seem to be the case. If I change 
> the scale, it always seems to be a factor of 100
> {code}
> >>> pyarrow.array([decimal.Decimal('1.2345')], pyarrow.decimal128(10,3))[0]
> Decimal('123.450')
> I see the same behavior if I use floating point to initialize the array 
> rather than Python's decimal type.
> {code}
> I searched for open github and JIRA for open issues but didn't find anything 
> related to this. I am using pyarrow 0.8.0 on OS X 10.12.6 using python 2.7.14 
> installed via Homebrew



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

[jira] [Commented] (ARROW-2162) [Python/C++] Decimal Values with too-high precision are multiplied by 100

Reply via email to