This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 9fefc23519d7f4dbd796807562f5302cceda44d7 Author: Phillip Cloud <cpcl...@gmail.com> AuthorDate: Wed Feb 21 10:42:11 2018 -0800 ARROW-2162: [Python/C++] Decimal Values with too-high precision are multiplied by 100 Author: Phillip Cloud <cpcl...@gmail.com> Closes #1619 from cpcloud/ARROW-2162 and squashes the following commits: 57f95eee [Phillip Cloud] ARROW-2162: [Python/C++] Decimal Values with too-high precision are multiplied by 100 --- cpp/src/arrow/python/python-test.cc | 40 +++++++++++++++++++++++++++++ cpp/src/arrow/util/decimal.cc | 38 ++++++++++++++++++++------- python/pyarrow/tests/test_convert_pandas.py | 11 ++++++++ 3 files changed, 80 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc index a2b832b..b76caae 100644 --- a/cpp/src/arrow/python/python-test.cc +++ b/cpp/src/arrow/python/python-test.cc @@ -201,5 +201,45 @@ TEST(BuiltinConversionTest, TestMixedTypeFails) { ASSERT_RAISES(UnknownError, ConvertPySequence(list, pool, &arr)); } +TEST_F(DecimalTest, FromPythonDecimalRescaleNotTruncateable) { + // We fail when truncating values that would lose data if cast to a decimal type with + // lower scale + Decimal128 value; + OwnedRef python_decimal(this->CreatePythonDecimal("1.001")); + auto type = ::arrow::decimal(10, 2); + const auto& decimal_type = static_cast<const DecimalType&>(*type); + ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal.obj(), + decimal_type, &value)); +} + +TEST_F(DecimalTest, FromPythonDecimalRescaleTruncateable) { + // We allow truncation of values that do not lose precision when dividing by 10 * the + // difference between the scales, e.g., 1.000 -> 1.00 + Decimal128 value; + OwnedRef python_decimal(this->CreatePythonDecimal("1.000")); + auto type = ::arrow::decimal(10, 2); + const auto& decimal_type = static_cast<const DecimalType&>(*type); + ASSERT_OK( + internal::DecimalFromPythonDecimal(python_decimal.obj(), decimal_type, &value)); + ASSERT_EQ(100, value.low_bits()); +} + +TEST_F(DecimalTest, TestOverflowFails) { + Decimal128 value; + int32_t precision; + int32_t scale; + OwnedRef python_decimal( + this->CreatePythonDecimal("9999999999999999999999999999999999999.9")); + ASSERT_OK( + internal::InferDecimalPrecisionAndScale(python_decimal.obj(), &precision, &scale)); + ASSERT_EQ(38, precision); + ASSERT_EQ(1, scale); + + auto type = ::arrow::decimal(38, 38); + const auto& decimal_type = static_cast<const DecimalType&>(*type); + ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal.obj(), + decimal_type, &value)); +} + } // namespace py } // namespace arrow diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index e999854..a3c8cda 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -854,26 +854,46 @@ static const Decimal128 ScaleMultipliers[] = { Decimal128("10000000000000000000000000000000000000"), Decimal128("100000000000000000000000000000000000000")}; +static bool RescaleWouldCauseDataLoss(const Decimal128& value, int32_t delta_scale, + int32_t abs_delta_scale, Decimal128* result) { + Decimal128 multiplier(ScaleMultipliers[abs_delta_scale]); + + if (delta_scale < 0) { + DCHECK_NE(multiplier, 0); + Decimal128 remainder; + Status status = value.Divide(multiplier, result, &remainder); + DCHECK(status.ok()) << status.message(); + return remainder != 0; + } + + *result = value * multiplier; + return *result < value; +} + Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) const { - DCHECK_NE(out, NULLPTR); - DCHECK_NE(original_scale, new_scale); - const int32_t delta_scale = original_scale - new_scale; + DCHECK_NE(out, NULLPTR) << "out is nullptr"; + DCHECK_NE(original_scale, new_scale) << "original_scale != new_scale"; + + const int32_t delta_scale = new_scale - original_scale; const int32_t abs_delta_scale = std::abs(delta_scale); + DCHECK_GE(abs_delta_scale, 1); DCHECK_LE(abs_delta_scale, 38); - const Decimal128 scale_multiplier = ScaleMultipliers[abs_delta_scale]; - const Decimal128 result = *this * scale_multiplier; + Decimal128 result(*this); + const bool rescale_would_cause_data_loss = + RescaleWouldCauseDataLoss(result, delta_scale, abs_delta_scale, out); - if (ARROW_PREDICT_FALSE(result < *this)) { + // Fail if we overflow or truncate + if (ARROW_PREDICT_FALSE(rescale_would_cause_data_loss)) { std::stringstream buf; - buf << "Rescaling decimal value from original scale " << original_scale - << " to new scale " << new_scale << " would cause overflow"; + buf << "Rescaling decimal value " << ToString(original_scale) + << " from original scale of " << original_scale << " to new scale of " + << new_scale << " would cause data loss"; return Status::Invalid(buf.str()); } - *out = result; return Status::OK(); } diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 92f6c25..6b62622 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -1132,6 +1132,17 @@ class TestConvertDecimalTypes(object): df = converted.to_pandas() tm.assert_frame_equal(df, expected) + def test_decimal_fails_with_truncation(self): + data1 = [decimal.Decimal('1.234')] + type1 = pa.decimal128(10, 2) + with pytest.raises(pa.ArrowException): + pa.array(data1, type=type1) + + data2 = [decimal.Decimal('1.2345')] + type2 = pa.decimal128(10, 3) + with pytest.raises(pa.ArrowException): + pa.array(data2, type=type2) + class TestListTypes(object): """ -- To stop receiving notification emails like this one, please contact w...@apache.org.