[GitHub] [arrow] pitrou commented on a change in pull request #9751: ARROW-10606: [C++] Implement Decimal256 casts

GitBox Tue, 23 Mar 2021 07:33:33 -0700


pitrou commented on a change in pull request #9751:
URL: https://github.com/apache/arrow/pull/9751#discussion_r599592899




##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
##########
@@ -606,6 +723,9 @@ std::shared_ptr<CastFunction> GetCastToDecimal128() {
   // We resolve the output type of this kernel from the CastOptions
   DCHECK_OK(
       func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, 
sig_out_ty, exec));
+  exec = CastFunctor<Decimal256Type, Decimal128Type>::Exec;
+  DCHECK_OK(
+      func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL256)}, 
sig_out_ty, exec));

Review comment:
       Should be `Type::DECIMAL256`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
##########
@@ -462,13 +506,80 @@ struct CastFunctor<Decimal128Type, Decimal128Type> {
   }
 };
 
+template <>
+struct CastFunctor<Decimal256Type, Decimal128Type> {

Review comment:
       Isn't the `CastFunctor` signature `<Out, In>`? Here it seems you're 
implementing the converse (Decimal256 to Decimal128).

##########
File path: cpp/src/arrow/util/decimal.cc
##########
@@ -764,6 +805,126 @@ Status Decimal256::ToArrowStatus(DecimalStatus dstatus) 
const {
   return arrow::ToArrowStatus(dstatus, 256);
 }
 
+namespace {
+
+template <typename Real, typename Derived>
+struct Decimal256RealConversion {
+  static Result<Decimal256> FromPositiveReal(Real real, int32_t precision,
+                                             int32_t scale) {
+    auto x = real;
+    if (scale >= -76 && scale <= 76) {
+      x *= Derived::powers_of_ten()[scale + 76];
+    } else {
+      x *= std::pow(static_cast<Real>(10), static_cast<Real>(scale));
+    }
+    x = std::nearbyint(x);
+    const auto max_abs = Derived::powers_of_ten()[precision + 76];
+    if (x <= -max_abs || x >= max_abs) {

Review comment:
       Shouldn't `x` be positive here? Is there a point in testing against 
`-max_abs`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
##########
@@ -617,6 +737,19 @@ std::shared_ptr<CastFunction> GetCastToDecimal256() {
   // tracks full implementation.
   AddCommonCasts(Type::DECIMAL256, sig_out_ty, func.get());
 
+  // Cast from floating point
+  DCHECK_OK(func->AddKernel(Type::FLOAT, {float32()}, sig_out_ty,
+                            CastFunctor<Decimal256Type, FloatType>::Exec));
+  DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty,
+                            CastFunctor<Decimal256Type, DoubleType>::Exec));
+
+  // Cast from other decimal
+  auto exec = CastFunctor<Decimal128Type, Decimal256Type>::Exec;
+  DCHECK_OK(
+      func->AddKernel(Type::DECIMAL256, {InputType(Type::DECIMAL128)}, 
sig_out_ty, exec));

Review comment:
       Should be `Type::DECIMAL128`?

##########
File path: cpp/src/arrow/util/decimal_test.cc
##########
@@ -590,13 +597,62 @@ TEST(TestDecimalFromRealFloat, LargeValues) {
   // Test the entire float range
   for (int32_t scale = -38; scale <= 38; ++scale) {
     float real = std::pow(10.0f, static_cast<float>(scale));
-    CheckDecimalFromRealIntegerString(real, 1, -scale, "1");
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 1, -scale, "1");
+  }
+  for (int32_t scale = -37; scale <= 36; ++scale) {
+    float real = 123.f * std::pow(10.f, static_cast<float>(scale));
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 2, -scale - 1, "12");
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 3, -scale, "123");
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 4, -scale + 1, "1230");
+  }
+}
+
+// Same tests, repeated for Decimal256
+class TestDecimal256FromRealFloat : public 
::testing::TestWithParam<FromFloatTestParam> {
+};
+
+TEST_P(TestDecimal256FromRealFloat, SuccessConversion) {
+  const auto param = GetParam();
+  CheckDecimalFromReal<Decimal256>(param.real, param.precision, param.scale,
+                                   param.expected);
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(
+    TestDecimal256FromRealFloat, TestDecimal256FromRealFloat,
+    ::testing::Values(

Review comment:
       Since those are the same values as for Decimal128, factor them out in a 
global variable?

##########
File path: cpp/src/arrow/util/decimal.cc
##########
@@ -94,6 +94,47 @@ static constexpr double kDoublePowersOfTen[2 * 38 + 1] = {
     1e17,  1e18,  1e19,  1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,
     1e28,  1e29,  1e30,  1e31,  1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  
1e38};
 
+// On the Windows R toolchain, INFINITY is double type instead of float
+static constexpr float kFloatInf = std::numeric_limits<float>::infinity();

Review comment:
       Is this used? You're still using `INFINITY` below.

##########
File path: cpp/src/arrow/util/decimal.h
##########
@@ -250,12 +250,41 @@ class ARROW_EXPORT Decimal256 : public BasicDecimal256 {
   /// \return error status if the length is an invalid value
   static Result<Decimal256> FromBigEndian(const uint8_t* data, int32_t length);
 
+  static Result<Decimal256> FromReal(double real, int32_t precision, int32_t 
scale);
+  static Result<Decimal256> FromReal(float real, int32_t precision, int32_t 
scale);
+
+  /// \brief Convert to a floating-point number (scaled)

Review comment:
       Mention that the result can be an infinite in case of overflow?

##########
File path: cpp/src/arrow/util/basic_decimal.h
##########
@@ -237,17 +240,30 @@ class ARROW_EXPORT BasicDecimal256 {
     return little_endian_array_;
   }
 
+  /// \brief Get the lowest bits of the two's complement representation of the 
number.
+  inline constexpr uint64_t low_bits() const { return little_endian_array_[0]; 
}
+
   /// \brief Return the raw bytes of the value in native-endian byte order.
   std::array<uint8_t, 32> ToBytes() const;
   void ToBytes(uint8_t* out) const;
 
   /// \brief Scale multiplier for given scale value.
   static const BasicDecimal256& GetScaleMultiplier(int32_t scale);
 
-  /// \brief Convert BasicDecimal128 from one scale to another
+  /// \brief Convert BasicDecimal256 from one scale to another
   DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
                         BasicDecimal256* out) const;
 
+  /// \brief Scale up.
+  BasicDecimal256 IncreaseScaleBy(int32_t increase_by) const;
+
+  /// \brief Scale down.
+  /// - If 'round' is true, the right-most digits are dropped and the result 
value is
+  ///   rounded up (+1 for +ve, -1 for -ve) based on the value of the dropped 
digits

Review comment:
       Write "positive" and "negative" in full?

##########
File path: cpp/src/arrow/util/decimal_test.cc
##########
@@ -728,72 +858,143 @@ TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
 TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
 
 // Custom test for Decimal128::ToReal<float>
-class TestDecimalToRealFloat : public TestDecimalToReal<float> {};
+class TestDecimalToRealFloat : public TestDecimalToReal<std::pair<Decimal128, 
float>> {};

Review comment:
       Rename to `TestDecimal128ToRealFloat`?

##########
File path: cpp/src/arrow/util/decimal_test.cc
##########
@@ -605,7 +661,8 @@ class TestDecimalFromRealDouble : public 
::testing::TestWithParam<FromDoubleTest
 

Review comment:
       Rename `TestDecimalFromRealDouble` to `TestDecimal128FromRealDouble`?

##########
File path: cpp/src/arrow/util/decimal_test.cc
##########
@@ -728,72 +858,143 @@ TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
 TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
 
 // Custom test for Decimal128::ToReal<float>
-class TestDecimalToRealFloat : public TestDecimalToReal<float> {};
+class TestDecimalToRealFloat : public TestDecimalToReal<std::pair<Decimal128, 
float>> {};
 
 TEST_F(TestDecimalToRealFloat, LargeValues) {
   // Note that exact comparisons would succeed on some platforms (Linux, 
macOS).
   // Nevertheless, power-of-ten factors are not all exactly representable
   // in binary floating point.
   for (int32_t scale = -38; scale <= 38; scale++) {
-    CheckFloatToRealApprox("1", scale, Pow10(-scale));
+    CheckDecimalToRealApprox<Decimal>("1", scale, Pow10(-scale));
   }
   for (int32_t scale = -38; scale <= 36; scale++) {
     const Real factor = static_cast<Real>(123);
-    CheckFloatToRealApprox("123", scale, factor * Pow10(-scale));
+    CheckDecimalToRealApprox<Decimal>("123", scale, factor * Pow10(-scale));
   }
 }
 
 TEST_F(TestDecimalToRealFloat, Precision) {
   // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
-  CheckDecimalToReal<float>("9223373136366403584", 0, 9.223373e+18f);
-  CheckDecimalToReal<float>("-9223373136366403584", 0, -9.223373e+18f);
+  CheckDecimalToReal<Decimal, Real>("9223373136366403584", 0, 9.223373e+18f);
+  CheckDecimalToReal<Decimal, Real>("-9223373136366403584", 0, -9.223373e+18f);
+  // 2**64 + 2**41 (exactly representable in a float)
+  CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 1.8446746e+19f);
+  CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, 
-1.8446746e+19f);
+}
+
+// Same tests, repeated for Decimal256
+class TestDecimal256ToRealFloat : public 
TestDecimalToReal<std::pair<Decimal256, float>> {
+};
+
+TEST_F(TestDecimal256ToRealFloat, LargeValues) {
+  // Note that exact comparisons would succeed on some platforms (Linux, 
macOS).
+  // Nevertheless, power-of-ten factors are not all exactly representable
+  // in binary floating point.
+  for (int32_t scale = -76; scale <= 76; scale++) {
+#ifdef _WIN32
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
+#endif
+    CheckDecimalToRealApprox<Decimal>("1", scale, Pow10(-scale));
+    const Real factor = static_cast<Real>(123);
+    CheckDecimalToRealApprox<Decimal>("123", scale, factor * Pow10(-scale));
+  }
+}
+
+TEST_F(TestDecimal256ToRealFloat, Precision) {
+  // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
+  CheckDecimalToReal<Decimal, Real>("9223373136366403584", 0, 9.223373e+18f);
+  CheckDecimalToReal<Decimal, Real>("-9223373136366403584", 0, -9.223373e+18f);
   // 2**64 + 2**41 (exactly representable in a float)
-  CheckDecimalToReal<float>("18446746272732807168", 0, 1.8446746e+19f);
-  CheckDecimalToReal<float>("-18446746272732807168", 0, -1.8446746e+19f);
+  CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 1.8446746e+19f);
+  CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, 
-1.8446746e+19f);
 }
 
 // ToReal<double> tests are disabled on MinGW because of precision issues in 
results
 #ifndef __MINGW32__
 
 // Custom test for Decimal128::ToReal<double>
-class TestDecimalToRealDouble : public TestDecimalToReal<double> {};
+class TestDecimalToRealDouble : public TestDecimalToReal<std::pair<Decimal128, 
double>> {

Review comment:
       Rename to `TestDecimal128ToRealDouble`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -573,6 +664,253 @@ TEST(Cast, DecimalToDecimal) {
   }
 }
 
+TEST(Cast, Decimal256ToDecimal256) {

Review comment:
       Rename `DecimalToDecimal` to `Decimal128ToDecimal128`?

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -611,6 +949,44 @@ TEST(Cast, FloatingToDecimal) {
   // Edge cases are tested for Decimal128::FromReal()
 }
 
+TEST(Cast, FloatingToDecimal256) {

Review comment:
       Don't you want to avoid the code duplication here? This looks like 
exactly the same test as above.
   

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
##########
@@ -606,6 +723,9 @@ std::shared_ptr<CastFunction> GetCastToDecimal128() {
   // We resolve the output type of this kernel from the CastOptions
   DCHECK_OK(
       func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL128)}, 
sig_out_ty, exec));
+  exec = CastFunctor<Decimal256Type, Decimal128Type>::Exec;
+  DCHECK_OK(
+      func->AddKernel(Type::DECIMAL128, {InputType(Type::DECIMAL256)}, 
sig_out_ty, exec));

Review comment:
       Note the signature of `CastFunction::AddKernel`:
   ```c++
     Status AddKernel(Type::type in_type_id, std::vector<InputType> in_types,
                      OutputType out_type, ArrayKernelExec exec,
                      NullHandling::type = NullHandling::INTERSECTION,
                      MemAllocation::type = MemAllocation::PREALLOCATE);
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -573,6 +664,253 @@ TEST(Cast, DecimalToDecimal) {
   }
 }
 
+TEST(Cast, Decimal256ToDecimal256) {
+  CastOptions options;
+
+  for (bool allow_decimal_truncate : {false, true}) {
+    options.allow_decimal_truncate = allow_decimal_truncate;
+
+    auto no_truncation = ArrayFromJSON(decimal256(38, 10), R"([
+          "02.0000000000",
+          "30.0000000000",
+          "22.0000000000",
+        "-121.0000000000",
+        null])");
+    auto expected = ArrayFromJSON(decimal256(28, 0), R"([
+          "02.",
+          "30.",
+          "22.",
+        "-121.",
+        null])");
+
+    CheckCast(no_truncation, expected, options);
+    CheckCast(expected, no_truncation, options);
+  }
+
+  for (bool allow_decimal_truncate : {false, true}) {
+    options.allow_decimal_truncate = allow_decimal_truncate;
+
+    // Same scale, different precision
+    auto d_5_2 = ArrayFromJSON(decimal256(5, 2), R"([
+          "12.34",
+           "0.56"])");
+    auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([
+          "12.34",
+           "0.56"])");
+
+    CheckCast(d_5_2, d_4_2, options);
+    CheckCast(d_4_2, d_5_2, options);
+  }
+
+  auto d_38_10 = ArrayFromJSON(decimal256(38, 10), R"([
+      "-02.1234567890",
+       "30.1234567890",
+      null])");
+
+  auto d_28_0 = ArrayFromJSON(decimal256(28, 0), R"([
+      "-02.",
+       "30.",
+      null])");
+
+  auto d_38_10_roundtripped = ArrayFromJSON(decimal256(38, 10), R"([
+      "-02.0000000000",
+       "30.0000000000",
+      null])");
+
+  // Rescale which leads to truncation
+  options.allow_decimal_truncate = true;
+  CheckCast(d_38_10, d_28_0, options);
+  CheckCast(d_28_0, d_38_10_roundtripped, options);
+
+  options.allow_decimal_truncate = false;
+  options.to_type = d_28_0->type();
+  CheckCastFails(d_38_10, options);
+  CheckCast(d_28_0, d_38_10_roundtripped, options);
+
+  // Precision loss without rescale leads to truncation
+  auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"(["12.34"])");
+  for (auto expected : {
+           ArrayFromJSON(decimal256(3, 2), R"(["12.34"])"),
+           ArrayFromJSON(decimal256(4, 3), R"(["12.340"])"),
+           ArrayFromJSON(decimal256(2, 1), R"(["12.3"])"),
+       }) {
+    options.allow_decimal_truncate = true;
+    CheckCast(d_4_2, expected, options);
+
+    options.allow_decimal_truncate = false;
+    options.to_type = expected->type();
+    CheckCastFails(d_4_2, options);
+  }
+}
+
+TEST(Cast, DecimalToDecimal256) {
+  CastOptions options;
+
+  for (bool allow_decimal_truncate : {false, true}) {
+    options.allow_decimal_truncate = allow_decimal_truncate;
+
+    auto no_truncation = ArrayFromJSON(decimal(38, 10), R"([
+          "02.0000000000",
+          "30.0000000000",
+          "22.0000000000",
+        "-121.0000000000",
+        null])");
+    auto expected = ArrayFromJSON(decimal256(48, 0), R"([
+          "02.",
+          "30.",
+          "22.",
+        "-121.",
+        null])");
+
+    CheckCast(no_truncation, expected, options);
+  }
+
+  for (bool allow_decimal_truncate : {false, true}) {
+    options.allow_decimal_truncate = allow_decimal_truncate;
+
+    // Same scale, different precision
+    auto d_5_2 = ArrayFromJSON(decimal(5, 2), R"([
+          "12.34",
+           "0.56"])");
+    auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([
+          "12.34",
+           "0.56"])");
+    auto d_40_2 = ArrayFromJSON(decimal256(40, 2), R"([
+          "12.34",
+           "0.56"])");
+
+    CheckCast(d_5_2, d_4_2, options);
+    CheckCast(d_5_2, d_40_2, options);
+  }
+
+  auto d128_38_10 = ArrayFromJSON(decimal(38, 10), R"([
+      "-02.1234567890",
+       "30.1234567890",
+      null])");
+
+  auto d128_28_0 = ArrayFromJSON(decimal(28, 0), R"([
+      "-02.",
+       "30.",
+      null])");
+
+  auto d256_28_0 = ArrayFromJSON(decimal256(28, 0), R"([
+      "-02.",
+       "30.",
+      null])");
+
+  auto d256_38_10_roundtripped = ArrayFromJSON(decimal256(38, 10), R"([
+      "-02.0000000000",
+       "30.0000000000",
+      null])");
+
+  // Rescale which leads to truncation
+  options.allow_decimal_truncate = true;
+  CheckCast(d128_38_10, d256_28_0, options);
+  CheckCast(d128_28_0, d256_38_10_roundtripped, options);
+
+  options.allow_decimal_truncate = false;
+  options.to_type = d256_28_0->type();
+  CheckCastFails(d128_38_10, options);
+  CheckCast(d128_28_0, d256_38_10_roundtripped, options);
+
+  // Precision loss without rescale leads to truncation
+  auto d128_4_2 = ArrayFromJSON(decimal(4, 2), R"(["12.34"])");
+  for (auto expected : {
+           ArrayFromJSON(decimal256(3, 2), R"(["12.34"])"),
+           ArrayFromJSON(decimal256(4, 3), R"(["12.340"])"),
+           ArrayFromJSON(decimal256(2, 1), R"(["12.3"])"),
+       }) {
+    options.allow_decimal_truncate = true;
+    CheckCast(d128_4_2, expected, options);
+
+    options.allow_decimal_truncate = false;
+    options.to_type = expected->type();
+    CheckCastFails(d128_4_2, options);
+  }
+}
+
+TEST(Cast, Decimal256ToDecimal) {

Review comment:
       `Decimal256ToDecimal128`

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
##########
@@ -393,35 +391,81 @@ struct CastFunctor<O, Decimal128Type, 
enable_if_t<is_integer_type<O>::value>> {
 // Decimal to decimal
 
 struct UnsafeUpscaleDecimal {
-  template <typename... Unused>
-  Decimal128 Call(KernelContext* ctx, Decimal128 val) const {
-    return val.IncreaseScaleBy(by_);
+  template <typename OutValue, typename Arg0Value>
+  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+    // For OutValue=Decimal256, convert then scale
+    return OutValue(val).IncreaseScaleBy(by_);
   }
   int32_t by_;
 };
 
 struct UnsafeDownscaleDecimal {
-  template <typename... Unused>
-  Decimal128 Call(KernelContext* ctx, Decimal128 val) const {
-    return val.ReduceScaleBy(by_, false);
+  template <typename OutValue, typename Arg0Value>
+  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+    return OutValue(val).ReduceScaleBy(by_, false);
   }
   int32_t by_;
 };
 
 struct SafeRescaleDecimal {
+  template <typename OutValue, typename Arg0Value>
+  OutValue Call(KernelContext* ctx, Arg0Value val) const {
+    auto maybe_rescaled = OutValue(val).Rescale(in_scale_, out_scale_);
+    if (ARROW_PREDICT_FALSE(!maybe_rescaled.ok())) {
+      ctx->SetStatus(maybe_rescaled.status());
+      return {};  // Zero
+    }
+
+    if (ARROW_PREDICT_TRUE(maybe_rescaled->FitsInPrecision(out_precision_))) {
+      return maybe_rescaled.MoveValueUnsafe();
+    }
+
+    ctx->SetStatus(
+        Status::Invalid("Decimal value does not fit in precision ", 
out_precision_));
+    return {};  // Zero
+  }
+
+  int32_t out_scale_, out_precision_, in_scale_;
+};
+
+// Same as above, but this time, scale then convert
+struct UnsafeUpscaleDecimalDowncast {
+  template <typename... Unused>
+  Decimal128 Call(KernelContext* ctx, Decimal256 val) const {
+    auto decimal256 = val.IncreaseScaleBy(by_);
+    return Decimal128(decimal256.little_endian_array()[1],
+                      decimal256.little_endian_array()[0]);

Review comment:
       Hmm, do we want to avoid repeating those kernels using a helper 
function? e.g.
   ```c++
   template <typename OutDecimal, typename InDecimal>
   OutDecimal ConvertDecimalType(InDecimal&& val, KernelContext*);
   
   template <>
   Decimal128 ConvertDecimalType<Decimal256, Decimal128>(Decimal256&& val, 
KernelContext*) {
     return Decimal128(val.little_endian_array()[1],
                       val.little_endian_array()[0]);
   }
   
   template <typename OutDecimal>
   OutDecimal ConvertDecimalType<OutDecimal, OutDecimal>(OutDecimal&& val, 
KernelContext*) {
     return val;
   }
   ```
   

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
##########
@@ -462,13 +506,80 @@ struct CastFunctor<Decimal128Type, Decimal128Type> {
   }
 };
 
+template <>
+struct CastFunctor<Decimal256Type, Decimal128Type> {
+  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const auto& options = checked_cast<const 
CastState*>(ctx->state())->options;
+
+    const auto& in_type = checked_cast<const 
Decimal256Type&>(*batch[0].type());
+    const auto& out_type = checked_cast<const Decimal128Type&>(*out->type());
+    const auto in_scale = in_type.scale();
+    const auto out_scale = out_type.scale();
+
+    if (options.allow_decimal_truncate) {
+      if (in_scale < out_scale) {
+        // Unsafe upscale
+        applicator::ScalarUnaryNotNullStateful<Decimal128Type, Decimal256Type,
+                                               UnsafeUpscaleDecimalDowncast>
+            kernel(UnsafeUpscaleDecimalDowncast{out_scale - in_scale});
+        return kernel.Exec(ctx, batch, out);
+      } else {
+        // Unsafe downscale
+        applicator::ScalarUnaryNotNullStateful<Decimal128Type, Decimal256Type,
+                                               UnsafeDownscaleDecimalDowncast>
+            kernel(UnsafeDownscaleDecimalDowncast{in_scale - out_scale});
+        return kernel.Exec(ctx, batch, out);
+      }
+    }
+
+    // Safe rescale
+    applicator::ScalarUnaryNotNullStateful<Decimal128Type, Decimal256Type,
+                                           SafeRescaleDecimalDowncast>
+        kernel(SafeRescaleDecimalDowncast{out_scale, out_type.precision(), 
in_scale});
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+template <typename Arg0Type>
+struct CastFunctor<Arg0Type, Decimal256Type, enable_if_decimal<Arg0Type>> {

Review comment:
       Same comment here: should be `CastFunctor<Decimal256Type, Arg0Type>`?

##########
File path: cpp/src/arrow/util/decimal_test.cc
##########
@@ -590,13 +597,62 @@ TEST(TestDecimalFromRealFloat, LargeValues) {
   // Test the entire float range
   for (int32_t scale = -38; scale <= 38; ++scale) {
     float real = std::pow(10.0f, static_cast<float>(scale));
-    CheckDecimalFromRealIntegerString(real, 1, -scale, "1");
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 1, -scale, "1");
+  }
+  for (int32_t scale = -37; scale <= 36; ++scale) {
+    float real = 123.f * std::pow(10.f, static_cast<float>(scale));
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 2, -scale - 1, "12");
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 3, -scale, "123");
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 4, -scale + 1, "1230");
+  }
+}
+
+// Same tests, repeated for Decimal256
+class TestDecimal256FromRealFloat : public 
::testing::TestWithParam<FromFloatTestParam> {
+};
+
+TEST_P(TestDecimal256FromRealFloat, SuccessConversion) {
+  const auto param = GetParam();
+  CheckDecimalFromReal<Decimal256>(param.real, param.precision, param.scale,
+                                   param.expected);
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(
+    TestDecimal256FromRealFloat, TestDecimal256FromRealFloat,
+    ::testing::Values(
+        // 2**63 + 2**40 (exactly representable in a float's 24 bits of 
precision)
+        FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"},
+        FromFloatTestParam{-9.223373e+18f, 19, 0, "-9223373136366403584"},
+        FromFloatTestParam{9.223373e+14f, 19, 4, "922337313636640.3584"},
+        FromFloatTestParam{-9.223373e+14f, 19, 4, "-922337313636640.3584"},
+        // 2**64 - 2**40 (exactly representable in a float)
+        FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"},
+        FromFloatTestParam{-1.8446743e+19f, 20, 0, "-18446742974197923840"},
+        // 2**64 + 2**41 (exactly representable in a float)
+        FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"},
+        FromFloatTestParam{-1.8446746e+19f, 20, 0, "-18446746272732807168"},
+        FromFloatTestParam{1.8446746e+15f, 20, 4, "1844674627273280.7168"},
+        FromFloatTestParam{-1.8446746e+15f, 20, 4, "-1844674627273280.7168"},
+        // Almost 10**38 (minus 2**103)
+        FromFloatTestParam{9.999999e+37f, 38, 0,
+                           "99999986661652122824821048795547566080"},
+        FromFloatTestParam{-9.999999e+37f, 38, 0,
+                           "-99999986661652122824821048795547566080"}
+));
+// clang-format on
+
+TEST(TestDecimal256FromRealFloat, LargeValues) {

Review comment:
       Perhaps fold this test into `TestDecimalFromReal`, to avoid repeating it 
for both decimal types?

##########
File path: cpp/src/arrow/util/decimal_test.cc
##########
@@ -641,13 +698,79 @@ TEST(TestDecimalFromRealDouble, LargeValues) {
   // Test the entire double range
   for (int32_t scale = -308; scale <= 308; ++scale) {
     double real = std::pow(10.0, static_cast<double>(scale));
-    CheckDecimalFromRealIntegerString(real, 1, -scale, "1");
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 1, -scale, "1");
+  }
+  for (int32_t scale = -307; scale <= 306; ++scale) {
+    double real = 123. * std::pow(10.0, static_cast<double>(scale));
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 2, -scale - 1, "12");
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 3, -scale, "123");
+    CheckDecimalFromRealIntegerString<Decimal128>(real, 4, -scale + 1, "1230");
+  }
+}
+
+// Same tests for Decimal256
+class TestDecimal256FromRealDouble
+    : public ::testing::TestWithParam<FromDoubleTestParam> {};
+
+TEST_P(TestDecimal256FromRealDouble, SuccessConversion) {
+  const auto param = GetParam();
+  CheckDecimalFromReal<Decimal256>(param.real, param.precision, param.scale,
+                                   param.expected);
+}
+
+// clang-format off
+INSTANTIATE_TEST_SUITE_P(
+    TestDecimal256FromRealDouble, TestDecimal256FromRealDouble,
+    ::testing::Values(
+        // 2**63 + 2**11 (exactly representable in a double's 53 bits of 
precision)
+        FromDoubleTestParam{9.223372036854778e+18, 19, 0, 
"9223372036854777856"},
+        FromDoubleTestParam{-9.223372036854778e+18, 19, 0, 
"-9223372036854777856"},
+        FromDoubleTestParam{9.223372036854778e+10, 19, 8, 
"92233720368.54777856"},
+        FromDoubleTestParam{-9.223372036854778e+10, 19, 8, 
"-92233720368.54777856"},
+        // 2**64 - 2**11 (exactly representable in a double)
+        FromDoubleTestParam{1.844674407370955e+19, 20, 0, 
"18446744073709549568"},
+        FromDoubleTestParam{-1.844674407370955e+19, 20, 0, 
"-18446744073709549568"},
+        // 2**64 + 2**11 (exactly representable in a double)
+        FromDoubleTestParam{1.8446744073709556e+19, 20, 0, 
"18446744073709555712"},
+        FromDoubleTestParam{-1.8446744073709556e+19, 20, 0, 
"-18446744073709555712"},
+        FromDoubleTestParam{1.8446744073709556e+15, 20, 4, 
"1844674407370955.5712"},
+        FromDoubleTestParam{-1.8446744073709556e+15, 20, 4, 
"-1844674407370955.5712"},
+        // Almost 10**38 (minus 2**73)
+        FromDoubleTestParam{9.999999999999998e+37, 38, 0,
+                            "99999999999999978859343891977453174784"},
+        FromDoubleTestParam{-9.999999999999998e+37, 38, 0,
+                            "-99999999999999978859343891977453174784"},
+        FromDoubleTestParam{9.999999999999998e+27, 38, 10,
+                            "9999999999999997885934389197.7453174784"},
+        FromDoubleTestParam{-9.999999999999998e+27, 38, 10,
+                            "-9999999999999997885934389197.7453174784"},
+        // Almost 10**76
+        FromDoubleTestParam{9.999999999999999e+75, 76, 0,
+                            
"999999999999999886366330070006442034959750906670402"
+                            "8242075715752105414230016"},
+        FromDoubleTestParam{-9.999999999999999e+75, 76, 0,
+                            
"-999999999999999886366330070006442034959750906670402"
+                            "8242075715752105414230016"},
+        FromDoubleTestParam{9.999999999999999e+65, 76, 10,
+                            
"999999999999999886366330070006442034959750906670402"
+                            "824207571575210.5414230016"},
+        FromDoubleTestParam{-9.999999999999999e+65, 76, 10,
+                            
"-999999999999999886366330070006442034959750906670402"
+                            "824207571575210.5414230016"}

Review comment:
       This is the same test vector as above + 4 additional values. Perhaps 
factor out the common values?

##########
File path: cpp/src/arrow/util/decimal_test.cc
##########
@@ -728,72 +858,143 @@ TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
 TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
 
 // Custom test for Decimal128::ToReal<float>
-class TestDecimalToRealFloat : public TestDecimalToReal<float> {};
+class TestDecimalToRealFloat : public TestDecimalToReal<std::pair<Decimal128, 
float>> {};
 
 TEST_F(TestDecimalToRealFloat, LargeValues) {
   // Note that exact comparisons would succeed on some platforms (Linux, 
macOS).
   // Nevertheless, power-of-ten factors are not all exactly representable
   // in binary floating point.
   for (int32_t scale = -38; scale <= 38; scale++) {
-    CheckFloatToRealApprox("1", scale, Pow10(-scale));
+    CheckDecimalToRealApprox<Decimal>("1", scale, Pow10(-scale));
   }
   for (int32_t scale = -38; scale <= 36; scale++) {
     const Real factor = static_cast<Real>(123);
-    CheckFloatToRealApprox("123", scale, factor * Pow10(-scale));
+    CheckDecimalToRealApprox<Decimal>("123", scale, factor * Pow10(-scale));
   }
 }
 
 TEST_F(TestDecimalToRealFloat, Precision) {
   // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
-  CheckDecimalToReal<float>("9223373136366403584", 0, 9.223373e+18f);
-  CheckDecimalToReal<float>("-9223373136366403584", 0, -9.223373e+18f);
+  CheckDecimalToReal<Decimal, Real>("9223373136366403584", 0, 9.223373e+18f);
+  CheckDecimalToReal<Decimal, Real>("-9223373136366403584", 0, -9.223373e+18f);
+  // 2**64 + 2**41 (exactly representable in a float)
+  CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 1.8446746e+19f);
+  CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, 
-1.8446746e+19f);
+}
+
+// Same tests, repeated for Decimal256
+class TestDecimal256ToRealFloat : public 
TestDecimalToReal<std::pair<Decimal256, float>> {
+};
+
+TEST_F(TestDecimal256ToRealFloat, LargeValues) {
+  // Note that exact comparisons would succeed on some platforms (Linux, 
macOS).
+  // Nevertheless, power-of-ten factors are not all exactly representable
+  // in binary floating point.
+  for (int32_t scale = -76; scale <= 76; scale++) {
+#ifdef _WIN32
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
+#endif
+    CheckDecimalToRealApprox<Decimal>("1", scale, Pow10(-scale));
+    const Real factor = static_cast<Real>(123);
+    CheckDecimalToRealApprox<Decimal>("123", scale, factor * Pow10(-scale));
+  }
+}
+
+TEST_F(TestDecimal256ToRealFloat, Precision) {
+  // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
+  CheckDecimalToReal<Decimal, Real>("9223373136366403584", 0, 9.223373e+18f);
+  CheckDecimalToReal<Decimal, Real>("-9223373136366403584", 0, -9.223373e+18f);
   // 2**64 + 2**41 (exactly representable in a float)
-  CheckDecimalToReal<float>("18446746272732807168", 0, 1.8446746e+19f);
-  CheckDecimalToReal<float>("-18446746272732807168", 0, -1.8446746e+19f);
+  CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 1.8446746e+19f);
+  CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, 
-1.8446746e+19f);
 }
 
 // ToReal<double> tests are disabled on MinGW because of precision issues in 
results
 #ifndef __MINGW32__
 
 // Custom test for Decimal128::ToReal<double>
-class TestDecimalToRealDouble : public TestDecimalToReal<double> {};
+class TestDecimalToRealDouble : public TestDecimalToReal<std::pair<Decimal128, 
double>> {
+};
 
 TEST_F(TestDecimalToRealDouble, LargeValues) {
   // Note that exact comparisons would succeed on some platforms (Linux, 
macOS).
   // Nevertheless, power-of-ten factors are not all exactly representable
   // in binary floating point.
   for (int32_t scale = -308; scale <= 308; scale++) {
-    CheckDoubleToRealApprox("1", scale, Pow10(-scale));
+    CheckDecimalToRealApprox<Decimal>("1", scale, Pow10(-scale));
   }
   for (int32_t scale = -308; scale <= 306; scale++) {
     const Real factor = static_cast<Real>(123);
-    CheckDoubleToRealApprox("123", scale, factor * Pow10(-scale));
+    CheckDecimalToRealApprox<Decimal>("123", scale, factor * Pow10(-scale));
   }
 }
 
 TEST_F(TestDecimalToRealDouble, Precision) {
   // 2**63 + 2**11 (exactly representable in a double's 53 bits of precision)
-  CheckDecimalToReal<double>("9223372036854777856", 0, 9.223372036854778e+18);
-  CheckDecimalToReal<double>("-9223372036854777856", 0, 
-9.223372036854778e+18);
+  CheckDecimalToReal<Decimal, Real>("9223372036854777856", 0, 
9.223372036854778e+18);
+  CheckDecimalToReal<Decimal, Real>("-9223372036854777856", 0, 
-9.223372036854778e+18);
   // 2**64 - 2**11 (exactly representable in a double)
-  CheckDecimalToReal<double>("18446744073709549568", 0, 1.844674407370955e+19);
-  CheckDecimalToReal<double>("-18446744073709549568", 0, 
-1.844674407370955e+19);
+  CheckDecimalToReal<Decimal, Real>("18446744073709549568", 0, 
1.844674407370955e+19);
+  CheckDecimalToReal<Decimal, Real>("-18446744073709549568", 0, 
-1.844674407370955e+19);
   // 2**64 + 2**11 (exactly representable in a double)
-  CheckDecimalToReal<double>("18446744073709555712", 0, 
1.8446744073709556e+19);
-  CheckDecimalToReal<double>("-18446744073709555712", 0, 
-1.8446744073709556e+19);
+  CheckDecimalToReal<Decimal, Real>("18446744073709555712", 0, 
1.8446744073709556e+19);
+  CheckDecimalToReal<Decimal, Real>("-18446744073709555712", 0, 
-1.8446744073709556e+19);
   // Almost 10**38 (minus 2**73)
-  CheckDecimalToReal<double>("99999999999999978859343891977453174784", 0,
-                             9.999999999999998e+37);
-  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", 0,
-                             -9.999999999999998e+37);
-  CheckDecimalToReal<double>("99999999999999978859343891977453174784", 10,
-                             9.999999999999998e+27);
-  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", 10,
-                             -9.999999999999998e+27);
-  CheckDecimalToReal<double>("99999999999999978859343891977453174784", -10,
-                             9.999999999999998e+47);
-  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", -10,
-                             -9.999999999999998e+47);
+  CheckDecimalToReal<Decimal, Real>("99999999999999978859343891977453174784", 
0,
+                                    9.999999999999998e+37);
+  CheckDecimalToReal<Decimal, Real>("-99999999999999978859343891977453174784", 
0,
+                                    -9.999999999999998e+37);
+  CheckDecimalToReal<Decimal, Real>("99999999999999978859343891977453174784", 
10,
+                                    9.999999999999998e+27);
+  CheckDecimalToReal<Decimal, Real>("-99999999999999978859343891977453174784", 
10,
+                                    -9.999999999999998e+27);
+  CheckDecimalToReal<Decimal, Real>("99999999999999978859343891977453174784", 
-10,
+                                    9.999999999999998e+47);
+  CheckDecimalToReal<Decimal, Real>("-99999999999999978859343891977453174784", 
-10,
+                                    -9.999999999999998e+47);
+}
+
+// Same tests, but for Decimal256
+class TestDecimal256ToRealDouble
+    : public TestDecimalToReal<std::pair<Decimal128, double>> {};
+
+TEST_F(TestDecimal256ToRealDouble, LargeValues) {

Review comment:
       Instead of copy-pasting these test functions, why not factor them out?
   (for example as `TestDecimalToReal` methods?)

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -573,6 +664,253 @@ TEST(Cast, DecimalToDecimal) {
   }
 }
 
+TEST(Cast, Decimal256ToDecimal256) {
+  CastOptions options;
+
+  for (bool allow_decimal_truncate : {false, true}) {
+    options.allow_decimal_truncate = allow_decimal_truncate;
+
+    auto no_truncation = ArrayFromJSON(decimal256(38, 10), R"([
+          "02.0000000000",
+          "30.0000000000",
+          "22.0000000000",
+        "-121.0000000000",
+        null])");
+    auto expected = ArrayFromJSON(decimal256(28, 0), R"([
+          "02.",
+          "30.",
+          "22.",
+        "-121.",
+        null])");
+
+    CheckCast(no_truncation, expected, options);
+    CheckCast(expected, no_truncation, options);
+  }
+
+  for (bool allow_decimal_truncate : {false, true}) {
+    options.allow_decimal_truncate = allow_decimal_truncate;
+
+    // Same scale, different precision
+    auto d_5_2 = ArrayFromJSON(decimal256(5, 2), R"([
+          "12.34",
+           "0.56"])");
+    auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"([
+          "12.34",
+           "0.56"])");
+
+    CheckCast(d_5_2, d_4_2, options);
+    CheckCast(d_4_2, d_5_2, options);
+  }
+
+  auto d_38_10 = ArrayFromJSON(decimal256(38, 10), R"([
+      "-02.1234567890",
+       "30.1234567890",
+      null])");
+
+  auto d_28_0 = ArrayFromJSON(decimal256(28, 0), R"([
+      "-02.",
+       "30.",
+      null])");
+
+  auto d_38_10_roundtripped = ArrayFromJSON(decimal256(38, 10), R"([
+      "-02.0000000000",
+       "30.0000000000",
+      null])");
+
+  // Rescale which leads to truncation
+  options.allow_decimal_truncate = true;
+  CheckCast(d_38_10, d_28_0, options);
+  CheckCast(d_28_0, d_38_10_roundtripped, options);
+
+  options.allow_decimal_truncate = false;
+  options.to_type = d_28_0->type();
+  CheckCastFails(d_38_10, options);
+  CheckCast(d_28_0, d_38_10_roundtripped, options);
+
+  // Precision loss without rescale leads to truncation
+  auto d_4_2 = ArrayFromJSON(decimal256(4, 2), R"(["12.34"])");
+  for (auto expected : {
+           ArrayFromJSON(decimal256(3, 2), R"(["12.34"])"),
+           ArrayFromJSON(decimal256(4, 3), R"(["12.340"])"),
+           ArrayFromJSON(decimal256(2, 1), R"(["12.3"])"),
+       }) {
+    options.allow_decimal_truncate = true;
+    CheckCast(d_4_2, expected, options);
+
+    options.allow_decimal_truncate = false;
+    options.to_type = expected->type();
+    CheckCastFails(d_4_2, options);
+  }
+}
+
+TEST(Cast, DecimalToDecimal256) {

Review comment:
       `Decimal128ToDecimal256`

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc
##########
@@ -494,6 +494,97 @@ TEST(Cast, DecimalToInt) {
   CheckCast(negative_scale, ArrayFromJSON(int64(), "[1234567890000, 
-120000]"), options);
 }
 
+TEST(Cast, Decimal256ToInt) {

Review comment:
       Nit, but rename `DecimalToInt` to `Decimal128ToInt` above?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] pitrou commented on a change in pull request #9751: ARROW-10606: [C++] Implement Decimal256 casts

Reply via email to