[arrow] branch master updated: ARROW-7010: [C++] Implement decimal-to-float casts

wesm Fri, 03 Jul 2020 05:59:00 -0700

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new 556811c  ARROW-7010: [C++] Implement decimal-to-float casts
556811c is described below

commit 556811c300d5b567889b37c613fea3dc81200cab
Author: Antoine Pitrou <[email protected]>
AuthorDate: Fri Jul 3 07:58:16 2020 -0500

    ARROW-7010: [C++] Implement decimal-to-float casts
    
    Closes #7618 from pitrou/ARROW-7010-cast-decimal-to-float
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Wes McKinney <[email protected]>
---
 .../arrow/compute/kernels/scalar_cast_numeric.cc   |  34 ++++-
 cpp/src/arrow/compute/kernels/scalar_cast_test.cc  |  22 +++
 cpp/src/arrow/util/decimal.cc                      |  45 ++++++-
 cpp/src/arrow/util/decimal.h                       |  26 ++++
 cpp/src/arrow/util/decimal_test.cc                 | 150 ++++++++++++++++++++-
 5 files changed, 270 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index 3b5aac4..ea06eb5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -503,6 +503,34 @@ struct CastFunctor<Decimal128Type, I, 
enable_if_t<is_floating_type<I>::value>> {
   }
 };
 
+// ----------------------------------------------------------------------
+// Decimal to real
+
+struct DecimalToReal {
+  template <typename RealType, typename ARG0>
+  RealType Call(KernelContext* ctx, const Decimal128& val) const {
+    return val.ToReal<RealType>(in_scale_);
+  }
+
+  int32_t in_scale_;
+};
+
+template <typename O>
+struct CastFunctor<O, Decimal128Type, enable_if_t<is_floating_type<O>::value>> 
{
+  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const auto& in_type_inst =
+        checked_cast<const Decimal128Type&>(*batch[0].array()->type);
+    const auto in_scale = in_type_inst.scale();
+
+    applicator::ScalarUnaryNotNullStateful<O, Decimal128Type, DecimalToReal> 
kernel(
+        DecimalToReal{in_scale});
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Top-level kernel instantiation
+
 namespace {
 
 template <typename OutType>
@@ -558,8 +586,12 @@ std::shared_ptr<CastFunction> 
GetCastToFloating(std::string name) {
     DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, 
CastFloatingToFloating));
   }
 
-  // From other numbers to integer
+  // From other numbers to floating point
   AddCommonNumberCasts<OutType>(out_ty, func.get());
+
+  // From decimal to floating point
+  DCHECK_OK(func->AddKernel(Type::DECIMAL, {InputType::Array(Type::DECIMAL)}, 
out_ty,
+                            CastFunctor<OutType, Decimal128Type>::Exec));
   return func;
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 252e50e..7384a67 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -399,6 +399,14 @@ class TestCast : public TestBase {
                   R"(["0.00", null, "0.00", "123.45", "999.99"])", 
/*check_scalar=*/true,
                   options);
   }
+
+  void TestCastDecimalToFloating(const std::shared_ptr<DataType>& out_type) {
+    auto in_type = decimal(5, 2);
+
+    CheckCaseJSON(in_type, out_type, R"(["0.00", null, "123.45", "999.99"])",
+                  "[0.0, null, 123.45, 999.99]");
+    // Edge cases are tested in Decimal128::ToReal()
+  }
 };
 
 TEST_F(TestCast, SameTypeZeroCopy) {
@@ -943,6 +951,8 @@ TEST_F(TestCast, FloatToDecimal) {
   out_type = decimal(20, 4);
   CheckCaseJSON(in_type, out_type, "[1.8446746e+15, -1.8446746e+15]",
                 R"(["1844674627273280.7168", "-1844674627273280.7168"])");
+
+  // More edge cases tested in Decimal128::FromReal
 }
 
 TEST_F(TestCast, DoubleToDecimal) {
@@ -957,6 +967,18 @@ TEST_F(TestCast, DoubleToDecimal) {
   out_type = decimal(20, 4);
   CheckCaseJSON(in_type, out_type, "[1.8446744073709556e+15, 
-1.8446744073709556e+15]",
                 R"(["1844674407370955.5712", "-1844674407370955.5712"])");
+
+  // More edge cases tested in Decimal128::FromReal
+}
+
+TEST_F(TestCast, DecimalToFloat) {
+  auto out_type = float32();
+  TestCastDecimalToFloating(out_type);
+}
+
+TEST_F(TestCast, DecimalToDouble) {
+  auto out_type = float64();
+  TestCastDecimalToFloating(out_type);
 }
 
 TEST_F(TestCast, TimestampToTimestamp) {
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 5620803..20331ba 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -98,7 +98,7 @@ static constexpr double kDoublePowersOfTen[2 * 38 + 1] = {
 namespace {
 
 template <typename Real, typename Derived>
-struct Decimal128FromReal {
+struct DecimalRealConversion {
   static Result<Decimal128> FromPositiveReal(Real real, int32_t precision,
                                              int32_t scale) {
     auto x = real;
@@ -140,24 +140,59 @@ struct Decimal128FromReal {
       return FromPositiveReal(x, precision, scale);
     }
   }
+
+  static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
+    Real x = static_cast<Real>(decimal.high_bits()) * Derived::two_to_64();
+    x += static_cast<Real>(decimal.low_bits());
+    if (scale >= -38 && scale <= 38) {
+      x *= Derived::powers_of_ten()[-scale + 38];
+    } else {
+      x *= std::pow(static_cast<Real>(10), static_cast<Real>(-scale));
+    }
+    return x;
+  }
+
+  static Real ToReal(Decimal128 decimal, int32_t scale) {
+    if (decimal.high_bits() < 0) {
+      // Convert the absolute value to avoid precision loss
+      decimal.Negate();
+      return -ToRealPositive(decimal, scale);
+    } else {
+      return ToRealPositive(decimal, scale);
+    }
+  }
 };
 
-struct Decimal128FromFloat : public Decimal128FromReal<float, 
Decimal128FromFloat> {
+struct DecimalFloatConversion
+    : public DecimalRealConversion<float, DecimalFloatConversion> {
   static constexpr const float* powers_of_ten() { return kFloatPowersOfTen; }
+
+  static constexpr float two_to_64() { return 1.8446744e+19f; }
 };
 
-struct Decimal128FromDouble : public Decimal128FromReal<double, 
Decimal128FromDouble> {
+struct DecimalDoubleConversion
+    : public DecimalRealConversion<double, DecimalDoubleConversion> {
   static constexpr const double* powers_of_ten() { return kDoublePowersOfTen; }
+
+  static constexpr double two_to_64() { return 1.8446744073709552e+19; }
 };
 
 }  // namespace
 
 Result<Decimal128> Decimal128::FromReal(float x, int32_t precision, int32_t 
scale) {
-  return Decimal128FromFloat::FromReal(x, precision, scale);
+  return DecimalFloatConversion::FromReal(x, precision, scale);
 }
 
 Result<Decimal128> Decimal128::FromReal(double x, int32_t precision, int32_t 
scale) {
-  return Decimal128FromDouble::FromReal(x, precision, scale);
+  return DecimalDoubleConversion::FromReal(x, precision, scale);
+}
+
+float Decimal128::ToFloat(int32_t scale) const {
+  return DecimalFloatConversion::ToReal(*this, scale);
+}
+
+double Decimal128::ToDouble(int32_t scale) const {
+  return DecimalDoubleConversion::ToReal(*this, scale);
 }
 
 std::string Decimal128::ToIntegerString() const {
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index d20a246..1f72705 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -138,12 +138,38 @@ class ARROW_EXPORT Decimal128 : public BasicDecimal128 {
     return ToInteger<T>().Value(out);
   }
 
+  /// \brief Convert to a floating-point number (scaled)
+  float ToFloat(int32_t scale) const;
+  /// \brief Convert to a floating-point number (scaled)
+  double ToDouble(int32_t scale) const;
+
+  /// \brief Convert to a floating-point number (scaled)
+  template <typename T>
+  T ToReal(int32_t scale) const {
+    return ToRealConversion<T>::ToReal(*this, scale);
+  }
+
   friend ARROW_EXPORT std::ostream& operator<<(std::ostream& os,
                                                const Decimal128& decimal);
 
  private:
   /// Converts internal error code to Status
   Status ToArrowStatus(DecimalStatus dstatus) const;
+
+  template <typename T>
+  struct ToRealConversion {};
+};
+
+template <>
+struct Decimal128::ToRealConversion<float> {
+  static float ToReal(const Decimal128& dec, int32_t scale) { return 
dec.ToFloat(scale); }
+};
+
+template <>
+struct Decimal128::ToRealConversion<double> {
+  static double ToReal(const Decimal128& dec, int32_t scale) {
+    return dec.ToDouble(scale);
+  }
 };
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal_test.cc 
b/cpp/src/arrow/util/decimal_test.cc
index e80eaab..3419526 100644
--- a/cpp/src/arrow/util/decimal_test.cc
+++ b/cpp/src/arrow/util/decimal_test.cc
@@ -19,6 +19,7 @@
 #include <array>
 #include <cmath>
 #include <cstdint>
+#include <sstream>
 #include <string>
 #include <tuple>
 #include <vector>
@@ -462,7 +463,7 @@ INSTANTIATE_TEST_SUITE_P(
         // 2**64 - 2**40 (exactly representable in a float)
         FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"},
         FromFloatTestParam{-1.8446743e+19f, 20, 0, "-18446742974197923840"},
-        // 2**64 + 2**41 (exactly representable in a double)
+        // 2**64 + 2**41 (exactly representable in a float)
         FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"},
         FromFloatTestParam{-1.8446746e+19f, 20, 0, "-18446746272732807168"},
         FromFloatTestParam{1.8446746e+15f, 20, 4, "1844674627273280.7168"},
@@ -540,6 +541,153 @@ TEST(TestDecimalFromRealDouble, LargeValues) {
   }
 }
 
+template <typename Real>
+struct ToRealTestParam {
+  std::string decimal_value;
+  int32_t scale;
+  Real expected;
+};
+
+using ToFloatTestParam = ToRealTestParam<float>;
+using ToDoubleTestParam = ToRealTestParam<double>;
+
+template <typename Real>
+void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real 
expected) {
+  Decimal128 dec(decimal_value);
+  ASSERT_EQ(dec.ToReal<Real>(scale), expected);
+}
+
+void CheckFloatToRealApprox(const std::string& decimal_value, int32_t scale,
+                            float expected) {
+  Decimal128 dec(decimal_value);
+  ASSERT_FLOAT_EQ(dec.ToReal<float>(scale), expected);
+}
+
+void CheckDoubleToRealApprox(const std::string& decimal_value, int32_t scale,
+                             double expected) {
+  Decimal128 dec(decimal_value);
+  ASSERT_DOUBLE_EQ(dec.ToReal<double>(scale), expected);
+}
+
+// Common tests for Decimal128::ToReal<T>
+template <typename T>
+class TestDecimalToReal : public ::testing::Test {
+ public:
+  using Real = T;
+  using ParamType = ToRealTestParam<T>;
+
+  Real Pow2(int exp) { return std::pow(static_cast<Real>(2), 
static_cast<Real>(exp)); }
+
+  Real Pow10(int exp) { return std::pow(static_cast<Real>(10), 
static_cast<Real>(exp)); }
+
+  void TestSuccess() {
+    const std::vector<ParamType> params{
+        // clang-format off
+        {"0", 0, 0.0f},
+        {"0", 10, 0.0f},
+        {"0", -10, 0.0f},
+        {"1", 0, 1.0f},
+        {"12345", 0, 12345.f},
+#ifndef __MINGW32__  // MinGW has precision issues
+        {"12345", 1, 1234.5f},
+#endif
+        {"12345", -3, 12345000.f},
+        // 2**62
+        {"4611686018427387904", 0, Pow2(62)},
+        // 2**63 + 2**62
+        {"13835058055282163712", 0, Pow2(63) + Pow2(62)},
+        // 2**64 + 2**62
+        {"23058430092136939520", 0, Pow2(64) + Pow2(62)},
+        // 10**38 - 2**103
+#ifndef __MINGW32__  // MinGW has precision issues
+        {"99999989858795198174164788026374356992", 0, Pow10(38) - Pow2(103)},
+#endif
+        // clang-format on
+    };
+    for (const ParamType& param : params) {
+      CheckDecimalToReal<Real>(param.decimal_value, param.scale, 
param.expected);
+      if (param.decimal_value != "0") {
+        CheckDecimalToReal<Real>("-" + param.decimal_value, param.scale, 
-param.expected);
+      }
+    }
+  }
+};
+
+TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
+
+TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
+
+// Custom test for Decimal128::ToReal<float>
+class TestDecimalToRealFloat : public TestDecimalToReal<float> {};
+
+TEST_F(TestDecimalToRealFloat, LargeValues) {
+  // Note that exact comparisons would succeed on some platforms (Linux, 
macOS).
+  // Nevertheless, power-of-ten factors are not all exactly representable
+  // in binary floating point.
+  for (int32_t scale = -38; scale <= 38; scale++) {
+    CheckFloatToRealApprox("1", scale, Pow10(-scale));
+  }
+  for (int32_t scale = -38; scale <= 36; scale++) {
+    const Real factor = static_cast<Real>(123);
+    CheckFloatToRealApprox("123", scale, factor * Pow10(-scale));
+  }
+}
+
+TEST_F(TestDecimalToRealFloat, Precision) {
+  // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
+  CheckDecimalToReal<float>("9223373136366403584", 0, 9.223373e+18f);
+  CheckDecimalToReal<float>("-9223373136366403584", 0, -9.223373e+18f);
+  // 2**64 + 2**41 (exactly representable in a float)
+  CheckDecimalToReal<float>("18446746272732807168", 0, 1.8446746e+19f);
+  CheckDecimalToReal<float>("-18446746272732807168", 0, -1.8446746e+19f);
+}
+
+// ToReal<double> tests are disabled on MinGW because of precision issues in 
results
+#ifndef __MINGW32__
+
+// Custom test for Decimal128::ToReal<double>
+class TestDecimalToRealDouble : public TestDecimalToReal<double> {};
+
+TEST_F(TestDecimalToRealDouble, LargeValues) {
+  // Note that exact comparisons would succeed on some platforms (Linux, 
macOS).
+  // Nevertheless, power-of-ten factors are not all exactly representable
+  // in binary floating point.
+  for (int32_t scale = -308; scale <= 308; scale++) {
+    CheckDoubleToRealApprox("1", scale, Pow10(-scale));
+  }
+  for (int32_t scale = -308; scale <= 306; scale++) {
+    const Real factor = static_cast<Real>(123);
+    CheckDoubleToRealApprox("123", scale, factor * Pow10(-scale));
+  }
+}
+
+TEST_F(TestDecimalToRealDouble, Precision) {
+  // 2**63 + 2**11 (exactly representable in a double's 53 bits of precision)
+  CheckDecimalToReal<double>("9223372036854777856", 0, 9.223372036854778e+18);
+  CheckDecimalToReal<double>("-9223372036854777856", 0, 
-9.223372036854778e+18);
+  // 2**64 - 2**11 (exactly representable in a double)
+  CheckDecimalToReal<double>("18446744073709549568", 0, 1.844674407370955e+19);
+  CheckDecimalToReal<double>("-18446744073709549568", 0, 
-1.844674407370955e+19);
+  // 2**64 + 2**11 (exactly representable in a double)
+  CheckDecimalToReal<double>("18446744073709555712", 0, 
1.8446744073709556e+19);
+  CheckDecimalToReal<double>("-18446744073709555712", 0, 
-1.8446744073709556e+19);
+  // Almost 10**38 (minus 2**73)
+  CheckDecimalToReal<double>("99999999999999978859343891977453174784", 0,
+                             9.999999999999998e+37);
+  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", 0,
+                             -9.999999999999998e+37);
+  CheckDecimalToReal<double>("99999999999999978859343891977453174784", 10,
+                             9.999999999999998e+27);
+  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", 10,
+                             -9.999999999999998e+27);
+  CheckDecimalToReal<double>("99999999999999978859343891977453174784", -10,
+                             9.999999999999998e+47);
+  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", -10,
+                             -9.999999999999998e+47);
+}
+
+#endif  // __MINGW32__
+
 TEST(Decimal128Test, TestSmallNumberFormat) {
   Decimal128 value("0.2");
   std::string expected("0.2");

[arrow] branch master updated: ARROW-7010: [C++] Implement decimal-to-float casts

Reply via email to