[GitHub] [arrow] pitrou commented on a diff in pull request #36667: GH-35942: [C++] Improve Decimal ToReal accuracy

via GitHub Mon, 17 Jul 2023 06:58:23 -0700


pitrou commented on code in PR #36667:
URL: https://github.com/apache/arrow/pull/36667#discussion_r1265367191



##########
cpp/src/arrow/util/decimal.cc:
##########
@@ -305,12 +305,39 @@ struct Decimal128RealConversion
   }
 
   template <typename Real>
-  static Real ToRealPositive(const Decimal128& decimal, int32_t scale) {
+  static Real ToRealPositiveNoSplit(const Decimal128& decimal, int32_t scale) {
     Real x = 
RealTraits<Real>::two_to_64(static_cast<Real>(decimal.high_bits()));
     x += static_cast<Real>(decimal.low_bits());
     x *= LargePowerOfTen<Real>(-scale);
     return x;
   }
+
+  /// An appoximate conversion from Decimal128 to Real that guarantees:
+  /// 1. If the decimal is an integer, the conversion is exact.

Review Comment:
   Even if the integer has more than 52 significant bits??



##########
cpp/src/arrow/util/decimal.cc:
##########
@@ -978,6 +1005,33 @@ struct Decimal256RealConversion
     x *= LargePowerOfTen<Real>(-scale);
     return x;
   }
+
+  /// An appoximate conversion from Decimal256 to Real that guarantees:
+  /// 1. If the decimal is an integer, the conversion is exact.
+  /// 2. If the number of fractional digits is <= 
RealTraits<Real>::kMantissaDigits (e.g.
+  ///    8 for float and 16 for double), the conversion is within 1 ULP of the 
exact
+  ///    value.
+  /// 3. Otherwise, the conversion is within 
2^(-RealTraits<Real>::kMantissaDigits+1)
+  ///    (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
+  /// Here "exact value" means the closest representable value by Real.
+  template <typename Real>
+  static Real ToRealPositive(const Decimal256& decimal, int32_t scale) {
+    const auto parts_le = 
bit_util::little_endian::Make(decimal.native_endian_array());

Review Comment:
   ```suggestion
       const auto parts_le = decimal.little_endian_array();
   ```



##########
cpp/src/arrow/util/decimal_test.cc:
##########
@@ -1119,6 +1137,35 @@ class TestDecimalToReal : public ::testing::Test {
     // 2**64 + 2**41 (exactly representable in a float)
     CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 
1.8446746e+19f);
     CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, 
-1.8446746e+19f);
+
+    // Integers are always exact
+    auto scale = Decimal::kMaxScale - 1;
+    std::string seven = "7.";
+    for (int32_t i = 0; i < scale; ++i) {
+      seven += "0";
+    }
+    CheckDecimalToReal<Decimal, Real>(seven, scale, 7.0f);
+    CheckDecimalToReal<Decimal, Real>("-" + seven, scale, -7.0f);
+
+    CheckDecimalToReal<Decimal, Real>("99999999999999999999.0000000000000000", 
16,
+                                      99999999999999999999.0f);
+    CheckDecimalToReal<Decimal, 
Real>("-99999999999999999999.0000000000000000", 16,
+                                      -99999999999999999999.0f);
+
+    // Small fractions are within one ULP
+    CheckDecimalToRealWithinOneULP<Decimal, Real>("9999999.9", 1, 9999999.9f);
+    CheckDecimalToRealWithinOneULP<Decimal, Real>("-9999999.9", 1, 
-9999999.9f);
+    CheckDecimalToRealWithinOneULP<Decimal, Real>("9999999.999999", 6, 
9999999.999999f);
+    CheckDecimalToRealWithinOneULP<Decimal, Real>("-9999999.999999", 6, 
-9999999.999999f);
+
+    // Large fractions are within 2^-23
+    constexpr Real epsilon = 1.1920928955078125e-07f;  // 2^-23

Review Comment:
   Shouldn't you use a better epsilon for `double`?



##########
cpp/src/arrow/util/decimal_test.cc:
##########
@@ -1119,6 +1137,35 @@ class TestDecimalToReal : public ::testing::Test {
     // 2**64 + 2**41 (exactly representable in a float)
     CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 
1.8446746e+19f);
     CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, 
-1.8446746e+19f);
+
+    // Integers are always exact
+    auto scale = Decimal::kMaxScale - 1;
+    std::string seven = "7.";
+    for (int32_t i = 0; i < scale; ++i) {
+      seven += "0";
+    }

Review Comment:
   Can probably write this more tersely:
   ```suggestion
       seven.append(scale, '0');  // pad with trailing zeros
   ```



##########
cpp/src/arrow/util/decimal_internal.h:
##########
@@ -451,6 +451,8 @@ struct RealTraits<float> {
   static constexpr int kMantissaBits = 24;
   // ceil(log10(2 ^ kMantissaBits))
   static constexpr int kMantissaDigits = 8;
+  // Integers between zero and kMaxPreciseInteger can be precisely represented
+  static constexpr uint64_t kMaxPreciseInteger = 1ULL << kMantissaBits;

Review Comment:
   Need to subtract one, no?
   ```suggestion
     static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
   ```



##########
cpp/src/arrow/util/decimal_test.cc:
##########
@@ -1119,6 +1137,35 @@ class TestDecimalToReal : public ::testing::Test {
     // 2**64 + 2**41 (exactly representable in a float)
     CheckDecimalToReal<Decimal, Real>("18446746272732807168", 0, 
1.8446746e+19f);
     CheckDecimalToReal<Decimal, Real>("-18446746272732807168", 0, 
-1.8446746e+19f);
+
+    // Integers are always exact
+    auto scale = Decimal::kMaxScale - 1;
+    std::string seven = "7.";
+    for (int32_t i = 0; i < scale; ++i) {
+      seven += "0";
+    }
+    CheckDecimalToReal<Decimal, Real>(seven, scale, 7.0f);
+    CheckDecimalToReal<Decimal, Real>("-" + seven, scale, -7.0f);
+
+    CheckDecimalToReal<Decimal, Real>("99999999999999999999.0000000000000000", 
16,
+                                      99999999999999999999.0f);
+    CheckDecimalToReal<Decimal, 
Real>("-99999999999999999999.0000000000000000", 16,
+                                      -99999999999999999999.0f);
+
+    // Small fractions are within one ULP
+    CheckDecimalToRealWithinOneULP<Decimal, Real>("9999999.9", 1, 9999999.9f);
+    CheckDecimalToRealWithinOneULP<Decimal, Real>("-9999999.9", 1, 
-9999999.9f);
+    CheckDecimalToRealWithinOneULP<Decimal, Real>("9999999.999999", 6, 
9999999.999999f);
+    CheckDecimalToRealWithinOneULP<Decimal, Real>("-9999999.999999", 6, 
-9999999.999999f);
+
+    // Large fractions are within 2^-23
+    constexpr Real epsilon = 1.1920928955078125e-07f;  // 2^-23
+    CheckDecimalToRealWithinEpsilon<Decimal, Real>(
+        "112334829348925.99070703983306884765625", 23, epsilon,
+        112334829348925.99070703983306884765625f);

Review Comment:
   Passing this as a `float` constant is weird. If you want to differentiate 
these tests between `float` and `double`, you may use `if constexpr`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] pitrou commented on a diff in pull request #36667: GH-35942: [C++] Improve Decimal ToReal accuracy

Reply via email to