Re: [PR] GH-43956: [C++][Format] Add initial Decimal32/Decimal64 implementations [arrow]

via GitHub Mon, 23 Sep 2024 08:56:49 -0700


pitrou commented on code in PR #43957:
URL: https://github.com/apache/arrow/pull/43957#discussion_r1771691664



##########
cpp/src/arrow/util/decimal_test.cc:
##########
@@ -1336,43 +1406,42 @@ TEST(Decimal128Test, TestFromBigEndian) {
       // convert to big endian
       std::reverse(native_endian.begin(), native_endian.end());
 #endif
-      ASSERT_OK_AND_EQ(complement, 
Decimal128::FromBigEndian(native_endian.data(), 16));
+      ASSERT_OK_AND_EQ(complement, 
TypeParam::FromBigEndian(native_endian.data(),
+                                                            
TypeParam::kByteWidth));
 
-      value <<= 8;
-      value += Decimal128(start);
+      value <<= 2;
+      value += TypeParam(start);
     }
   }
 }
 
-TEST(Decimal128Test, TestFromBigEndianBadLength) {
-  ASSERT_RAISES(Invalid, Decimal128::FromBigEndian(0, -1));
-  ASSERT_RAISES(Invalid, Decimal128::FromBigEndian(0, 17));
+TYPED_TEST(TestBasicDecimalFunctionality, TestFromBigEndianBadLength) {
+  ASSERT_RAISES(Invalid, TypeParam::FromBigEndian(0, -1));
+  ASSERT_RAISES(Invalid, TypeParam::FromBigEndian(0, TypeParam::kByteWidth + 
1));
 }
 
-TEST(Decimal128Test, TestToInteger) {
-  Decimal128 value1("1234");
-  int32_t out1;
+TYPED_TEST(TestBasicDecimalFunctionality, TestToInteger) {
+  if constexpr (std::is_same_v<TypeParam, Decimal256>) {

Review Comment:
    `Decimal256` isn't in `BasicFunctionalityDecimalTypes`, so is this 
condition useful? Or were you planning to add back `Decimal256` into the type 
list?



##########
cpp/src/arrow/util/decimal.cc:
##########
@@ -697,8 +943,150 @@ Status DecimalFromString(const char* type_name, 
std::string_view s, Decimal* out
   return Status::OK();
 }
 
+template <typename DecimalClass>
+Status SimpleDecimalFromString(const char* type_name, std::string_view s,
+                               DecimalClass* out, int32_t* precision, int32_t* 
scale) {
+  if (s.empty()) {
+    return Status::Invalid("Empty string cannot be converted to ", type_name);
+  }
+
+  DecimalComponents dec;
+  if (!ParseDecimalComponents(s.data(), s.size(), &dec)) {
+    return Status::Invalid("The string '", s, "' is not a valid ", type_name, 
" number");
+  }
+
+  // count number of significant digits (without leading zeros)
+  size_t first_non_zero = dec.whole_digits.find_first_not_of('0');
+  size_t significant_digits = dec.fractional_digits.size();
+  if (first_non_zero != std::string::npos) {
+    significant_digits += dec.whole_digits.size() - first_non_zero;
+  }
+  int32_t parsed_precision = static_cast<int32_t>(significant_digits);
+
+  int32_t parsed_scale = 0;
+  if (dec.has_exponent) {
+    auto adjusted_exponent = dec.exponent;
+    parsed_scale =
+        -adjusted_exponent + 
static_cast<int32_t>(dec.fractional_digits.size());
+  } else {
+    parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
+  }
+
+  if (out != nullptr) {
+    if constexpr (std::is_same_v<Decimal32, DecimalClass>) {
+      uint32_t value{0};
+      ShiftAndAdd(dec.whole_digits, &value);
+      ShiftAndAdd(dec.fractional_digits, &value);
+      if (value > static_cast<uint32_t>(
+                      std::numeric_limits<typename 
DecimalClass::ValueType>::max())) {
+        return Status::Invalid("The string '", s, "' cannot be represented as 
",
+                               type_name);
+      }
+
+      *out = DecimalClass(value);
+      if (dec.sign == '-') {
+        out->Negate();
+      }
+    } else {

Review Comment:
   The two codepaths are almost identical except for `uint32_t` vs. `uint64_t`, 
you can probably reconcile them (perhaps something like 
`make_unsigned_t<typename DecimalClass::DigitType>`?)



##########
cpp/src/arrow/util/decimal.cc:
##########
@@ -92,6 +93,12 @@ struct DecimalRealConversion : public 
BaseDecimalRealConversion {
     constexpr int kMantissaBits = RealTraits<Real>::kMantissaBits;
     constexpr int kMantissaDigits = RealTraits<Real>::kMantissaDigits;
 
+    // to avoid precision and rounding issues, we'll unconditionally
+    // throw Decimal32 to the approx algorithm instead.

Review Comment:
   Can we open a GH issue to improve this and reference it here?
   (for example, a very simple solution would be to convert to Decimal64 and 
convert down to Decimal32 afterwards)



##########
cpp/src/arrow/util/decimal.cc:
##########
@@ -109,6 +116,10 @@ struct DecimalRealConversion : public 
BaseDecimalRealConversion {
       return OverflowError(real, precision, scale);
     }
 
+    if constexpr (kMaxPrecision <= kMantissaDigits) {

Review Comment:
   ```suggestion
       // The algorithm below requires the destination decimal type
       // to be strictly more precise than the source float type
       // (see `kSafeMulByTenTo` calculation).
       if constexpr (kMaxPrecision <= kMantissaDigits) {
   ```



##########
cpp/src/arrow/util/decimal.cc:
##########
@@ -521,6 +750,23 @@ std::string Decimal128::ToString(int32_t scale) const {
   return str;
 }
 
+static inline void ShiftAndAdd(std::string_view input, uint32_t* out) {
+  const uint32_t len =
+      static_cast<uint32_t>(std::min(kInt32DecimalDigits + 1, input.size()));
+  if (len == 0) {
+    return;
+  }
+
+  uint32_t value = 0;
+  ARROW_CHECK(internal::ParseValue<UInt32Type>(input.data(), len, &value));
+
+  uint64_t tmp = *out;
+  tmp *= kUInt32PowersOfTen[len];
+  tmp += value;
+
+  *out = static_cast<uint32_t>(tmp & 0xFFFFFFFFU);

Review Comment:
   So we're just losing bits if there's an overflow here? I'll note that 
`SimpleDecimalFromString` checks for overflows on other ops.
   
   How about:
   1. remove the `uint32_t`-output variant of this function
   2. for Decimal32, call the `uint64_t` variant and reuse the existing limits 
check
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] GH-43956: [C++][Format] Add initial Decimal32/Decimal64 implementations [arrow]

Reply via email to