Re: [PR] GH-43956: [C++][Format] Add initial Decimal32/Decimal64 implementations [arrow]

via GitHub Mon, 16 Sep 2024 05:51:06 -0700


pitrou commented on code in PR #43957:
URL: https://github.com/apache/arrow/pull/43957#discussion_r1761042655



##########
cpp/src/arrow/compute/kernels/aggregate_internal.h:
##########
@@ -52,6 +52,16 @@ struct FindAccumulatorType<I, enable_if_floating_point<I>> {
   using Type = DoubleType;
 };
 
+template <typename I>
+struct FindAccumulatorType<I, enable_if_decimal32<I>> {
+  using Type = Decimal64Type;

Review Comment:
   Why not `Decimal32Type`?



##########
cpp/src/arrow/compute/kernels/aggregate_internal.h:
##########
@@ -132,7 +142,13 @@ struct GetSumType<T, enable_if_integer<T>> {
 };
 
 template <typename T>
-struct GetSumType<T, enable_if_decimal<T>> {
+struct GetSumType<T, enable_if_decimal32<T>> {
+  using SumType = arrow::Decimal64;

Review Comment:
   Why? This makes things slightly irregular and more difficult to predict for 
the user.



##########
cpp/src/arrow/compute/kernels/aggregate_basic.cc:
##########
@@ -336,8 +336,8 @@ struct ProductImpl : public ScalarAggregator {
       internal::VisitArrayValuesInline<ArrowType>(
           data,
           [&](typename TypeTraits<ArrowType>::CType value) {
-            this->product =
-                MultiplyTraits<AccType>::Multiply(*out_type, this->product, 
value);
+            this->product = MultiplyTraits<AccType>::Multiply(
+                *out_type, this->product, static_cast<ProductType>(value));

Review Comment:
   FTR, why was it necessary to add these casts?



##########
cpp/src/arrow/testing/random.cc:
##########
@@ -343,8 +343,119 @@ struct DecimalGenerator {
   }
 };
 
+template <>
+struct DecimalGenerator<Decimal32Type> {
+  using DecimalBuilderType = typename TypeTraits<Decimal32Type>::BuilderType;
+  using DecimalValue = typename DecimalBuilderType::ValueType;
+
+  std::shared_ptr<DataType> type_;
+  RandomArrayGenerator* rng_;
+
+  static int32_t MaxDecimalInteger(int32_t digits) {
+    return static_cast<int32_t>(std::ceil(std::pow(10.0, digits))) - 1;
+  }
+
+  std::shared_ptr<Array> MakeRandomArray(int64_t size, double null_probability,
+                                         int64_t alignment, MemoryPool* 
memory_pool) {
+    static constexpr int32_t kMaxDigitsInInteger = 9;
+    static constexpr int kNumIntegers = Decimal32Type::kByteWidth / 4;
+    static_assert(
+        kNumIntegers == (Decimal32Type::kMaxPrecision + kMaxDigitsInInteger - 
1) /
+                            (kMaxDigitsInInteger + 1),
+        "inconsistent decimal metadata: kMaxPrecision doesn't match 
kByteWidth");

Review Comment:
   This can be massively simplified to:
   ```suggestion
       static_assert(kMaxDigitsInInteger >= Decimal32Type::kMaxPrecision);
   ```



##########
cpp/src/arrow/compute/kernels/codegen_internal.h:
##########
@@ -224,7 +258,9 @@ using enable_if_not_floating_value = 
enable_if_t<!std::is_floating_point<T>::val
 
 template <typename T, typename R = T>
 using enable_if_decimal_value =
-    enable_if_t<std::is_same<Decimal128, T>::value || std::is_same<Decimal256, 
T>::value,
+    enable_if_t<std::is_same<Decimal32, T>::value || std::is_same<Decimal64, 
T>::value ||
+                    std::is_same<Decimal128, T>::value ||
+                    std::is_same<Decimal256, T>::value,

Review Comment:
   This is an internal header, no user code should ever use anything from here.



##########
cpp/src/arrow/util/decimal.h:
##########
@@ -31,6 +31,257 @@
 
 namespace arrow {
 
+class Decimal64;
+
+/// Represents a signed 32-bit decimal value in two's complement.
+/// Calulations wrap around and overflow is ignored.
+/// The max decimal precision that can be safely represented is
+/// 9 significant digits.
+///
+/// The implementation is split into two parts :
+///
+/// 1. BasicDecimal32
+///    - can be safely compiled to IR without references to libstdc++
+/// 2. Decimal32
+///    - has additional functionality on top of BasicDecimal32 to deal with
+///      strings and streams
+class ARROW_EXPORT Decimal32 : public BasicDecimal32 {
+ public:
+  /// \cond FALSE
+  // (need to avoid a duplicate definition in sphinx)
+  using BasicDecimal32::BasicDecimal32;
+  /// \endcond
+
+  /// \brief constructor creates a Decimal32 from a BasicDecimal32
+  constexpr Decimal32(const BasicDecimal32& value) noexcept  // NOLINT 
runtime/explicit
+      : BasicDecimal32(value) {}
+
+  /// \brief Parse the number from a base 10 string representation
+  explicit Decimal32(const std::string& value);
+
+  /// \brief Empty constructor creates a Decimal32 with a value of 0
+  /// this is required for some older compilers
+  constexpr Decimal32() noexcept : BasicDecimal32() {}
+
+  /// \brief Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \return the pair of the quotient and the remainder
+  Result<std::pair<Decimal32, Decimal32>> Divide(const Decimal32& divisor) 
const {
+    std::pair<Decimal32, Decimal32> result;
+    auto dstatus = BasicDecimal32::Divide(divisor, &result.first, 
&result.second);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return result;
+  }
+
+  /// \brief Convert the Decimal32 value to a base 10 decimal string with the 
given scale
+  std::string ToString(int32_t scale) const;
+
+  /// \brief Convert the value to an integer string
+  std::string ToIntegerString() const;
+
+  /// \brief Cast this value to an int64_t
+  explicit operator int64_t() const;
+
+  explicit operator Decimal64() const;
+
+  /// \brief Convert a decimal string to a Decimal value, optionally including
+  /// precision and scale if they're passed in and not null.
+  static Status FromString(std::string_view s, Decimal32* out, int32_t* 
precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const std::string& s, Decimal32* out, int32_t* 
precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const char* s, Decimal32* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Result<Decimal32> FromString(std::string_view s);
+  static Result<Decimal32> FromString(const std::string& s);
+  static Result<Decimal32> FromString(const char* s);
+
+  static Result<Decimal32> FromReal(double real, int32_t precision, int32_t 
scale);
+  static Result<Decimal32> FromReal(float real, int32_t precision, int32_t 
scale);
+
+  /// \brief Convert from a big-endian byte representation. The length must be
+  ///        between 1 and 4
+  /// \return error statis if the length is an invalid value
+  static Result<Decimal32> FromBigEndian(const uint8_t* data, int32_t length);
+
+  /// \brief Convert Decimal32 from one scale to another
+  Result<Decimal32> Rescale(int32_t original_scale, int32_t new_scale) const {
+    Decimal32 out;
+    auto dstatus = BasicDecimal32::Rescale(original_scale, new_scale, &out);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return out;
+  }
+
+  /// \brief Convert to a signed integer
+  template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, 
int64_t>>
+  Result<T> ToInteger() const {
+    constexpr auto min_value = std::numeric_limits<T>::min();
+    constexpr auto max_value = std::numeric_limits<T>::max();
+    const auto& self = *this;
+    if (self < min_value || self > max_value) {
+      return Status::Invalid("Invalid cast from Decimal32 value to ", 
sizeof(T),
+                             "byte integer");
+    }

Review Comment:
   Can this even happen?



##########
cpp/src/arrow/type_test.cc:
##########
@@ -1241,15 +1245,15 @@ TEST_F(TestUnifySchemas, Decimal) {
   CheckPromoteTo(decimal256(3, -2), decimal256(5, -2), decimal256(5, -2), 
options);
 
   // int32() is essentially decimal128(10, 0)
-  CheckPromoteTo(int32(), decimal128(3, 2), decimal128(12, 2), options);
-  CheckPromoteTo(int32(), decimal128(3, -2), decimal128(10, 0), options);
-  CheckPromoteTo(int64(), decimal128(38, 37), decimal256(56, 37), options);
+  CheckPromoteTo(int32(), decimal128(3, 2), decimal128(11, 2), options);
+  CheckPromoteTo(int32(), decimal128(3, -2), decimal128(9, 0), options);
+  CheckPromoteTo(int64(), decimal128(38, 37), decimal256(55, 37), options);
 
   CheckUnifyFailsTypeError(decimal256(1, 0), decimal128(1, 0), options);
 
   options.promote_numeric_width = true;
   CheckPromoteTo(decimal128(3, 2), decimal256(5, 2), decimal256(5, 2), 
options);
-  CheckPromoteTo(int32(), decimal128(38, 37), decimal256(47, 37), options);
+  CheckPromoteTo(int32(), decimal128(38, 37), decimal256(46, 37), options);
   CheckUnifyFailsInvalid(decimal128(38, 10), decimal256(76, 5), options);
 

Review Comment:
   Can you add a couple tests that `smallest_decimal` returns the right type 
for a given precision?



##########
cpp/src/arrow/util/basic_decimal.h:
##########
@@ -166,6 +167,317 @@ class ARROW_EXPORT GenericBasicDecimal {
   }
 };
 
+template <typename BaseType>
+class ARROW_EXPORT BasicDecimal {
+ public:
+  static_assert(
+      std::is_same_v<BaseType, int32_t> || std::is_same_v<BaseType, int64_t>,
+      "for bitwidths larger than 64 bits use BasicDecimal128 and 
BasicDecimal256");
+
+  static constexpr int kMaxPrecision = std::numeric_limits<BaseType>::digits10;
+  static constexpr int kMaxScale = kMaxPrecision;
+  static constexpr int kBitWidth = sizeof(BaseType) * CHAR_BIT;
+  static constexpr int kByteWidth = sizeof(BaseType);
+
+  /// \brief Empty constructor creates a decimal with a value of 0.
+  constexpr BasicDecimal() noexcept : value_(0) {}
+
+  /// \brief Create a decimal from any integer not wider than 64 bits.
+  template <typename T,
+            typename = typename std::enable_if<
+                std::is_integral<T>::value && (sizeof(T) <= sizeof(int64_t)), 
T>::type>
+  constexpr BasicDecimal(T value) noexcept  // NOLINT(runtime/explicit)
+      : value_(static_cast<BaseType>(value)) {}
+
+  /// \brief Create a decimal from an array of bytes.
+  ///
+  /// Bytes are assumed to be in native-endian byte order.
+  explicit BasicDecimal(const uint8_t* bytes) { memcpy(&value_, bytes, 
sizeof(value_)); }
+
+  const uint8_t* native_endian_bytes() const {
+    return reinterpret_cast<const uint8_t*>(&value_);
+  }
+
+  uint8_t* mutable_native_endian_bytes() { return 
reinterpret_cast<uint8_t*>(&value_); }
+
+  /// \brief Return the raw bytes of the value in native-endian byte order.
+  std::array<uint8_t, kByteWidth> ToBytes() const {
+    std::array<uint8_t, kByteWidth> out{{0}};
+    memcpy(out.data(), &value_, kByteWidth);
+    return out;
+  }
+
+  /// \brief Copy the raw bytes of the value in native-endian byte order
+  void ToBytes(uint8_t* out) const { memcpy(out, &value_, kByteWidth); }
+
+  /// \brief Return 1 if positive or 0, -1 if strictly negative
+  int64_t Sign() const { return 1 | (value_ >> (sizeof(kBitWidth) - 1)); }
+
+  bool IsNegative() const { return value_ < 0; }
+
+  explicit operator bool() const { return value_ != 0; }
+
+  friend bool operator==(const BasicDecimal& left, const BasicDecimal& right) {
+    return left.value_ == right.value_;
+  }
+
+  friend bool operator!=(const BasicDecimal& left, const BasicDecimal& right) {
+    return left.value_ != right.value_;
+  }
+
+  BaseType value() const { return value_; }
+
+  /// \brief count hte number of leading binary zeroes.
+  int32_t CountLeadingBinaryZeros() const;
+
+  constexpr uint64_t low_bits() const { return static_cast<uint64_t>(value_); }
+
+ protected:
+  BaseType value_;
+};
+
+class BasicDecimal64;
+
+class ARROW_EXPORT BasicDecimal32 : public BasicDecimal<int32_t> {
+ public:
+  using BasicDecimal<int32_t>::BasicDecimal;
+  using ValueType = int32_t;
+
+  /// \brief Negate the current value (in-place)
+  BasicDecimal32& Negate();
+  /// \brief Absolute value (in-place)
+  BasicDecimal32& Abs();
+  /// \brief Absolute value
+  static BasicDecimal32 Abs(const BasicDecimal32& left);
+  /// \brief Add a number to this one. The result is truncated to 32 bits.
+  BasicDecimal32& operator+=(const BasicDecimal32& right);
+  /// \brief Subtract a number from this one. The result is truncated to 32 
bits.
+  BasicDecimal32& operator-=(const BasicDecimal32& right);
+  /// \brief Multiply this number by another. The result is truncated to 32 
bits.
+  BasicDecimal32& operator*=(const BasicDecimal32& right);

Review Comment:
   1. why not define those in the base generic class?
   2. since the implementation of each method is very short, perhaps inline it 
in the header here?



##########
cpp/src/arrow/type.cc:
##########
@@ -1459,20 +1478,51 @@ int32_t DecimalType::DecimalSize(int32_t precision) {
   return static_cast<int32_t>(std::ceil((precision / 8.0) * std::log2(10) + 
1));
 }
 
+template <typename D>
+Status ValidateDecimalPrecision(int32_t precision) {
+  if (precision < D::kMinPrecision || precision > D::kMaxPrecision) {
+    return Status::Invalid("Decimal precision out of range [", 
int32_t(D::kMinPrecision),
+                           ", ", int32_t(D::kMaxPrecision), "]: ", precision);
+  }
+  return Status::OK();
+}
+

Review Comment:
   Make this `static` or anonymous?



##########
cpp/src/arrow/testing/random.cc:
##########
@@ -343,8 +343,119 @@ struct DecimalGenerator {
   }
 };
 
+template <>
+struct DecimalGenerator<Decimal32Type> {
+  using DecimalBuilderType = typename TypeTraits<Decimal32Type>::BuilderType;
+  using DecimalValue = typename DecimalBuilderType::ValueType;
+
+  std::shared_ptr<DataType> type_;
+  RandomArrayGenerator* rng_;
+
+  static int32_t MaxDecimalInteger(int32_t digits) {
+    return static_cast<int32_t>(std::ceil(std::pow(10.0, digits))) - 1;
+  }
+
+  std::shared_ptr<Array> MakeRandomArray(int64_t size, double null_probability,
+                                         int64_t alignment, MemoryPool* 
memory_pool) {
+    static constexpr int32_t kMaxDigitsInInteger = 9;
+    static constexpr int kNumIntegers = Decimal32Type::kByteWidth / 4;
+    static_assert(
+        kNumIntegers == (Decimal32Type::kMaxPrecision + kMaxDigitsInInteger - 
1) /
+                            (kMaxDigitsInInteger + 1),
+        "inconsistent decimal metadata: kMaxPrecision doesn't match 
kByteWidth");
+
+    // First generate separate random values for individual components:
+    // boolean sign (including null-ness), and uint64 "digits" in big endian 
order.

Review Comment:
   This comment is outdated, remove it?



##########
docs/source/status.rst:
##########
@@ -44,6 +44,10 @@ Data Types
 
+-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Float32/64        | ✓     | ✓     | ✓     | ✓  |  ✓    |  ✓    | ✓     | ✓   
  | ✓         |
 
+-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
+| Decimal32         | ✓     |       | ✓     |    |       |       |       |     
  |           |
++-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
+| Decimal64         | ✓     |       | ✓     |    |       |       |       |     
  |           |
++-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+

Review Comment:
   Go isn't part of this PR, is it?



##########
cpp/src/arrow/type.cc:
##########
@@ -480,7 +494,7 @@ Result<std::shared_ptr<DataType>> MaybeMergeNumericTypes(
     ARROW_ASSIGN_OR_RAISE(const int32_t precision,
                           MaxDecimalDigitsForInteger(other_type->id()));
     ARROW_ASSIGN_OR_RAISE(const auto promoted_decimal,
-                          DecimalType::Make(promoted_type->id(), precision, 
0));
+                          DecimalType::Make(promoted_type->id(), precision - 
1, 0));

Review Comment:
   Why this change?



##########
cpp/src/arrow/util/basic_decimal.h:
##########
@@ -166,6 +167,317 @@ class ARROW_EXPORT GenericBasicDecimal {
   }
 };
 
+template <typename BaseType>

Review Comment:
   `BaseType` sounds like a base class, perhaps call it `DigitType` or simply 
`Digit`.



##########
cpp/src/arrow/integration/json_internal.cc:
##########
@@ -969,12 +988,18 @@ Result<std::shared_ptr<DataType>> GetDecimal(const 
RjObject& json_type) {
     bit_width = maybe_bit_width.ValueOrDie();
   }
 
-  if (bit_width == 128) {
-    return decimal128(precision, scale);
-  } else if (bit_width == 256) {
-    return decimal256(precision, scale);
+  switch (bit_width) {
+    case 32:
+      return decimal32(precision, scale);
+    case 64:
+      return decimal64(precision, scale);
+    case 128:
+      return decimal128(precision, scale);
+    case 256:
+      return decimal256(precision, scale);
   }
-  return Status::Invalid("Only 128 bit and 256 Decimals are supported. 
Received",
+
+  return Status::Invalid("Only 32/64/128/256-bit Decimals are supported. 
Received",

Review Comment:
   Let's add the required space at the end
   ```suggestion
     return Status::Invalid("Only 32/64/128/256-bit Decimals are supported. 
Received ",
   ```



##########
cpp/src/arrow/testing/random.cc:
##########
@@ -343,8 +343,119 @@ struct DecimalGenerator {
   }
 };
 
+template <>
+struct DecimalGenerator<Decimal32Type> {
+  using DecimalBuilderType = typename TypeTraits<Decimal32Type>::BuilderType;
+  using DecimalValue = typename DecimalBuilderType::ValueType;
+
+  std::shared_ptr<DataType> type_;
+  RandomArrayGenerator* rng_;
+
+  static int32_t MaxDecimalInteger(int32_t digits) {
+    return static_cast<int32_t>(std::ceil(std::pow(10.0, digits))) - 1;
+  }
+
+  std::shared_ptr<Array> MakeRandomArray(int64_t size, double null_probability,
+                                         int64_t alignment, MemoryPool* 
memory_pool) {
+    static constexpr int32_t kMaxDigitsInInteger = 9;
+    static constexpr int kNumIntegers = Decimal32Type::kByteWidth / 4;
+    static_assert(
+        kNumIntegers == (Decimal32Type::kMaxPrecision + kMaxDigitsInInteger - 
1) /
+                            (kMaxDigitsInInteger + 1),
+        "inconsistent decimal metadata: kMaxPrecision doesn't match 
kByteWidth");
+
+    // First generate separate random values for individual components:
+    // boolean sign (including null-ness), and uint64 "digits" in big endian 
order.
+    const auto& decimal_type = checked_cast<const DecimalType&>(*type_);
+
+    auto remaining_digits = decimal_type.precision();
+    auto values = checked_pointer_cast<Int32Array>(rng_->Int32(
+        size, -1 * MaxDecimalInteger(remaining_digits),
+        MaxDecimalInteger(remaining_digits), null_probability, alignment, 
memory_pool));
+
+    DecimalBuilderType builder(type_, memory_pool, alignment);
+    ABORT_NOT_OK(builder.Reserve(size));
+
+    for (int64_t i = 0; i < size; ++i) {
+      if (values->IsValid(i)) {
+        builder.UnsafeAppend(DecimalValue{values->Value(i)});
+      } else {
+        builder.UnsafeAppendNull();
+      }
+    }
+    std::shared_ptr<Array> array;
+    ABORT_NOT_OK(builder.Finish(&array));
+    return array;
+  }
+};
+
+template <>
+struct DecimalGenerator<Decimal64Type> {

Review Comment:
   Hmm, can you perhaps refactor those two specialisations for `Decimal32Type` 
and `Decimal64Type` into a single one (perhaps `SmallDecimalGenerator<T>`)?



##########
cpp/src/arrow/testing/random.cc:
##########
@@ -343,8 +343,119 @@ struct DecimalGenerator {
   }
 };
 
+template <>
+struct DecimalGenerator<Decimal32Type> {
+  using DecimalBuilderType = typename TypeTraits<Decimal32Type>::BuilderType;
+  using DecimalValue = typename DecimalBuilderType::ValueType;
+
+  std::shared_ptr<DataType> type_;
+  RandomArrayGenerator* rng_;
+
+  static int32_t MaxDecimalInteger(int32_t digits) {
+    return static_cast<int32_t>(std::ceil(std::pow(10.0, digits))) - 1;
+  }
+
+  std::shared_ptr<Array> MakeRandomArray(int64_t size, double null_probability,
+                                         int64_t alignment, MemoryPool* 
memory_pool) {
+    static constexpr int32_t kMaxDigitsInInteger = 9;
+    static constexpr int kNumIntegers = Decimal32Type::kByteWidth / 4;
+    static_assert(
+        kNumIntegers == (Decimal32Type::kMaxPrecision + kMaxDigitsInInteger - 
1) /
+                            (kMaxDigitsInInteger + 1),
+        "inconsistent decimal metadata: kMaxPrecision doesn't match 
kByteWidth");
+
+    // First generate separate random values for individual components:
+    // boolean sign (including null-ness), and uint64 "digits" in big endian 
order.
+    const auto& decimal_type = checked_cast<const DecimalType&>(*type_);
+
+    auto remaining_digits = decimal_type.precision();

Review Comment:
   Why "remaining"?



##########
cpp/src/arrow/util/basic_decimal.h:
##########
@@ -166,6 +167,317 @@ class ARROW_EXPORT GenericBasicDecimal {
   }
 };
 
+template <typename BaseType>
+class ARROW_EXPORT BasicDecimal {

Review Comment:
   There's already a ton of "basic" types here, can you name it perhaps 
`SmallBasicDecimal`?



##########
cpp/src/arrow/testing/gtest_util.h:
##########
@@ -171,7 +171,8 @@ using PrimitiveArrowTypes =
 using TemporalArrowTypes =
     ::testing::Types<Date32Type, Date64Type, TimestampType, Time32Type, 
Time64Type>;
 
-using DecimalArrowTypes = ::testing::Types<Decimal128Type, Decimal256Type>;
+using DecimalArrowTypes =
+    ::testing::Types</*Decimal32Type, Decimal64Type,*/ Decimal128Type, 
Decimal256Type>;

Review Comment:
   But it's going to be a separate PR, right?



##########
cpp/src/arrow/util/basic_decimal.h:
##########
@@ -166,6 +167,317 @@ class ARROW_EXPORT GenericBasicDecimal {
   }
 };
 
+template <typename BaseType>
+class ARROW_EXPORT BasicDecimal {
+ public:
+  static_assert(
+      std::is_same_v<BaseType, int32_t> || std::is_same_v<BaseType, int64_t>,
+      "for bitwidths larger than 64 bits use BasicDecimal128 and 
BasicDecimal256");
+
+  static constexpr int kMaxPrecision = std::numeric_limits<BaseType>::digits10;
+  static constexpr int kMaxScale = kMaxPrecision;
+  static constexpr int kBitWidth = sizeof(BaseType) * CHAR_BIT;
+  static constexpr int kByteWidth = sizeof(BaseType);
+
+  /// \brief Empty constructor creates a decimal with a value of 0.
+  constexpr BasicDecimal() noexcept : value_(0) {}
+
+  /// \brief Create a decimal from any integer not wider than 64 bits.
+  template <typename T,
+            typename = typename std::enable_if<
+                std::is_integral<T>::value && (sizeof(T) <= sizeof(int64_t)), 
T>::type>
+  constexpr BasicDecimal(T value) noexcept  // NOLINT(runtime/explicit)
+      : value_(static_cast<BaseType>(value)) {}
+
+  /// \brief Create a decimal from an array of bytes.
+  ///
+  /// Bytes are assumed to be in native-endian byte order.
+  explicit BasicDecimal(const uint8_t* bytes) { memcpy(&value_, bytes, 
sizeof(value_)); }
+
+  const uint8_t* native_endian_bytes() const {
+    return reinterpret_cast<const uint8_t*>(&value_);
+  }
+
+  uint8_t* mutable_native_endian_bytes() { return 
reinterpret_cast<uint8_t*>(&value_); }
+
+  /// \brief Return the raw bytes of the value in native-endian byte order.
+  std::array<uint8_t, kByteWidth> ToBytes() const {
+    std::array<uint8_t, kByteWidth> out{{0}};
+    memcpy(out.data(), &value_, kByteWidth);
+    return out;
+  }
+
+  /// \brief Copy the raw bytes of the value in native-endian byte order
+  void ToBytes(uint8_t* out) const { memcpy(out, &value_, kByteWidth); }
+
+  /// \brief Return 1 if positive or 0, -1 if strictly negative
+  int64_t Sign() const { return 1 | (value_ >> (sizeof(kBitWidth) - 1)); }
+
+  bool IsNegative() const { return value_ < 0; }
+
+  explicit operator bool() const { return value_ != 0; }
+
+  friend bool operator==(const BasicDecimal& left, const BasicDecimal& right) {
+    return left.value_ == right.value_;
+  }
+
+  friend bool operator!=(const BasicDecimal& left, const BasicDecimal& right) {
+    return left.value_ != right.value_;
+  }
+
+  BaseType value() const { return value_; }
+
+  /// \brief count hte number of leading binary zeroes.
+  int32_t CountLeadingBinaryZeros() const;
+
+  constexpr uint64_t low_bits() const { return static_cast<uint64_t>(value_); }
+
+ protected:
+  BaseType value_;
+};
+
+class BasicDecimal64;
+
+class ARROW_EXPORT BasicDecimal32 : public BasicDecimal<int32_t> {
+ public:
+  using BasicDecimal<int32_t>::BasicDecimal;
+  using ValueType = int32_t;
+
+  /// \brief Negate the current value (in-place)
+  BasicDecimal32& Negate();
+  /// \brief Absolute value (in-place)
+  BasicDecimal32& Abs();
+  /// \brief Absolute value
+  static BasicDecimal32 Abs(const BasicDecimal32& left);
+  /// \brief Add a number to this one. The result is truncated to 32 bits.
+  BasicDecimal32& operator+=(const BasicDecimal32& right);
+  /// \brief Subtract a number from this one. The result is truncated to 32 
bits.
+  BasicDecimal32& operator-=(const BasicDecimal32& right);
+  /// \brief Multiply this number by another. The result is truncated to 32 
bits.
+  BasicDecimal32& operator*=(const BasicDecimal32& right);
+
+  /// \brief Divide this number by the divisor and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \param[out] result the quotient
+  /// \param[out] remainder the remainder after the division
+  DecimalStatus Divide(const BasicDecimal32& divisor, BasicDecimal32* result,
+                       BasicDecimal32* remainder) const;
+
+  /// \brief In-place division
+  BasicDecimal32& operator/=(const BasicDecimal32& right);
+  /// \brief Bitwise "or" between two BasicDecimal32s
+  BasicDecimal32& operator|=(const BasicDecimal32& right);
+  /// \brief Bitwise "and" between two BasicDecimal32s
+  BasicDecimal32& operator&=(const BasicDecimal32& right);
+  /// \brief Shift left by the given number of bits.
+  BasicDecimal32& operator<<=(uint32_t bits);
+
+  BasicDecimal32 operator<<(uint32_t bits) const {
+    auto res = *this;
+    res <<= bits;
+    return res;
+  }
+
+  /// \brief Shift right by the given number of bits.
+  ///
+  /// Negative values will sign-extend
+  BasicDecimal32& operator>>=(uint32_t bits);
+
+  BasicDecimal32 operator>>(uint32_t bits) const {
+    auto res = *this;
+    res >>= bits;
+    return res;
+  }
+
+  /// \brief Convert BasicDecimal32 from one scale to another
+  DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
+                        BasicDecimal32* out) const;
+
+  void GetWholeAndFraction(int scale, BasicDecimal32* whole,
+                           BasicDecimal32* fraction) const;
+
+  /// \brief Scale up.
+  BasicDecimal32 IncreaseScaleBy(int32_t increase_by) const;
+
+  /// \brief Scale down.
+  ///
+  /// - If 'round' is true, the right-most digits are dropped and the result 
value is
+  ///   rounded up (+1 for +ve, -1 for -ve) based on the value of the dropped 
digits
+  ///   (>= 10^reduce_by / 2).
+  /// - If 'round' is false, the right-most digits are simply dropped.
+  BasicDecimal32 ReduceScaleBy(int32_t reduce_by, bool round = true) const;
+
+  /// \brief Whether this number fits in the given precision
+  ///
+  /// Return true if the number of significant digits is less or equal to 
'precision'.
+  bool FitsInPrecision(int32_t precision) const;
+
+  /// \brief Get the maximum valid unscaled decimal value.
+  static const BasicDecimal32& GetMaxValue();
+  /// \brief Get the maximum valid unscaled decimal value for the given 
precision.
+  static BasicDecimal32 GetMaxValue(int32_t precision);
+
+  /// \brief Get the maximum decimal value (is not a valid value).
+  static constexpr BasicDecimal32 GetMaxSentinel() {
+    return BasicDecimal32(std::numeric_limits<int32_t>::max());
+  }
+
+  /// \brief Get the minimum decimal value (is not a valid value).
+  static constexpr BasicDecimal32 GetMinSentinel() {
+    return BasicDecimal32(std::numeric_limits<int32_t>::min());
+  }
+
+  /// \brief Scale multiplier for a given scale value.
+  static const BasicDecimal32& GetScaleMultiplier(int32_t scale);
+  /// \brief Half-scale multiplier for a given scale value.
+  static const BasicDecimal32& GetHalfScaleMultiplier(int32_t scale);
+
+  explicit operator BasicDecimal64() const;
+};
+
+ARROW_EXPORT bool operator<(const BasicDecimal32& left, const BasicDecimal32& 
right);
+ARROW_EXPORT bool operator<=(const BasicDecimal32& left, const BasicDecimal32& 
right);
+ARROW_EXPORT bool operator>(const BasicDecimal32& left, const BasicDecimal32& 
right);
+ARROW_EXPORT bool operator>=(const BasicDecimal32& left, const BasicDecimal32& 
right);
+
+ARROW_EXPORT BasicDecimal32 operator-(const BasicDecimal32& self);
+ARROW_EXPORT BasicDecimal32 operator+(const BasicDecimal32& left,
+                                      const BasicDecimal32& right);
+ARROW_EXPORT BasicDecimal32 operator-(const BasicDecimal32& left,
+                                      const BasicDecimal32& right);
+ARROW_EXPORT BasicDecimal32 operator*(const BasicDecimal32& left,
+                                      const BasicDecimal32& right);

Review Comment:
   Similarly, these ones can probably be 1) made generic 2) be inlined in this 
header



##########
cpp/src/arrow/util/decimal.h:
##########
@@ -31,6 +31,257 @@
 
 namespace arrow {
 
+class Decimal64;
+
+/// Represents a signed 32-bit decimal value in two's complement.
+/// Calulations wrap around and overflow is ignored.
+/// The max decimal precision that can be safely represented is
+/// 9 significant digits.
+///
+/// The implementation is split into two parts :
+///
+/// 1. BasicDecimal32
+///    - can be safely compiled to IR without references to libstdc++
+/// 2. Decimal32
+///    - has additional functionality on top of BasicDecimal32 to deal with
+///      strings and streams
+class ARROW_EXPORT Decimal32 : public BasicDecimal32 {
+ public:
+  /// \cond FALSE
+  // (need to avoid a duplicate definition in sphinx)
+  using BasicDecimal32::BasicDecimal32;
+  /// \endcond
+
+  /// \brief constructor creates a Decimal32 from a BasicDecimal32
+  constexpr Decimal32(const BasicDecimal32& value) noexcept  // NOLINT 
runtime/explicit
+      : BasicDecimal32(value) {}
+
+  /// \brief Parse the number from a base 10 string representation
+  explicit Decimal32(const std::string& value);
+
+  /// \brief Empty constructor creates a Decimal32 with a value of 0
+  /// this is required for some older compilers
+  constexpr Decimal32() noexcept : BasicDecimal32() {}
+
+  /// \brief Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \return the pair of the quotient and the remainder
+  Result<std::pair<Decimal32, Decimal32>> Divide(const Decimal32& divisor) 
const {
+    std::pair<Decimal32, Decimal32> result;
+    auto dstatus = BasicDecimal32::Divide(divisor, &result.first, 
&result.second);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return result;
+  }
+
+  /// \brief Convert the Decimal32 value to a base 10 decimal string with the 
given scale
+  std::string ToString(int32_t scale) const;
+
+  /// \brief Convert the value to an integer string
+  std::string ToIntegerString() const;
+
+  /// \brief Cast this value to an int64_t
+  explicit operator int64_t() const;
+
+  explicit operator Decimal64() const;
+
+  /// \brief Convert a decimal string to a Decimal value, optionally including
+  /// precision and scale if they're passed in and not null.
+  static Status FromString(std::string_view s, Decimal32* out, int32_t* 
precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const std::string& s, Decimal32* out, int32_t* 
precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const char* s, Decimal32* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Result<Decimal32> FromString(std::string_view s);
+  static Result<Decimal32> FromString(const std::string& s);
+  static Result<Decimal32> FromString(const char* s);
+
+  static Result<Decimal32> FromReal(double real, int32_t precision, int32_t 
scale);
+  static Result<Decimal32> FromReal(float real, int32_t precision, int32_t 
scale);
+
+  /// \brief Convert from a big-endian byte representation. The length must be
+  ///        between 1 and 4
+  /// \return error statis if the length is an invalid value
+  static Result<Decimal32> FromBigEndian(const uint8_t* data, int32_t length);
+
+  /// \brief Convert Decimal32 from one scale to another
+  Result<Decimal32> Rescale(int32_t original_scale, int32_t new_scale) const {
+    Decimal32 out;
+    auto dstatus = BasicDecimal32::Rescale(original_scale, new_scale, &out);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return out;
+  }
+
+  /// \brief Convert to a signed integer
+  template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, 
int64_t>>
+  Result<T> ToInteger() const {
+    constexpr auto min_value = std::numeric_limits<T>::min();
+    constexpr auto max_value = std::numeric_limits<T>::max();
+    const auto& self = *this;
+    if (self < min_value || self > max_value) {
+      return Status::Invalid("Invalid cast from Decimal32 value to ", 
sizeof(T),
+                             "byte integer");
+    }
+    return static_cast<T>(value_);
+  }
+
+  /// \brief Convert to a signed integer
+  template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, 
int64_t>>
+  Status ToInteger(T* out) const {
+    return ToInteger<T>().Value(out);
+  }
+
+  /// \brief Convert to a floating-point number (scaled)
+  float ToFloat(int32_t scale) const;
+  /// \brief Convert to a floating-point number (scaled)
+  double ToDouble(int32_t scale) const;
+
+  /// \brief Convert to a floating-point number (scaled)
+  template <typename T, typename = 
std::enable_if_t<std::is_floating_point_v<T>>>
+  T ToReal(int32_t scale) const {
+    static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,
+                  "Unexpected floating-point type");
+    if constexpr (std::is_same_v<T, float>) {
+      return ToFloat(scale);
+    } else {
+      return ToDouble(scale);
+    }
+  }
+
+  ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os,
+                                                      const Decimal32& 
decimal);
+
+ private:
+  /// Converts internal error code to Status
+  Status ToArrowStatus(DecimalStatus dstatus) const;
+};
+
+class ARROW_EXPORT Decimal64 : public BasicDecimal64 {
+ public:
+  /// \cond FALSE
+  // (need to avoid a duplicate definition in sphinx)
+  using BasicDecimal64::BasicDecimal64;
+  /// \endcond
+
+  /// \brief constructor creates a Decimal64 from a BasicDecimal64
+  constexpr Decimal64(const BasicDecimal64& value) noexcept  // NOLINT 
runtime/explicit
+      : BasicDecimal64(value) {}
+
+  explicit Decimal64(const BasicDecimal32& value) noexcept
+      : BasicDecimal64(static_cast<int64_t>(value.value())) {}
+
+  /// \brief Parse the number from a base 10 string representation
+  explicit Decimal64(const std::string& value);
+
+  /// \brief Empty constructor creates a Decimal64 with a value of 0
+  /// this is required for some older compilers
+  constexpr Decimal64() noexcept : BasicDecimal64() {}
+
+  /// \brief Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \return the pair of the quotient and the remainder
+  Result<std::pair<Decimal64, Decimal64>> Divide(const Decimal64& divisor) 
const {
+    std::pair<Decimal64, Decimal64> result;
+    auto dstatus = BasicDecimal64::Divide(divisor, &result.first, 
&result.second);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return result;
+  }
+
+  /// \brief Convert the Decimal64 value to a base 10 decimal string with the 
given scale
+  std::string ToString(int32_t scale) const;
+
+  /// \brief Convert the value to an integer string
+  std::string ToIntegerString() const;
+
+  /// \brief Cast this value to an int64_t
+  explicit operator int64_t() const;
+
+  /// \brief Convert a decimal string to a Decimal value, optionally including
+  /// precision and scale if they're passed in and not null.
+  static Status FromString(std::string_view s, Decimal64* out, int32_t* 
precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const std::string& s, Decimal64* out, int32_t* 
precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const char* s, Decimal64* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Result<Decimal64> FromString(std::string_view s);
+  static Result<Decimal64> FromString(const std::string& s);
+  static Result<Decimal64> FromString(const char* s);
+
+  static Result<Decimal64> FromReal(double real, int32_t precision, int32_t 
scale);
+  static Result<Decimal64> FromReal(float real, int32_t precision, int32_t 
scale);
+
+  /// \brief Convert from a big-endian byte representation. The length must be
+  ///        between 1 and 4
+  /// \return error statis if the length is an invalid value
+  static Result<Decimal64> FromBigEndian(const uint8_t* data, int32_t length);
+
+  /// \brief Convert Decimal64 from one scale to another
+  Result<Decimal64> Rescale(int32_t original_scale, int32_t new_scale) const {
+    Decimal64 out;
+    auto dstatus = BasicDecimal64::Rescale(original_scale, new_scale, &out);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return out;
+  }
+
+  /// \brief Convert to a signed integer
+  template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, 
int64_t>>
+  Result<T> ToInteger() const {
+    constexpr auto min_value = std::numeric_limits<T>::min();
+    constexpr auto max_value = std::numeric_limits<T>::max();
+    const auto& self = *this;
+    if (self < min_value || self > max_value) {

Review Comment:
   This will issue pointless comparisons if T is `int64_t`.



##########
cpp/src/arrow/util/basic_decimal.h:
##########
@@ -166,6 +167,317 @@ class ARROW_EXPORT GenericBasicDecimal {
   }
 };
 
+template <typename BaseType>
+class ARROW_EXPORT BasicDecimal {
+ public:
+  static_assert(
+      std::is_same_v<BaseType, int32_t> || std::is_same_v<BaseType, int64_t>,
+      "for bitwidths larger than 64 bits use BasicDecimal128 and 
BasicDecimal256");
+
+  static constexpr int kMaxPrecision = std::numeric_limits<BaseType>::digits10;
+  static constexpr int kMaxScale = kMaxPrecision;
+  static constexpr int kBitWidth = sizeof(BaseType) * CHAR_BIT;
+  static constexpr int kByteWidth = sizeof(BaseType);
+
+  /// \brief Empty constructor creates a decimal with a value of 0.
+  constexpr BasicDecimal() noexcept : value_(0) {}
+
+  /// \brief Create a decimal from any integer not wider than 64 bits.
+  template <typename T,
+            typename = typename std::enable_if<
+                std::is_integral<T>::value && (sizeof(T) <= sizeof(int64_t)), 
T>::type>
+  constexpr BasicDecimal(T value) noexcept  // NOLINT(runtime/explicit)
+      : value_(static_cast<BaseType>(value)) {}
+
+  /// \brief Create a decimal from an array of bytes.
+  ///
+  /// Bytes are assumed to be in native-endian byte order.
+  explicit BasicDecimal(const uint8_t* bytes) { memcpy(&value_, bytes, 
sizeof(value_)); }
+
+  const uint8_t* native_endian_bytes() const {
+    return reinterpret_cast<const uint8_t*>(&value_);
+  }
+
+  uint8_t* mutable_native_endian_bytes() { return 
reinterpret_cast<uint8_t*>(&value_); }
+
+  /// \brief Return the raw bytes of the value in native-endian byte order.
+  std::array<uint8_t, kByteWidth> ToBytes() const {
+    std::array<uint8_t, kByteWidth> out{{0}};
+    memcpy(out.data(), &value_, kByteWidth);
+    return out;
+  }
+
+  /// \brief Copy the raw bytes of the value in native-endian byte order
+  void ToBytes(uint8_t* out) const { memcpy(out, &value_, kByteWidth); }
+
+  /// \brief Return 1 if positive or 0, -1 if strictly negative
+  int64_t Sign() const { return 1 | (value_ >> (sizeof(kBitWidth) - 1)); }
+
+  bool IsNegative() const { return value_ < 0; }
+
+  explicit operator bool() const { return value_ != 0; }
+
+  friend bool operator==(const BasicDecimal& left, const BasicDecimal& right) {
+    return left.value_ == right.value_;
+  }
+
+  friend bool operator!=(const BasicDecimal& left, const BasicDecimal& right) {
+    return left.value_ != right.value_;
+  }
+
+  BaseType value() const { return value_; }
+
+  /// \brief count hte number of leading binary zeroes.

Review Comment:
   ```suggestion
     /// \brief count the number of leading binary zeroes.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] GH-43956: [C++][Format] Add initial Decimal32/Decimal64 implementations [arrow]

Reply via email to