Re: [PR] GH-43956: [C++][Format] Add initial Decimal32/Decimal64 implementations [arrow]

via GitHub Tue, 10 Sep 2024 07:37:00 -0700


bkietz commented on code in PR #43957:
URL: https://github.com/apache/arrow/pull/43957#discussion_r1752054170



##########
cpp/src/arrow/engine/substrait/expression_internal.cc:
##########
@@ -951,6 +951,38 @@ struct ScalarToProtoImpl {
   Status Visit(const MonthIntervalScalar& s) { return NotImplemented(s); }
   Status Visit(const DayTimeIntervalScalar& s) { return NotImplemented(s); }
 
+  Status Visit(const Decimal32Scalar& s) {

Review Comment:
   This is also a place where we'd benefit from a generic



##########
cpp/src/arrow/array/util.cc:
##########
@@ -152,6 +152,42 @@ class ArrayDataEndianSwapper {
     return Status::OK();
   }
 
+  Status Visit(const Decimal32Type& type) {
+    auto data = reinterpret_cast<const uint32_t*>(data_->buffers[1]->data());
+    ARROW_ASSIGN_OR_RAISE(auto new_buffer,
+                          AllocateBuffer(data_->buffers[1]->size(), pool_));
+    auto new_data = reinterpret_cast<uint32_t*>(new_buffer->mutable_data());
+    // NOTE: data_->length not trusted (see warning above)
+    const int64_t length = data_->buffers[1]->size() / 
Decimal32Type::kByteWidth;
+    for (int64_t i = 0; i < length; i++) {
+#if ARROW_LITTLE_ENDIAN
+      new_data[i] = bit_util::FromBigEndian(data[i]);
+#else
+      new_data[i] = bit_util::FromLittleEndian(data[i]);
+#endif

Review Comment:
   If it's not too annoying: I think the copy-pasta here has reached the 
critical mass where it'd be worthwhile to make Visit generic for decimal types, 
probably assisted by extracting 
`{Decimal32,Decimal64,Decimal128,Decimal256}::SwapEndian()`



##########
cpp/src/arrow/type.cc:
##########
@@ -1459,6 +1478,42 @@ int32_t DecimalType::DecimalSize(int32_t precision) {
   return static_cast<int32_t>(std::ceil((precision / 8.0) * std::log2(10) + 
1));
 }
 
+// ----------------------------------------------------------------------
+// Decimal32 type
+
+Decimal32Type::Decimal32Type(int32_t precision, int32_t scale)
+    : DecimalType(type_id, 4, precision, scale) {
+  ARROW_CHECK_GE(precision, kMinPrecision);
+  ARROW_CHECK_LE(precision, kMaxPrecision);

Review Comment:
   Would it be worthwhile to extract
   
   ```
   template <typename D>
   Status ValidateDecimalPrecision(int32_t precision) {
     if (precision < D::kMinPrecision || precision > D::kMaxPrecision) {
       return Status::Invalid("Decimal precision out of range [", 
int32_t(D::kMinPrecision),
                              ", ", int32_t(D::kMaxPrecision), "]: ", 
precision);
     }
     return Status::Ok();
   }
   ```
   ?
   
   Then we could write
   ```suggestion
     ARROW_CHECK_OK(ValidatePrecisionScale<Decimal32Type>());
   ```
   
   And in the `::Make` functions below
   ```
   Result<std::shared_ptr<DataType>> Decimal32Type::Make(int32_t precision, 
int32_t scale) {
     RETURN_NOT_OK(ValidatePrecisionScale<Decimal32Type>(precision));
     return std::make_shared<Decimal32Type>(precision, scale);
   }
   ```



##########
cpp/src/arrow/compute/kernels/codegen_internal.h:
##########
@@ -224,7 +258,9 @@ using enable_if_not_floating_value = 
enable_if_t<!std::is_floating_point<T>::val
 
 template <typename T, typename R = T>
 using enable_if_decimal_value =
-    enable_if_t<std::is_same<Decimal128, T>::value || std::is_same<Decimal256, 
T>::value,
+    enable_if_t<std::is_same<Decimal32, T>::value || std::is_same<Decimal64, 
T>::value ||
+                    std::is_same<Decimal128, T>::value ||
+                    std::is_same<Decimal256, T>::value,

Review Comment:
   It's a potentially breaking change for user code like:
   
   ```
   template <typename T>
   enable_if_decimal_value<T> SetToMagic(T* ptr) {
     constexpr char MAGIC[256] = ...;
     if constexpr (std::is_same_v<T, Decimal128>) {
       memcpy(ptr, &MAGIC, sizeof(Decimal128));
     } else {
       static_assert(std::is_same_v<T, Decimal256>); // error for Decimal32
       memcpy(ptr, &MAGIC, sizeof(MAGIC));
     }
   }
   ```
   
   I think that risk is outweighed by the greater weirdness of not including 
the new types however



##########
cpp/src/arrow/array/array_test.cc:
##########
@@ -667,8 +667,10 @@ static ScalarVector GetScalars() {
       std::make_shared<BinaryViewScalar>(hello),
       std::make_shared<FixedSizeBinaryScalar>(
           hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
-      std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
-      std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
+      std::make_shared<Decimal32Scalar>(Decimal32(10), smallest_decimal(7, 4)),
+      std::make_shared<Decimal64Scalar>(Decimal64(10), smallest_decimal(12, 
4)),
+      std::make_shared<Decimal128Scalar>(Decimal128(10), smallest_decimal(20, 
4)),
+      std::make_shared<Decimal256Scalar>(Decimal256(10), smallest_decimal(76, 
38)),

Review Comment:
   "narrowest_decimal"?



##########
cpp/src/arrow/compute/kernels/codegen_internal.h:
##########
@@ -140,6 +140,30 @@ struct GetViewType<Type, 
enable_if_t<is_base_binary_type<Type>::value ||
   static T LogicalValue(PhysicalType value) { return value; }
 };
 
+template <>
+struct GetViewType<Decimal32Type> {
+  using T = Decimal32;
+  using PhysicalType = std::string_view;

Review Comment:
   I wonder if `std::array<char, 4>` or `span<char, 4>` would be a more doable 
replacement



##########
cpp/src/arrow/type_fwd.h:
##########
@@ -512,9 +528,28 @@ std::shared_ptr<DataType> fixed_size_binary(int32_t 
byte_width);
 ///
 /// If the precision is greater than 38, a Decimal256Type is returned,
 /// otherwise a Decimal128Type.
+///
+/// Deprecated: prefer `smallest_decimal` instead.

Review Comment:
   We have an attribute macro which should provide compiler warnings when the 
deprecated function is used
   
   ```suggestion
   ///
   /// Deprecated: prefer `smallest_decimal` instead.
   ARROW_DEPRECATED("Deprecated in 18.0. Use `smallest_decimal` instead")
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] GH-43956: [C++][Format] Add initial Decimal32/Decimal64 implementations [arrow]

Reply via email to