bkietz commented on code in PR #43957:
URL: https://github.com/apache/arrow/pull/43957#discussion_r1752054170
##########
cpp/src/arrow/engine/substrait/expression_internal.cc:
##########
@@ -951,6 +951,38 @@ struct ScalarToProtoImpl {
Status Visit(const MonthIntervalScalar& s) { return NotImplemented(s); }
Status Visit(const DayTimeIntervalScalar& s) { return NotImplemented(s); }
+ Status Visit(const Decimal32Scalar& s) {
Review Comment:
This is also a place where we'd benefit from a generic
##########
cpp/src/arrow/array/util.cc:
##########
@@ -152,6 +152,42 @@ class ArrayDataEndianSwapper {
return Status::OK();
}
+ Status Visit(const Decimal32Type& type) {
+ auto data = reinterpret_cast<const uint32_t*>(data_->buffers[1]->data());
+ ARROW_ASSIGN_OR_RAISE(auto new_buffer,
+ AllocateBuffer(data_->buffers[1]->size(), pool_));
+ auto new_data = reinterpret_cast<uint32_t*>(new_buffer->mutable_data());
+ // NOTE: data_->length not trusted (see warning above)
+ const int64_t length = data_->buffers[1]->size() /
Decimal32Type::kByteWidth;
+ for (int64_t i = 0; i < length; i++) {
+#if ARROW_LITTLE_ENDIAN
+ new_data[i] = bit_util::FromBigEndian(data[i]);
+#else
+ new_data[i] = bit_util::FromLittleEndian(data[i]);
+#endif
Review Comment:
If it's not too annoying: I think the copy-pasta here has reached the
critical mass where it'd be worthwhile to make Visit generic for decimal types,
probably assisted by extracting
`{Decimal32,Decimal64,Decimal128,Decimal256}::SwapEndian()`
##########
cpp/src/arrow/type.cc:
##########
@@ -1459,6 +1478,42 @@ int32_t DecimalType::DecimalSize(int32_t precision) {
return static_cast<int32_t>(std::ceil((precision / 8.0) * std::log2(10) +
1));
}
+// ----------------------------------------------------------------------
+// Decimal32 type
+
+Decimal32Type::Decimal32Type(int32_t precision, int32_t scale)
+ : DecimalType(type_id, 4, precision, scale) {
+ ARROW_CHECK_GE(precision, kMinPrecision);
+ ARROW_CHECK_LE(precision, kMaxPrecision);
Review Comment:
Would it be worthwhile to extract
```
template <typename D>
Status ValidateDecimalPrecision(int32_t precision) {
if (precision < D::kMinPrecision || precision > D::kMaxPrecision) {
return Status::Invalid("Decimal precision out of range [",
int32_t(D::kMinPrecision),
", ", int32_t(D::kMaxPrecision), "]: ",
precision);
}
return Status::Ok();
}
```
?
Then we could write
```suggestion
ARROW_CHECK_OK(ValidatePrecisionScale<Decimal32Type>());
```
And in the `::Make` functions below
```
Result<std::shared_ptr<DataType>> Decimal32Type::Make(int32_t precision,
int32_t scale) {
RETURN_NOT_OK(ValidatePrecisionScale<Decimal32Type>(precision));
return std::make_shared<Decimal32Type>(precision, scale);
}
```
##########
cpp/src/arrow/compute/kernels/codegen_internal.h:
##########
@@ -224,7 +258,9 @@ using enable_if_not_floating_value =
enable_if_t<!std::is_floating_point<T>::val
template <typename T, typename R = T>
using enable_if_decimal_value =
- enable_if_t<std::is_same<Decimal128, T>::value || std::is_same<Decimal256,
T>::value,
+ enable_if_t<std::is_same<Decimal32, T>::value || std::is_same<Decimal64,
T>::value ||
+ std::is_same<Decimal128, T>::value ||
+ std::is_same<Decimal256, T>::value,
Review Comment:
It's a potentially breaking change for user code like:
```
template <typename T>
enable_if_decimal_value<T> SetToMagic(T* ptr) {
constexpr char MAGIC[256] = ...;
if constexpr (std::is_same_v<T, Decimal128>) {
memcpy(ptr, &MAGIC, sizeof(Decimal128));
} else {
static_assert(std::is_same_v<T, Decimal256>); // error for Decimal32
memcpy(ptr, &MAGIC, sizeof(MAGIC));
}
}
```
I think that risk is outweighed by the greater weirdness of not including
the new types however
##########
cpp/src/arrow/array/array_test.cc:
##########
@@ -667,8 +667,10 @@ static ScalarVector GetScalars() {
std::make_shared<BinaryViewScalar>(hello),
std::make_shared<FixedSizeBinaryScalar>(
hello, fixed_size_binary(static_cast<int32_t>(hello->size()))),
- std::make_shared<Decimal128Scalar>(Decimal128(10), decimal(16, 4)),
- std::make_shared<Decimal256Scalar>(Decimal256(10), decimal(76, 38)),
+ std::make_shared<Decimal32Scalar>(Decimal32(10), smallest_decimal(7, 4)),
+ std::make_shared<Decimal64Scalar>(Decimal64(10), smallest_decimal(12,
4)),
+ std::make_shared<Decimal128Scalar>(Decimal128(10), smallest_decimal(20,
4)),
+ std::make_shared<Decimal256Scalar>(Decimal256(10), smallest_decimal(76,
38)),
Review Comment:
"narrowest_decimal"?
##########
cpp/src/arrow/compute/kernels/codegen_internal.h:
##########
@@ -140,6 +140,30 @@ struct GetViewType<Type,
enable_if_t<is_base_binary_type<Type>::value ||
static T LogicalValue(PhysicalType value) { return value; }
};
+template <>
+struct GetViewType<Decimal32Type> {
+ using T = Decimal32;
+ using PhysicalType = std::string_view;
Review Comment:
I wonder if `std::array<char, 4>` or `span<char, 4>` would be a more doable
replacement
##########
cpp/src/arrow/type_fwd.h:
##########
@@ -512,9 +528,28 @@ std::shared_ptr<DataType> fixed_size_binary(int32_t
byte_width);
///
/// If the precision is greater than 38, a Decimal256Type is returned,
/// otherwise a Decimal128Type.
+///
+/// Deprecated: prefer `smallest_decimal` instead.
Review Comment:
We have an attribute macro which should provide compiler warnings when the
deprecated function is used
```suggestion
///
/// Deprecated: prefer `smallest_decimal` instead.
ARROW_DEPRECATED("Deprecated in 18.0. Use `smallest_decimal` instead")
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]