This is an automated email from the ASF dual-hosted git repository.
yibocai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new bbecb6a ARROW-13067: [C++][Compute] Implement integer to decimal cast
bbecb6a is described below
commit bbecb6ae5242cf97061d5daa73ef127d49b105b6
Author: Yibo Cai <[email protected]>
AuthorDate: Thu Sep 2 10:04:39 2021 +0000
ARROW-13067: [C++][Compute] Implement integer to decimal cast
Closes #11045 from cyb70289/13067-int2dec
Authored-by: Yibo Cai <[email protected]>
Signed-off-by: Yibo Cai <[email protected]>
---
.../compute/kernels/scalar_arithmetic_test.cc | 14 ++++-
.../arrow/compute/kernels/scalar_cast_numeric.cc | 59 ++++++++++++++++++++++
cpp/src/arrow/compute/kernels/scalar_cast_test.cc | 34 ++++++++++++-
3 files changed, 104 insertions(+), 3 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 2939e47..ce3588f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -1365,6 +1365,18 @@ TEST(TestBinaryDecimalArithmetic, DispatchBest) {
}
}
+ // decimal, integer
+ for (std::string name : {"add", "subtract", "multiply", "divide"}) {
+ for (std::string suffix : {"", "_checked"}) {
+ name += suffix;
+
+ CheckDispatchBest(name, {int64(), decimal128(1, 0)},
+ {decimal128(1, 0), decimal128(1, 0)});
+ CheckDispatchBest(name, {decimal128(1, 0), int64()},
+ {decimal128(1, 0), decimal128(1, 0)});
+ }
+ }
+
// decimal, decimal
for (std::string name : {"add", "subtract"}) {
for (std::string suffix : {"", "_checked"}) {
@@ -1410,8 +1422,6 @@ TEST(TestBinaryDecimalArithmetic, DispatchBest) {
{decimal256(6, 4), decimal256(6, 4)});
}
}
-
- // TODO(ARROW-13067): add 'integer, decimal' tests
}
// reference result from bc (precsion=100, scale=40)
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index cd89a57..e9cf928 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -392,6 +392,53 @@ struct CastFunctor<O, I,
};
// ----------------------------------------------------------------------
+// Integer to decimal
+
+struct IntegerToDecimal {
+ template <typename OutValue, typename IntegerType>
+ OutValue Call(KernelContext*, IntegerType val, Status* st) const {
+ auto maybe_decimal = OutValue(val).Rescale(0, out_scale_);
+ if (ARROW_PREDICT_TRUE(maybe_decimal.ok())) {
+ return maybe_decimal.MoveValueUnsafe();
+ }
+ *st = maybe_decimal.status();
+ return OutValue{};
+ }
+
+ int32_t out_scale_;
+};
+
+template <typename O, typename I>
+struct CastFunctor<O, I,
+ enable_if_t<is_decimal_type<O>::value &&
is_integer_type<I>::value>> {
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ const auto& out_type = checked_cast<const O&>(*out->type());
+ const auto out_scale = out_type.scale();
+ const auto out_precision = out_type.precision();
+
+ // verify precision and scale
+ if (out_scale < 0) {
+ return Status::Invalid("Scale must be non-negative");
+ }
+ // maximal number of decimal digits for int8/16/32/64
+ constexpr std::array<int, 4> decimal_digits{3, 5, 10, 19};
+ using ctype = typename I::c_type;
+ static_assert(sizeof(ctype) <= 8, "");
+ const int precision = decimal_digits[BitUtil::Log2(sizeof(ctype))] +
out_scale;
+ if (out_precision < precision) {
+ return Status::Invalid(
+ "Precision is not great enough for the result. "
+ "It should be at least ",
+ precision);
+ }
+
+ applicator::ScalarUnaryNotNullStateful<O, I, IntegerToDecimal> kernel(
+ IntegerToDecimal{out_scale});
+ return kernel.Exec(ctx, batch, out);
+ }
+};
+
+// ----------------------------------------------------------------------
// Decimal to decimal
// Helper that converts the input and output decimals
@@ -641,6 +688,12 @@ std::shared_ptr<CastFunction> GetCastToDecimal128() {
DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty,
CastFunctor<Decimal128Type, DoubleType>::Exec));
+ // Cast from integer
+ for (const std::shared_ptr<DataType>& in_ty : IntTypes()) {
+ auto exec = GenerateInteger<CastFunctor, Decimal128Type>(in_ty->id());
+ DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty,
std::move(exec)));
+ }
+
// Cast from other decimal
auto exec = CastFunctor<Decimal128Type, Decimal128Type>::Exec;
// We resolve the output type of this kernel from the CastOptions
@@ -664,6 +717,12 @@ std::shared_ptr<CastFunction> GetCastToDecimal256() {
DCHECK_OK(func->AddKernel(Type::DOUBLE, {float64()}, sig_out_ty,
CastFunctor<Decimal256Type, DoubleType>::Exec));
+ // Cast from integer
+ for (const std::shared_ptr<DataType>& in_ty : IntTypes()) {
+ auto exec = GenerateInteger<CastFunctor, Decimal256Type>(in_ty->id());
+ DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty,
std::move(exec)));
+ }
+
// Cast from other decimal
auto exec = CastFunctor<Decimal256Type, Decimal128Type>::Exec;
DCHECK_OK(
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 90d4189..fc7e42a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -71,9 +71,11 @@ static std::vector<std::shared_ptr<DataType>> kNumericTypes
= {
uint8(), int8(), uint16(), int16(), uint32(),
int32(), uint64(), int64(), float32(), float64()};
-static std::vector<std::shared_ptr<DataType>> kDictionaryIndexTypes = {
+static std::vector<std::shared_ptr<DataType>> kIntegerTypes = {
int8(), uint8(), int16(), uint16(), int32(), uint32(), int64(), uint64()};
+static std::vector<std::shared_ptr<DataType>> kDictionaryIndexTypes =
kIntegerTypes;
+
static std::vector<std::shared_ptr<DataType>> kBaseBinaryTypes = {
binary(), utf8(), large_binary(), large_utf8()};
@@ -587,6 +589,36 @@ TEST(Cast, Decimal256ToInt) {
CheckCast(negative_scale, ArrayFromJSON(int64(), "[1234567890000,
-120000]"), options);
}
+TEST(Cast, IntegerToDecimal) {
+ for (auto decimal_type : {decimal128(21, 2), decimal256(21, 2)}) {
+ for (auto integer_type : kIntegerTypes) {
+ CheckCast(
+ ArrayFromJSON(integer_type, "[0, 7, null, 100, 99]"),
+ ArrayFromJSON(decimal_type, R"(["0.00", "7.00", null, "100.00",
"99.00"])"));
+ }
+ }
+
+ // extreme value
+ for (auto decimal_type : {decimal128(19, 0), decimal256(19, 0)}) {
+ CheckCast(ArrayFromJSON(int64(), "[-9223372036854775808,
9223372036854775807]"),
+ ArrayFromJSON(decimal_type,
+ R"(["-9223372036854775808",
"9223372036854775807"])"));
+ CheckCast(ArrayFromJSON(uint64(), "[0, 18446744073709551615]"),
+ ArrayFromJSON(decimal_type, R"(["0", "18446744073709551615"])"));
+ }
+
+ // insufficient output precision
+ {
+ CastOptions options;
+
+ options.to_type = decimal128(5, 3);
+ CheckCastFails(ArrayFromJSON(int8(), "[0]"), options);
+
+ options.to_type = decimal256(76, 67);
+ CheckCastFails(ArrayFromJSON(int32(), "[0]"), options);
+ }
+}
+
TEST(Cast, Decimal128ToDecimal128) {
CastOptions options;