(arrow) branch main updated: GH-40308: [C++][Gandiva] Add support for compute module's decimal promotion rules (#40434)

kou Sun, 24 Mar 2024 22:02:35 -0700

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new f100eff39f GH-40308: [C++][Gandiva] Add support for compute module's 
decimal promotion rules (#40434)
f100eff39f is described below

commit f100eff39fd37538c5ab4572083029622fc0f5aa
Author: ZhangHuiGui <[email protected]>
AuthorDate: Mon Mar 25 13:02:24 2024 +0800

    GH-40308: [C++][Gandiva] Add support for compute module's decimal promotion 
rules (#40434)
    
    ### Rationale for this change
    
    Gandiva decimal divide rules are different with our compute module's rules. 
Some systems such as Redshift use the same rules as our compute module's rules. 
So it's useful that Gandiva support our compute module's rules too.
    
    ### What changes are included in this PR?
    Support an option argument in GetResultType for compatibilty with  
**compute module's decimal promotion rules**.
    
    ### Are these changes tested?
    Yes
    
    ### Are there any user-facing changes?
    No
    
    * GitHub Issue: #40308
    
    Authored-by: ZhangHuiGui <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/gandiva/decimal_type_util.cc         | 19 ++++++++++---
 cpp/src/gandiva/decimal_type_util.h          | 14 +++++++++-
 cpp/src/gandiva/tests/decimal_single_test.cc | 40 +++++++++++++++++++++++-----
 3 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/cpp/src/gandiva/decimal_type_util.cc 
b/cpp/src/gandiva/decimal_type_util.cc
index 2abc5a21ea..cce4292f3b 100644
--- a/cpp/src/gandiva/decimal_type_util.cc
+++ b/cpp/src/gandiva/decimal_type_util.cc
@@ -30,7 +30,8 @@ constexpr int32_t DecimalTypeUtil::kMinAdjustedScale;
 
 // Implementation of decimal rules.
 Status DecimalTypeUtil::GetResultType(Op op, const Decimal128TypeVector& 
in_types,
-                                      Decimal128TypePtr* out_type) {
+                                      Decimal128TypePtr* out_type,
+                                      bool use_compute_rules) {
   DCHECK_EQ(in_types.size(), 2);
 
   *out_type = nullptr;
@@ -59,7 +60,9 @@ Status DecimalTypeUtil::GetResultType(Op op, const 
Decimal128TypeVector& in_type
       break;
 
     case kOpDivide:
-      result_scale = std::max(kMinAdjustedScale, s1 + p2 + 1);
+      result_scale = use_compute_rules
+                         ? std::max(kMinComputeAdjustedScale, s1 + p2 - s2 + 1)
+                         : std::max(kMinAdjustedScale, s1 + p2 + 1);
       result_precision = p1 - s1 + s2 + result_scale;
       break;
 
@@ -68,7 +71,17 @@ Status DecimalTypeUtil::GetResultType(Op op, const 
Decimal128TypeVector& in_type
       result_precision = std::min(p1 - s1, p2 - s2) + result_scale;
       break;
   }
-  *out_type = MakeAdjustedType(result_precision, result_scale);
+
+  if (use_compute_rules) {
+    if (result_precision < kMinPrecision || result_precision > kMaxPrecision) {
+      return Status::Invalid("Decimal precision out of range [", 
int32_t(kMinPrecision),
+                             ", ", int32_t(kMaxPrecision), "]: ", 
result_precision);
+    }
+    *out_type = MakeType(result_precision, result_scale);
+  } else {
+    *out_type = MakeAdjustedType(result_precision, result_scale);
+  }
+
   return Status::OK();
 }
 
diff --git a/cpp/src/gandiva/decimal_type_util.h 
b/cpp/src/gandiva/decimal_type_util.h
index 2b496f6cbf..16ce544717 100644
--- a/cpp/src/gandiva/decimal_type_util.h
+++ b/cpp/src/gandiva/decimal_type_util.h
@@ -45,6 +45,9 @@ class GANDIVA_EXPORT DecimalTypeUtil {
   /// The maximum precision representable by a 8-byte decimal
   static constexpr int32_t kMaxDecimal64Precision = 18;
 
+  /// The minimum precision representable by a 16-byte decimal
+  static constexpr int32_t kMinPrecision = 1;
+
   /// The maximum precision representable by a 16-byte decimal
   static constexpr int32_t kMaxPrecision = 38;
 
@@ -57,10 +60,19 @@ class GANDIVA_EXPORT DecimalTypeUtil {
   // * There is no strong reason for 6, but both SQLServer and Impala use 6 
too.
   static constexpr int32_t kMinAdjustedScale = 6;
 
+  // The same function with kMinAdjustedScale, just for compatibility with
+  // compute module's decimal promotion rules.
+  static constexpr int32_t kMinComputeAdjustedScale = 4;
+
   // For specified operation and input scale/precision, determine the output
   // scale/precision.
+  //
+  // The 'use_compute_rules' is for compatibility with compute module's
+  // decimal promotion rules:
+  // https://arrow.apache.org/docs/cpp/compute.html#arithmetic-functions
   static Status GetResultType(Op op, const Decimal128TypeVector& in_types,
-                              Decimal128TypePtr* out_type);
+                              Decimal128TypePtr* out_type,
+                              bool use_compute_rules = false);
 
   static Decimal128TypePtr MakeType(int32_t precision, int32_t scale) {
     return std::dynamic_pointer_cast<arrow::Decimal128Type>(
diff --git a/cpp/src/gandiva/tests/decimal_single_test.cc 
b/cpp/src/gandiva/tests/decimal_single_test.cc
index 666ee4a68d..57c281a455 100644
--- a/cpp/src/gandiva/tests/decimal_single_test.cc
+++ b/cpp/src/gandiva/tests/decimal_single_test.cc
@@ -49,7 +49,8 @@ class TestDecimalOps : public ::testing::Test {
   ArrayPtr MakeDecimalVector(const DecimalScalar128& in);
 
   void Verify(DecimalTypeUtil::Op, const std::string& function, const 
DecimalScalar128& x,
-              const DecimalScalar128& y, const DecimalScalar128& expected);
+              const DecimalScalar128& y, const DecimalScalar128& expected,
+              bool use_compute_rules = false, bool verify_failed = false);
 
   void AddAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
                     const DecimalScalar128& expected) {
@@ -67,8 +68,10 @@ class TestDecimalOps : public ::testing::Test {
   }
 
   void DivideAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
-                       const DecimalScalar128& expected) {
-    Verify(DecimalTypeUtil::kOpDivide, "divide", x, y, expected);
+                       const DecimalScalar128& expected, bool 
use_compute_rules = false,
+                       bool verify_failed = false) {
+    Verify(DecimalTypeUtil::kOpDivide, "divide", x, y, expected, 
use_compute_rules,
+           verify_failed);
   }
 
   void ModAndVerify(const DecimalScalar128& x, const DecimalScalar128& y,
@@ -91,7 +94,8 @@ ArrayPtr TestDecimalOps::MakeDecimalVector(const 
DecimalScalar128& in) {
 
 void TestDecimalOps::Verify(DecimalTypeUtil::Op op, const std::string& 
function,
                             const DecimalScalar128& x, const DecimalScalar128& 
y,
-                            const DecimalScalar128& expected) {
+                            const DecimalScalar128& expected, bool 
use_compute_rules,
+                            bool verify_failed) {
   auto x_type = std::make_shared<arrow::Decimal128Type>(x.precision(), 
x.scale());
   auto y_type = std::make_shared<arrow::Decimal128Type>(y.precision(), 
y.scale());
   auto field_x = field("x", x_type);
@@ -99,8 +103,14 @@ void TestDecimalOps::Verify(DecimalTypeUtil::Op op, const 
std::string& function,
   auto schema = arrow::schema({field_x, field_y});
 
   Decimal128TypePtr output_type;
-  auto status = DecimalTypeUtil::GetResultType(op, {x_type, y_type}, 
&output_type);
-  ARROW_EXPECT_OK(status);
+  auto status = DecimalTypeUtil::GetResultType(op, {x_type, y_type}, 
&output_type,
+                                               use_compute_rules);
+  if (verify_failed) {
+    ASSERT_NOT_OK(status);
+    return;
+  } else {
+    ARROW_EXPECT_OK(status);
+  }
 
   // output fields
   auto res = field("res", output_type);
@@ -283,13 +293,31 @@ TEST_F(TestDecimalOps, TestMultiply) {
 }
 
 TEST_F(TestDecimalOps, TestDivide) {
+  // fast-path
+  //
+  // origin Gandiva's rules
   DivideAndVerify(decimal_literal("201", 10, 3),              // x
                   decimal_literal("301", 10, 2),              // y
                   decimal_literal("6677740863787", 23, 14));  // expected
 
+  // compute module's rules
+  DivideAndVerify(decimal_literal("201", 10, 3),           // x
+                  decimal_literal("301", 10, 2),           // y
+                  decimal_literal("66777408638", 21, 12),  // expected
+                  /*use_compute_rules=*/true);
+
+  // max precision beyond 38
+  //
+  // normally under origin Gandiva rules
   DivideAndVerify(DecimalScalar128(std::string(38, '9'), 38, 20),  // x
                   DecimalScalar128(std::string(35, '9'), 38, 20),  // x
                   DecimalScalar128("1000000000", 38, 6));
+
+  // invalid under compute module's rules
+  DivideAndVerify(DecimalScalar128(std::string(38, '9'), 38, 20),  // x
+                  DecimalScalar128(std::string(35, '9'), 38, 20),  // x
+                  DecimalScalar128(std::string(35, '9'), 0, 0),    // useless 
expected
+                  /*use_compute_rules=*/true, /*verify_failed=*/true);
 }
 
 TEST_F(TestDecimalOps, TestMod) {

(arrow) branch main updated: GH-40308: [C++][Gandiva] Add support for compute module's decimal promotion rules (#40434)

Reply via email to