[GitHub] [arrow] pitrou commented on a change in pull request #8542: ARROW-10407: [C++] Add BasicDecimal256 Division Support

GitBox Mon, 09 Nov 2020 07:07:48 -0800


pitrou commented on a change in pull request #8542:
URL: https://github.com/apache/arrow/pull/8542#discussion_r519873678




##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -395,33 +395,49 @@ BasicDecimal128& BasicDecimal128::operator*=(const 
BasicDecimal128& right) {
   return *this;
 }
 
-/// Expands the given value into an array of ints so that we can work on
-/// it. The array will be converted to an absolute value and the wasNegative
+/// Expands the given little endian array of uint64_t into a big endian array 
of
+/// uint32_t. The value of input array is expected to be non-negative. The 
result_array
+/// will remove leading zeros from the input array.
+/// \param value_array a little endian array to represent the value
+/// \param result_array a big endian array of length N*2 to set with the value
+/// \result the output length of the array
+template <size_t N>
+static int64_t FillInArray(const std::array<uint64_t, N>& value_array,
+                           uint32_t* result_array) {
+  int64_t next_index = 0;
+  for (int64_t i = N - 1; i >= 0; i--) {
+    if (value_array[i] != 0) {
+      if (value_array[i] <= std::numeric_limits<uint32_t>::max()) {
+        result_array[next_index++] = static_cast<uint32_t>(value_array[i]);
+        i--;
+      }
+      for (int64_t j = i; j >= 0; j--) {
+        result_array[next_index++] = static_cast<uint32_t>(value_array[j] >> 
32);
+        result_array[next_index++] = static_cast<uint32_t>(value_array[j]);
+      }
+      break;
+    }
+  }
+  return next_index;
+}
+
+/// Expands the given value into a big endian array of ints so that we can 
work on
+/// it. The array will be converted to an absolute value and the was_negative
 /// flag will be set appropriately. The array will remove leading zeros from
 /// the value.
-/// \param array an array of length 4 to set with the value
+/// \param array a big endian array of length 4 to set with the value
 /// \param was_negative a flag for whether the value was original negative
 /// \result the output length of the array
 static int64_t FillInArray(const BasicDecimal128& value, uint32_t* array,
                            bool& was_negative) {
-  uint64_t high;
-  uint64_t low;
-  const int64_t highbits = value.high_bits();
-  const uint64_t lowbits = value.low_bits();
-
-  if (highbits < 0) {
-    low = ~lowbits + 1;
-    high = static_cast<uint64_t>(~highbits);
-    if (low == 0) {
-      ++high;
-    }
-    was_negative = true;
-  } else {
-    low = lowbits;
-    high = static_cast<uint64_t>(highbits);
-    was_negative = false;
-  }
-
+  BasicDecimal128 abs_value = BasicDecimal128::Abs(value);
+  was_negative = value.high_bits() < 0;
+  uint64_t high = static_cast<uint64_t>(abs_value.high_bits());
+  uint64_t low = abs_value.low_bits();
+
+  // FillInArray(std::array<uint64_t, N>& value_array, uint32_t* result_array) 
is not
+  // called here as the following code has better performance, to avoid 
regression on
+  // BasicDecimal128 Division.
   if (high != 0) {
     if (high > std::numeric_limits<uint32_t>::max()) {

Review comment:
       By the way, why do we have strict inequality here, but a non-strict one 
below for `low`?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -451,6 +467,25 @@ static int64_t FillInArray(const BasicDecimal128& value, 
uint32_t* array,
   return 1;
 }
 
+/// Expands the given value into a big endian array of ints so that we can 
work on
+/// it. The array will be converted to an absolute value and the was_negative
+/// flag will be set appropriately. The array will remove leading zeros from
+/// the value.
+/// \param array a big endian array of length 8 to set with the value
+/// \param was_negative a flag for whether the value was original negative
+/// \result the output length of the array
+static int64_t FillInArray(const BasicDecimal256& value, uint32_t* array,
+                           bool& was_negative) {
+  BasicDecimal256 positive_value = value;
+  was_negative = false;
+  int64_t highest_bit = positive_value.little_endian_array()[3];
+  if (highest_bit < 0) {

Review comment:
       Doesn't/shouldn't `BasicDecimal256` have a `sign` or `is_negative` 
method to factor out this pattern?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -490,49 +527,60 @@ static void FixDivisionSigns(BasicDecimal128* result, 
BasicDecimal128* remainder
   }
 }
 
-/// \brief Build a BasicDecimal128 from a list of ints.
+/// \brief Build a little endian array of uint64_t from a big endian array of 
uint32_t.
+template <size_t N>
+static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
+                                    uint32_t* array, int64_t length) {
+  for (int64_t i = length - 2 * N - 1; i >= 0; i--) {
+    if (array[i] != 0) {
+      return DecimalStatus::kOverflow;
+    }
+  }
+  int64_t next_index = length - 1;
+  for (size_t i = 0; i < N; i++) {
+    uint64_t lower_bits = (next_index < 0) ? 0 : array[next_index--];
+    (*result_array)[i] =
+        (next_index < 0)
+            ? lower_bits
+            : ((static_cast<uint64_t>(array[next_index--]) << 32) + 
lower_bits);
+  }
+  return DecimalStatus::kSuccess;
+}
+
+/// \brief Build a BasicDecimal128 from a big endian array of uint32_t.
 static DecimalStatus BuildFromArray(BasicDecimal128* value, uint32_t* array,

Review comment:
       `const uint32_t*`?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -490,49 +527,60 @@ static void FixDivisionSigns(BasicDecimal128* result, 
BasicDecimal128* remainder
   }
 }
 
-/// \brief Build a BasicDecimal128 from a list of ints.
+/// \brief Build a little endian array of uint64_t from a big endian array of 
uint32_t.
+template <size_t N>
+static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
+                                    uint32_t* array, int64_t length) {

Review comment:
       `const uint32_t* array`?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -395,33 +395,49 @@ BasicDecimal128& BasicDecimal128::operator*=(const 
BasicDecimal128& right) {
   return *this;
 }
 
-/// Expands the given value into an array of ints so that we can work on
-/// it. The array will be converted to an absolute value and the wasNegative
+/// Expands the given little endian array of uint64_t into a big endian array 
of
+/// uint32_t. The value of input array is expected to be non-negative. The 
result_array
+/// will remove leading zeros from the input array.
+/// \param value_array a little endian array to represent the value
+/// \param result_array a big endian array of length N*2 to set with the value
+/// \result the output length of the array
+template <size_t N>
+static int64_t FillInArray(const std::array<uint64_t, N>& value_array,
+                           uint32_t* result_array) {
+  int64_t next_index = 0;
+  for (int64_t i = N - 1; i >= 0; i--) {

Review comment:
       Nit, but the nested loop is a bit confusing. Two separate loops would 
make the intent more obvious (and might also help the compiler?).

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -490,49 +527,60 @@ static void FixDivisionSigns(BasicDecimal128* result, 
BasicDecimal128* remainder
   }
 }
 
-/// \brief Build a BasicDecimal128 from a list of ints.
+/// \brief Build a little endian array of uint64_t from a big endian array of 
uint32_t.
+template <size_t N>
+static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
+                                    uint32_t* array, int64_t length) {
+  for (int64_t i = length - 2 * N - 1; i >= 0; i--) {
+    if (array[i] != 0) {
+      return DecimalStatus::kOverflow;
+    }
+  }
+  int64_t next_index = length - 1;
+  for (size_t i = 0; i < N; i++) {
+    uint64_t lower_bits = (next_index < 0) ? 0 : array[next_index--];
+    (*result_array)[i] =
+        (next_index < 0)

Review comment:
       The code would look clearer if you had one loop for the non-zero words, 
and one other loop for the excess (zeroed) words, IMHO.

##########
File path: cpp/src/arrow/util/decimal_test.cc
##########
@@ -1296,6 +1296,56 @@ TEST(Decimal256Test, Multiply) {
   }
 }
 
+TEST(Decimal256Test, Divide) {
+  ASSERT_EQ(Decimal256(33), Decimal256(100) / Decimal256(3));
+  ASSERT_EQ(Decimal256(66), Decimal256(200) / Decimal256(3));
+  ASSERT_EQ(Decimal256(66), Decimal256(20100) / Decimal256(301));
+  ASSERT_EQ(Decimal256(-66), Decimal256(-20100) / Decimal256(301));
+  ASSERT_EQ(Decimal256(-66), Decimal256(20100) / Decimal256(-301));
+  ASSERT_EQ(Decimal256(66), Decimal256(-20100) / Decimal256(-301));
+  ASSERT_EQ(Decimal256("-5192296858534827628530496329343552"),
+            
Decimal256("-269599466671506397946670150910580797473777870509761363"
+                       "24636208709184") /
+                Decimal256("5192296858534827628530496329874417"));
+  ASSERT_EQ(Decimal256("5192296858534827628530496329343552"),
+            
Decimal256("-269599466671506397946670150910580797473777870509761363"
+                       "24636208709184") /
+                Decimal256("-5192296858534827628530496329874417"));
+  ASSERT_EQ(Decimal256("5192296858534827628530496329343552"),
+            
Decimal256("2695994666715063979466701509105807974737778705097613632"
+                       "4636208709184") /
+                Decimal256("5192296858534827628530496329874417"));
+  ASSERT_EQ(Decimal256("-5192296858534827628530496329343552"),
+            
Decimal256("2695994666715063979466701509105807974737778705097613632"
+                       "4636208709184") /
+                Decimal256("-5192296858534827628530496329874417"));
+
+  // Test some random numbers.
+  for (auto x : GetRandomNumbers<Int32Type>(16)) {
+    for (auto y : GetRandomNumbers<Int32Type>(16)) {
+      if (y == 0) {
+        continue;
+      }
+
+      Decimal256 result = Decimal256(x) / Decimal256(y);
+      ASSERT_EQ(Decimal256(static_cast<int64_t>(x) / y), result)
+          << " x: " << x << " y: " << y;
+    }
+  }
+
+  // Test some edge cases
+  for (auto x : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, 
INT64_MAX}) {
+    for (auto y : std::vector<int128_t>{-INT32_MAX, -32, -2, -1, 1, 2, 32, 
INT32_MAX}) {

Review comment:
       Also, what about `-INT64_MAX - 1` and `-INT32_MAX - 1`? Are they 
expected to work as well? If so, test them.

##########
File path: cpp/src/arrow/util/decimal_test.cc
##########
@@ -1296,6 +1296,56 @@ TEST(Decimal256Test, Multiply) {
   }
 }
 
+TEST(Decimal256Test, Divide) {
+  ASSERT_EQ(Decimal256(33), Decimal256(100) / Decimal256(3));
+  ASSERT_EQ(Decimal256(66), Decimal256(200) / Decimal256(3));
+  ASSERT_EQ(Decimal256(66), Decimal256(20100) / Decimal256(301));
+  ASSERT_EQ(Decimal256(-66), Decimal256(-20100) / Decimal256(301));
+  ASSERT_EQ(Decimal256(-66), Decimal256(20100) / Decimal256(-301));
+  ASSERT_EQ(Decimal256(66), Decimal256(-20100) / Decimal256(-301));
+  ASSERT_EQ(Decimal256("-5192296858534827628530496329343552"),
+            
Decimal256("-269599466671506397946670150910580797473777870509761363"
+                       "24636208709184") /
+                Decimal256("5192296858534827628530496329874417"));
+  ASSERT_EQ(Decimal256("5192296858534827628530496329343552"),
+            
Decimal256("-269599466671506397946670150910580797473777870509761363"
+                       "24636208709184") /
+                Decimal256("-5192296858534827628530496329874417"));
+  ASSERT_EQ(Decimal256("5192296858534827628530496329343552"),
+            
Decimal256("2695994666715063979466701509105807974737778705097613632"
+                       "4636208709184") /
+                Decimal256("5192296858534827628530496329874417"));
+  ASSERT_EQ(Decimal256("-5192296858534827628530496329343552"),
+            
Decimal256("2695994666715063979466701509105807974737778705097613632"
+                       "4636208709184") /
+                Decimal256("-5192296858534827628530496329874417"));
+
+  // Test some random numbers.
+  for (auto x : GetRandomNumbers<Int32Type>(16)) {
+    for (auto y : GetRandomNumbers<Int32Type>(16)) {
+      if (y == 0) {
+        continue;
+      }
+
+      Decimal256 result = Decimal256(x) / Decimal256(y);
+      ASSERT_EQ(Decimal256(static_cast<int64_t>(x) / y), result)
+          << " x: " << x << " y: " << y;
+    }
+  }
+
+  // Test some edge cases
+  for (auto x : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, 
INT64_MAX}) {
+    for (auto y : std::vector<int128_t>{-INT32_MAX, -32, -2, -1, 1, 2, 32, 
INT32_MAX}) {

Review comment:
       Can we define a `INT128_MAX` constant and test it as well?

##########
File path: cpp/src/arrow/util/basic_decimal.cc
##########
@@ -490,49 +527,60 @@ static void FixDivisionSigns(BasicDecimal128* result, 
BasicDecimal128* remainder
   }
 }
 
-/// \brief Build a BasicDecimal128 from a list of ints.
+/// \brief Build a little endian array of uint64_t from a big endian array of 
uint32_t.
+template <size_t N>
+static DecimalStatus BuildFromArray(std::array<uint64_t, N>* result_array,
+                                    uint32_t* array, int64_t length) {
+  for (int64_t i = length - 2 * N - 1; i >= 0; i--) {
+    if (array[i] != 0) {
+      return DecimalStatus::kOverflow;
+    }
+  }
+  int64_t next_index = length - 1;
+  for (size_t i = 0; i < N; i++) {
+    uint64_t lower_bits = (next_index < 0) ? 0 : array[next_index--];
+    (*result_array)[i] =
+        (next_index < 0)
+            ? lower_bits
+            : ((static_cast<uint64_t>(array[next_index--]) << 32) + 
lower_bits);
+  }
+  return DecimalStatus::kSuccess;
+}
+
+/// \brief Build a BasicDecimal128 from a big endian array of uint32_t.
 static DecimalStatus BuildFromArray(BasicDecimal128* value, uint32_t* array,
                                     int64_t length) {
-  switch (length) {
-    case 0:
-      *value = {static_cast<int64_t>(0)};
-      break;
-    case 1:
-      *value = {static_cast<int64_t>(array[0])};
-      break;
-    case 2:
-      *value = {static_cast<int64_t>(0),
-                (static_cast<uint64_t>(array[0]) << 32) + array[1]};
-      break;
-    case 3:
-      *value = {static_cast<int64_t>(array[0]),
-                (static_cast<uint64_t>(array[1]) << 32) + array[2]};
-      break;
-    case 4:
-      *value = {(static_cast<int64_t>(array[0]) << 32) + array[1],
-                (static_cast<uint64_t>(array[2]) << 32) + array[3]};
-      break;
-    case 5:
-      if (array[0] != 0) {
-        return DecimalStatus::kOverflow;
-      }
-      *value = {(static_cast<int64_t>(array[1]) << 32) + array[2],
-                (static_cast<uint64_t>(array[3]) << 32) + array[4]};
-      break;
-    default:
-      return DecimalStatus::kOverflow;
+  std::array<uint64_t, 2> result_array;
+  auto status = BuildFromArray(&result_array, array, length);
+  if (status != DecimalStatus::kSuccess) {
+    return status;
   }
+  *value = {static_cast<int64_t>(result_array[1]), result_array[0]};
+  return DecimalStatus::kSuccess;
+}
 
+/// \brief Build a BasicDecimal256 from a big endian array of uint32_t.
+static DecimalStatus BuildFromArray(BasicDecimal256* value, uint32_t* array,

Review comment:
       `const uint32_t* array`?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] pitrou commented on a change in pull request #8542: ARROW-10407: [C++] Add BasicDecimal256 Division Support

Reply via email to