This is an automated email from the ASF dual-hosted git repository.
emkornfield pushed a commit to branch decimal256
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/decimal256 by this push:
new ccd88e2 Add BasicDecimal256 Multiplication Support (PR for decimal256
branch, not master) (#8344)
ccd88e2 is described below
commit ccd88e2edeb0d40e5c0ae1261fe9f8f53f4a2a47
Author: Ezra <[email protected]>
AuthorDate: Mon Oct 12 14:44:28 2020 -0700
Add BasicDecimal256 Multiplication Support (PR for decimal256 branch, not
master) (#8344)
---
cpp/src/arrow/util/basic_decimal.cc | 167 ++++++++++++++++++++++++--------
cpp/src/arrow/util/basic_decimal.h | 36 +++++--
cpp/src/arrow/util/decimal.cc | 5 +
cpp/src/arrow/util/decimal.h | 3 +
cpp/src/arrow/util/decimal_benchmark.cc | 16 +++
cpp/src/arrow/util/decimal_test.cc | 63 ++++++++++++
6 files changed, 245 insertions(+), 45 deletions(-)
diff --git a/cpp/src/arrow/util/basic_decimal.cc
b/cpp/src/arrow/util/basic_decimal.cc
index ac85bd0..ea247b7 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -123,7 +123,7 @@ static const BasicDecimal128 ScaleMultipliersHalf[] = {
#ifdef ARROW_USE_NATIVE_INT128
static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF;
#else
-static constexpr uint64_t kIntMask = 0xFFFFFFFF;
+static constexpr uint64_t kInt32Mask = 0xFFFFFFFF;
#endif
// same as ScaleMultipliers[38] - 1
@@ -254,67 +254,125 @@ BasicDecimal128& BasicDecimal128::operator>>=(uint32_t
bits) {
namespace {
-// TODO: Remove this guard once it's used by BasicDecimal256
-#ifndef ARROW_USE_NATIVE_INT128
-// This method losslessly multiplies x and y into a 128 bit unsigned integer
-// whose high bits will be stored in hi and low bits in lo.
-void ExtendAndMultiplyUint64(uint64_t x, uint64_t y, uint64_t* hi, uint64_t*
lo) {
+// Convenience wrapper type over 128 bit unsigned integers. We opt not to
+// replace the uint128_t type in int128_internal.h because it would require
+// significantly more implementation work to be done. This class merely
+// provides the minimum necessary set of functions to perform 128+ bit
+// multiplication operations when there may or may not be native support.
#ifdef ARROW_USE_NATIVE_INT128
- const __uint128_t r = static_cast<__uint128_t>(x) * y;
- *lo = r & kInt64Mask;
- *hi = r >> 64;
+struct uint128_t {
+ uint128_t() {}
+ uint128_t(uint64_t hi, uint64_t lo) : val_((static_cast<__uint128_t>(hi) <<
64) | lo) {}
+ uint128_t(const BasicDecimal128& decimal) {
+ val_ = (static_cast<__uint128_t>(decimal.high_bits()) << 64) |
decimal.low_bits();
+ }
+
+ uint64_t hi() { return val_ >> 64; }
+ uint64_t lo() { return val_ & kInt64Mask; }
+
+ uint128_t& operator+=(const uint128_t& other) {
+ val_ += other.val_;
+ return *this;
+ }
+
+ uint128_t& operator*=(const uint128_t& other) {
+ val_ *= other.val_;
+ return *this;
+ }
+
+ __uint128_t val_;
+};
+
#else
- // If we can't use a native fallback, perform multiplication
+// Multiply two 64 bit word components into a 128 bit result, with high bits
+// stored in hi and low bits in lo.
+inline void ExtendAndMultiply(uint64_t x, uint64_t y, uint64_t* hi, uint64_t*
lo) {
+ // Perform multiplication on two 64 bit words x and y into a 128 bit result
// by splitting up x and y into 32 bit high/low bit components,
// allowing us to represent the multiplication as
// x * y = x_lo * y_lo + x_hi * y_lo * 2^32 + y_hi * x_lo * 2^32
- // + x_hi * y_hi * 2^64.
+ // + x_hi * y_hi * 2^64
//
- // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi.
+ // Now, consider the final output as lo_lo || lo_hi || hi_lo || hi_hi
// Therefore,
// lo_lo is (x_lo * y_lo)_lo,
// lo_hi is ((x_lo * y_lo)_hi + (x_hi * y_lo)_lo + (x_lo * y_hi)_lo)_lo,
// hi_lo is ((x_hi * y_hi)_lo + (x_hi * y_lo)_hi + (x_lo * y_hi)_hi)_hi,
// hi_hi is (x_hi * y_hi)_hi
- const uint64_t x_lo = x & kIntMask;
- const uint64_t y_lo = y & kIntMask;
+ const uint64_t x_lo = x & kInt32Mask;
+ const uint64_t y_lo = y & kInt32Mask;
const uint64_t x_hi = x >> 32;
const uint64_t y_hi = y >> 32;
const uint64_t t = x_lo * y_lo;
- const uint64_t t_lo = t & kIntMask;
+ const uint64_t t_lo = t & kInt32Mask;
const uint64_t t_hi = t >> 32;
const uint64_t u = x_hi * y_lo + t_hi;
- const uint64_t u_lo = u & kIntMask;
+ const uint64_t u_lo = u & kInt32Mask;
const uint64_t u_hi = u >> 32;
const uint64_t v = x_lo * y_hi + u_lo;
const uint64_t v_hi = v >> 32;
*hi = x_hi * y_hi + u_hi + v_hi;
- *lo = (v << 32) | t_lo;
-#endif
+ *lo = (v << 32) + t_lo;
}
-#endif
-void MultiplyUint128(uint64_t x_hi, uint64_t x_lo, uint64_t y_hi, uint64_t
y_lo,
- uint64_t* hi, uint64_t* lo) {
-#ifdef ARROW_USE_NATIVE_INT128
- const __uint128_t x = (static_cast<__uint128_t>(x_hi) << 64) | x_lo;
- const __uint128_t y = (static_cast<__uint128_t>(y_hi) << 64) | y_lo;
- const __uint128_t r = x * y;
- *lo = r & kInt64Mask;
- *hi = r >> 64;
-#else
- // To perform 128 bit multiplication without a native fallback
- // we first perform lossless 64 bit multiplication of the low
- // bits, and then add x_hi * y_lo and x_lo * y_hi to the high
- // bits. Note that we can skip adding x_hi * y_hi because it
- // always will be over 128 bits.
- ExtendAndMultiplyUint64(x_lo, y_lo, hi, lo);
- *hi += (x_hi * y_lo) + (x_lo * y_hi);
+struct uint128_t {
+ uint128_t() {}
+ uint128_t(uint64_t hi, uint64_t lo) : hi_(hi), lo_(lo) {}
+ uint128_t(const BasicDecimal128& decimal) {
+ hi_ = decimal.high_bits();
+ lo_ = decimal.low_bits();
+ }
+
+ uint64_t hi() const { return hi_; }
+ uint64_t lo() const { return lo_; }
+
+ uint128_t& operator+=(const uint128_t& other) {
+ // To deduce the carry bit, we perform "65 bit" addition on the low bits
and
+ // seeing if the resulting high bit is 1. This is accomplished by shifting
the
+ // low bits to the right by 1 (chopping off the lowest bit), then adding 1
if the
+ // result of adding the two chopped bits would have produced a carry.
+ uint64_t carry = (((lo_ & other.lo_) & 1) + (lo_ >> 1) + (other.lo_ >> 1))
>> 63;
+ hi_ += other.hi_ + carry;
+ lo_ += other.lo_;
+ return *this;
+ }
+
+ uint128_t& operator*=(const uint128_t& other) {
+ uint128_t r;
+ ExtendAndMultiply(lo_, other.lo_, &r.hi_, &r.lo_);
+ r.hi_ += (hi_ * other.lo_) + (lo_ * other.hi_);
+ *this = r;
+ return *this;
+ }
+
+ uint64_t hi_;
+ uint64_t lo_;
+};
#endif
+
+// Multiplies two N * 64 bit unsigned integer types, represented by a uint64_t
+// array into a same sized output. Elements in the array should be in
+// little endian order, and output will be the same. Overflow in multiplication
+// will result in the lower N * 64 bits of the result being set.
+template <int N>
+inline void MultiplyUnsignedArray(const std::array<uint64_t, N>& lh,
+ const std::array<uint64_t, N>& rh,
+ std::array<uint64_t, N>* result) {
+ for (int j = 0; j < N; ++j) {
+ uint64_t carry = 0;
+ for (int i = 0; i < N - j; ++i) {
+ uint128_t tmp(lh[i]);
+ tmp *= uint128_t(rh[j]);
+ tmp += uint128_t((*result)[i + j]);
+ tmp += uint128_t(carry);
+ (*result)[i + j] = tmp.lo();
+ carry = tmp.hi();
+ }
+ }
}
} // namespace
@@ -325,10 +383,10 @@ BasicDecimal128& BasicDecimal128::operator*=(const
BasicDecimal128& right) {
const bool negate = Sign() != right.Sign();
BasicDecimal128 x = BasicDecimal128::Abs(*this);
BasicDecimal128 y = BasicDecimal128::Abs(right);
- uint64_t hi;
- MultiplyUint128(x.high_bits(), x.low_bits(), y.high_bits(), y.low_bits(),
&hi,
- &low_bits_);
- high_bits_ = hi;
+ uint128_t r(x);
+ r *= y;
+ high_bits_ = r.hi();
+ low_bits_ = r.lo();
if (negate) {
Negate();
}
@@ -800,6 +858,13 @@ BasicDecimal256& BasicDecimal256::Negate() {
return *this;
}
+BasicDecimal256& BasicDecimal256::Abs() { return *this < 0 ? Negate() : *this;
}
+
+BasicDecimal256 BasicDecimal256::Abs(const BasicDecimal256& in) {
+ BasicDecimal256 result(in);
+ return result.Abs();
+}
+
std::array<uint8_t, 32> BasicDecimal256::ToBytes() const {
std::array<uint8_t, 32> out{{0}};
ToBytes(out.data());
@@ -821,12 +886,36 @@ void BasicDecimal256::ToBytes(uint8_t* out) const {
#endif
}
+BasicDecimal256& BasicDecimal256::operator*=(const BasicDecimal256& right) {
+ // Since the max value of BasicDecimal256 is supposed to be 1e76 - 1 and the
+ // min the negation taking the absolute values here should always be safe.
+ const bool negate = Sign() != right.Sign();
+ BasicDecimal256 x = BasicDecimal256::Abs(*this);
+ BasicDecimal256 y = BasicDecimal256::Abs(right);
+
+ uint128_t r_hi;
+ uint128_t r_lo;
+ std::array<uint64_t, 4> res({0, 0, 0, 0});
+ MultiplyUnsignedArray<4>(x.little_endian_array_, y.little_endian_array_,
&res);
+ little_endian_array_ = res;
+ if (negate) {
+ Negate();
+ }
+ return *this;
+}
+
DecimalStatus BasicDecimal256::Rescale(int32_t original_scale, int32_t
new_scale,
BasicDecimal256* out) const {
// TODO: implement.
return DecimalStatus::kSuccess;
}
+BasicDecimal256 operator*(const BasicDecimal256& left, const BasicDecimal256&
right) {
+ BasicDecimal256 result = left;
+ result *= right;
+ return result;
+}
+
bool operator==(const BasicDecimal256& left, const BasicDecimal256& right) {
return left.little_endian_array() == right.little_endian_array();
}
diff --git a/cpp/src/arrow/util/basic_decimal.h
b/cpp/src/arrow/util/basic_decimal.h
index 54c8808..b114126 100644
--- a/cpp/src/arrow/util/basic_decimal.h
+++ b/cpp/src/arrow/util/basic_decimal.h
@@ -109,10 +109,10 @@ class ARROW_EXPORT BasicDecimal128 {
BasicDecimal128& operator>>=(uint32_t bits);
/// \brief Get the high bits of the two's complement representation of the
number.
- inline int64_t high_bits() const { return high_bits_; }
+ inline constexpr int64_t high_bits() const { return high_bits_; }
/// \brief Get the low bits of the two's complement representation of the
number.
- inline uint64_t low_bits() const { return low_bits_; }
+ inline constexpr uint64_t low_bits() const { return low_bits_; }
/// \brief Return the raw bytes of the value in native-endian byte order.
std::array<uint8_t, 16> ToBytes() const;
@@ -179,6 +179,14 @@ ARROW_EXPORT BasicDecimal128 operator%(const
BasicDecimal128& left,
const BasicDecimal128& right);
class ARROW_EXPORT BasicDecimal256 {
+ private:
+ // Due to a bug in clang, we have to declare the extend method prior to its
+ // usage.
+ template <typename T>
+ inline static constexpr uint64_t extend(T low_bits) noexcept {
+ return low_bits >= T() ? uint64_t{0} : ~uint64_t{0};
+ }
+
public:
/// \brief Create a BasicDecimal256 from the two's complement representation.
constexpr BasicDecimal256(const std::array<uint64_t, 4>&
little_endian_array) noexcept
@@ -195,6 +203,10 @@ class ARROW_EXPORT BasicDecimal256 {
: little_endian_array_({static_cast<uint64_t>(value), extend(value),
extend(value),
extend(value)}) {}
+ constexpr BasicDecimal256(const BasicDecimal128& value) noexcept
+ : little_endian_array_({value.low_bits(),
static_cast<uint64_t>(value.high_bits()),
+ extend(value.high_bits()),
extend(value.high_bits())}) {}
+
/// \brief Create a BasicDecimal256 from an array of bytes. Bytes are
assumed to be in
/// native-endian byte order.
explicit BasicDecimal256(const uint8_t* bytes);
@@ -202,6 +214,12 @@ class ARROW_EXPORT BasicDecimal256 {
/// \brief Negate the current value (in-place)
BasicDecimal256& Negate();
+ /// \brief Absolute value (in-place)
+ BasicDecimal256& Abs();
+
+ /// \brief Absolute value
+ static BasicDecimal256 Abs(const BasicDecimal256& left);
+
/// \brief Get the bits of the two's complement representation of the
number. The 4
/// elements are in little endian order. The bits within each uint64_t
element are in
/// native endian order. For example,
@@ -220,11 +238,14 @@ class ARROW_EXPORT BasicDecimal256 {
DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
BasicDecimal256* out) const;
- private:
- template <typename T>
- inline static constexpr uint64_t extend(T low_bits) noexcept {
- return low_bits >= T() ? uint64_t{0} : ~uint64_t{0};
+ inline int64_t Sign() const {
+ return 1 | (static_cast<int64_t>(little_endian_array_[3]) >> 63);
}
+
+ /// \brief Multiply this number by another number. The result is truncated
to 256 bits.
+ BasicDecimal256& operator*=(const BasicDecimal256& right);
+
+ private:
std::array<uint64_t, 4> little_endian_array_;
};
@@ -234,4 +255,7 @@ ARROW_EXPORT bool operator<(const BasicDecimal256& left,
const BasicDecimal256&
ARROW_EXPORT bool operator<=(const BasicDecimal256& left, const
BasicDecimal256& right);
ARROW_EXPORT bool operator>(const BasicDecimal256& left, const
BasicDecimal256& right);
ARROW_EXPORT bool operator>=(const BasicDecimal256& left, const
BasicDecimal256& right);
+
+ARROW_EXPORT BasicDecimal256 operator*(const BasicDecimal256& left,
+ const BasicDecimal256& right);
} // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index e2f3a07..c38e66c 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -721,4 +721,9 @@ Result<Decimal256> Decimal256::FromString(const char* s) {
Status Decimal256::ToArrowStatus(DecimalStatus dstatus) const {
return arrow::ToArrowStatus(dstatus, 256);
}
+
+std::ostream& operator<<(std::ostream& os, const Decimal256& decimal) {
+ os << decimal.ToIntegerString();
+ return os;
+}
} // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index a883797..3b159bc 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -227,6 +227,9 @@ class ARROW_EXPORT Decimal256 : public BasicDecimal256 {
return std::move(out);
}
+ friend ARROW_EXPORT std::ostream& operator<<(std::ostream& os,
+ const Decimal256& decimal);
+
private:
/// Converts internal error code to Status
Status ToArrowStatus(DecimalStatus dstatus) const;
diff --git a/cpp/src/arrow/util/decimal_benchmark.cc
b/cpp/src/arrow/util/decimal_benchmark.cc
index c1acefc..abdcb21 100644
--- a/cpp/src/arrow/util/decimal_benchmark.cc
+++ b/cpp/src/arrow/util/decimal_benchmark.cc
@@ -148,6 +148,21 @@ static void BinaryMathOp(benchmark::State& state) { //
NOLINT non-const referen
state.SetItemsProcessed(state.iterations() * kValueSize);
}
+static void BinaryMathOp256(benchmark::State& state) { // NOLINT non-const
reference
+ std::vector<BasicDecimal256> v1, v2;
+ for (uint64_t x = 0; x < kValueSize; x++) {
+ v1.push_back(BasicDecimal256({100 + x, 100 + x, 100 + x, 100 + x}));
+ v2.push_back(BasicDecimal256({200 + x, 200 + x, 200 + x, 200 + x}));
+ }
+
+ for (auto _ : state) {
+ for (int x = 0; x < kValueSize; x += 5) {
+ benchmark::DoNotOptimize(v1[x + 2] * v2[x + 2]);
+ }
+ }
+ state.SetItemsProcessed(state.iterations() * kValueSize);
+}
+
static void UnaryOp(benchmark::State& state) { // NOLINT non-const reference
std::vector<BasicDecimal128> v;
for (int x = 0; x < kValueSize; x++) {
@@ -191,6 +206,7 @@ static void BinaryBitOp(benchmark::State& state) { //
NOLINT non-const referenc
BENCHMARK(FromString);
BENCHMARK(ToString);
BENCHMARK(BinaryMathOp);
+BENCHMARK(BinaryMathOp256);
BENCHMARK(BinaryMathOpAggregate);
BENCHMARK(BinaryCompareOp);
BENCHMARK(BinaryCompareOpConstant);
diff --git a/cpp/src/arrow/util/decimal_test.cc
b/cpp/src/arrow/util/decimal_test.cc
index 372dbee..3c2f533 100644
--- a/cpp/src/arrow/util/decimal_test.cc
+++ b/cpp/src/arrow/util/decimal_test.cc
@@ -26,6 +26,7 @@
#include <vector>
#include <gtest/gtest.h>
+#include <boost/multiprecision/cpp_int.hpp>
#include "arrow/status.h"
#include "arrow/testing/gtest_util.h"
@@ -37,6 +38,7 @@
namespace arrow {
using internal::int128_t;
+using internal::uint128_t;
class DecimalTestFixture : public ::testing::Test {
public:
@@ -1233,6 +1235,67 @@ TEST(Decimal256Test, ConstructibleFromBool) {
EXPECT_EQ(Decimal256(1), Decimal256(true));
}
+Decimal256 Decimal256FromInt128(int128_t value) {
+ return Decimal256(Decimal128(static_cast<int64_t>(value >> 64),
+ static_cast<uint64_t>(value &
0xFFFFFFFFFFFFFFFFULL)));
+}
+
+TEST(Decimal256Test, Multiply) {
+ using boost::multiprecision::int256_t;
+ using boost::multiprecision::uint256_t;
+
+ ASSERT_EQ(Decimal256(60501), Decimal256(301) * Decimal256(201));
+
+ ASSERT_EQ(Decimal256(-60501), Decimal256(-301) * Decimal256(201));
+
+ ASSERT_EQ(Decimal256(-60501), Decimal256(301) * Decimal256(-201));
+
+ ASSERT_EQ(Decimal256(60501), Decimal256(-301) * Decimal256(-201));
+
+ // Test some random numbers.
+ std::vector<int128_t> left;
+ std::vector<int128_t> right;
+ for (auto x : GetRandomNumbers<Int32Type>(16)) {
+ for (auto y : GetRandomNumbers<Int32Type>(16)) {
+ for (auto z : GetRandomNumbers<Int32Type>(16)) {
+ for (auto w : GetRandomNumbers<Int32Type>(16)) {
+ // Test two 128 bit numbers which have a large amount of bits set.
+ int128_t l = static_cast<uint128_t>(x) << 96 |
static_cast<uint128_t>(y) << 64 |
+ static_cast<uint128_t>(z) << 32 |
static_cast<uint128_t>(w);
+ int128_t r = static_cast<uint128_t>(w) << 96 |
static_cast<uint128_t>(z) << 64 |
+ static_cast<uint128_t>(y) << 32 |
static_cast<uint128_t>(x);
+ int256_t expected = int256_t(l) * r;
+ Decimal256 actual = Decimal256FromInt128(l) *
Decimal256FromInt128(r);
+ ASSERT_EQ(expected.str(), actual.ToIntegerString())
+ << " " << int256_t(l).str() << " * " << int256_t(r).str();
+ // Test a 96 bit number against a 160 bit number.
+ int128_t s = l >> 32;
+ uint256_t b = uint256_t(r) << 32;
+ Decimal256 b_dec =
+ Decimal256FromInt128(r) * Decimal256(static_cast<uint64_t>(1) <<
32);
+ ASSERT_EQ(b.str(), b_dec.ToIntegerString()) << int256_t(r).str();
+ expected = int256_t(s) * b;
+ actual = Decimal256FromInt128(s) * b_dec;
+ ASSERT_EQ(expected.str(), actual.ToIntegerString())
+ << " " << int256_t(s).str() << " * " << int256_t(b).str();
+ }
+ }
+ }
+ }
+
+ // Test some edge cases
+ for (auto x : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, 0, INT32_MAX,
INT64_MAX}) {
+ for (auto y :
+ std::vector<int128_t>{-INT32_MAX, -32, -2, -1, 0, 1, 2, 32,
INT32_MAX}) {
+ Decimal256 decimal_x = Decimal256FromInt128(x);
+ Decimal256 decimal_y = Decimal256FromInt128(y);
+ Decimal256 result = decimal_x * decimal_y;
+ EXPECT_EQ(Decimal256FromInt128(x * y), result)
+ << " x: " << decimal_x << " y: " << decimal_y;
+ }
+ }
+}
+
class Decimal256ToStringTest : public
::testing::TestWithParam<ToStringTestParam> {};
TEST_P(Decimal256ToStringTest, ToString) {