This is an automated email from the ASF dual-hosted git repository. apitrou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new fcc13f5 ARROW-2585: [C++] Add Decimal::FromBigEndian, which was formerly a static method in parquet-cpp/src/parquet/arrow/reader.cc fcc13f5 is described below commit fcc13f5088323989d925270e7850b817bde19d48 Author: Joshua Storck <joshua.sto...@twosigma.com> AuthorDate: Tue May 22 14:24:35 2018 +0200 ARROW-2585: [C++] Add Decimal::FromBigEndian, which was formerly a static method in parquet-cpp/src/parquet/arrow/reader.cc Author: Joshua Storck <joshua.sto...@twosigma.com> Closes #2036 from joshuastorck/decimal_from_big_endian and squashes the following commits: e970c87 <Joshua Storck> Fixing lint errors 4cb4d89 <Joshua Storck> Adding Decimal::FromBigEndian, which was formerly a static method in parquet-cpp/src/parquet/arrow/reader.cc --- cpp/src/arrow/util/decimal-test.cc | 56 ++++++++++++++++++++++++++++++++++ cpp/src/arrow/util/decimal.cc | 62 ++++++++++++++++++++++++++++++++++++++ cpp/src/arrow/util/decimal.h | 5 +++ 3 files changed, 123 insertions(+) diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc index 2829e4a..0877617 100644 --- a/cpp/src/arrow/util/decimal-test.cc +++ b/cpp/src/arrow/util/decimal-test.cc @@ -380,4 +380,60 @@ TEST(Decimal128Test, TestNoDecimalPointExponential) { ASSERT_EQ(0, scale); } +TEST(Decimal128Test, TestFromBigEndian) { + // We test out a variety of scenarios: + // + // * Positive values that are left shifted + // and filled in with the same bit pattern + // * Negated of the positive values + // * Complement of the positive values + // + // For the positive values, we can call FromBigEndian + // with a length that is less than 16, whereas we must + // pass all 16 bytes for the negative and complement. + // + // We use a number of bit patterns to increase the coverage + // of scenarios + for (int32_t start : {1, 15, /* 00001111 */ + 85, /* 01010101 */ + 127 /* 01111111 */}) { + Decimal128 value(start); + for (int ii = 0; ii < 16; ++ii) { + auto little_endian = value.ToBytes(); + std::reverse(little_endian.begin(), little_endian.end()); + Decimal128 out; + // Limit the number of bytes we are passing to make + // sure that it works correctly. That's why all of the + // 'start' values don't have a 1 in the most significant + // bit place + ASSERT_OK(Decimal128::FromBigEndian(little_endian.data() + 15 - ii, ii + 1, &out)); + ASSERT_EQ(value, out); + + // Negate it and convert to big endian + auto negated = -value; + little_endian = negated.ToBytes(); + std::reverse(little_endian.begin(), little_endian.end()); + // Convert all of the bytes since we have to include the sign bit + ASSERT_OK(Decimal128::FromBigEndian(little_endian.data(), 16, &out)); + ASSERT_EQ(negated, out); + + // Take the complement and convert to big endian + auto complement = ~value; + little_endian = complement.ToBytes(); + std::reverse(little_endian.begin(), little_endian.end()); + ASSERT_OK(Decimal128::FromBigEndian(little_endian.data(), 16, &out)); + ASSERT_EQ(complement, out); + + value <<= 8; + value += Decimal128(start); + } + } +} + +TEST(Decimal128Test, TestFromBigEndianBadLength) { + Decimal128 out; + ASSERT_RAISES(Invalid, Decimal128::FromBigEndian(0, -1, &out)); + ASSERT_RAISES(Invalid, Decimal128::FromBigEndian(0, 17, &out)); +} + } // namespace arrow diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 668da6f..5d83653 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -17,6 +17,7 @@ #include <algorithm> #include <cctype> +#include <climits> #include <cmath> #include <cstring> #include <iomanip> @@ -873,4 +874,65 @@ Status Decimal128::Rescale(int32_t original_scale, int32_t new_scale, return Status::OK(); } +// Helper function used by Decimal128::FromBigEndian +static inline uint64_t FromBigEndian(const uint8_t* bytes, int32_t length) { + // We don't bounds check the length here because this is called by + // FromBigEndian that has a Decimal128 as its out parameters and + // that function is already checking the length of the bytes and only + // passes lengths between zero and eight. + uint64_t result = 0; + // Using memcpy instead of special casing for length + // and doing the conversion in 16, 32 parts, which could + // possibly create unaligned memory access on certain platforms + memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length); + return ::arrow::BitUtil::FromBigEndian(result); +} + +Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, Decimal128* out) { + static constexpr int32_t kMinDecimalBytes = 1; + static constexpr int32_t kMaxDecimalBytes = 16; + + int64_t high; + uint64_t low; + + if (length < kMinDecimalBytes || length > kMaxDecimalBytes) { + std::ostringstream stream; + stream << "Length of byte array passed to Decimal128::FromBigEndian "; + stream << "was " << length << ", but must be between "; + stream << kMinDecimalBytes << " and " << kMaxDecimalBytes; + return Status::Invalid(stream.str()); + } + + /// Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the + /// sign bit. + const bool is_negative = static_cast<int8_t>(bytes[0]) < 0; + + /// Sign extend the low bits if necessary + low = UINT64_MAX * (is_negative && length < 8); + high = -1 * (is_negative && length < kMaxDecimalBytes); + + /// Stop byte of the high bytes + const int32_t high_bits_offset = std::max(0, length - 8); + + /// Shift left enough bits to make room for the incoming int64_t + high <<= high_bits_offset * CHAR_BIT; + + /// Preserve the upper bits by inplace OR-ing the int64_t + uint64_t value = arrow::FromBigEndian(bytes, high_bits_offset); + high |= value; + + /// Stop byte of the low bytes + const int32_t low_bits_offset = std::min(length, 8); + + /// Shift left enough bits to make room for the incoming uint64_t + low <<= low_bits_offset * CHAR_BIT; + + /// Preserve the upper bits by inplace OR-ing the uint64_t + value = arrow::FromBigEndian(bytes + high_bits_offset, length - high_bits_offset); + low |= value; + + *out = Decimal128(high, low); + return Status::OK(); +} + } // namespace arrow diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h index 79a99ba..b3180cb 100644 --- a/cpp/src/arrow/util/decimal.h +++ b/cpp/src/arrow/util/decimal.h @@ -126,6 +126,11 @@ class ARROW_EXPORT Decimal128 { static Status FromString(const std::string& s, Decimal128* out, int32_t* precision = NULLPTR, int32_t* scale = NULLPTR); + /// \brief Convert from a big endian byte representation. The length must be + /// between 1 and 16 + /// \return error status if the length is an invalid value + static Status FromBigEndian(const uint8_t* data, int32_t length, Decimal128* out); + /// \brief Convert Decimal128 from one scale to another Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) const; -- To stop receiving notification emails like this one, please contact apit...@apache.org.