reader.cc

apitrou Tue, 22 May 2018 05:24:57 -0700

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new fcc13f5  ARROW-2585: [C++] Add Decimal::FromBigEndian, which was 
formerly a static method in parquet-cpp/src/parquet/arrow/reader.cc
fcc13f5 is described below

commit fcc13f5088323989d925270e7850b817bde19d48
Author: Joshua Storck <joshua.sto...@twosigma.com>
AuthorDate: Tue May 22 14:24:35 2018 +0200

    ARROW-2585: [C++] Add Decimal::FromBigEndian, which was formerly a static 
method in parquet-cpp/src/parquet/arrow/reader.cc
    
    Author: Joshua Storck <joshua.sto...@twosigma.com>
    
    Closes #2036 from joshuastorck/decimal_from_big_endian and squashes the 
following commits:
    
    e970c87 <Joshua Storck> Fixing lint errors
    4cb4d89 <Joshua Storck> Adding Decimal::FromBigEndian, which was formerly a 
static method in parquet-cpp/src/parquet/arrow/reader.cc
---
 cpp/src/arrow/util/decimal-test.cc | 56 ++++++++++++++++++++++++++++++++++
 cpp/src/arrow/util/decimal.cc      | 62 ++++++++++++++++++++++++++++++++++++++
 cpp/src/arrow/util/decimal.h       |  5 +++
 3 files changed, 123 insertions(+)

diff --git a/cpp/src/arrow/util/decimal-test.cc 
b/cpp/src/arrow/util/decimal-test.cc
index 2829e4a..0877617 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -380,4 +380,60 @@ TEST(Decimal128Test, TestNoDecimalPointExponential) {
   ASSERT_EQ(0, scale);
 }
 
+TEST(Decimal128Test, TestFromBigEndian) {
+  // We test out a variety of scenarios:
+  //
+  // * Positive values that are left shifted
+  //   and filled in with the same bit pattern
+  // * Negated of the positive values
+  // * Complement of the positive values
+  //
+  // For the positive values, we can call FromBigEndian
+  // with a length that is less than 16, whereas we must
+  // pass all 16 bytes for the negative and complement.
+  //
+  // We use a number of bit patterns to increase the coverage
+  // of scenarios
+  for (int32_t start : {1, 15, /* 00001111 */
+                        85,    /* 01010101 */
+                        127 /* 01111111 */}) {
+    Decimal128 value(start);
+    for (int ii = 0; ii < 16; ++ii) {
+      auto little_endian = value.ToBytes();
+      std::reverse(little_endian.begin(), little_endian.end());
+      Decimal128 out;
+      // Limit the number of bytes we are passing to make
+      // sure that it works correctly. That's why all of the
+      // 'start' values don't have a 1 in the most significant
+      // bit place
+      ASSERT_OK(Decimal128::FromBigEndian(little_endian.data() + 15 - ii, ii + 
1, &out));
+      ASSERT_EQ(value, out);
+
+      // Negate it and convert to big endian
+      auto negated = -value;
+      little_endian = negated.ToBytes();
+      std::reverse(little_endian.begin(), little_endian.end());
+      // Convert all of the bytes since we have to include the sign bit
+      ASSERT_OK(Decimal128::FromBigEndian(little_endian.data(), 16, &out));
+      ASSERT_EQ(negated, out);
+
+      // Take the complement and convert to big endian
+      auto complement = ~value;
+      little_endian = complement.ToBytes();
+      std::reverse(little_endian.begin(), little_endian.end());
+      ASSERT_OK(Decimal128::FromBigEndian(little_endian.data(), 16, &out));
+      ASSERT_EQ(complement, out);
+
+      value <<= 8;
+      value += Decimal128(start);
+    }
+  }
+}
+
+TEST(Decimal128Test, TestFromBigEndianBadLength) {
+  Decimal128 out;
+  ASSERT_RAISES(Invalid, Decimal128::FromBigEndian(0, -1, &out));
+  ASSERT_RAISES(Invalid, Decimal128::FromBigEndian(0, 17, &out));
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 668da6f..5d83653 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -17,6 +17,7 @@
 
 #include <algorithm>
 #include <cctype>
+#include <climits>
 #include <cmath>
 #include <cstring>
 #include <iomanip>
@@ -873,4 +874,65 @@ Status Decimal128::Rescale(int32_t original_scale, int32_t 
new_scale,
   return Status::OK();
 }
 
+// Helper function used by Decimal128::FromBigEndian
+static inline uint64_t FromBigEndian(const uint8_t* bytes, int32_t length) {
+  // We don't bounds check the length here because this is called by
+  // FromBigEndian that has a Decimal128 as its out parameters and
+  // that function is already checking the length of the bytes and only
+  // passes lengths between zero and eight.
+  uint64_t result = 0;
+  // Using memcpy instead of special casing for length
+  // and doing the conversion in 16, 32 parts, which could
+  // possibly create unaligned memory access on certain platforms
+  memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
+  return ::arrow::BitUtil::FromBigEndian(result);
+}
+
+Status Decimal128::FromBigEndian(const uint8_t* bytes, int32_t length, 
Decimal128* out) {
+  static constexpr int32_t kMinDecimalBytes = 1;
+  static constexpr int32_t kMaxDecimalBytes = 16;
+
+  int64_t high;
+  uint64_t low;
+
+  if (length < kMinDecimalBytes || length > kMaxDecimalBytes) {
+    std::ostringstream stream;
+    stream << "Length of byte array passed to Decimal128::FromBigEndian ";
+    stream << "was " << length << ", but must be between ";
+    stream << kMinDecimalBytes << " and " << kMaxDecimalBytes;
+    return Status::Invalid(stream.str());
+  }
+
+  /// Bytes are coming in big-endian, so the first byte is the MSB and 
therefore holds the
+  /// sign bit.
+  const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
+
+  /// Sign extend the low bits if necessary
+  low = UINT64_MAX * (is_negative && length < 8);
+  high = -1 * (is_negative && length < kMaxDecimalBytes);
+
+  /// Stop byte of the high bytes
+  const int32_t high_bits_offset = std::max(0, length - 8);
+
+  /// Shift left enough bits to make room for the incoming int64_t
+  high <<= high_bits_offset * CHAR_BIT;
+
+  /// Preserve the upper bits by inplace OR-ing the int64_t
+  uint64_t value = arrow::FromBigEndian(bytes, high_bits_offset);
+  high |= value;
+
+  /// Stop byte of the low bytes
+  const int32_t low_bits_offset = std::min(length, 8);
+
+  /// Shift left enough bits to make room for the incoming uint64_t
+  low <<= low_bits_offset * CHAR_BIT;
+
+  /// Preserve the upper bits by inplace OR-ing the uint64_t
+  value = arrow::FromBigEndian(bytes + high_bits_offset, length - 
high_bits_offset);
+  low |= value;
+
+  *out = Decimal128(high, low);
+  return Status::OK();
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
index 79a99ba..b3180cb 100644
--- a/cpp/src/arrow/util/decimal.h
+++ b/cpp/src/arrow/util/decimal.h
@@ -126,6 +126,11 @@ class ARROW_EXPORT Decimal128 {
   static Status FromString(const std::string& s, Decimal128* out,
                            int32_t* precision = NULLPTR, int32_t* scale = 
NULLPTR);
 
+  /// \brief Convert from a big endian byte representation. The length must be
+  ///        between 1 and 16
+  /// \return error status if the length is an invalid value
+  static Status FromBigEndian(const uint8_t* data, int32_t length, Decimal128* 
out);
+
   /// \brief Convert Decimal128 from one scale to another
   Status Rescale(int32_t original_scale, int32_t new_scale, Decimal128* out) 
const;
 

-- 
To stop receiving notification emails like this one, please contact
apit...@apache.org.

[arrow] branch master updated: ARROW-2585: [C++] Add Decimal::FromBigEndian, which was formerly a static method in parquet-cpp/src/parquet/arrow/reader.cc

Reply via email to