AntoinePrv commented on code in PR #47294:
URL: https://github.com/apache/arrow/pull/47294#discussion_r2336690910


##########
cpp/src/arrow/util/bit_util.h:
##########
@@ -365,5 +365,118 @@ void PackBits(const uint32_t* values, uint8_t* out) {
   }
 }
 
+constexpr int64_t MaxLEB128ByteLen(int64_t n_bits) { return CeilDiv(n_bits, 
7); }
+
+template <typename Int>
+constexpr int64_t MaxLEB128ByteLenFor = MaxLEB128ByteLen(sizeof(Int) * 8);
+
+/// Write a integer as LEB128
+///
+/// Write the input value as LEB128 into the outptu buffer and return the 
number of bytes
+/// written.
+/// If the output buffer size is insufficient, return 0 but the output may 
have been
+/// written to.
+///
+/// \see https://en.wikipedia.org/wiki/LEB128
+/// \see MaxLEB128ByteLenFor
+template <typename Int>
+constexpr int32_t WriteLEB128(Int value, uint8_t* out, int32_t max_out_size) {
+  constexpr Int kLow7Mask = Int(0x7F);
+  constexpr Int kHigh7Mask = ~kLow7Mask;
+  constexpr uint8_t kContinuationBit = 0x80;
+
+  auto const out_first = out;
+
+  // Write as many bytes as the could be for the given input
+  while ((value & kHigh7Mask) != Int(0)) {
+    // We do not have enough room to write the LEB128
+    if (out - out_first >= max_out_size) {
+      return 0;
+    }
+
+    // Write the encoded byte with continuation bit
+    *out = static_cast<uint8_t>(value & kLow7Mask) | kContinuationBit;
+    ++out;
+    // Shift remaining data
+    value >>= 7;
+  }
+
+  // We do not have enough room to write the LEB128
+  if (out - out_first >= max_out_size) {
+    return 0;
+  }
+
+  // Write last non-continuing byte
+  *out = static_cast<uint8_t>(value & kLow7Mask);
+  ++out;
+
+  return static_cast<int32_t>(out - out_first);
+}
+
+/// Parse a leading LEB128
+///
+/// Take as input a data pointer and the maximum number of bytes that can be 
read from it
+/// (typically the array size).
+/// When a valid LEB128 is found at the start of the data, the function writes 
it to the
+/// out pointer and return the number of bytes read.
+/// Otherwise, the out pointer is unmodified and zero is returned.
+///
+/// \see https://en.wikipedia.org/wiki/LEB128
+/// \see MaxLEB128ByteLenFor
+template <typename Int>
+constexpr int32_t ParseLeadingLEB128(uint8_t const* data, int32_t 
max_data_size,
+                                     Int* out) {
+  constexpr auto kMaxBytes = static_cast<int32_t>(MaxLEB128ByteLenFor<Int>);
+  static_assert(kMaxBytes >= 1);
+  constexpr uint8_t kLow7Mask = 0x7F;
+  constexpr uint8_t kContinuationBit = 0x80;
+  constexpr int32_t kSignBitCount = std::is_signed_v<Int> ? 1 : 0;

Review Comment:
   This decoding function allows reading into a signed integer type, but the 
encoded value can only represent non-negatives anyways (the encoding function 
refuses negative inputs).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to