AntoinePrv commented on code in PR #47896:
URL: https://github.com/apache/arrow/pull/47896#discussion_r2454222190


##########
cpp/src/arrow/util/bpacking_dispatch_internal.h:
##########
@@ -47,7 +49,113 @@ int unpack_full(const uint8_t* in, Uint* out, int 
batch_size) {
       out[k] = FromLittleEndian(SafeLoadAs<Uint>(in + (k * sizeof(Uint))));
     }
   }
-  return batch_size;
+}
+
+/// Compute the maximum spread in bytes that a packed integer can cover.
+///
+/// This is assuming contiguous packed integer starting with the given bit 
offset away
+/// from a byte boundary.
+/// This function is non-monotonic, for instance with zero offset, three bit 
integers
+/// will be split on the first byte boundary (hence having a spread of two 
bytes) while
+/// four bit integer will be well behaved and never spread over byte boundary 
(hence
+/// having a spread of one).
+constexpr int PackedMaxSpreadBytes(int width, int bit_offset) {
+  int max = static_cast<int>(bit_util::BytesForBits(width));
+  int start = bit_offset;
+  do {
+    const int byte_start = start / 8;
+    const int byte_end = (start + width - 1) / 8;  // inclusive end bit
+    const int spread = byte_end - byte_start + 1;
+    max = spread > max ? spread : max;
+    start += width;
+  } while (start % 8 != bit_offset);
+  return max;
+}
+
+/// Compute the maximum spread in bytes that a packed integer can cover across 
all bit
+/// offsets.
+constexpr int PackedMaxSpreadBytes(int width) {
+  int max = 0;
+  for (int offset = 0; offset < 8; ++offset) {
+    const int spread = PackedMaxSpreadBytes(width, offset);
+    max = spread > max ? spread : max;
+  }
+  return max;
+}
+
+// Integer type that tries to contain as much as the spread as possible.
+template <int kSpreadBytes>
+using SpreadBufferUint = std::conditional_t<
+    (kSpreadBytes <= sizeof(uint8_t)), uint_fast8_t,
+    std::conditional_t<(kSpreadBytes <= sizeof(uint16_t)), uint_fast16_t,
+                       std::conditional_t<(kSpreadBytes <= sizeof(uint32_t)),
+                                          uint_fast32_t, uint_fast64_t>>>;
+
+/// Unpack integers.
+/// This function works for all input batch sizes but is not the fastest.
+/// In prolog mode, instead of unpacking all required element, the function 
will
+/// stop if it finds a byte aligned value start.
+template <int kPackedBitWidth, bool kIsProlog, typename Uint>
+int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) 
{
+  // For the epilog we adapt the max spread since better alignment give 
shorter spreads
+  ARROW_DCHECK(kIsProlog || bit_offset == 0);
+  constexpr int kMaxSpreadBytes = kIsProlog ? 
PackedMaxSpreadBytes(kPackedBitWidth)
+                                            : 
PackedMaxSpreadBytes(kPackedBitWidth, 0);
+  using buffer_uint = SpreadBufferUint<kMaxSpreadBytes>;
+  constexpr int kBufferSize = sizeof(buffer_uint);
+  // Due to misalignment, on large bit width, the spread can be larger than 
the maximum
+  // size integer. For instance a 63 bit width misaligned packed integer can 
spread over 9
+  // aligned bytes.
+  constexpr bool kOversized = kBufferSize < kMaxSpreadBytes;

Review Comment:
   Probably not, gonna benchmark that.
   
   Another use for `kMaxSpreadBytes` is to deduce the bit widths where the 
spread might be greater than what `uint64_t` can contain and enable the second 
check:
   ```cpp
       // Handle the oversized bytes
       if constexpr (kOversized) {
         // The oversized bytes do not happen at all iterations
         if (spread_bytes > kBufferSize) {
   ```
   This can also be done with ` if (ARROW_PREDICT_FALSE(spread_bytes > 
kBufferSize)) {` tbh.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to