pitrou commented on code in PR #47896:
URL: https://github.com/apache/arrow/pull/47896#discussion_r2452356455
##########
cpp/src/arrow/util/bit_util.h:
##########
@@ -118,6 +118,16 @@ constexpr uint64_t LeastSignificantBitMask(int64_t
bit_index) {
return (static_cast<uint64_t>(1) << bit_index) - 1;
}
+// Returns a mask for the bit_index lower order bits.
+// Only valid for bit_index in the range [0, sizeof(Uint)].
+template <typename Uint>
+constexpr auto LeastSignificantBitMaskInc(Uint bit_index) {
Review Comment:
What does `Inc` mean here?
And I wonder if we cannot just reconcile the two functions, for example:
```c++
template <typename Uint, bool kAllowUpperBound = false>
constexpr auto LeastSignificantBitMask(Uint bit_index) {
if constexpr (kAllowUpperBound) {
if (bit_index == 8 * sizeof(Uint)) {
return ~Uint{0};
}
}
return (Uint{1} << bit_index) - Uint{1};
}
```
##########
cpp/src/arrow/util/bpacking_dispatch_internal.h:
##########
@@ -47,7 +49,113 @@ int unpack_full(const uint8_t* in, Uint* out, int
batch_size) {
out[k] = FromLittleEndian(SafeLoadAs<Uint>(in + (k * sizeof(Uint))));
}
}
- return batch_size;
+}
+
+/// Compute the maximum spread in bytes that a packed integer can cover.
+///
+/// This is assuming contiguous packed integer starting with the given bit
offset away
+/// from a byte boundary.
+/// This function is non-monotonic, for instance with zero offset, three bit
integers
+/// will be split on the first byte boundary (hence having a spread of two
bytes) while
+/// four bit integer will be well behaved and never spread over byte boundary
(hence
+/// having a spread of one).
+constexpr int PackedMaxSpreadBytes(int width, int bit_offset) {
Review Comment:
This function looks complicated, but if I recode it in Python it looks like
it can be reduced to a much simpler form that's more amenable to compile-time
execution:
```python
>>> def spread(width, bit_offset):
...: max_spread = (width >> 3) + ((width & 7) != 0);
...: start = bit_offset
...: while True:
...: byte_start = start // 8
...: byte_end = (start + width - 1) // 8
...: spread = byte_end - byte_start + 1
...: max_spread = max(spread, max_spread)
...: start += width
...: if start % 8 == bit_offset:
...: return max_spread
...:
>>> [spread(8, b) for b in range(8)]
[1, 2, 2, 2, 2, 2, 2, 2]
>>> [spread(16, b) for b in range(8)]
[2, 3, 3, 3, 3, 3, 3, 3]
>>> [spread(32, b) for b in range(8)]
[4, 5, 5, 5, 5, 5, 5, 5]
```
##########
cpp/src/arrow/util/bpacking_dispatch_internal.h:
##########
@@ -47,7 +49,113 @@ int unpack_full(const uint8_t* in, Uint* out, int
batch_size) {
out[k] = FromLittleEndian(SafeLoadAs<Uint>(in + (k * sizeof(Uint))));
}
}
- return batch_size;
+}
+
+/// Compute the maximum spread in bytes that a packed integer can cover.
+///
+/// This is assuming contiguous packed integer starting with the given bit
offset away
+/// from a byte boundary.
+/// This function is non-monotonic, for instance with zero offset, three bit
integers
+/// will be split on the first byte boundary (hence having a spread of two
bytes) while
+/// four bit integer will be well behaved and never spread over byte boundary
(hence
+/// having a spread of one).
+constexpr int PackedMaxSpreadBytes(int width, int bit_offset) {
+ int max = static_cast<int>(bit_util::BytesForBits(width));
+ int start = bit_offset;
+ do {
+ const int byte_start = start / 8;
+ const int byte_end = (start + width - 1) / 8; // inclusive end bit
+ const int spread = byte_end - byte_start + 1;
+ max = spread > max ? spread : max;
+ start += width;
+ } while (start % 8 != bit_offset);
+ return max;
+}
+
+/// Compute the maximum spread in bytes that a packed integer can cover across
all bit
+/// offsets.
+constexpr int PackedMaxSpreadBytes(int width) {
Review Comment:
This is constexpr, but will the compiler actually reduce it to a
compile-time constant? It does not seem obvious to me.
##########
cpp/src/arrow/util/bpacking_dispatch_internal.h:
##########
@@ -47,7 +49,113 @@ int unpack_full(const uint8_t* in, Uint* out, int
batch_size) {
out[k] = FromLittleEndian(SafeLoadAs<Uint>(in + (k * sizeof(Uint))));
}
}
- return batch_size;
+}
+
+/// Compute the maximum spread in bytes that a packed integer can cover.
+///
+/// This is assuming contiguous packed integer starting with the given bit
offset away
+/// from a byte boundary.
+/// This function is non-monotonic, for instance with zero offset, three bit
integers
+/// will be split on the first byte boundary (hence having a spread of two
bytes) while
+/// four bit integer will be well behaved and never spread over byte boundary
(hence
+/// having a spread of one).
+constexpr int PackedMaxSpreadBytes(int width, int bit_offset) {
+ int max = static_cast<int>(bit_util::BytesForBits(width));
+ int start = bit_offset;
+ do {
+ const int byte_start = start / 8;
+ const int byte_end = (start + width - 1) / 8; // inclusive end bit
+ const int spread = byte_end - byte_start + 1;
+ max = spread > max ? spread : max;
+ start += width;
+ } while (start % 8 != bit_offset);
+ return max;
+}
+
+/// Compute the maximum spread in bytes that a packed integer can cover across
all bit
+/// offsets.
+constexpr int PackedMaxSpreadBytes(int width) {
+ int max = 0;
+ for (int offset = 0; offset < 8; ++offset) {
+ const int spread = PackedMaxSpreadBytes(width, offset);
+ max = spread > max ? spread : max;
+ }
+ return max;
+}
+
+// Integer type that tries to contain as much as the spread as possible.
+template <int kSpreadBytes>
+using SpreadBufferUint = std::conditional_t<
+ (kSpreadBytes <= sizeof(uint8_t)), uint_fast8_t,
+ std::conditional_t<(kSpreadBytes <= sizeof(uint16_t)), uint_fast16_t,
+ std::conditional_t<(kSpreadBytes <= sizeof(uint32_t)),
+ uint_fast32_t, uint_fast64_t>>>;
+
+/// Unpack integers.
+/// This function works for all input batch sizes but is not the fastest.
+/// In prolog mode, instead of unpacking all required element, the function
will
+/// stop if it finds a byte aligned value start.
+template <int kPackedBitWidth, bool kIsProlog, typename Uint>
+int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset)
{
+ // For the epilog we adapt the max spread since better alignment give
shorter spreads
+ ARROW_DCHECK(kIsProlog || bit_offset == 0);
Review Comment:
Also, IIUC:
```suggestion
ARROW_DCHECK(kIsProlog || bit_offset == 0);
ARROW_DCHECK(bit_offset >= 0 && bit_offset < 8);
```
##########
cpp/src/arrow/util/bpacking_dispatch_internal.h:
##########
@@ -47,7 +49,113 @@ int unpack_full(const uint8_t* in, Uint* out, int
batch_size) {
out[k] = FromLittleEndian(SafeLoadAs<Uint>(in + (k * sizeof(Uint))));
}
}
- return batch_size;
+}
+
+/// Compute the maximum spread in bytes that a packed integer can cover.
+///
+/// This is assuming contiguous packed integer starting with the given bit
offset away
+/// from a byte boundary.
+/// This function is non-monotonic, for instance with zero offset, three bit
integers
+/// will be split on the first byte boundary (hence having a spread of two
bytes) while
+/// four bit integer will be well behaved and never spread over byte boundary
(hence
+/// having a spread of one).
+constexpr int PackedMaxSpreadBytes(int width, int bit_offset) {
+ int max = static_cast<int>(bit_util::BytesForBits(width));
+ int start = bit_offset;
+ do {
+ const int byte_start = start / 8;
+ const int byte_end = (start + width - 1) / 8; // inclusive end bit
+ const int spread = byte_end - byte_start + 1;
+ max = spread > max ? spread : max;
+ start += width;
+ } while (start % 8 != bit_offset);
Review Comment:
Note that this will be an infinite loop if `bit_offset >= 8` (hence the
DCHECK suggestion below)
##########
cpp/src/arrow/util/bpacking_dispatch_internal.h:
##########
@@ -47,7 +49,113 @@ int unpack_full(const uint8_t* in, Uint* out, int
batch_size) {
out[k] = FromLittleEndian(SafeLoadAs<Uint>(in + (k * sizeof(Uint))));
}
}
- return batch_size;
+}
+
+/// Compute the maximum spread in bytes that a packed integer can cover.
+///
+/// This is assuming contiguous packed integer starting with the given bit
offset away
+/// from a byte boundary.
+/// This function is non-monotonic, for instance with zero offset, three bit
integers
+/// will be split on the first byte boundary (hence having a spread of two
bytes) while
+/// four bit integer will be well behaved and never spread over byte boundary
(hence
+/// having a spread of one).
+constexpr int PackedMaxSpreadBytes(int width, int bit_offset) {
Review Comment:
And, as noted below, calling with `bit_offset` equal to 8 or larger results
in an infinite loop...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]