alamb commented on code in PR #8619:
URL: https://github.com/apache/arrow-rs/pull/8619#discussion_r2495555344
##########
arrow-buffer/src/util/bit_util.rs:
##########
@@ -94,11 +96,722 @@ pub fn ceil(value: usize, divisor: usize) -> usize {
value.div_ceil(divisor)
}
+/// Read up to 8 bits from a byte slice starting at a given bit offset.
+///
+/// # Arguments
+///
+/// * `slice` - The byte slice to read from
+/// * `number_of_bits_to_read` - Number of bits to read (must be < 8)
+/// * `bit_offset` - Starting bit offset within the first byte (must be < 8)
+///
+/// # Returns
+///
+/// A `u8` containing the requested bits in the least significant positions
+///
+/// # Panics
+/// - Panics if `number_of_bits_to_read` is 0 or >= 8
+/// - Panics if `bit_offset` is >= 8
+/// - Panics if `slice` is empty or too small to read the requested bits
+///
+#[inline]
+pub(crate) fn read_up_to_byte_from_offset(
+ slice: &[u8],
+ number_of_bits_to_read: usize,
+ bit_offset: usize,
+) -> u8 {
+ assert!(number_of_bits_to_read < 8, "can read up to 8 bits only");
+ assert!(bit_offset < 8, "bit offset must be less than 8");
+ assert_ne!(
+ number_of_bits_to_read, 0,
+ "number of bits to read must be greater than 0"
+ );
+ assert_ne!(slice.len(), 0, "slice must not be empty");
+
+ let number_of_bytes_to_read = ceil(number_of_bits_to_read + bit_offset, 8);
+
+ // number of bytes to read
+ assert!(slice.len() >= number_of_bytes_to_read, "slice is too small");
+
+ let mut bits = slice[0] >> bit_offset;
+ for (i, &byte) in slice
+ .iter()
+ .take(number_of_bytes_to_read)
+ .enumerate()
+ .skip(1)
+ {
+ bits |= byte << (i * 8 - bit_offset);
+ }
+
+ bits & ((1 << number_of_bits_to_read) - 1)
+}
+
+/// Applies a bitwise operation relative to another bit-packed byte slice
+/// (right) in place
+///
+/// Note: applies the operation 64-bits (u64) at a time.
+///
+/// # Arguments
+///
+/// * `left` - The mutable buffer to be modified in-place
+/// * `offset_in_bits` - Starting bit offset in Self buffer
+/// * `right` - slice of bit-packed bytes in LSB order
+/// * `right_offset_in_bits` - Starting bit offset in the right buffer
+/// * `len_in_bits` - Number of bits to process
+/// * `op` - Binary operation to apply (e.g., `|a, b| a & b`). Applied a word
at a time
+///
+/// # Example: Modify entire buffer
+/// ```
+/// # use arrow_buffer::MutableBuffer;
+/// # use arrow_buffer::bit_util::bitwise_binary_op;
+/// let mut left = MutableBuffer::new(2);
+/// left.extend_from_slice(&[0b11110000u8, 0b00110011u8]);
+/// let right = &[0b10101010u8, 0b10101010u8];
+/// // apply bitwise AND between left and right buffers, updating left in place
+/// bitwise_binary_op(left.as_slice_mut(), 0, right, 0, 16, |a, b| a & b);
+/// assert_eq!(left.as_slice(), &[0b10100000u8, 0b00100010u8]);
+/// ```
+///
+/// # Example: Modify buffer with offsets
+/// ```
+/// # use arrow_buffer::MutableBuffer;
Review Comment:
BTW this example shows how these methods can still be used to modify a
`MutableBuffer` in place
But the functions are general and can work on any `&mut[u8]`
This also has the nice property that Rust prevents any mutation of the
length or capacity so we don't need to assert anymore
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]