Dandandan commented on code in PR #9297:
URL: https://github.com/apache/arrow-rs/pull/9297#discussion_r2779486204
##########
arrow-buffer/src/buffer/boolean.rs:
##########
@@ -191,67 +189,48 @@ impl BooleanBuffer {
where
F: FnMut(u64) -> u64,
{
- // try fast path for aligned input
- if offset_in_bits & 0x7 == 0 {
- // align to byte boundary
- let aligned = &src.as_ref()[offset_in_bits / 8..];
- if let Some(result) =
- Self::try_from_aligned_bitwise_unary_op(aligned, len_in_bits,
&mut op)
- {
- return result;
+ let end = offset_in_bits + len_in_bits;
+ // Align start and end to 64 bit (8 byte) boundaries if possible to
allow using the
+ // optimized code path as much as possible.
+ let aligned_offset = offset_in_bits & !63;
+ let aligned_end_bytes = bit_util::ceil(end, 64) * 8;
+ let src_len = src.as_ref().len();
+ let slice_end = aligned_end_bytes.min(src_len);
+
+ let aligned_start = &src.as_ref()[aligned_offset / 8..slice_end];
+
+ let (prefix, aligned_u64s, suffix) = unsafe {
aligned_start.as_ref().align_to::<u64>() };
+ match (prefix, suffix) {
+ ([], []) => {
+ // the buffer is word (64 bit) aligned, so use optimized Vec
code.
+ let result_u64s: Vec<u64> = aligned_u64s.iter().map(|l|
op(*l)).collect();
+ return BooleanBuffer::new(result_u64s.into(), offset_in_bits %
64, len_in_bits);
}
- }
-
- let chunks = BitChunks::new(src.as_ref(), offset_in_bits, len_in_bits);
- let mut result = MutableBuffer::with_capacity(chunks.num_u64s() * 8);
- for chunk in chunks.iter() {
- // SAFETY: reserved enough capacity above, (exactly num_u64s()
- // items) and we assume `BitChunks` correctly reports upper bound
- unsafe {
- result.push_unchecked(op(chunk));
+ ([], suffix) => {
+ let suffix = read_u64(suffix);
+ let result_u64s: Vec<u64> = aligned_u64s
+ .iter()
+ .cloned()
+ .chain(std::iter::once(suffix))
+ .map(&mut op)
+ .collect();
+ return BooleanBuffer::new(result_u64s.into(), offset_in_bits %
64, len_in_bits);
}
- }
- if chunks.remainder_len() > 0 {
- debug_assert!(result.capacity() >= result.len() + 8); // should
not reallocate
- // SAFETY: reserved enough capacity above, (exactly num_u64s()
- // items) and we assume `BitChunks` correctly reports upper bound
- unsafe {
- result.push_unchecked(op(chunks.remainder_bits()));
- }
- // Just pushed one u64, which may have trailing zeros
- result.truncate(chunks.num_bytes());
+ _ => {}
}
- BooleanBuffer {
- buffer: Buffer::from(result),
- bit_offset: 0,
- bit_len: len_in_bits,
- }
- }
+ // align to byte boundaries
+ // Use unaligned code path, handle remainder bytes
+ let chunks = aligned_start.chunks_exact(8);
+ let remainder = chunks.remainder();
+ let iter = chunks.map(|c| u64::from_le_bytes(c.try_into().unwrap()));
+ let vec_u64s: Vec<u64> = if remainder.is_empty() {
+ iter.map(&mut op).collect()
Review Comment:
Hm I think the buffer itself (the address at offset 0) could still be not
aligned to 64 bytes and has a `prefix` in the path above. It could still be the
entire buffer from beginning to end is a multiple of 64 bits and the remainder
is empty.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]