This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new d53df60565 feat: Optimize from_bitwise_binary_op with 64-bit alignment
(#9441)
d53df60565 is described below
commit d53df605656d8012eca42e8ddffe165362a1a4cb
Author: Kunal <[email protected]>
AuthorDate: Fri Mar 20 01:44:01 2026 +0530
feat: Optimize from_bitwise_binary_op with 64-bit alignment (#9441)
# Which issue does this PR close?
- Closes #9378
# Rationale for this change
the optimizations as listed in the issue description
- Align to 8 bytes
- Don't try to return a buffer with bit_offset 0 but round it to a
multiple of 64
- Use chunk_exact for the fallback path
# What changes are included in this PR?
When both inputs share the same sub-64-bit alignment (left_offset % 64
== right_offset % 64), the optimized path is used. This covers the
common cases (both offset 0, both sliced equally, etc.). The BitChunks
fallback is retained only when the two offsets have different sub-64-bit
alignment.
# Are these changes tested?
Yes the tests are changed and they are included
# Are there any user-facing changes?
Yes, this is a minor breaking change to from_bitwise_binary_op:
- The returned BooleanBuffer may now have a non-zero offset (previously
always 0)
- The returned BooleanBuffer may have padding bits set outside the
logical range in values()
---------
Signed-off-by: Kunal Singh Dadhwal <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
arrow-buffer/src/buffer/boolean.rs | 230 +++++++++++++++++++++++++++++++------
arrow-buffer/src/buffer/ops.rs | 102 ++++++++++++++--
2 files changed, 288 insertions(+), 44 deletions(-)
diff --git a/arrow-buffer/src/buffer/boolean.rs
b/arrow-buffer/src/buffer/boolean.rs
index bae083b3b2..420bbf59f3 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -290,7 +290,8 @@ impl BooleanBuffer {
/// on the relevant bits; the input `u64` values may contain irrelevant
bits
/// and may be processed differently on different endian architectures.
/// * `op` may be called with input bits outside the requested range.
- /// * The returned `BooleanBuffer` always has zero offset.
+ /// * Returned `BooleanBuffer` may have non zero offset
+ /// * Returned `BooleanBuffer` may have bits set outside the requested
range
///
/// # See Also
/// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a
single input buffer.
@@ -305,19 +306,28 @@ impl BooleanBuffer {
/// let result = BooleanBuffer::from_bitwise_binary_op(
/// &left, 0, &right, 0, 12, |a, b| a & b
/// );
- /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
+ /// assert_eq!(result.len(), 12);
+ /// for i in 0..12 {
+ /// assert_eq!(result.value(i), left.as_slice()[i / 8] >> (i % 8) & 1
== 1
+ /// && right.as_slice()[i / 8] >> (i % 8) & 1 == 1);
+ /// }
/// ```
///
/// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte
slices
/// ```
- /// # use arrow_buffer::BooleanBuffer;
+ /// # use arrow_buffer::{BooleanBuffer, bit_util};
/// let left = [0b11001100u8, 0b10111010u8];
/// let right = [0b10101010u8, 0b11011100u8];
/// // OR of bits 4..16 from left and bits 0..12 from right
/// let result = BooleanBuffer::from_bitwise_binary_op(
/// &left, 4, &right, 0, 12, |a, b| a | b
/// );
- /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
+ /// assert_eq!(result.len(), 12);
+ /// for i in 0..12 {
+ /// let l = bit_util::get_bit(&left, 4 + i);
+ /// let r = bit_util::get_bit(&right, i);
+ /// assert_eq!(result.value(i), l | r);
+ /// }
/// ```
pub fn from_bitwise_binary_op<F>(
left: impl AsRef<[u8]>,
@@ -332,39 +342,74 @@ impl BooleanBuffer {
{
let left = left.as_ref();
let right = right.as_ref();
- // try fast path for aligned input
- // If the underlying buffers are aligned to u64 we can apply the
operation directly on the u64 slices
- // to improve performance.
- if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
- // align to byte boundary
- let left = &left[left_offset_in_bits / 8..];
- let right = &right[right_offset_in_bits / 8..];
-
- unsafe {
- let (left_prefix, left_u64s, left_suffix) =
left.align_to::<u64>();
- let (right_prefix, right_u64s, right_suffix) =
right.align_to::<u64>();
- // if there is no prefix or suffix, both buffers are aligned
and
- // we can do the operation directly on u64s.
- // TODO: consider `slice::as_chunks` and `u64::from_le_bytes`
when MSRV reaches 1.88.
- //
https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361
- if left_prefix.is_empty()
- && right_prefix.is_empty()
- && left_suffix.is_empty()
- && right_suffix.is_empty()
- {
- let result_u64s = left_u64s
+
+ // When both offsets share the same sub-64-bit alignment, we can
+ // align both to 64-bit boundaries and zip u64s directly,
+ // avoiding BitChunks bit-shifting entirely.
+ if left_offset_in_bits % 64 == right_offset_in_bits % 64 {
+ let bit_offset = left_offset_in_bits % 64;
+ let left_end = left_offset_in_bits + len_in_bits;
+ let right_end = right_offset_in_bits + len_in_bits;
+
+ let left_aligned = left_offset_in_bits & !63;
+ let right_aligned = right_offset_in_bits & !63;
+
+ let left_end_bytes = (bit_util::ceil(left_end, 64) *
8).min(left.len());
+ let right_end_bytes = (bit_util::ceil(right_end, 64) *
8).min(right.len());
+
+ let left_slice = &left[left_aligned / 8..left_end_bytes];
+ let right_slice = &right[right_aligned / 8..right_end_bytes];
+
+ let (lp, left_u64s, ls) = unsafe { left_slice.align_to::<u64>() };
+ let (rp, right_u64s, rs) = unsafe { right_slice.align_to::<u64>()
};
+
+ match (lp, ls, rp, rs) {
+ ([], [], [], []) => {
+ let result_u64s: Vec<u64> = left_u64s
.iter()
.zip(right_u64s.iter())
.map(|(l, r)| op(*l, *r))
- .collect::<Vec<u64>>();
- return BooleanBuffer {
- buffer: Buffer::from(result_u64s),
- bit_offset: 0,
- bit_len: len_in_bits,
- };
+ .collect();
+ return BooleanBuffer::new(result_u64s.into(), bit_offset,
len_in_bits);
}
+ ([], left_suf, [], right_suf) => {
+ let left_iter = left_u64s
+ .iter()
+ .cloned()
+ .chain((!left_suf.is_empty()).then(||
read_u64(left_suf)));
+ let right_iter = right_u64s
+ .iter()
+ .cloned()
+ .chain((!right_suf.is_empty()).then(||
read_u64(right_suf)));
+ let result_u64s: Vec<u64> =
+ left_iter.zip(right_iter).map(|(l, r)| op(l,
r)).collect();
+ return BooleanBuffer::new(result_u64s.into(), bit_offset,
len_in_bits);
+ }
+ _ => {}
}
+
+ // Memory not u64-aligned, use chunks_exact fallback
+ let left_chunks = left_slice.chunks_exact(8);
+ let left_rem = left_chunks.remainder();
+ let right_chunks = right_slice.chunks_exact(8);
+ let right_rem = right_chunks.remainder();
+
+ let left_iter = left_chunks.map(|c|
u64::from_le_bytes(c.try_into().unwrap()));
+ let right_iter = right_chunks.map(|c|
u64::from_le_bytes(c.try_into().unwrap()));
+
+ let result_u64s: Vec<u64> = if left_rem.is_empty() &&
right_rem.is_empty() {
+ left_iter.zip(right_iter).map(|(l, r)| op(l, r)).collect()
+ } else {
+ left_iter
+ .chain(Some(read_u64(left_rem)))
+ .zip(right_iter.chain(Some(read_u64(right_rem))))
+ .map(|(l, r)| op(l, r))
+ .collect()
+ };
+ return BooleanBuffer::new(result_u64s.into(), bit_offset,
len_in_bits);
}
+
+ // Different sub-64-bit alignments: bit-shifting unavoidable
let left_chunks = BitChunks::new(left, left_offset_in_bits,
len_in_bits);
let right_chunks = BitChunks::new(right, right_offset_in_bits,
len_in_bits);
@@ -479,7 +524,7 @@ impl BooleanBuffer {
}
}
- /// Returns a [`Buffer`] containing the sliced contents of this
[`BooleanBuffer`]
+ /// Returns a new [`Buffer`] containing the sliced contents of this
[`BooleanBuffer`]
///
/// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
pub fn sliced(&self) -> Buffer {
@@ -994,6 +1039,127 @@ mod tests {
}
}
+ #[test]
+ fn test_from_bitwise_binary_op_same_mod_64_unaligned_fallback() {
+ // Exercise the shared-alignment fast path when both inputs are
misaligned in memory,
+ // forcing the chunks_exact fallback instead of align_to::<u64>().
+ let left_bytes = [
+ 0, // dropped so `&left_bytes[1..]` is not u64-aligned
in memory
+ 0b1101_0010, // logical left bits start at bit 3 of this byte
+ 0b0110_1101,
+ 0b1010_0111,
+ 0b0001_1110,
+ 0b1110_0001,
+ 0b0101_1010,
+ 0b1001_0110,
+ 0b0011_1100,
+ 0b1011_0001,
+ 0b0100_1110,
+ 0b1100_0011,
+ 0b0111_1000,
+ ];
+ let right_bytes = [
+ 0, // dropped so `&right_bytes[1..]` is not u64-aligned
in memory
+ 0b1010_1100, // logical right bits start at bit 67 == bit 3 of the
second 64-bit block
+ 0b0101_0011,
+ 0b1111_0000,
+ 0b0011_1010,
+ 0b1000_1111,
+ 0b0110_0101,
+ 0b1101_1000,
+ 0b0001_0111,
+ 0b1110_0100,
+ 0b0010_1101,
+ 0b1001_1010,
+ 0b0111_0001,
+ ];
+
+ let left = &left_bytes[1..];
+ let right = &right_bytes[1..];
+
+ let left_offset = 3;
+ let right_offset = 67; // same mod 64 as left_offset, so this takes
the shared-alignment path
+ let len = 24; // leaves a partial trailing chunk, so this covers the
non-empty remainder branch
+
+ let result = BooleanBuffer::from_bitwise_binary_op(
+ left,
+ left_offset,
+ right,
+ right_offset,
+ len,
+ |a, b| a & b,
+ );
+ let expected = (0..len)
+ .map(|i| {
+ bit_util::get_bit(left, left_offset + i)
+ & bit_util::get_bit(right, right_offset + i)
+ })
+ .collect::<BooleanBuffer>();
+
+ assert_eq!(result, expected);
+ assert_eq!(result.offset(), left_offset % 64);
+ }
+
+ #[test]
+ fn
test_from_bitwise_binary_op_same_mod_64_unaligned_fallback_no_remainder() {
+ // Force the chunks_exact fallback with an exact 8-byte chunk so both
remainders are empty.
+ let left_bytes = [
+ 0, // dropped so `&left_bytes[1..]` is not u64-aligned
in memory
+ 0b1010_1100, // logical left bits start at bit 3 of this byte
+ 0b0110_1001,
+ 0b1101_0011,
+ 0b0001_1110,
+ 0b1110_0101,
+ 0b0101_1000,
+ 0b1001_0111,
+ 0b0011_1101,
+ ];
+ let right_bytes = [
+ 0, // dropped so `&right_bytes[1..]` is not u64-aligned
in memory
+ 0b0111_0010, // logical right bits start at bit 67 == bit 3 of the
second 64-bit block
+ 0b1010_1001,
+ 0b0101_1110,
+ 0b1100_0011,
+ 0b0011_1011,
+ 0b1000_1110,
+ 0b1111_0001,
+ 0b0100_1101,
+ 0b1011_0110,
+ 0b0001_1011,
+ 0b1101_0100,
+ 0b0110_0011,
+ 0b1001_1110,
+ 0b0010_1001,
+ 0b1110_0110,
+ 0b0101_0001,
+ ];
+
+ let left = &left_bytes[1..];
+ let right = &right_bytes[1..];
+
+ let left_offset = 3;
+ let right_offset = 67; // same mod 64 as left_offset, so this takes
the shared-alignment path
+ let len = 61; // 3 + 61 = 64, so the aligned slices are exactly one
8-byte chunk with empty remainders
+
+ let result = BooleanBuffer::from_bitwise_binary_op(
+ left,
+ left_offset,
+ right,
+ right_offset,
+ len,
+ |a, b| a | b,
+ );
+ let expected = (0..len)
+ .map(|i| {
+ bit_util::get_bit(left, left_offset + i)
+ | bit_util::get_bit(right, right_offset + i)
+ })
+ .collect::<BooleanBuffer>();
+
+ assert_eq!(result, expected);
+ assert_eq!(result.offset(), left_offset % 64);
+ }
+
#[test]
fn test_extend_trusted_len_sets_byte_len() {
// Ensures extend_trusted_len keeps the underlying byte length in sync
with bit length.
diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
index 36efe87643..793bbaf6c2 100644
--- a/arrow-buffer/src/buffer/ops.rs
+++ b/arrow-buffer/src/buffer/ops.rs
@@ -143,6 +143,9 @@ where
/// Apply a bitwise and to two inputs and return the result as a Buffer.
/// The inputs are treated as bitmaps, meaning that offsets and length are
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s
directly
pub fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
@@ -150,19 +153,27 @@ pub fn buffer_bin_and(
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
- BooleanBuffer::from_bitwise_binary_op(
+ let result = BooleanBuffer::from_bitwise_binary_op(
left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & b,
- )
- .into_inner()
+ );
+ // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+ if result.offset() == 0 {
+ result.into_inner()
+ } else {
+ result.sliced()
+ }
}
/// Apply a bitwise or to two inputs and return the result as a Buffer.
/// The inputs are treated as bitmaps, meaning that offsets and length are
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s
directly
pub fn buffer_bin_or(
left: &Buffer,
left_offset_in_bits: usize,
@@ -170,19 +181,27 @@ pub fn buffer_bin_or(
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
- BooleanBuffer::from_bitwise_binary_op(
+ let result = BooleanBuffer::from_bitwise_binary_op(
left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a | b,
- )
- .into_inner()
+ );
+ // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+ if result.offset() == 0 {
+ result.into_inner()
+ } else {
+ result.sliced()
+ }
}
/// Apply a bitwise xor to two inputs and return the result as a Buffer.
/// The inputs are treated as bitmaps, meaning that offsets and length are
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s
directly
pub fn buffer_bin_xor(
left: &Buffer,
left_offset_in_bits: usize,
@@ -190,19 +209,27 @@ pub fn buffer_bin_xor(
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
- BooleanBuffer::from_bitwise_binary_op(
+ let result = BooleanBuffer::from_bitwise_binary_op(
left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a ^ b,
- )
- .into_inner()
+ );
+ // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+ if result.offset() == 0 {
+ result.into_inner()
+ } else {
+ result.sliced()
+ }
}
/// Apply a bitwise and_not to two inputs and return the result as a Buffer.
/// The inputs are treated as bitmaps, meaning that offsets and length are
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s
directly
pub fn buffer_bin_and_not(
left: &Buffer,
left_offset_in_bits: usize,
@@ -210,19 +237,70 @@ pub fn buffer_bin_and_not(
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
- BooleanBuffer::from_bitwise_binary_op(
+ let result = BooleanBuffer::from_bitwise_binary_op(
left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & !b,
- )
- .into_inner()
+ );
+ // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+ if result.offset() == 0 {
+ result.into_inner()
+ } else {
+ result.sliced()
+ }
}
/// Apply a bitwise not to one input and return the result as a Buffer.
/// The input is treated as a bitmap, meaning that offset and length are
specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_unary_op`] for creating `BooleanBuffer`s
directly
pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits:
usize) -> Buffer {
BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits,
|a| !a).into_inner()
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_buffer_bin_ops_return_zero_offset_buffers() {
+ let left = Buffer::from(vec![0b1010_1100, 0b0110_1001]);
+ let right = Buffer::from(vec![0, 0, 0, 0, 0, 0, 0, 0, 0b1110_0101,
0b0101_1000]);
+
+ let left_offset = 1;
+ let right_offset = 65; // same mod 64 as left_offset, so
from_bitwise_binary_op returns non-zero offset
+ let len = 7;
+
+ // Reuse the same offset scenario for all four binary wrappers:
+ // each wrapper should return the logically equivalent offset-0 Buffer,
+ // even though the underlying BooleanBuffer result has offset 1.
+ for (op, wrapper) in [
+ (
+ (|a, b| a & b) as fn(u64, u64) -> u64,
+ buffer_bin_and as fn(&Buffer, usize, &Buffer, usize, usize) ->
Buffer,
+ ),
+ (((|a, b| a | b) as fn(u64, u64) -> u64), buffer_bin_or),
+ (((|a, b| a ^ b) as fn(u64, u64) -> u64), buffer_bin_xor),
+ (((|a, b| a & !b) as fn(u64, u64) -> u64), buffer_bin_and_not),
+ ] {
+ let unsliced = BooleanBuffer::from_bitwise_binary_op(
+ &left,
+ left_offset,
+ &right,
+ right_offset,
+ len,
+ op,
+ );
+ assert_eq!(unsliced.offset(), 1);
+
+ let result = wrapper(&left, left_offset, &right, right_offset,
len);
+
+ assert_eq!(result, unsliced.sliced());
+ assert_eq!(result.len(), 1);
+ }
+ }
+}