This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 96637fc8b9 Speed up binary kernels (30% faster `and` and `or`), add 
`BooleanBuffer::from_bitwise_binary_op` (#9090)
96637fc8b9 is described below

commit 96637fc8b928a94de53bbec3501337c0ecfbf936
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri Jan 9 05:51:52 2026 -0500

    Speed up binary kernels (30% faster `and` and `or`), add 
`BooleanBuffer::from_bitwise_binary_op` (#9090)
    
    # Which issue does this PR close?
    
    - Part of https://github.com/apache/arrow-rs/issues/8806
    - Closes https://github.com/apache/arrow-rs/pull/8854
    - Closes https://github.com/apache/arrow-rs/pull/8807
    
    
    This is the next step after
    -  https://github.com/apache/arrow-rs/pull/8996
    
    # Rationale for this change
    
    - we can help rust / LLVM generate more optimal code by processing u64
    words at a time when the buffer is already u64 aligned (see #8807)
    
    Also, it is hard to find the code to create new Buffers by applying
    bitwise unary operations.
    
    # What changes are included in this PR?
    
    - Introduce optimized `BooleanBuffer::from_bitwise_binary`
    - Migrate several kernels that use `bitwise_bin_op_helper` to use the
    new BooleanBuffer
    
    
    # Are these changes tested?
    
    Yes new tests are added
    
    Performance results show 30% performance improvement for the `and` and
    `or` kernels for aligned buffers (common case)
    
    # Are there any user-facing changes?
    
    A new API
---
 arrow-arith/src/boolean.rs         |  24 +++---
 arrow-buffer/src/buffer/boolean.rs | 150 ++++++++++++++++++++++++++++++++++++-
 arrow-buffer/src/buffer/ops.rs     |  13 ++--
 arrow-select/src/nullif.rs         |   2 +-
 4 files changed, 169 insertions(+), 20 deletions(-)

diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index d94df49de2..6bf438e646 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -23,7 +23,7 @@
 //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
 
 use arrow_array::*;
-use arrow_buffer::buffer::{bitwise_bin_op_helper, 
bitwise_quaternary_op_helper};
+use arrow_buffer::buffer::bitwise_quaternary_op_helper;
 use arrow_buffer::{BooleanBuffer, NullBuffer, buffer_bin_and_not};
 use arrow_schema::ArrowError;
 
@@ -74,7 +74,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) 
-> Result<BooleanAr
             // The final null bit is set only if:
             // 1. left null bit is set, or
             // 2. right data bit is false (because null AND false = false).
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 left_null_buffer.buffer(),
                 left_null_buffer.offset(),
                 right_values.inner(),
@@ -85,7 +85,7 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) 
-> Result<BooleanAr
         }
         (None, Some(right_null_buffer)) => {
             // Same as above
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 right_null_buffer.buffer(),
                 right_null_buffer.offset(),
                 left_values.inner(),
@@ -100,7 +100,7 @@ pub fn and_kleene(left: &BooleanArray, right: 
&BooleanArray) -> Result<BooleanAr
             // d is right data bits.
             // The final null bits are:
             // (a | (c & !d)) & (c | (a & !b))
-            Some(bitwise_quaternary_op_helper(
+            let buffer = bitwise_quaternary_op_helper(
                 [
                     left_null_buffer.buffer(),
                     left_values.inner(),
@@ -115,10 +115,11 @@ pub fn and_kleene(left: &BooleanArray, right: 
&BooleanArray) -> Result<BooleanAr
                 ],
                 left.len(),
                 |a, b, c, d| (a | (c & !d)) & (c | (a & !b)),
-            ))
+            );
+            Some(BooleanBuffer::new(buffer, 0, left.len()))
         }
     };
-    let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, 
left.len())));
+    let nulls = buffer.map(NullBuffer::new);
     Ok(BooleanArray::new(left_values & right_values, nulls))
 }
 
@@ -169,7 +170,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) 
-> Result<BooleanArr
             // The final null bit is set only if:
             // 1. left null bit is set, or
             // 2. right data bit is true (because null OR true = true).
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 left_nulls.buffer(),
                 left_nulls.offset(),
                 right_values.inner(),
@@ -180,7 +181,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) 
-> Result<BooleanArr
         }
         (None, Some(right_nulls)) => {
             // Same as above
-            Some(bitwise_bin_op_helper(
+            Some(BooleanBuffer::from_bitwise_binary_op(
                 right_nulls.buffer(),
                 right_nulls.offset(),
                 left_values.inner(),
@@ -195,7 +196,7 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) 
-> Result<BooleanArr
             // d is right data bits.
             // The final null bits are:
             // (a | (c & d)) & (c | (a & b))
-            Some(bitwise_quaternary_op_helper(
+            let buffer = bitwise_quaternary_op_helper(
                 [
                     left_nulls.buffer(),
                     left_values.inner(),
@@ -210,11 +211,12 @@ pub fn or_kleene(left: &BooleanArray, right: 
&BooleanArray) -> Result<BooleanArr
                 ],
                 left.len(),
                 |a, b, c, d| (a | (c & d)) & (c | (a & b)),
-            ))
+            );
+            Some(BooleanBuffer::new(buffer, 0, left.len()))
         }
     };
 
-    let nulls = buffer.map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, 
left.len())));
+    let nulls = buffer.map(NullBuffer::new);
     Ok(BooleanArray::new(left_values | right_values, nulls))
 }
 
diff --git a/arrow-buffer/src/buffer/boolean.rs 
b/arrow-buffer/src/buffer/boolean.rs
index ecd7de38c0..f7f1b025ed 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -169,9 +169,10 @@ impl BooleanBuffer {
     /// * The output always has zero offset
     ///
     /// # See Also
+    /// - [`BooleanBuffer::from_bitwise_binary_op`] to create a new buffer 
from a binary operation
     /// - [`apply_bitwise_unary_op`](bit_util::apply_bitwise_unary_op) for 
in-place unary bitwise operations
     ///
-    /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of an input 
[`Buffer`]
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `NOT` of a byte 
slice
     /// ```
     /// # use arrow_buffer::BooleanBuffer;
     /// let input = [0b11001100u8, 0b10111010u8]; // 2 bytes = 16 bits
@@ -221,9 +222,8 @@ impl BooleanBuffer {
             result.truncate(chunks.num_bytes());
         }
 
-        let buffer = Buffer::from(result);
         BooleanBuffer {
-            buffer,
+            buffer: Buffer::from(result),
             bit_offset: 0,
             bit_len: len_in_bits,
         }
@@ -254,6 +254,112 @@ impl BooleanBuffer {
         Some(BooleanBuffer::new(buffer, 0, len_in_bits))
     }
 
+    /// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` 
to
+    /// the relevant bits from two input buffers.
+    ///
+    /// This function is faster than applying the operation bit by bit as
+    /// it processes input buffers in chunks of 64 bits (8 bytes) at a time
+    ///
+    /// # Notes:
+    /// See notes on [Self::from_bitwise_unary_op]
+    ///
+    /// # See Also
+    /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a 
single input buffer.
+    /// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for 
in-place binary bitwise operations
+    ///
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two 
[`Buffer`]s
+    /// ```
+    /// # use arrow_buffer::{Buffer, BooleanBuffer};
+    /// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes 
= 16 bits
+    /// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 
0b11110000u8]); // 3 bytes = 24 bits
+    /// // AND of the first 12 bits
+    /// let result = BooleanBuffer::from_bitwise_binary_op(
+    ///   &left, 0, &right, 0, 12, |a, b| a & b
+    /// );
+    /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
+    /// ```
+    ///
+    /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte 
slices
+    /// ```
+    /// # use arrow_buffer::BooleanBuffer;
+    /// let left = [0b11001100u8, 0b10111010u8];
+    /// let right = [0b10101010u8, 0b11011100u8];
+    /// // OR of bits 4..16 from left and bits 0..12 from right
+    /// let result = BooleanBuffer::from_bitwise_binary_op(
+    ///  &left, 4, &right, 0, 12, |a, b| a | b
+    /// );
+    /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
+    /// ```
+    pub fn from_bitwise_binary_op<F>(
+        left: impl AsRef<[u8]>,
+        left_offset_in_bits: usize,
+        right: impl AsRef<[u8]>,
+        right_offset_in_bits: usize,
+        len_in_bits: usize,
+        mut op: F,
+    ) -> Self
+    where
+        F: FnMut(u64, u64) -> u64,
+    {
+        let left = left.as_ref();
+        let right = right.as_ref();
+        // try fast path for aligned input
+        // If the underlying buffers are aligned to u64 we can apply the 
operation directly on the u64 slices
+        // to improve performance.
+        if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
+            // align to byte boundary
+            let left = &left[left_offset_in_bits / 8..];
+            let right = &right[right_offset_in_bits / 8..];
+
+            unsafe {
+                let (left_prefix, left_u64s, left_suffix) = 
left.align_to::<u64>();
+                let (right_prefix, right_u64s, right_suffix) = 
right.align_to::<u64>();
+                // if there is no prefix or suffix, both buffers are aligned 
and
+                // we can do the operation directly on u64s.
+                // TODO: consider `slice::as_chunks` and `u64::from_le_bytes` 
when MSRV reaches 1.88.
+                // 
https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361
+                if left_prefix.is_empty()
+                    && right_prefix.is_empty()
+                    && left_suffix.is_empty()
+                    && right_suffix.is_empty()
+                {
+                    let result_u64s = left_u64s
+                        .iter()
+                        .zip(right_u64s.iter())
+                        .map(|(l, r)| op(*l, *r))
+                        .collect::<Vec<u64>>();
+                    return BooleanBuffer {
+                        buffer: Buffer::from(result_u64s),
+                        bit_offset: 0,
+                        bit_len: len_in_bits,
+                    };
+                }
+            }
+        }
+        let left_chunks = BitChunks::new(left, left_offset_in_bits, 
len_in_bits);
+        let right_chunks = BitChunks::new(right, right_offset_in_bits, 
len_in_bits);
+
+        let chunks = left_chunks
+            .iter()
+            .zip(right_chunks.iter())
+            .map(|(left, right)| op(left, right));
+        // Soundness: `BitChunks` is a `BitChunks` trusted length iterator 
which
+        // correctly reports its upper bound
+        let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) 
};
+
+        let remainder_bytes = bit_util::ceil(left_chunks.remainder_len(), 8);
+        let rem = op(left_chunks.remainder_bits(), 
right_chunks.remainder_bits());
+        // we are counting its starting from the least significant bit, to 
to_le_bytes should be correct
+        let rem = &rem.to_le_bytes()[0..remainder_bytes];
+        buffer.extend_from_slice(rem);
+
+        BooleanBuffer {
+            buffer: Buffer::from(buffer),
+            bit_offset: 0,
+            bit_len: len_in_bits,
+        }
+    }
+
     /// Returns the number of set bits in this buffer
     pub fn count_set_bits(&self) -> usize {
         self.buffer
@@ -656,4 +762,42 @@ mod tests {
             assert_eq!(result, expected);
         }
     }
+
+    #[test]
+    fn test_from_bitwise_binary_op() {
+        // pick random boolean inputs
+        let input_bools_left = (0..1024)
+            .map(|_| rand::random::<bool>())
+            .collect::<Vec<bool>>();
+        let input_bools_right = (0..1024)
+            .map(|_| rand::random::<bool>())
+            .collect::<Vec<bool>>();
+        let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]);
+        let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]);
+
+        for left_offset in 0..200 {
+            for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] {
+                for len_offset in [0, 1, 44, 100, 256, 300, 512] {
+                    let len = 1024 - len_offset - 
left_offset.max(right_offset); // ensure we don't go out of bounds
+                    // compute with AND
+                    let result = BooleanBuffer::from_bitwise_binary_op(
+                        input_buffer_left.values(),
+                        left_offset,
+                        input_buffer_right.values(),
+                        right_offset,
+                        len,
+                        |a, b| a & b,
+                    );
+                    // compute directly from bools
+                    let expected = input_bools_left[left_offset..]
+                        .iter()
+                        .zip(&input_bools_right[right_offset..])
+                        .take(len)
+                        .map(|(a, b)| *a & *b)
+                        .collect::<BooleanBuffer>();
+                    assert_eq!(result, expected);
+                }
+            }
+        }
+    }
 }
diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
index cb0925bb2c..36efe87643 100644
--- a/arrow-buffer/src/buffer/ops.rs
+++ b/arrow-buffer/src/buffer/ops.rs
@@ -150,7 +150,7 @@ pub fn buffer_bin_and(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
+    BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
@@ -158,6 +158,7 @@ pub fn buffer_bin_and(
         len_in_bits,
         |a, b| a & b,
     )
+    .into_inner()
 }
 
 /// Apply a bitwise or to two inputs and return the result as a Buffer.
@@ -169,7 +170,7 @@ pub fn buffer_bin_or(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
+    BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
@@ -177,6 +178,7 @@ pub fn buffer_bin_or(
         len_in_bits,
         |a, b| a | b,
     )
+    .into_inner()
 }
 
 /// Apply a bitwise xor to two inputs and return the result as a Buffer.
@@ -188,7 +190,7 @@ pub fn buffer_bin_xor(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
+    BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
@@ -196,6 +198,7 @@ pub fn buffer_bin_xor(
         len_in_bits,
         |a, b| a ^ b,
     )
+    .into_inner()
 }
 
 /// Apply a bitwise and_not to two inputs and return the result as a Buffer.
@@ -207,7 +210,7 @@ pub fn buffer_bin_and_not(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    bitwise_bin_op_helper(
+    BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
@@ -215,11 +218,11 @@ pub fn buffer_bin_and_not(
         len_in_bits,
         |a, b| a & !b,
     )
+    .into_inner()
 }
 
 /// Apply a bitwise not to one input and return the result as a Buffer.
 /// The input is treated as a bitmap, meaning that offset and length are 
specified in number of bits.
 pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: 
usize) -> Buffer {
-    // TODO: should we deprecate this function in favor of the Buffer ! impl ?
     BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, 
|a| !a).into_inner()
 }
diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs
index e51016f9ba..fa875c20e3 100644
--- a/arrow-select/src/nullif.rs
+++ b/arrow-select/src/nullif.rs
@@ -23,7 +23,7 @@ use arrow_buffer::{BooleanBuffer, NullBuffer, 
bitwise_unary_op_helper};
 use arrow_schema::{ArrowError, DataType};
 
 /// Returns a new array with the same values and the validity bit to false 
where
-/// the corresponding element of`right` is true.
+/// the corresponding element of `right` is true.
 ///
 /// This can be used to implement SQL `NULLIF`
 ///

Reply via email to