This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 1a42f4c27 Add iterators to BooleanBuffer and NullBuffer (#3901)
1a42f4c27 is described below

commit 1a42f4c279ef13c2970933f5de79eb3ed32ae3f9
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Mar 23 12:25:05 2023 +0000

    Add iterators to BooleanBuffer and NullBuffer (#3901)
    
    * Add iterators to BooleanBuffer and NullBuffer
    
    * Clippy
    
    * Review feedback
---
 arrow-arith/src/aggregate.rs       |  4 +---
 arrow-buffer/src/buffer/boolean.rs | 25 +++++++++++++++++++++++++
 arrow-buffer/src/buffer/null.rs    | 37 +++++++++++++++++++++++++++++++++++--
 arrow-select/src/filter.rs         |  3 +--
 4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs
index 8e760da21..54f2240db 100644
--- a/arrow-arith/src/aggregate.rs
+++ b/arrow-arith/src/aggregate.rs
@@ -22,7 +22,6 @@ use arrow_array::iterator::ArrayIter;
 use arrow_array::*;
 use arrow_buffer::ArrowNativeType;
 use arrow_data::bit_iterator::try_for_each_valid_idx;
-use arrow_data::bit_iterator::BitIndexIterator;
 use arrow_schema::ArrowError;
 use arrow_schema::*;
 
@@ -118,9 +117,8 @@ where
             .reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
     } else {
         let nulls = array.nulls().unwrap();
-        let iter = BitIndexIterator::new(nulls.validity(), nulls.offset(), 
nulls.len());
         unsafe {
-            let idx = iter.reduce(|acc_idx, idx| {
+            let idx = nulls.valid_indices().reduce(|acc_idx, idx| {
                 let acc = array.value_unchecked(acc_idx);
                 let item = array.value_unchecked(idx);
                 if cmp(&acc, &item) {
diff --git a/arrow-buffer/src/buffer/boolean.rs 
b/arrow-buffer/src/buffer/boolean.rs
index 9d5953594..fea04cc79 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use crate::bit_chunk_iterator::BitChunks;
+use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
 use crate::{bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer};
 use std::ops::{BitAnd, BitOr, Not};
 
@@ -164,6 +165,21 @@ impl BooleanBuffer {
     pub fn into_inner(self) -> Buffer {
         self.buffer
     }
+
+    /// Returns an iterator over the bits in this [`BooleanBuffer`]
+    pub fn iter(&self) -> BitIterator<'_> {
+        self.into_iter()
+    }
+
+    /// Returns an iterator over the set bit positions in this 
[`BooleanBuffer`]
+    pub fn set_indices(&self) -> BitIndexIterator<'_> {
+        BitIndexIterator::new(self.values(), self.offset, self.len)
+    }
+
+    /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
+    pub fn set_slices(&self) -> BitSliceIterator<'_> {
+        BitSliceIterator::new(self.values(), self.offset, self.len)
+    }
 }
 
 impl Not for &BooleanBuffer {
@@ -215,3 +231,12 @@ impl BitOr<&BooleanBuffer> for &BooleanBuffer {
         }
     }
 }
+
+impl<'a> IntoIterator for &'a BooleanBuffer {
+    type Item = bool;
+    type IntoIter = BitIterator<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        BitIterator::new(self.values(), self.offset, self.len)
+    }
+}
diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index cbadb7f42..f088e7fa6 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::bit_iterator::BitIndexIterator;
+use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
 use crate::buffer::BooleanBuffer;
 use crate::{Buffer, MutableBuffer};
 
@@ -114,6 +114,30 @@ impl NullBuffer {
         Self::new(self.buffer.slice(offset, len))
     }
 
+    /// Returns an iterator over the bits in this [`NullBuffer`]
+    ///
+    /// * `true` indicates that the corresponding value is not NULL
+    /// * `false` indicates that the corresponding value is NULL
+    ///
+    /// Note: [`Self::valid_indices`] will be significantly faster for most 
use-cases
+    pub fn iter(&self) -> BitIterator<'_> {
+        self.buffer.iter()
+    }
+
+    /// Returns a [`BitIndexIterator`] over the valid indices in this 
[`NullBuffer`]
+    ///
+    /// Valid indices indicate the corresponding value is not NULL
+    pub fn valid_indices(&self) -> BitIndexIterator<'_> {
+        self.buffer.set_indices()
+    }
+
+    /// Returns a [`BitSliceIterator`] yielding contiguous ranges of valid 
indices
+    ///
+    /// Valid indices indicate the corresponding value is not NULL
+    pub fn valid_slices(&self) -> BitSliceIterator<'_> {
+        self.buffer.set_slices()
+    }
+
     /// Calls the provided closure for each index in this null mask that is set
     #[inline]
     pub fn try_for_each_valid_idx<E, F: FnMut(usize) -> Result<(), E>>(
@@ -123,7 +147,7 @@ impl NullBuffer {
         if self.null_count == self.len() {
             return Ok(());
         }
-        BitIndexIterator::new(self.validity(), self.offset(), 
self.len()).try_for_each(f)
+        self.valid_indices().try_for_each(f)
     }
 
     /// Returns the inner [`BooleanBuffer`]
@@ -145,6 +169,15 @@ impl NullBuffer {
     }
 }
 
+impl<'a> IntoIterator for &'a NullBuffer {
+    type Item = bool;
+    type IntoIter = BitIterator<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.buffer.iter()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 35c11970c..784bfa020 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -81,8 +81,7 @@ struct IndexIterator<'a> {
 impl<'a> IndexIterator<'a> {
     fn new(filter: &'a BooleanArray, remaining: usize) -> Self {
         assert_eq!(filter.null_count(), 0);
-        let data = filter.data();
-        let iter = BitIndexIterator::new(data.buffers()[0], data.offset(), 
data.len());
+        let iter = filter.values().set_indices();
         Self { remaining, iter }
     }
 }

Reply via email to