This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 1a42f4c27 Add iterators to BooleanBuffer and NullBuffer (#3901)
1a42f4c27 is described below
commit 1a42f4c279ef13c2970933f5de79eb3ed32ae3f9
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Thu Mar 23 12:25:05 2023 +0000
Add iterators to BooleanBuffer and NullBuffer (#3901)
* Add iterators to BooleanBuffer and NullBuffer
* Clippy
* Review feedback
---
arrow-arith/src/aggregate.rs | 4 +---
arrow-buffer/src/buffer/boolean.rs | 25 +++++++++++++++++++++++++
arrow-buffer/src/buffer/null.rs | 37 +++++++++++++++++++++++++++++++++++--
arrow-select/src/filter.rs | 3 +--
4 files changed, 62 insertions(+), 7 deletions(-)
diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs
index 8e760da21..54f2240db 100644
--- a/arrow-arith/src/aggregate.rs
+++ b/arrow-arith/src/aggregate.rs
@@ -22,7 +22,6 @@ use arrow_array::iterator::ArrayIter;
use arrow_array::*;
use arrow_buffer::ArrowNativeType;
use arrow_data::bit_iterator::try_for_each_valid_idx;
-use arrow_data::bit_iterator::BitIndexIterator;
use arrow_schema::ArrowError;
use arrow_schema::*;
@@ -118,9 +117,8 @@ where
.reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
} else {
let nulls = array.nulls().unwrap();
- let iter = BitIndexIterator::new(nulls.validity(), nulls.offset(),
nulls.len());
unsafe {
- let idx = iter.reduce(|acc_idx, idx| {
+ let idx = nulls.valid_indices().reduce(|acc_idx, idx| {
let acc = array.value_unchecked(acc_idx);
let item = array.value_unchecked(idx);
if cmp(&acc, &item) {
diff --git a/arrow-buffer/src/buffer/boolean.rs
b/arrow-buffer/src/buffer/boolean.rs
index 9d5953594..fea04cc79 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -16,6 +16,7 @@
// under the License.
use crate::bit_chunk_iterator::BitChunks;
+use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
use crate::{bit_util, buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer};
use std::ops::{BitAnd, BitOr, Not};
@@ -164,6 +165,21 @@ impl BooleanBuffer {
pub fn into_inner(self) -> Buffer {
self.buffer
}
+
+ /// Returns an iterator over the bits in this [`BooleanBuffer`]
+ pub fn iter(&self) -> BitIterator<'_> {
+ self.into_iter()
+ }
+
+ /// Returns an iterator over the set bit positions in this
[`BooleanBuffer`]
+ pub fn set_indices(&self) -> BitIndexIterator<'_> {
+ BitIndexIterator::new(self.values(), self.offset, self.len)
+ }
+
+ /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits
+ pub fn set_slices(&self) -> BitSliceIterator<'_> {
+ BitSliceIterator::new(self.values(), self.offset, self.len)
+ }
}
impl Not for &BooleanBuffer {
@@ -215,3 +231,12 @@ impl BitOr<&BooleanBuffer> for &BooleanBuffer {
}
}
}
+
+impl<'a> IntoIterator for &'a BooleanBuffer {
+ type Item = bool;
+ type IntoIter = BitIterator<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ BitIterator::new(self.values(), self.offset, self.len)
+ }
+}
diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index cbadb7f42..f088e7fa6 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-use crate::bit_iterator::BitIndexIterator;
+use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
use crate::buffer::BooleanBuffer;
use crate::{Buffer, MutableBuffer};
@@ -114,6 +114,30 @@ impl NullBuffer {
Self::new(self.buffer.slice(offset, len))
}
+ /// Returns an iterator over the bits in this [`NullBuffer`]
+ ///
+ /// * `true` indicates that the corresponding value is not NULL
+ /// * `false` indicates that the corresponding value is NULL
+ ///
+ /// Note: [`Self::valid_indices`] will be significantly faster for most
use-cases
+ pub fn iter(&self) -> BitIterator<'_> {
+ self.buffer.iter()
+ }
+
+ /// Returns a [`BitIndexIterator`] over the valid indices in this
[`NullBuffer`]
+ ///
+ /// Valid indices indicate the corresponding value is not NULL
+ pub fn valid_indices(&self) -> BitIndexIterator<'_> {
+ self.buffer.set_indices()
+ }
+
+ /// Returns a [`BitSliceIterator`] yielding contiguous ranges of valid
indices
+ ///
+ /// Valid indices indicate the corresponding value is not NULL
+ pub fn valid_slices(&self) -> BitSliceIterator<'_> {
+ self.buffer.set_slices()
+ }
+
/// Calls the provided closure for each index in this null mask that is set
#[inline]
pub fn try_for_each_valid_idx<E, F: FnMut(usize) -> Result<(), E>>(
@@ -123,7 +147,7 @@ impl NullBuffer {
if self.null_count == self.len() {
return Ok(());
}
- BitIndexIterator::new(self.validity(), self.offset(),
self.len()).try_for_each(f)
+ self.valid_indices().try_for_each(f)
}
/// Returns the inner [`BooleanBuffer`]
@@ -145,6 +169,15 @@ impl NullBuffer {
}
}
+impl<'a> IntoIterator for &'a NullBuffer {
+ type Item = bool;
+ type IntoIter = BitIterator<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.buffer.iter()
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 35c11970c..784bfa020 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -81,8 +81,7 @@ struct IndexIterator<'a> {
impl<'a> IndexIterator<'a> {
fn new(filter: &'a BooleanArray, remaining: usize) -> Self {
assert_eq!(filter.null_count(), 0);
- let data = filter.data();
- let iter = BitIndexIterator::new(data.buffers()[0], data.offset(),
data.len());
+ let iter = filter.values().set_indices();
Self { remaining, iter }
}
}