This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new efa0664923 perf: override `ArrayIter` default impl for `nth`,
`nth_back`, `last` and `count` (#8785)
efa0664923 is described below
commit efa06649232e4cd20086fd535e5d45a90140be91
Author: Raz Luvaton <[email protected]>
AuthorDate: Wed Nov 12 00:19:08 2025 +0200
perf: override `ArrayIter` default impl for `nth`, `nth_back`, `last` and
`count` (#8785)
# Which issue does this PR close?
N/A
# Rationale for this change
The default implementations iterate over the iterator to get the value,
while we can do that in constant time
# What changes are included in this PR?
override `nth`, `nth_back`, `last` and `count`
# Are these changes tested?
existing tests in this file that I added in previous pr
# Are there any user-facing changes?
Nope
-----
Extracted from the following PR as I probably close it as it is not
faster locally in some cases:
- #8697
---
arrow-array/src/iterator.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 52 insertions(+), 1 deletion(-)
diff --git a/arrow-array/src/iterator.rs b/arrow-array/src/iterator.rs
index e72b259ef0..c281231a2e 100644
--- a/arrow-array/src/iterator.rs
+++ b/arrow-array/src/iterator.rs
@@ -56,7 +56,7 @@ impl<T: ArrayAccessor> ArrayIter<T> {
/// create a new iterator
pub fn new(array: T) -> Self {
let len = array.len();
- let logical_nulls = array.logical_nulls();
+ let logical_nulls = array.logical_nulls().filter(|x| x.null_count() >
0);
ArrayIter {
array,
logical_nulls,
@@ -102,6 +102,38 @@ impl<T: ArrayAccessor> Iterator for ArrayIter<T> {
Some(self.current_end - self.current),
)
}
+
+ #[inline]
+ fn nth(&mut self, n: usize) -> Option<Self::Item> {
+ // Check if we can advance to the desired offset
+ match self.current.checked_add(n) {
+ // Yes, and still within bounds
+ Some(new_current) if new_current < self.current_end => {
+ self.current = new_current;
+ }
+
+ // Either overflow or would exceed current_end
+ _ => {
+ self.current = self.current_end;
+ return None;
+ }
+ }
+
+ self.next()
+ }
+
+ #[inline]
+ fn last(mut self) -> Option<Self::Item> {
+ self.next_back()
+ }
+
+ #[inline]
+ fn count(self) -> usize
+ where
+ Self: Sized,
+ {
+ self.len()
+ }
}
impl<T: ArrayAccessor> DoubleEndedIterator for ArrayIter<T> {
@@ -122,6 +154,25 @@ impl<T: ArrayAccessor> DoubleEndedIterator for
ArrayIter<T> {
})
}
}
+
+ #[inline]
+ fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
+ // Check if we advance to the one before the desired offset
+ match self.current_end.checked_sub(n) {
+ // Yes, and still within bounds
+ Some(new_offset) if self.current < new_offset => {
+ self.current_end = new_offset;
+ }
+
+ // Either underflow or would exceed current
+ _ => {
+ self.current = self.current_end;
+ return None;
+ }
+ }
+
+ self.next_back()
+ }
}
/// all arrays have known size.