Dandandan commented on code in PR #9284:
URL: https://github.com/apache/arrow-rs/pull/9284#discussion_r2737766556
##########
arrow-buffer/src/util/bit_chunk_iterator.rs:
##########
@@ -158,23 +158,190 @@ impl<'a> UnalignedBitChunk<'a> {
/// Returns an iterator over the chunks
pub fn iter(&self) -> UnalignedBitChunkIterator<'a> {
- self.prefix
- .into_iter()
- .chain(self.chunks.iter().cloned())
- .chain(self.suffix)
+ UnalignedBitChunkIterator {
+ prefix: self.prefix,
+ chunks: self.chunks,
+ suffix: self.suffix,
+ }
+ }
+
+ /// Returns a zipped iterator over two [`UnalignedBitChunk`]
+ #[inline]
+ pub fn zip(&self, other: &UnalignedBitChunk<'a>) ->
UnalignedBitChunkZipIterator<'a> {
+ UnalignedBitChunkZipIterator {
+ left: self.iter(),
+ right: other.iter(),
+ }
}
/// Counts the number of ones
pub fn count_ones(&self) -> usize {
- self.iter().map(|x| x.count_ones() as usize).sum()
+ let prefix_count = self.prefix.map(|x| x.count_ones() as
usize).unwrap_or(0);
+ let chunks_count: usize = self.chunks.iter().map(|&x| x.count_ones()
as usize).sum();
+ let suffix_count = self.suffix.map(|x| x.count_ones() as
usize).unwrap_or(0);
+ prefix_count + chunks_count + suffix_count
+ }
+}
+
+/// An iterator over the chunks of an [`UnalignedBitChunk`]
+#[derive(Debug, Clone)]
+pub struct UnalignedBitChunkIterator<'a> {
+ prefix: Option<u64>,
+ chunks: &'a [u64],
+ suffix: Option<u64>,
+}
+
+impl<'a> Iterator for UnalignedBitChunkIterator<'a> {
+ type Item = u64;
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ if let Some(prefix) = self.prefix.take() {
+ return Some(prefix);
+ }
+ if let Some((&first, rest)) = self.chunks.split_first() {
+ self.chunks = rest;
+ return Some(first);
+ }
+ self.suffix.take()
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let len = self.len();
+ (len, Some(len))
+ }
+
+ #[inline]
+ fn fold<B, F>(mut self, init: B, mut f: F) -> B
+ where
+ F: FnMut(B, Self::Item) -> B,
+ {
+ let mut acc = init;
+ if let Some(prefix) = self.prefix.take() {
+ acc = f(acc, prefix);
+ }
+ for &chunk in self.chunks {
+ acc = f(acc, chunk);
+ }
+ self.chunks = &[];
+ if let Some(suffix) = self.suffix.take() {
+ acc = f(acc, suffix);
+ }
+ acc
+ }
+}
+
+impl<'a> UnalignedBitChunkIterator<'a> {
+ /// Returns a zipped iterator over two [`UnalignedBitChunkIterator`]
+ #[inline]
+ pub fn zip(self, other: UnalignedBitChunkIterator<'a>) ->
UnalignedBitChunkZipIterator<'a> {
+ UnalignedBitChunkZipIterator {
+ left: self,
+ right: other,
+ }
+ }
+}
+
+impl ExactSizeIterator for UnalignedBitChunkIterator<'_> {
+ #[inline]
+ fn len(&self) -> usize {
+ self.prefix.is_some() as usize + self.chunks.len() +
self.suffix.is_some() as usize
+ }
+}
+
+impl std::iter::FusedIterator for UnalignedBitChunkIterator<'_> {}
+
+impl<'a> DoubleEndedIterator for UnalignedBitChunkIterator<'a> {
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ if let Some(suffix) = self.suffix.take() {
+ return Some(suffix);
+ }
+ if let Some((&last, rest)) = self.chunks.split_last() {
+ self.chunks = rest;
+ return Some(last);
+ }
+ self.prefix.take()
+ }
+}
+
+/// An iterator over zipped [`UnalignedBitChunk`]
+#[derive(Debug)]
+pub struct UnalignedBitChunkZipIterator<'a> {
+ left: UnalignedBitChunkIterator<'a>,
+ right: UnalignedBitChunkIterator<'a>,
+}
+
+impl<'a> Iterator for UnalignedBitChunkZipIterator<'a> {
+ type Item = (u64, u64);
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ Some((self.left.next()?, self.right.next()?))
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.left.size_hint()
+ }
+
+ #[inline]
+ fn fold<B, F>(mut self, init: B, mut f: F) -> B
Review Comment:
The idea here is to implement an improved `fold` implementation so
`from_trusted_len_iter` is fast (I used AI assistance to come up with the
implementation, but it seems to look jormal)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]