This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new c70804a728 Add find_nth_set_bit_position (#9151)
c70804a728 is described below

commit c70804a728287f38fc51b04c8dbb1e7b72bceb9b
Author: DaniĆ«l Heres <[email protected]>
AuthorDate: Thu Jan 15 12:28:41 2026 +0100

    Add find_nth_set_bit_position (#9151)
    
    # Which issue does this PR close?
    
    Part of #9136
    
    # Rationale for this change
    
    We want to reduce copies when filtering.
    
    # What changes are included in this PR?
    
    
    # Are these changes tested?
    
    Yes, new tests
    
    
    # Are there any user-facing changes?
---
 arrow-buffer/src/buffer/boolean.rs | 77 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/arrow-buffer/src/buffer/boolean.rs 
b/arrow-buffer/src/buffer/boolean.rs
index ff836bf287..6e3d7686a1 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -366,6 +366,20 @@ impl BooleanBuffer {
             .count_set_bits_offset(self.bit_offset, self.bit_len)
     }
 
+    /// Finds the position of the n-th set bit (1-based) starting from `start` 
index.
+    /// If fewer than `n` set bits are found, returns the length of the buffer.
+    pub fn find_nth_set_bit_position(&self, start: usize, n: usize) -> usize {
+        if n == 0 {
+            return start;
+        }
+
+        self.slice(start, self.bit_len - start)
+            .set_indices()
+            .nth(n - 1)
+            .map(|idx| start + idx + 1)
+            .unwrap_or(self.bit_len)
+    }
+
     /// Returns a [`BitChunks`] instance which can be used to iterate over
     /// this buffer's bits in `u64` chunks
     #[inline]
@@ -823,4 +837,67 @@ mod tests {
             assert_eq!(finished.value(i), v, "at index {}", i);
         }
     }
+
+    #[test]
+    fn test_find_nth_set_bit_position() {
+        let bools = vec![true, false, true, true, false, true];
+        let buffer = BooleanBuffer::from(bools);
+
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 1), 1);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 2), 3);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 3), 4);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 4), 6);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 5), 6);
+
+        assert_eq!(buffer.clone().find_nth_set_bit_position(1, 1), 3);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(3, 1), 4);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(3, 2), 6);
+    }
+
+    #[test]
+    fn test_find_nth_set_bit_position_large() {
+        let mut bools = vec![false; 1000];
+        bools[100] = true;
+        bools[500] = true;
+        bools[999] = true;
+        let buffer = BooleanBuffer::from(bools);
+
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 1), 101);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 2), 501);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 3), 1000);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 4), 1000);
+
+        assert_eq!(buffer.clone().find_nth_set_bit_position(101, 1), 501);
+    }
+
+    #[test]
+    fn test_find_nth_set_bit_position_sliced() {
+        let bools = vec![false, true, false, true, true, false, true]; // [F, 
T, F, T, T, F, T]
+        let buffer = BooleanBuffer::from(bools);
+        let slice = buffer.slice(1, 6); // [T, F, T, T, F, T]
+
+        assert_eq!(slice.len(), 6);
+        // Logical indices: 0, 1, 2, 3, 4, 5
+        // Logical values: T, F, T, T, F, T
+
+        assert_eq!(slice.clone().find_nth_set_bit_position(0, 1), 1);
+        assert_eq!(slice.clone().find_nth_set_bit_position(0, 2), 3);
+        assert_eq!(slice.clone().find_nth_set_bit_position(0, 3), 4);
+        assert_eq!(slice.clone().find_nth_set_bit_position(0, 4), 6);
+    }
+
+    #[test]
+    fn test_find_nth_set_bit_position_all_set() {
+        let buffer = BooleanBuffer::new_set(100);
+        for i in 1..=100 {
+            assert_eq!(buffer.clone().find_nth_set_bit_position(0, i), i);
+        }
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 101), 100);
+    }
+
+    #[test]
+    fn test_find_nth_set_bit_position_none_set() {
+        let buffer = BooleanBuffer::new_unset(100);
+        assert_eq!(buffer.clone().find_nth_set_bit_position(0, 1), 100);
+    }
 }

Reply via email to