This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 780ed4092f Consolidate `filter_null_mask` into 
`FilterPredicate::filter_nulls` (#10049)
780ed4092f is described below

commit 780ed4092fa04883e268fc8b8b598fcd233761b4
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Jun 22 14:01:14 2026 -0400

    Consolidate `filter_null_mask` into `FilterPredicate::filter_nulls` (#10049)
    
    # Which issue does this PR close?
    
    - Follow up that I noticed during review of #9986
    
    # Rationale for this change
    
    `arrow-select` had two near-identical ways to compute a filtered null
    mask:
    
    - a private free function `filter_null_mask`
    - the public method `FilterPredicate::filter_nulls(nulls) (added in
    #9986), which just wraps `filter_null_mask`
    
    This PR collapses them into a single, more discoverable method.
    
    # What changes are included in this PR?
    
    - Fold the body of the free `filter_null_mask` into
    `FilterPredicate::filter_nulls`
    
    Note: `filter_null_mask` was a private (module-local) function, never
    part of the public API, so no deprecation is required — see discussion
    below.
    
    # Are these changes tested?
    
    Covered by existing `arrow-select` filter tests (all 60 `filter` lib
    tests pass, including `test_filter_struct`, `test_null_mask`, and the
    filter fuzz tests). No behavior change.
    
    # Are there any user-facing changes?
    
    No. `filter_null_mask` was private; the public
    `FilterPredicate::filter_nulls` keeps the same signature and semantics.
    
    Co-authored-by: Claude Opus 4.8 (1M context) <[email protected]>
---
 arrow-select/src/filter.rs | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index b1f3a21a3e..634316b16c 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -494,12 +494,24 @@ impl FilterPredicate {
     /// filtered result has no nulls. Otherwise returns the filtered
     /// [`NullBuffer`] with its precomputed null count.
     pub fn filter_nulls(&self, nulls: Option<&NullBuffer>) -> 
Option<NullBuffer> {
-        let (null_count, nulls) = filter_null_mask(nulls, self)?;
-        let buffer = BooleanBuffer::new(nulls, 0, self.count);
+        let nulls = nulls?;
+        if nulls.null_count() == 0 {
+            return None;
+        }
+
+        let nulls = filter_bits(nulls.inner(), self);
+        // The filtered `nulls` has a length of `self.count` bits and therefore
+        // the null count is this minus the number of valid bits
+        let null_count = self.count - nulls.count_set_bits_offset(0, 
self.count);
 
+        if null_count == 0 {
+            return None;
+        }
+
+        let buffer = BooleanBuffer::new(nulls, 0, self.count);
         debug_assert_eq!(null_count, buffer.len() - buffer.count_set_bits());
-        // SAFETY: `filter_null_mask` derived `null_count` from `buffer`, so it
-        // matches the number of unset bits as required by `new_unchecked`.
+        // SAFETY: `null_count` was derived from `buffer` above, so it matches
+        // the number of unset bits as required by `new_unchecked`.
         Some(unsafe { NullBuffer::new_unchecked(buffer, null_count) })
     }
 }
@@ -1020,13 +1032,7 @@ fn filter_struct(
         .map(|column| filter_array(column, predicate))
         .collect::<Result<_, _>>()?;
 
-    let nulls = if let Some((null_count, nulls)) = 
filter_null_mask(array.nulls(), predicate) {
-        let buffer = BooleanBuffer::new(nulls, 0, predicate.count);
-
-        Some(unsafe { NullBuffer::new_unchecked(buffer, null_count) })
-    } else {
-        None
-    };
+    let nulls = predicate.filter_nulls(array.nulls());
 
     Ok(unsafe {
         StructArray::new_unchecked_with_length(

Reply via email to