This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 780ed4092f Consolidate `filter_null_mask` into
`FilterPredicate::filter_nulls` (#10049)
780ed4092f is described below
commit 780ed4092fa04883e268fc8b8b598fcd233761b4
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Jun 22 14:01:14 2026 -0400
Consolidate `filter_null_mask` into `FilterPredicate::filter_nulls` (#10049)
# Which issue does this PR close?
- Follow up that I noticed during review of #9986
# Rationale for this change
`arrow-select` had two near-identical ways to compute a filtered null
mask:
- a private free function `filter_null_mask`
- the public method `FilterPredicate::filter_nulls(nulls) (added in
#9986), which just wraps `filter_null_mask`
This PR collapses them into a single, more discoverable method.
# What changes are included in this PR?
- Fold the body of the free `filter_null_mask` into
`FilterPredicate::filter_nulls`
Note: `filter_null_mask` was a private (module-local) function, never
part of the public API, so no deprecation is required — see discussion
below.
# Are these changes tested?
Covered by existing `arrow-select` filter tests (all 60 `filter` lib
tests pass, including `test_filter_struct`, `test_null_mask`, and the
filter fuzz tests). No behavior change.
# Are there any user-facing changes?
No. `filter_null_mask` was private; the public
`FilterPredicate::filter_nulls` keeps the same signature and semantics.
Co-authored-by: Claude Opus 4.8 (1M context) <[email protected]>
---
arrow-select/src/filter.rs | 28 +++++++++++++++++-----------
1 file changed, 17 insertions(+), 11 deletions(-)
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index b1f3a21a3e..634316b16c 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -494,12 +494,24 @@ impl FilterPredicate {
/// filtered result has no nulls. Otherwise returns the filtered
/// [`NullBuffer`] with its precomputed null count.
pub fn filter_nulls(&self, nulls: Option<&NullBuffer>) ->
Option<NullBuffer> {
- let (null_count, nulls) = filter_null_mask(nulls, self)?;
- let buffer = BooleanBuffer::new(nulls, 0, self.count);
+ let nulls = nulls?;
+ if nulls.null_count() == 0 {
+ return None;
+ }
+
+ let nulls = filter_bits(nulls.inner(), self);
+ // The filtered `nulls` has a length of `self.count` bits and therefore
+ // the null count is this minus the number of valid bits
+ let null_count = self.count - nulls.count_set_bits_offset(0,
self.count);
+ if null_count == 0 {
+ return None;
+ }
+
+ let buffer = BooleanBuffer::new(nulls, 0, self.count);
debug_assert_eq!(null_count, buffer.len() - buffer.count_set_bits());
- // SAFETY: `filter_null_mask` derived `null_count` from `buffer`, so it
- // matches the number of unset bits as required by `new_unchecked`.
+ // SAFETY: `null_count` was derived from `buffer` above, so it matches
+ // the number of unset bits as required by `new_unchecked`.
Some(unsafe { NullBuffer::new_unchecked(buffer, null_count) })
}
}
@@ -1020,13 +1032,7 @@ fn filter_struct(
.map(|column| filter_array(column, predicate))
.collect::<Result<_, _>>()?;
- let nulls = if let Some((null_count, nulls)) =
filter_null_mask(array.nulls(), predicate) {
- let buffer = BooleanBuffer::new(nulls, 0, predicate.count);
-
- Some(unsafe { NullBuffer::new_unchecked(buffer, null_count) })
- } else {
- None
- };
+ let nulls = predicate.filter_nulls(array.nulls());
Ok(unsafe {
StructArray::new_unchecked_with_length(