ritchie46 commented on a change in pull request #341:
URL: https://github.com/apache/arrow-rs/pull/341#discussion_r639434538
##########
File path: arrow/src/compute/kernels/filter.rs
##########
@@ -225,38 +248,49 @@ pub fn filter(array: &Array, filter: &BooleanArray) ->
Result<ArrayRef> {
if filter.null_count() > 0 {
// this greatly simplifies subsequent filtering code
// now we only have a boolean mask to deal with
- let array_data = filter.data_ref();
- let null_bitmap = array_data.null_buffer().unwrap();
- let mask = filter.values();
- let offset = filter.offset();
-
- let new_mask = buffer_bin_and(mask, offset, null_bitmap, offset,
filter.len());
-
- let array_data = ArrayData::builder(DataType::Boolean)
- .len(filter.len())
- .add_buffer(new_mask)
- .build();
- let filter = BooleanArray::from(array_data);
+ let filter = prep_null_mask_filter(filter);
// fully qualified syntax, because we have an argument with the same
name
return crate::compute::kernels::filter::filter(array, &filter);
}
let iter = SlicesIterator::new(filter);
-
- let mut mutable =
- MutableArrayData::new(vec![array.data_ref()], false,
iter.filter_count);
- iter.for_each(|(start, end)| mutable.extend(0, start, end));
- let data = mutable.freeze();
- Ok(make_array(data))
+ match iter.filter_count {
+ 0 => {
+ // return empty
+ Ok(new_empty_array(array.data_type()))
+ }
+ len if len == array.len() => {
+ // return all
+ let data = array.data().clone();
+ Ok(make_array(data))
+ }
+ _ => {
+ // actually filter
+ let mut mutable =
+ MutableArrayData::new(vec![array.data_ref()], false,
iter.filter_count);
+ iter.for_each(|(start, end)| mutable.extend(0, start, end));
+ let data = mutable.freeze();
+ Ok(make_array(data))
+ }
+ }
}
/// Returns a new [RecordBatch] with arrays containing only values matching
the filter.
-/// WARNING: the nulls of `filter` are ignored and the value on its slot is
considered.
-/// Therefore, it is considered undefined behavior to pass `filter` with null
values.
pub fn filter_record_batch(
record_batch: &RecordBatch,
filter: &BooleanArray,
Review comment:
Yes, it seems to me like a better name as well. :+1:
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]