jhorstmann commented on code in PR #7937:
URL: https://github.com/apache/arrow-rs/pull/7937#discussion_r2214287898
##########
arrow-ord/src/sort.rs:
##########
@@ -180,13 +180,41 @@ where
// partition indices into valid and null indices
fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
- match array.null_count() {
- // faster path
- 0 => ((0..(array.len() as u32)).collect(), vec![]),
- _ => {
- let indices = 0..(array.len() as u32);
- indices.partition(|index| array.is_valid(*index as usize))
+ let len = array.len();
+ let null_count = array.null_count();
+ match array.nulls() {
+ Some(nulls) if null_count > 0 => {
+ let mut valid_indices = Vec::with_capacity(len - null_count);
+ let mut null_indices = Vec::with_capacity(null_count);
+
+ let valid_slice = valid_indices.spare_capacity_mut();
+ let null_slice = null_indices.spare_capacity_mut();
+ let mut valid_idx = 0;
+ let mut null_idx = 0;
+
+ nulls.into_iter().enumerate().for_each(|(i, v)| {
+ if v {
+ valid_slice[valid_idx].write(i as u32);
+ valid_idx += 1;
+ } else {
+ null_slice[null_idx].write(i as u32);
+ null_idx += 1;
+ }
+ });
+
+ assert_eq!(null_idx, null_count);
Review Comment:
I'm pretty sure these asserts can never fail, but they also don't add any
overhead considering the loop above does about 2 comparisons *per array
element*.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]