jorgecarleitao commented on a change in pull request #9602: URL: https://github.com/apache/arrow/pull/9602#discussion_r588970908
########## File path: rust/arrow/src/compute/kernels/sort.rs ########## @@ -37,10 +37,33 @@ use TimeUnit::*; /// Returns an `ArrowError::ComputeError(String)` if the array type is either unsupported by `sort_to_indices` or `take`. /// pub fn sort(values: &ArrayRef, options: Option<SortOptions>) -> Result<ArrayRef> { - let indices = sort_to_indices(values, options)?; + let indices = sort_to_indices(values, options, None)?; take(values.as_ref(), &indices, None) } +/// Sort the `ArrayRef` partially. +/// Return an sorted `ArrayRef`, discarding the data after limit. +pub fn partial_sort( + values: &ArrayRef, + options: Option<SortOptions>, + limit: Option<usize>, +) -> Result<ArrayRef> { + let indices = sort_to_indices(values, options, limit)?; + take(values.as_ref(), &indices, None) +} + +#[inline] +fn sort_by<T, F>(array: &mut [T], limit: usize, cmp: F) +where + F: FnMut(&T, &T) -> Ordering, Review comment: Why does it need to be `FnMut`? ########## File path: rust/arrow/src/compute/kernels/sort.rs ########## @@ -76,118 +99,175 @@ where // partition indices into valid and null indices fn partition_validity(array: &ArrayRef) -> (Vec<u32>, Vec<u32>) { - let indices = 0..(array.len().to_u32().unwrap()); - indices.partition(|index| array.is_valid(*index as usize)) + match array.null_count() { + // faster path + 0 => ((0..(array.len() as u32)).collect(), vec![]), + _ => { + let indices = 0..(array.len() as u32); + indices.partition(|index| array.is_valid(*index as usize)) + } + } } /// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices. /// For floating point arrays any NaN values are considered to be greater than any other non-null value Review comment: The doc could be updated :) ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org