Dandandan commented on a change in pull request #9602:
URL: https://github.com/apache/arrow/pull/9602#discussion_r593904547



##########
File path: rust/arrow/src/compute/kernels/sort.rs
##########
@@ -686,90 +815,124 @@ pub fn lexsort_to_indices(columns: &[SortColumn]) -> 
Result<UInt32Array> {
     };
 
     let mut value_indices = (0..row_count).collect::<Vec<usize>>();
-    value_indices.sort_by(lex_comparator);
+    let mut len = value_indices.len();
+
+    if let Some(limit) = limit {
+        len = limit.min(len);
+    }
+    sort_by(&mut value_indices, len, lex_comparator);
 
     Ok(UInt32Array::from(
-        value_indices
-            .into_iter()
-            .map(|i| i as u32)
+        (&value_indices)[0..len]
+            .iter()
+            .map(|i| *i as u32)
             .collect::<Vec<u32>>(),
     ))
 }
 
+pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
+where
+    F: FnMut(&T, &T) -> Ordering,
+{
+    let (before, _mid, _after) = v.select_nth_unstable_by(limit, &mut is_less);
+    before.sort_unstable_by(is_less);

Review comment:
       Clear, thanks @sundy-li.
   
   Maybe some docs can be added about this?
   
   I think it probably is not really a problem, but might be surprising by 
someone using the kernel. And we might want to see whether we could use a 
(faster) unstable version of the normal sort kernel as well (I think that could 
be used by DataFusion as well I think).
   
   What do you think @alamb 

##########
File path: rust/arrow/src/compute/kernels/sort.rs
##########
@@ -686,90 +815,124 @@ pub fn lexsort_to_indices(columns: &[SortColumn]) -> 
Result<UInt32Array> {
     };
 
     let mut value_indices = (0..row_count).collect::<Vec<usize>>();
-    value_indices.sort_by(lex_comparator);
+    let mut len = value_indices.len();
+
+    if let Some(limit) = limit {
+        len = limit.min(len);
+    }
+    sort_by(&mut value_indices, len, lex_comparator);
 
     Ok(UInt32Array::from(
-        value_indices
-            .into_iter()
-            .map(|i| i as u32)
+        (&value_indices)[0..len]
+            .iter()
+            .map(|i| *i as u32)
             .collect::<Vec<u32>>(),
     ))
 }
 
+pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
+where
+    F: FnMut(&T, &T) -> Ordering,
+{
+    let (before, _mid, _after) = v.select_nth_unstable_by(limit, &mut is_less);
+    before.sort_unstable_by(is_less);

Review comment:
       Clear, thanks @sundy-li.
   
   Maybe some docs can be added about this?
   
   I think it probably is not really a problem, but might be surprising by 
someone using the kernel. And we might want to see whether we could use a 
(faster) unstable version of the normal sort kernel as well (Ithat could be 
used by DataFusion as well I think).
   
   What do you think @alamb 

##########
File path: rust/arrow/src/compute/kernels/sort.rs
##########
@@ -686,90 +815,124 @@ pub fn lexsort_to_indices(columns: &[SortColumn]) -> 
Result<UInt32Array> {
     };
 
     let mut value_indices = (0..row_count).collect::<Vec<usize>>();
-    value_indices.sort_by(lex_comparator);
+    let mut len = value_indices.len();
+
+    if let Some(limit) = limit {
+        len = limit.min(len);
+    }
+    sort_by(&mut value_indices, len, lex_comparator);
 
     Ok(UInt32Array::from(
-        value_indices
-            .into_iter()
-            .map(|i| i as u32)
+        (&value_indices)[0..len]
+            .iter()
+            .map(|i| *i as u32)
             .collect::<Vec<u32>>(),
     ))
 }
 
+pub fn partial_sort<T, F>(v: &mut [T], limit: usize, mut is_less: F)
+where
+    F: FnMut(&T, &T) -> Ordering,
+{
+    let (before, _mid, _after) = v.select_nth_unstable_by(limit, &mut is_less);
+    before.sort_unstable_by(is_less);

Review comment:
       Clear, thanks @sundy-li.
   
   Maybe some docs can be added about this?
   
   I think it probably is not really a problem, but might be surprising by 
someone using the kernel. And we might want to see whether we could use a 
(faster) unstable version of the normal sort kernel as well (that could be used 
by DataFusion as well I think).
   
   What do you think @alamb 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to