This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new a2e2fa762e Don't Reorder Nulls in sort_to_indices (#4545) (#4603)
a2e2fa762e is described below

commit a2e2fa762e0af4bfffe92aa650266efa279293f9
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Aug 1 10:26:28 2023 +0100

    Don't Reorder Nulls in sort_to_indices (#4545) (#4603)
---
 arrow-ord/src/sort.rs | 65 ++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 35 deletions(-)

diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index c623475c0b..c3e9e26ec0 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -455,7 +455,7 @@ fn sort_boolean(
             .map(|index| (index, values.value(index as usize)))
             .collect::<Vec<(u32, bool)>>();
 
-        sort_valids(descending, &mut valids, &mut null_indices, len, cmp);
+        sort_valids(descending, &mut valids, len, cmp);
         valids
     } else {
         // when limit is not present, we have a better way than sorting: we 
can just partition
@@ -576,7 +576,7 @@ fn sort_dictionary<K: ArrowDictionaryKeyType>(
 // sort is instantiated a lot so we only compile this inner version for each 
native type
 fn sort_primitive_inner<T, F>(
     value_len: usize,
-    null_indices: Vec<u32>,
+    nulls: Vec<u32>,
     cmp: F,
     options: &SortOptions,
     limit: Option<usize>,
@@ -587,8 +587,6 @@ where
     T: PartialOrd,
     F: Fn(T, T) -> Ordering,
 {
-    let mut nulls = null_indices;
-
     let valids_len = valids.len();
     let nulls_len = nulls.len();
     let mut len = value_len;
@@ -597,7 +595,7 @@ where
         len = limit.min(len);
     }
 
-    sort_valids(options.descending, &mut valids, &mut nulls, len, cmp);
+    sort_valids(options.descending, &mut valids, len, cmp);
 
     // collect results directly into a buffer instead of a vec to avoid 
another aligned allocation
     let result_capacity = len * std::mem::size_of::<u32>();
@@ -884,7 +882,7 @@ where
         len = limit.min(len);
     }
 
-    sort_valids(descending, &mut valids, &mut nulls, len, cmp);
+    sort_valids(descending, &mut valids, len, cmp);
     // collect the order of valid tuplies
     let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| 
tuple.0).collect();
 
@@ -1002,7 +1000,7 @@ where
         len = limit.min(len);
     }
 
-    sort_valids(descending, &mut valids, &mut null_indices, len, cmp);
+    sort_valids(descending, &mut valids, len, cmp);
 
     let mut valid_indices: Vec<u32> = valids.iter().map(|tuple| 
tuple.0).collect();
     if options.nulls_first {
@@ -1230,10 +1228,9 @@ impl LexicographicalComparator<'_> {
     }
 }
 
-fn sort_valids<T, U>(
+fn sort_valids<T>(
     descending: bool,
     valids: &mut [(u32, T)],
-    nulls: &mut [U],
     len: usize,
     mut cmp: impl FnMut(T, T) -> Ordering,
 ) where
@@ -1244,8 +1241,6 @@ fn sort_valids<T, U>(
         sort_unstable_by(valids, len.min(valids_len), |a, b| cmp(a.1, b.1));
     } else {
         sort_unstable_by(valids, len.min(valids_len), |a, b| cmp(a.1, 
b.1).reverse());
-        // reverse to keep a stable ordering
-        nulls.reverse();
     }
 }
 
@@ -1756,7 +1751,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![2, 1, 4, 3, 5, 0], // [2, 4, 1, 3, 5, 0]
+            vec![2, 1, 4, 3, 0, 5],
         );
 
         test_sort_to_indices_primitive_arrays::<Int16Type>(
@@ -1766,7 +1761,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![2, 1, 4, 3, 5, 0],
+            vec![2, 1, 4, 3, 0, 5],
         );
 
         test_sort_to_indices_primitive_arrays::<Int32Type>(
@@ -1776,7 +1771,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![2, 1, 4, 3, 5, 0],
+            vec![2, 1, 4, 3, 0, 5],
         );
 
         test_sort_to_indices_primitive_arrays::<Int64Type>(
@@ -1786,7 +1781,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![2, 1, 4, 3, 5, 0],
+            vec![2, 1, 4, 3, 0, 5],
         );
 
         test_sort_to_indices_primitive_arrays::<Float16Type>(
@@ -1803,7 +1798,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![2, 1, 4, 3, 5, 0],
+            vec![2, 1, 4, 3, 0, 5],
         );
 
         test_sort_to_indices_primitive_arrays::<Float32Type>(
@@ -1820,7 +1815,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![2, 1, 4, 3, 5, 0],
+            vec![2, 1, 4, 3, 0, 5],
         );
 
         test_sort_to_indices_primitive_arrays::<Float64Type>(
@@ -1830,7 +1825,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![2, 1, 4, 3, 5, 0],
+            vec![2, 1, 4, 3, 0, 5],
         );
 
         // descending, nulls first
@@ -1841,7 +1836,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
+            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
         );
 
         test_sort_to_indices_primitive_arrays::<Int16Type>(
@@ -1851,7 +1846,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
+            vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
         );
 
         test_sort_to_indices_primitive_arrays::<Int32Type>(
@@ -1861,7 +1856,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![5, 0, 2, 1, 4, 3],
+            vec![0, 5, 2, 1, 4, 3],
         );
 
         test_sort_to_indices_primitive_arrays::<Int64Type>(
@@ -1871,7 +1866,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![5, 0, 2, 1, 4, 3],
+            vec![0, 5, 2, 1, 4, 3],
         );
 
         test_sort_to_indices_primitive_arrays::<Float16Type>(
@@ -1888,7 +1883,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![5, 0, 2, 1, 4, 3],
+            vec![0, 5, 2, 1, 4, 3],
         );
 
         test_sort_to_indices_primitive_arrays::<Float32Type>(
@@ -1898,7 +1893,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![5, 0, 2, 1, 4, 3],
+            vec![0, 5, 2, 1, 4, 3],
         );
 
         test_sort_to_indices_primitive_arrays::<Float64Type>(
@@ -1908,7 +1903,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![5, 0, 2, 1, 4, 3],
+            vec![0, 5, 2, 1, 4, 3],
         );
 
         // valid values less than limit with extra nulls
@@ -2007,7 +2002,7 @@ mod tests {
                 nulls_first: true,
             }),
             Some(3),
-            vec![5, 0, 2],
+            vec![0, 5, 2],
         );
 
         // valid values less than limit with extra nulls
@@ -2070,7 +2065,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![1, 5, 3, 2, 4, 6, 0],
+            vec![1, 5, 3, 2, 4, 0, 6],
         );
         // decimal null_first and descending
         test_sort_to_indices_decimal128_array(
@@ -2080,7 +2075,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![6, 0, 1, 5, 3, 2, 4],
+            vec![0, 6, 1, 5, 3, 2, 4],
         );
         // decimal null_first
         test_sort_to_indices_decimal128_array(
@@ -2117,7 +2112,7 @@ mod tests {
                 nulls_first: true,
             }),
             Some(3),
-            vec![6, 0, 1],
+            vec![0, 6, 1],
         );
         // limit null_first
         test_sort_to_indices_decimal128_array(
@@ -2154,7 +2149,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![1, 5, 3, 2, 4, 6, 0],
+            vec![1, 5, 3, 2, 4, 0, 6],
         );
         // decimal null_first and descending
         test_sort_to_indices_decimal256_array(
@@ -2167,7 +2162,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![6, 0, 1, 5, 3, 2, 4],
+            vec![0, 6, 1, 5, 3, 2, 4],
         );
         // decimal null_first
         test_sort_to_indices_decimal256_array(
@@ -2216,7 +2211,7 @@ mod tests {
                 nulls_first: true,
             }),
             Some(3),
-            vec![6, 0, 1],
+            vec![0, 6, 1],
         );
         // limit null_first
         test_sort_to_indices_decimal256_array(
@@ -2938,7 +2933,7 @@ mod tests {
                 nulls_first: false,
             }),
             None,
-            vec![2, 4, 1, 5, 3, 0],
+            vec![2, 4, 1, 5, 0, 3],
         );
 
         test_sort_to_indices_string_arrays(
@@ -2972,7 +2967,7 @@ mod tests {
                 nulls_first: true,
             }),
             None,
-            vec![3, 0, 2, 4, 1, 5],
+            vec![0, 3, 2, 4, 1, 5],
         );
 
         test_sort_to_indices_string_arrays(
@@ -2989,7 +2984,7 @@ mod tests {
                 nulls_first: true,
             }),
             Some(3),
-            vec![3, 0, 2],
+            vec![0, 3, 2],
         );
 
         // valid values less than limit with extra nulls

Reply via email to