This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new a2e2fa762e Don't Reorder Nulls in sort_to_indices (#4545) (#4603)
a2e2fa762e is described below
commit a2e2fa762e0af4bfffe92aa650266efa279293f9
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Tue Aug 1 10:26:28 2023 +0100
Don't Reorder Nulls in sort_to_indices (#4545) (#4603)
---
arrow-ord/src/sort.rs | 65 ++++++++++++++++++++++++---------------------------
1 file changed, 30 insertions(+), 35 deletions(-)
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index c623475c0b..c3e9e26ec0 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -455,7 +455,7 @@ fn sort_boolean(
.map(|index| (index, values.value(index as usize)))
.collect::<Vec<(u32, bool)>>();
- sort_valids(descending, &mut valids, &mut null_indices, len, cmp);
+ sort_valids(descending, &mut valids, len, cmp);
valids
} else {
// when limit is not present, we have a better way than sorting: we
can just partition
@@ -576,7 +576,7 @@ fn sort_dictionary<K: ArrowDictionaryKeyType>(
// sort is instantiated a lot so we only compile this inner version for each
native type
fn sort_primitive_inner<T, F>(
value_len: usize,
- null_indices: Vec<u32>,
+ nulls: Vec<u32>,
cmp: F,
options: &SortOptions,
limit: Option<usize>,
@@ -587,8 +587,6 @@ where
T: PartialOrd,
F: Fn(T, T) -> Ordering,
{
- let mut nulls = null_indices;
-
let valids_len = valids.len();
let nulls_len = nulls.len();
let mut len = value_len;
@@ -597,7 +595,7 @@ where
len = limit.min(len);
}
- sort_valids(options.descending, &mut valids, &mut nulls, len, cmp);
+ sort_valids(options.descending, &mut valids, len, cmp);
// collect results directly into a buffer instead of a vec to avoid
another aligned allocation
let result_capacity = len * std::mem::size_of::<u32>();
@@ -884,7 +882,7 @@ where
len = limit.min(len);
}
- sort_valids(descending, &mut valids, &mut nulls, len, cmp);
+ sort_valids(descending, &mut valids, len, cmp);
// collect the order of valid tuplies
let mut valid_indices: Vec<u32> = valids.iter().map(|tuple|
tuple.0).collect();
@@ -1002,7 +1000,7 @@ where
len = limit.min(len);
}
- sort_valids(descending, &mut valids, &mut null_indices, len, cmp);
+ sort_valids(descending, &mut valids, len, cmp);
let mut valid_indices: Vec<u32> = valids.iter().map(|tuple|
tuple.0).collect();
if options.nulls_first {
@@ -1230,10 +1228,9 @@ impl LexicographicalComparator<'_> {
}
}
-fn sort_valids<T, U>(
+fn sort_valids<T>(
descending: bool,
valids: &mut [(u32, T)],
- nulls: &mut [U],
len: usize,
mut cmp: impl FnMut(T, T) -> Ordering,
) where
@@ -1244,8 +1241,6 @@ fn sort_valids<T, U>(
sort_unstable_by(valids, len.min(valids_len), |a, b| cmp(a.1, b.1));
} else {
sort_unstable_by(valids, len.min(valids_len), |a, b| cmp(a.1,
b.1).reverse());
- // reverse to keep a stable ordering
- nulls.reverse();
}
}
@@ -1756,7 +1751,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![2, 1, 4, 3, 5, 0], // [2, 4, 1, 3, 5, 0]
+ vec![2, 1, 4, 3, 0, 5],
);
test_sort_to_indices_primitive_arrays::<Int16Type>(
@@ -1766,7 +1761,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![2, 1, 4, 3, 5, 0],
+ vec![2, 1, 4, 3, 0, 5],
);
test_sort_to_indices_primitive_arrays::<Int32Type>(
@@ -1776,7 +1771,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![2, 1, 4, 3, 5, 0],
+ vec![2, 1, 4, 3, 0, 5],
);
test_sort_to_indices_primitive_arrays::<Int64Type>(
@@ -1786,7 +1781,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![2, 1, 4, 3, 5, 0],
+ vec![2, 1, 4, 3, 0, 5],
);
test_sort_to_indices_primitive_arrays::<Float16Type>(
@@ -1803,7 +1798,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![2, 1, 4, 3, 5, 0],
+ vec![2, 1, 4, 3, 0, 5],
);
test_sort_to_indices_primitive_arrays::<Float32Type>(
@@ -1820,7 +1815,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![2, 1, 4, 3, 5, 0],
+ vec![2, 1, 4, 3, 0, 5],
);
test_sort_to_indices_primitive_arrays::<Float64Type>(
@@ -1830,7 +1825,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![2, 1, 4, 3, 5, 0],
+ vec![2, 1, 4, 3, 0, 5],
);
// descending, nulls first
@@ -1841,7 +1836,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
+ vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
);
test_sort_to_indices_primitive_arrays::<Int16Type>(
@@ -1851,7 +1846,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![5, 0, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
+ vec![0, 5, 2, 1, 4, 3], // [5, 0, 2, 4, 1, 3]
);
test_sort_to_indices_primitive_arrays::<Int32Type>(
@@ -1861,7 +1856,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![5, 0, 2, 1, 4, 3],
+ vec![0, 5, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<Int64Type>(
@@ -1871,7 +1866,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![5, 0, 2, 1, 4, 3],
+ vec![0, 5, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<Float16Type>(
@@ -1888,7 +1883,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![5, 0, 2, 1, 4, 3],
+ vec![0, 5, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<Float32Type>(
@@ -1898,7 +1893,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![5, 0, 2, 1, 4, 3],
+ vec![0, 5, 2, 1, 4, 3],
);
test_sort_to_indices_primitive_arrays::<Float64Type>(
@@ -1908,7 +1903,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![5, 0, 2, 1, 4, 3],
+ vec![0, 5, 2, 1, 4, 3],
);
// valid values less than limit with extra nulls
@@ -2007,7 +2002,7 @@ mod tests {
nulls_first: true,
}),
Some(3),
- vec![5, 0, 2],
+ vec![0, 5, 2],
);
// valid values less than limit with extra nulls
@@ -2070,7 +2065,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![1, 5, 3, 2, 4, 6, 0],
+ vec![1, 5, 3, 2, 4, 0, 6],
);
// decimal null_first and descending
test_sort_to_indices_decimal128_array(
@@ -2080,7 +2075,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![6, 0, 1, 5, 3, 2, 4],
+ vec![0, 6, 1, 5, 3, 2, 4],
);
// decimal null_first
test_sort_to_indices_decimal128_array(
@@ -2117,7 +2112,7 @@ mod tests {
nulls_first: true,
}),
Some(3),
- vec![6, 0, 1],
+ vec![0, 6, 1],
);
// limit null_first
test_sort_to_indices_decimal128_array(
@@ -2154,7 +2149,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![1, 5, 3, 2, 4, 6, 0],
+ vec![1, 5, 3, 2, 4, 0, 6],
);
// decimal null_first and descending
test_sort_to_indices_decimal256_array(
@@ -2167,7 +2162,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![6, 0, 1, 5, 3, 2, 4],
+ vec![0, 6, 1, 5, 3, 2, 4],
);
// decimal null_first
test_sort_to_indices_decimal256_array(
@@ -2216,7 +2211,7 @@ mod tests {
nulls_first: true,
}),
Some(3),
- vec![6, 0, 1],
+ vec![0, 6, 1],
);
// limit null_first
test_sort_to_indices_decimal256_array(
@@ -2938,7 +2933,7 @@ mod tests {
nulls_first: false,
}),
None,
- vec![2, 4, 1, 5, 3, 0],
+ vec![2, 4, 1, 5, 0, 3],
);
test_sort_to_indices_string_arrays(
@@ -2972,7 +2967,7 @@ mod tests {
nulls_first: true,
}),
None,
- vec![3, 0, 2, 4, 1, 5],
+ vec![0, 3, 2, 4, 1, 5],
);
test_sort_to_indices_string_arrays(
@@ -2989,7 +2984,7 @@ mod tests {
nulls_first: true,
}),
Some(3),
- vec![3, 0, 2],
+ vec![0, 3, 2],
);
// valid values less than limit with extra nulls