This is an automated email from the ASF dual-hosted git repository. tison pushed a commit to branch sort-pair-table in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git
commit d497a1eae39e8d3e0970eeba6bcc5d974b499ddd Author: tison <[email protected]> AuthorDate: Sun Feb 8 23:23:58 2026 +0800 refactor: sort PairTable values with std sort_unstable Signed-off-by: tison <[email protected]> --- datasketches/src/cpc/compression.rs | 11 ++--- datasketches/src/cpc/pair_table.rs | 93 ------------------------------------- 2 files changed, 4 insertions(+), 100 deletions(-) diff --git a/datasketches/src/cpc/compression.rs b/datasketches/src/cpc/compression.rs index 2b3a08a..752ad9d 100644 --- a/datasketches/src/cpc/compression.rs +++ b/datasketches/src/cpc/compression.rs @@ -28,7 +28,6 @@ use crate::cpc::compression_data::LENGTH_LIMITED_UNARY_ENCODING_TABLE65; use crate::cpc::determine_correct_offset; use crate::cpc::determine_flavor; use crate::cpc::pair_table::PairTable; -use crate::cpc::pair_table::introspective_insertion_sort; #[derive(Default)] pub(super) struct CompressedState { @@ -70,7 +69,7 @@ impl CompressedState { fn compress_sparse_flavor(&mut self, source: &CpcSketch) { debug_assert!(source.sliding_window.is_empty()); let mut pairs = source.surprising_value_table().unwrapping_get_items(); - introspective_insertion_sort(&mut pairs); + pairs.sort_unstable(); self.compress_surprising_values(&pairs, source.lg_k()); } @@ -80,9 +79,7 @@ impl CompressedState { let k = 1 << source.lg_k(); let mut pairs = source.surprising_value_table().unwrapping_get_items(); - if !pairs.is_empty() { - introspective_insertion_sort(&mut pairs); - } + pairs.sort_unstable(); let num_pairs_from_table = pairs.len(); let num_pairs_from_window = (source.num_coupons() as usize) - num_pairs_from_table; @@ -141,7 +138,7 @@ impl CompressedState { *pair -= 8; } - introspective_insertion_sort(&mut pairs); + pairs.sort_unstable(); self.compress_surprising_values(&pairs, source.lg_k()); } } @@ -171,7 +168,7 @@ impl CompressedState { *pair = (row << 6) | (col as u32); } - introspective_insertion_sort(&mut pairs); + pairs.sort_unstable(); self.compress_surprising_values(&pairs, source.lg_k()); } } diff --git a/datasketches/src/cpc/pair_table.rs b/datasketches/src/cpc/pair_table.rs index 5111703..6e23067 100644 --- a/datasketches/src/cpc/pair_table.rs +++ b/datasketches/src/cpc/pair_table.rs @@ -247,96 +247,3 @@ impl PairTable { } } } - -/// This should be used pair with [`PairTable::unwrapping_get_items`]. -/// -/// In applications where the input array is already nearly sorted, insertion sort runs in linear -/// time with a very small constant. -/// -/// This introspective version of insertion sort protects against the quadratic cost of sorting bad -/// input arrays. It keeps track of how much work has been done, and if that exceeds a constant -/// times the array length, it switches to a different sorting algorithm. -pub fn introspective_insertion_sort(a: &mut [u32]) { - let cost_limit = 8 * a.len(); - - let mut cost = 0; - for i in 1..a.len() { - let mut j = i; - let v = a[i]; - while j >= 1 && v < a[j - 1] { - a[j] = a[j - 1]; - j -= 1; - } - a[j] = v; - cost += i - j; // distance moved is a measure of work - if cost > cost_limit { - knuth_shell_sort3(a); - return; - } - } -} - -fn knuth_shell_sort3(a: &mut [u32]) { - let len = a.len(); - - let mut h = 0; - while h < len / 9 { - h = 3 * h + 1; - } - - while h > 0 { - for i in h..len { - let v = a[i]; - let mut j = i; - while j >= h && v < a[j - h] { - a[j] = a[j - h]; - j -= h; - } - a[j] = v; - } - h /= 3; - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_sort_1() { - let data = (0..10) - .map(|_| rand::random_range(0..10000)) - .collect::<Vec<_>>(); - let mut sorted = data.clone(); - introspective_insertion_sort(&mut sorted); - assert!( - sorted.is_sorted(), - "data was not sorted correctly: origin={data:?}, sorted={sorted:?}" - ); - } - - #[test] - fn test_sort_2() { - let data = (0..10) - .map(|_| rand::random_range(0..10000) + 3_000_000_000) - .collect::<Vec<_>>(); - let mut sorted = data.clone(); - introspective_insertion_sort(&mut sorted); - assert!( - sorted.is_sorted(), - "data was not sorted correctly: origin={data:?}, sorted={sorted:?}" - ); - } - - #[test] - fn test_sort_3() { - let len = 20; - let data = (0..len).map(|i| len - i + 1).collect::<Vec<_>>(); - let mut sorted = data.clone(); - introspective_insertion_sort(&mut sorted); - assert!( - sorted.is_sorted(), - "data was not sorted correctly: origin={data:?}, sorted={sorted:?}" - ); - } -} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
