This is an automated email from the ASF dual-hosted git repository.
psiace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 346f7a2 refactor: sort PairTable values with std sort_unstable (#86)
346f7a2 is described below
commit 346f7a2587b2f941dd85fde319db875e4108e077
Author: tison <[email protected]>
AuthorDate: Mon Feb 9 00:11:46 2026 +0800
refactor: sort PairTable values with std sort_unstable (#86)
Signed-off-by: tison <[email protected]>
---
datasketches/src/cpc/compression.rs | 11 ++---
datasketches/src/cpc/pair_table.rs | 93 -------------------------------------
2 files changed, 4 insertions(+), 100 deletions(-)
diff --git a/datasketches/src/cpc/compression.rs
b/datasketches/src/cpc/compression.rs
index 2b3a08a..752ad9d 100644
--- a/datasketches/src/cpc/compression.rs
+++ b/datasketches/src/cpc/compression.rs
@@ -28,7 +28,6 @@ use
crate::cpc::compression_data::LENGTH_LIMITED_UNARY_ENCODING_TABLE65;
use crate::cpc::determine_correct_offset;
use crate::cpc::determine_flavor;
use crate::cpc::pair_table::PairTable;
-use crate::cpc::pair_table::introspective_insertion_sort;
#[derive(Default)]
pub(super) struct CompressedState {
@@ -70,7 +69,7 @@ impl CompressedState {
fn compress_sparse_flavor(&mut self, source: &CpcSketch) {
debug_assert!(source.sliding_window.is_empty());
let mut pairs = source.surprising_value_table().unwrapping_get_items();
- introspective_insertion_sort(&mut pairs);
+ pairs.sort_unstable();
self.compress_surprising_values(&pairs, source.lg_k());
}
@@ -80,9 +79,7 @@ impl CompressedState {
let k = 1 << source.lg_k();
let mut pairs = source.surprising_value_table().unwrapping_get_items();
- if !pairs.is_empty() {
- introspective_insertion_sort(&mut pairs);
- }
+ pairs.sort_unstable();
let num_pairs_from_table = pairs.len();
let num_pairs_from_window = (source.num_coupons() as usize) -
num_pairs_from_table;
@@ -141,7 +138,7 @@ impl CompressedState {
*pair -= 8;
}
- introspective_insertion_sort(&mut pairs);
+ pairs.sort_unstable();
self.compress_surprising_values(&pairs, source.lg_k());
}
}
@@ -171,7 +168,7 @@ impl CompressedState {
*pair = (row << 6) | (col as u32);
}
- introspective_insertion_sort(&mut pairs);
+ pairs.sort_unstable();
self.compress_surprising_values(&pairs, source.lg_k());
}
}
diff --git a/datasketches/src/cpc/pair_table.rs
b/datasketches/src/cpc/pair_table.rs
index 5111703..6e23067 100644
--- a/datasketches/src/cpc/pair_table.rs
+++ b/datasketches/src/cpc/pair_table.rs
@@ -247,96 +247,3 @@ impl PairTable {
}
}
}
-
-/// This should be used pair with [`PairTable::unwrapping_get_items`].
-///
-/// In applications where the input array is already nearly sorted, insertion
sort runs in linear
-/// time with a very small constant.
-///
-/// This introspective version of insertion sort protects against the
quadratic cost of sorting bad
-/// input arrays. It keeps track of how much work has been done, and if that
exceeds a constant
-/// times the array length, it switches to a different sorting algorithm.
-pub fn introspective_insertion_sort(a: &mut [u32]) {
- let cost_limit = 8 * a.len();
-
- let mut cost = 0;
- for i in 1..a.len() {
- let mut j = i;
- let v = a[i];
- while j >= 1 && v < a[j - 1] {
- a[j] = a[j - 1];
- j -= 1;
- }
- a[j] = v;
- cost += i - j; // distance moved is a measure of work
- if cost > cost_limit {
- knuth_shell_sort3(a);
- return;
- }
- }
-}
-
-fn knuth_shell_sort3(a: &mut [u32]) {
- let len = a.len();
-
- let mut h = 0;
- while h < len / 9 {
- h = 3 * h + 1;
- }
-
- while h > 0 {
- for i in h..len {
- let v = a[i];
- let mut j = i;
- while j >= h && v < a[j - h] {
- a[j] = a[j - h];
- j -= h;
- }
- a[j] = v;
- }
- h /= 3;
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_sort_1() {
- let data = (0..10)
- .map(|_| rand::random_range(0..10000))
- .collect::<Vec<_>>();
- let mut sorted = data.clone();
- introspective_insertion_sort(&mut sorted);
- assert!(
- sorted.is_sorted(),
- "data was not sorted correctly: origin={data:?}, sorted={sorted:?}"
- );
- }
-
- #[test]
- fn test_sort_2() {
- let data = (0..10)
- .map(|_| rand::random_range(0..10000) + 3_000_000_000)
- .collect::<Vec<_>>();
- let mut sorted = data.clone();
- introspective_insertion_sort(&mut sorted);
- assert!(
- sorted.is_sorted(),
- "data was not sorted correctly: origin={data:?}, sorted={sorted:?}"
- );
- }
-
- #[test]
- fn test_sort_3() {
- let len = 20;
- let data = (0..len).map(|i| len - i + 1).collect::<Vec<_>>();
- let mut sorted = data.clone();
- introspective_insertion_sort(&mut sorted);
- assert!(
- sorted.is_sorted(),
- "data was not sorted correctly: origin={data:?}, sorted={sorted:?}"
- );
- }
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]