This is an automated email from the ASF dual-hosted git repository.

psiace pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 346f7a2  refactor: sort PairTable values with std sort_unstable (#86)
346f7a2 is described below

commit 346f7a2587b2f941dd85fde319db875e4108e077
Author: tison <[email protected]>
AuthorDate: Mon Feb 9 00:11:46 2026 +0800

    refactor: sort PairTable values with std sort_unstable (#86)
    
    Signed-off-by: tison <[email protected]>
---
 datasketches/src/cpc/compression.rs | 11 ++---
 datasketches/src/cpc/pair_table.rs  | 93 -------------------------------------
 2 files changed, 4 insertions(+), 100 deletions(-)

diff --git a/datasketches/src/cpc/compression.rs 
b/datasketches/src/cpc/compression.rs
index 2b3a08a..752ad9d 100644
--- a/datasketches/src/cpc/compression.rs
+++ b/datasketches/src/cpc/compression.rs
@@ -28,7 +28,6 @@ use 
crate::cpc::compression_data::LENGTH_LIMITED_UNARY_ENCODING_TABLE65;
 use crate::cpc::determine_correct_offset;
 use crate::cpc::determine_flavor;
 use crate::cpc::pair_table::PairTable;
-use crate::cpc::pair_table::introspective_insertion_sort;
 
 #[derive(Default)]
 pub(super) struct CompressedState {
@@ -70,7 +69,7 @@ impl CompressedState {
     fn compress_sparse_flavor(&mut self, source: &CpcSketch) {
         debug_assert!(source.sliding_window.is_empty());
         let mut pairs = source.surprising_value_table().unwrapping_get_items();
-        introspective_insertion_sort(&mut pairs);
+        pairs.sort_unstable();
         self.compress_surprising_values(&pairs, source.lg_k());
     }
 
@@ -80,9 +79,7 @@ impl CompressedState {
 
         let k = 1 << source.lg_k();
         let mut pairs = source.surprising_value_table().unwrapping_get_items();
-        if !pairs.is_empty() {
-            introspective_insertion_sort(&mut pairs);
-        }
+        pairs.sort_unstable();
         let num_pairs_from_table = pairs.len();
         let num_pairs_from_window = (source.num_coupons() as usize) - 
num_pairs_from_table;
 
@@ -141,7 +138,7 @@ impl CompressedState {
                 *pair -= 8;
             }
 
-            introspective_insertion_sort(&mut pairs);
+            pairs.sort_unstable();
             self.compress_surprising_values(&pairs, source.lg_k());
         }
     }
@@ -171,7 +168,7 @@ impl CompressedState {
                 *pair = (row << 6) | (col as u32);
             }
 
-            introspective_insertion_sort(&mut pairs);
+            pairs.sort_unstable();
             self.compress_surprising_values(&pairs, source.lg_k());
         }
     }
diff --git a/datasketches/src/cpc/pair_table.rs 
b/datasketches/src/cpc/pair_table.rs
index 5111703..6e23067 100644
--- a/datasketches/src/cpc/pair_table.rs
+++ b/datasketches/src/cpc/pair_table.rs
@@ -247,96 +247,3 @@ impl PairTable {
         }
     }
 }
-
-/// This should be used pair with [`PairTable::unwrapping_get_items`].
-///
-/// In applications where the input array is already nearly sorted, insertion 
sort runs in linear
-/// time with a very small constant.
-///
-/// This introspective version of insertion sort protects against the 
quadratic cost of sorting bad
-/// input arrays. It keeps track of how much work has been done, and if that 
exceeds a constant
-/// times the array length, it switches to a different sorting algorithm.
-pub fn introspective_insertion_sort(a: &mut [u32]) {
-    let cost_limit = 8 * a.len();
-
-    let mut cost = 0;
-    for i in 1..a.len() {
-        let mut j = i;
-        let v = a[i];
-        while j >= 1 && v < a[j - 1] {
-            a[j] = a[j - 1];
-            j -= 1;
-        }
-        a[j] = v;
-        cost += i - j; // distance moved is a measure of work
-        if cost > cost_limit {
-            knuth_shell_sort3(a);
-            return;
-        }
-    }
-}
-
-fn knuth_shell_sort3(a: &mut [u32]) {
-    let len = a.len();
-
-    let mut h = 0;
-    while h < len / 9 {
-        h = 3 * h + 1;
-    }
-
-    while h > 0 {
-        for i in h..len {
-            let v = a[i];
-            let mut j = i;
-            while j >= h && v < a[j - h] {
-                a[j] = a[j - h];
-                j -= h;
-            }
-            a[j] = v;
-        }
-        h /= 3;
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_sort_1() {
-        let data = (0..10)
-            .map(|_| rand::random_range(0..10000))
-            .collect::<Vec<_>>();
-        let mut sorted = data.clone();
-        introspective_insertion_sort(&mut sorted);
-        assert!(
-            sorted.is_sorted(),
-            "data was not sorted correctly: origin={data:?}, sorted={sorted:?}"
-        );
-    }
-
-    #[test]
-    fn test_sort_2() {
-        let data = (0..10)
-            .map(|_| rand::random_range(0..10000) + 3_000_000_000)
-            .collect::<Vec<_>>();
-        let mut sorted = data.clone();
-        introspective_insertion_sort(&mut sorted);
-        assert!(
-            sorted.is_sorted(),
-            "data was not sorted correctly: origin={data:?}, sorted={sorted:?}"
-        );
-    }
-
-    #[test]
-    fn test_sort_3() {
-        let len = 20;
-        let data = (0..len).map(|i| len - i + 1).collect::<Vec<_>>();
-        let mut sorted = data.clone();
-        introspective_insertion_sort(&mut sorted);
-        assert!(
-            sorted.is_sorted(),
-            "data was not sorted correctly: origin={data:?}, sorted={sorted:?}"
-        );
-    }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to