This is an automated email from the ASF dual-hosted git repository.

tison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 81e619f  feat: add halve and decay to countmin sketch of unsigned 
values (#71)
81e619f is described below

commit 81e619f3fb37ea1abaf9a8efdd5743d60d3b62e1
Author: Chojan Shang <[email protected]>
AuthorDate: Tue Jan 27 17:24:58 2026 +0800

    feat: add halve and decay to countmin sketch of unsigned values (#71)
    
    Co-authored-by: tison <[email protected]>
---
 datasketches/src/countmin/mod.rs       |  12 ++-
 datasketches/src/countmin/sketch.rs    | 174 +++++++++++++++++++-----------
 datasketches/src/countmin/value.rs     | 187 +++++++++++++++++++++++++++++++++
 datasketches/src/frequencies/sketch.rs |   2 +-
 datasketches/tests/countmin_test.rs    | 151 +++++++++++++++++++++-----
 5 files changed, 434 insertions(+), 92 deletions(-)

diff --git a/datasketches/src/countmin/mod.rs b/datasketches/src/countmin/mod.rs
index 9b427e9..254907e 100644
--- a/datasketches/src/countmin/mod.rs
+++ b/datasketches/src/countmin/mod.rs
@@ -24,7 +24,7 @@
 //!
 //! ```rust
 //! # use datasketches::countmin::CountMinSketch;
-//! let mut sketch = CountMinSketch::new(5, 256);
+//! let mut sketch = CountMinSketch::<i64>::new(5, 256);
 //! sketch.update("apple");
 //! sketch.update_with_weight("banana", 3);
 //! assert!(sketch.estimate("banana") >= 3);
@@ -34,12 +34,16 @@
 //!
 //! ```rust
 //! # use datasketches::countmin::CountMinSketch;
-//! let buckets = CountMinSketch::suggest_num_buckets(0.01);
-//! let hashes = CountMinSketch::suggest_num_hashes(0.99);
-//! let _sketch = CountMinSketch::new(hashes, buckets);
+//! let buckets = CountMinSketch::<i64>::suggest_num_buckets(0.01);
+//! let hashes = CountMinSketch::<i64>::suggest_num_hashes(0.99);
+//! let _sketch = CountMinSketch::<i64>::new(hashes, buckets);
 //! ```
 
 mod serialization;
 
 mod sketch;
 pub use self::sketch::CountMinSketch;
+
+mod value;
+pub use self::value::CountMinValue;
+pub use self::value::UnsignedCountMinValue;
diff --git a/datasketches/src/countmin/sketch.rs 
b/datasketches/src/countmin/sketch.rs
index 321c836..e2814d1 100644
--- a/datasketches/src/countmin/sketch.rs
+++ b/datasketches/src/countmin/sketch.rs
@@ -17,10 +17,11 @@
 
 use std::hash::Hash;
 use std::hash::Hasher;
-use std::mem::size_of;
 
 use crate::codec::SketchBytes;
 use crate::codec::SketchSlice;
+use crate::countmin::CountMinValue;
+use crate::countmin::UnsignedCountMinValue;
 use crate::countmin::serialization::COUNTMIN_FAMILY_ID;
 use crate::countmin::serialization::FLAGS_IS_EMPTY;
 use crate::countmin::serialization::LONG_SIZE_BYTES;
@@ -38,16 +39,16 @@ const MAX_TABLE_ENTRIES: usize = 1 << 30;
 /// The sketch provides upper and lower bounds on estimated item frequencies
 /// with configurable relative error and confidence.
 #[derive(Debug, Clone, PartialEq)]
-pub struct CountMinSketch {
+pub struct CountMinSketch<T: CountMinValue> {
     num_hashes: u8,
     num_buckets: u32,
     seed: u64,
-    total_weight: i64,
-    counts: Vec<i64>,
+    total_weight: T,
+    counts: Vec<T>,
     hash_seeds: Vec<u64>,
 }
 
-impl CountMinSketch {
+impl<T: CountMinValue> CountMinSketch<T> {
     /// Creates a new Count-Min sketch with the default seed.
     ///
     /// # Panics
@@ -59,7 +60,7 @@ impl CountMinSketch {
     ///
     /// ```rust
     /// # use datasketches::countmin::CountMinSketch;
-    /// let sketch = CountMinSketch::new(4, 128);
+    /// let sketch = CountMinSketch::<i64>::new(4, 128);
     /// assert_eq!(sketch.num_buckets(), 128);
     /// ```
     pub fn new(num_hashes: u8, num_buckets: u32) -> Self {
@@ -77,7 +78,7 @@ impl CountMinSketch {
     ///
     /// ```rust
     /// # use datasketches::countmin::CountMinSketch;
-    /// let sketch = CountMinSketch::with_seed(4, 64, 42);
+    /// let sketch = CountMinSketch::<i64>::with_seed(4, 64, 42);
     /// assert_eq!(sketch.seed(), 42);
     /// ```
     pub fn with_seed(num_hashes: u8, num_buckets: u32, seed: u64) -> Self {
@@ -101,7 +102,7 @@ impl CountMinSketch {
     }
 
     /// Returns the total weight inserted into the sketch.
-    pub fn total_weight(&self) -> i64 {
+    pub fn total_weight(&self) -> T {
         self.total_weight
     }
 
@@ -112,7 +113,7 @@ impl CountMinSketch {
 
     /// Returns true if the sketch has not seen any updates.
     pub fn is_empty(&self) -> bool {
-        self.total_weight == 0
+        self.total_weight == T::ZERO
     }
 
     /// Suggests the number of buckets to achieve the given relative error.
@@ -129,7 +130,7 @@ impl CountMinSketch {
     ///
     /// # Panics
     ///
-    /// Panics if `confidence` is not in [0, 1].
+    /// Panics if `confidence` is not in `[0, 1]`.
     pub fn suggest_num_hashes(confidence: f64) -> u8 {
         assert!(
             (0.0..=1.0).contains(&confidence),
@@ -148,12 +149,12 @@ impl CountMinSketch {
     ///
     /// ```rust
     /// # use datasketches::countmin::CountMinSketch;
-    /// let mut sketch = CountMinSketch::new(4, 128);
+    /// let mut sketch = CountMinSketch::<i64>::new(4, 128);
     /// sketch.update("apple");
     /// assert!(sketch.estimate("apple") >= 1);
     /// ```
-    pub fn update<T: Hash>(&mut self, item: T) {
-        self.update_with_weight(item, 1);
+    pub fn update<I: Hash>(&mut self, item: I) {
+        self.update_with_weight(item, T::ONE);
     }
 
     /// Updates the sketch with the given item and weight.
@@ -162,21 +163,21 @@ impl CountMinSketch {
     ///
     /// ```rust
     /// # use datasketches::countmin::CountMinSketch;
-    /// let mut sketch = CountMinSketch::new(4, 128);
+    /// let mut sketch = CountMinSketch::<i64>::new(4, 128);
     /// sketch.update_with_weight("banana", 3);
     /// assert!(sketch.estimate("banana") >= 3);
     /// ```
-    pub fn update_with_weight<T: Hash>(&mut self, item: T, weight: i64) {
-        if weight == 0 {
+    pub fn update_with_weight<I: Hash>(&mut self, item: I, weight: T) {
+        if weight == T::ZERO {
             return;
         }
-        let abs_weight = abs_i64(weight);
-        self.total_weight = self.total_weight.wrapping_add(abs_weight);
+        let abs_weight = weight.abs();
+        self.total_weight = self.total_weight.add(abs_weight);
         let num_buckets = self.num_buckets as usize;
         for (row, seed) in self.hash_seeds.iter().enumerate() {
             let bucket = self.bucket_index(&item, *seed);
             let index = row * num_buckets + bucket;
-            self.counts[index] = self.counts[index].wrapping_add(weight);
+            self.counts[index] = self.counts[index].add(weight);
         }
     }
 
@@ -186,13 +187,13 @@ impl CountMinSketch {
     ///
     /// ```rust
     /// # use datasketches::countmin::CountMinSketch;
-    /// let mut sketch = CountMinSketch::new(4, 128);
+    /// let mut sketch = CountMinSketch::<i64>::new(4, 128);
     /// sketch.update_with_weight("pear", 2);
     /// assert!(sketch.estimate("pear") >= 2);
     /// ```
-    pub fn estimate<T: Hash>(&self, item: T) -> i64 {
+    pub fn estimate<I: Hash>(&self, item: I) -> T {
         let num_buckets = self.num_buckets as usize;
-        let mut min = i64::MAX;
+        let mut min = T::MAX;
         for (row, seed) in self.hash_seeds.iter().enumerate() {
             let bucket = self.bucket_index(&item, *seed);
             let index = row * num_buckets + bucket;
@@ -205,15 +206,15 @@ impl CountMinSketch {
     }
 
     /// Returns the lower bound on the true frequency of the given item.
-    pub fn lower_bound<T: Hash>(&self, item: T) -> i64 {
+    pub fn lower_bound<I: Hash>(&self, item: I) -> T {
         self.estimate(item)
     }
 
     /// Returns the upper bound on the true frequency of the given item.
-    pub fn upper_bound<T: Hash>(&self, item: T) -> i64 {
+    pub fn upper_bound<I: Hash>(&self, item: I) -> T {
         let estimate = self.estimate(item);
-        let error = (self.relative_error() * self.total_weight as f64) as i64;
-        estimate.wrapping_add(error)
+        let error = T::from_f64(self.relative_error() * 
self.total_weight.to_f64());
+        estimate.add(error)
     }
 
     /// Merges another sketch into this one.
@@ -226,8 +227,8 @@ impl CountMinSketch {
     ///
     /// ```rust
     /// # use datasketches::countmin::CountMinSketch;
-    /// let mut left = CountMinSketch::new(4, 128);
-    /// let mut right = CountMinSketch::new(4, 128);
+    /// let mut left = CountMinSketch::<i64>::new(4, 128);
+    /// let mut right = CountMinSketch::<i64>::new(4, 128);
     ///
     /// left.update("apple");
     /// right.update_with_weight("banana", 2);
@@ -235,20 +236,19 @@ impl CountMinSketch {
     /// left.merge(&right);
     /// assert!(left.estimate("banana") >= 2);
     /// ```
-    pub fn merge(&mut self, other: &CountMinSketch) {
+    pub fn merge(&mut self, other: &CountMinSketch<T>) {
         if std::ptr::eq(self, other) {
             panic!("Cannot merge a sketch with itself.");
         }
-        if self.num_hashes != other.num_hashes
-            || self.num_buckets != other.num_buckets
-            || self.seed != other.seed
-        {
-            panic!("Incompatible sketch configuration.");
+        assert_eq!(self.num_hashes, other.num_hashes);
+        assert_eq!(self.num_buckets, other.num_buckets);
+        assert_eq!(self.seed, other.seed);
+        assert_eq!(self.counts.len(), other.counts.len());
+        let counts_len = self.counts.len();
+        for i in 0..counts_len {
+            self.counts[i] = self.counts[i].add(other.counts[i]);
         }
-        for (dst, src) in self.counts.iter_mut().zip(other.counts.iter()) {
-            *dst = dst.wrapping_add(*src);
-        }
-        self.total_weight = self.total_weight.wrapping_add(other.total_weight);
+        self.total_weight = self.total_weight.add(other.total_weight);
     }
 
     /// Serializes this sketch into the DataSketches Count-Min format.
@@ -257,18 +257,19 @@ impl CountMinSketch {
     ///
     /// ```rust
     /// # use datasketches::countmin::CountMinSketch;
-    /// # let mut sketch = CountMinSketch::new(4, 128);
+    /// # let mut sketch = CountMinSketch::<i64>::new(4, 128);
     /// # sketch.update("apple");
     /// let bytes = sketch.serialize();
-    /// let decoded = CountMinSketch::deserialize(&bytes).unwrap();
+    /// let decoded = CountMinSketch::<i64>::deserialize(&bytes).unwrap();
     /// assert!(decoded.estimate("apple") >= 1);
     /// ```
     pub fn serialize(&self) -> Vec<u8> {
         let header_size = PREAMBLE_LONGS_SHORT as usize * LONG_SIZE_BYTES;
+        let value_size = LONG_SIZE_BYTES;
         let payload_size = if self.is_empty() {
             0
         } else {
-            LONG_SIZE_BYTES + (self.counts.len() * size_of::<i64>())
+            value_size + (self.counts.len() * value_size)
         };
         let mut bytes = SketchBytes::with_capacity(header_size + payload_size);
 
@@ -287,9 +288,9 @@ impl CountMinSketch {
             return bytes.into_bytes();
         }
 
-        bytes.write_i64_le(self.total_weight);
-        for count in self.counts.iter().copied() {
-            bytes.write_i64_le(count);
+        bytes.write(&self.total_weight.to_bytes());
+        for count in &self.counts {
+            bytes.write(&count.to_bytes());
         }
         bytes.into_bytes()
     }
@@ -300,10 +301,10 @@ impl CountMinSketch {
     ///
     /// ```rust
     /// # use datasketches::countmin::CountMinSketch;
-    /// # let mut sketch = CountMinSketch::new(4, 64);
+    /// # let mut sketch = CountMinSketch::<i64>::new(4, 64);
     /// # sketch.update("apple");
     /// # let bytes = sketch.serialize();
-    /// let decoded = CountMinSketch::deserialize(&bytes).unwrap();
+    /// let decoded = CountMinSketch::<i64>::deserialize(&bytes).unwrap();
     /// assert!(decoded.estimate("apple") >= 1);
     /// ```
     pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
@@ -316,10 +317,10 @@ impl CountMinSketch {
     ///
     /// ```rust
     /// # use datasketches::countmin::CountMinSketch;
-    /// # let mut sketch = CountMinSketch::with_seed(4, 64, 7);
+    /// # let mut sketch = CountMinSketch::<i64>::with_seed(4, 64, 7);
     /// # sketch.update("apple");
     /// # let bytes = sketch.serialize();
-    /// let decoded = CountMinSketch::deserialize_with_seed(&bytes, 
7).unwrap();
+    /// let decoded = CountMinSketch::<i64>::deserialize_with_seed(&bytes, 
7).unwrap();
     /// assert!(decoded.estimate("apple") >= 1);
     /// ```
     pub fn deserialize_with_seed(bytes: &[u8], seed: u64) -> Result<Self, 
Error> {
@@ -327,6 +328,15 @@ impl CountMinSketch {
             move |_| Error::insufficient_data(tag)
         }
 
+        fn read_value<T: CountMinValue>(
+            cursor: &mut SketchSlice<'_>,
+            tag: &'static str,
+        ) -> Result<T, Error> {
+            let mut bs = [0u8; 8];
+            cursor.read_exact(&mut bs).map_err(make_error(tag))?;
+            T::try_from_bytes(bs)
+        }
+
         let mut cursor = SketchSlice::new(bytes);
         let preamble_longs = 
cursor.read_u8().map_err(make_error("preamble_longs"))?;
         let serial_version = 
cursor.read_u8().map_err(make_error("serial_version"))?;
@@ -372,27 +382,27 @@ impl CountMinSketch {
             return Ok(sketch);
         }
 
-        sketch.total_weight = 
cursor.read_i64_le().map_err(make_error("total_weight"))?;
-        for count in sketch.counts.iter_mut() {
-            *count = cursor.read_i64_le().map_err(make_error("counts"))?;
+        sketch.total_weight = read_value(&mut cursor, "total_weight")?;
+        for count in &mut sketch.counts {
+            *count = read_value(&mut cursor, "counts")?;
         }
         Ok(sketch)
     }
 
     fn make(num_hashes: u8, num_buckets: u32, seed: u64, entries: usize) -> 
Self {
-        let counts = vec![0i64; entries];
+        let counts = vec![T::ZERO; entries];
         let hash_seeds = make_hash_seeds(seed, num_hashes);
         CountMinSketch {
             num_hashes,
             num_buckets,
             seed,
-            total_weight: 0,
+            total_weight: T::ZERO,
             counts,
             hash_seeds,
         }
     }
 
-    fn bucket_index<T: Hash>(&self, item: &T, seed: u64) -> usize {
+    fn bucket_index<I: Hash>(&self, item: &I, seed: u64) -> usize {
         let mut hasher = MurmurHash3X64128::with_seed(seed);
         item.hash(&mut hasher);
         let (h1, _) = hasher.finish128();
@@ -400,6 +410,54 @@ impl CountMinSketch {
     }
 }
 
+impl<T: UnsignedCountMinValue> CountMinSketch<T> {
+    /// Divides every counter by two, truncating toward zero.
+    ///
+    /// Useful for exponential decay where counts represent recent activity.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// let mut sketch = CountMinSketch::<u64>::new(4, 128);
+    /// sketch.update_with_weight("apple", 3);
+    /// sketch.halve();
+    /// assert!(sketch.estimate("apple") >= 1);
+    /// ```
+    pub fn halve(&mut self) {
+        for c in &mut self.counts {
+            *c = c.halve()
+        }
+        self.total_weight = self.total_weight.halve();
+    }
+
+    /// Multiplies every counter by `decay` and truncates back into `T`.
+    ///
+    /// Values are truncated toward zero after multiplication; choose `decay` 
in `(0, 1]`.
+    /// The total weight is scaled by the same factor to keep bounds 
consistent.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// let mut sketch = CountMinSketch::<u64>::new(4, 128);
+    /// sketch.update_with_weight("apple", 3);
+    /// sketch.decay(0.5);
+    /// assert!(sketch.estimate("apple") >= 1);
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// Panics if `decay` is not finite or is outside `(0, 1]`.
+    pub fn decay(&mut self, decay: f64) {
+        assert!(decay > 0.0 && decay <= 1.0, "decay must be within (0, 1]");
+        for c in &mut self.counts {
+            *c = c.decay(decay)
+        }
+        self.total_weight = self.total_weight.decay(decay);
+    }
+}
+
 fn entries_for_config(num_hashes: u8, num_buckets: u32) -> usize {
     assert!(num_hashes > 0, "num_hashes must be at least 1");
     assert!(num_buckets >= 3, "num_buckets must be at least 3");
@@ -443,11 +501,3 @@ fn make_hash_seeds(seed: u64, num_hashes: u8) -> Vec<u64> {
     }
     seeds
 }
-
-fn abs_i64(value: i64) -> i64 {
-    if value >= 0 {
-        value
-    } else {
-        value.wrapping_neg()
-    }
-}
diff --git a/datasketches/src/countmin/value.rs 
b/datasketches/src/countmin/value.rs
new file mode 100644
index 0000000..f236440
--- /dev/null
+++ b/datasketches/src/countmin/value.rs
@@ -0,0 +1,187 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::error::Error;
+
+mod private {
+    // Sealed trait to prevent external implementations of CountMinValue.
+    pub trait Sealed {}
+}
+
+/// Value type supported in a Count-Min sketch.
+pub trait CountMinValue: private::Sealed + Copy + Ord {
+    /// Zero value for counters and weights.
+    const ZERO: Self;
+
+    /// One value for unit updates.
+    const ONE: Self;
+
+    /// Maximum representable value for initializing minima.
+    const MAX: Self;
+
+    /// Performs the + operation.
+    fn add(self, other: Self) -> Self;
+
+    /// Computes the absolute value of `self`.
+    fn abs(self) -> Self;
+
+    /// Converts into `f64`.
+    fn to_f64(self) -> f64;
+
+    /// Converts from `f64` by truncating toward zero.
+    fn from_f64(value: f64) -> Self;
+
+    /// Returns the raw transmutation in little-endian 8 bytes.
+    fn to_bytes(self) -> [u8; 8];
+
+    /// Constructs from the raw transmutation in little-endian 8 bytes.
+    fn try_from_bytes(bytes: [u8; 8]) -> Result<Self, Error>;
+}
+
+/// Unsigned value type supported in a Count-Min sketch.
+pub trait UnsignedCountMinValue: CountMinValue {
+    /// Divides the value by two, truncating toward zero.
+    fn halve(self) -> Self;
+
+    /// Multiplies the value by decay and truncates back into `T`.
+    fn decay(self, decay: f64) -> Self;
+}
+
+macro_rules! impl_signed {
+    ($name:ty, $min:expr, $max:expr) => {
+        impl private::Sealed for $name {}
+
+        impl CountMinValue for $name {
+            const ZERO: Self = 0;
+            const ONE: Self = 1;
+            const MAX: Self = $max;
+
+            #[inline(always)]
+            fn add(self, other: Self) -> Self {
+                self + other
+            }
+
+            #[inline(always)]
+            fn abs(self) -> Self {
+                if self >= 0 { self } else { -self }
+            }
+
+            #[inline(always)]
+            fn to_f64(self) -> f64 {
+                self as f64
+            }
+
+            #[inline(always)]
+            fn from_f64(value: f64) -> Self {
+                value.trunc() as $name
+            }
+
+            #[inline(always)]
+            fn to_bytes(self) -> [u8; 8] {
+                let value = self as i64;
+                value.to_le_bytes()
+            }
+
+            #[inline(always)]
+            fn try_from_bytes(bytes: [u8; 8]) -> Result<Self, Error> {
+                let value = i64::from_le_bytes(bytes);
+                if value < $min as i64 || value > $max as i64 {
+                    return Err(Error::deserial(format!(
+                        "value {} out of range for {}",
+                        value,
+                        stringify!($name)
+                    )));
+                }
+                Ok(value as $name)
+            }
+        }
+    };
+}
+
+impl_signed!(i8, i8::MIN, i8::MAX);
+impl_signed!(i16, i16::MIN, i16::MAX);
+impl_signed!(i32, i32::MIN, i32::MAX);
+impl_signed!(i64, i64::MIN, i64::MAX);
+
+macro_rules! impl_unsigned {
+    ($name:ty, $max:expr) => {
+        impl private::Sealed for $name {}
+
+        impl CountMinValue for $name {
+            const ZERO: Self = 0;
+            const ONE: Self = 1;
+            const MAX: Self = $max;
+
+            #[inline(always)]
+            fn add(self, other: Self) -> Self {
+                self + other
+            }
+
+            #[inline(always)]
+            fn abs(self) -> Self {
+                self
+            }
+
+            #[inline(always)]
+            fn to_f64(self) -> f64 {
+                self as f64
+            }
+
+            #[inline(always)]
+            fn from_f64(value: f64) -> Self {
+                value.trunc() as $name
+            }
+
+            #[inline(always)]
+            fn to_bytes(self) -> [u8; 8] {
+                let value = self as u64;
+                value.to_le_bytes()
+            }
+
+            #[inline(always)]
+            fn try_from_bytes(bytes: [u8; 8]) -> Result<Self, Error> {
+                let value = u64::from_le_bytes(bytes);
+                if value > $max as u64 {
+                    return Err(Error::deserial(format!(
+                        "value {} out of range for {}",
+                        value,
+                        stringify!($name)
+                    )));
+                }
+                Ok(value as $name)
+            }
+        }
+
+        impl UnsignedCountMinValue for $name {
+            #[inline(always)]
+            fn halve(self) -> Self {
+                self >> 1
+            }
+
+            #[inline(always)]
+            fn decay(self, decay: f64) -> Self {
+                let value = self.to_f64() * decay;
+                Self::from_f64(value)
+            }
+        }
+    };
+}
+
+impl_unsigned!(u8, u8::MAX);
+impl_unsigned!(u16, u16::MAX);
+impl_unsigned!(u32, u32::MAX);
+impl_unsigned!(u64, u64::MAX);
diff --git a/datasketches/src/frequencies/sketch.rs 
b/datasketches/src/frequencies/sketch.rs
index f399445..e0d9711 100644
--- a/datasketches/src/frequencies/sketch.rs
+++ b/datasketches/src/frequencies/sketch.rs
@@ -355,7 +355,7 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
                 });
             }
         }
-        rows.sort_by(|a, b| b.estimate.cmp(&a.estimate));
+        rows.sort_by_key(|row| std::cmp::Reverse(row.estimate));
         rows
     }
 
diff --git a/datasketches/tests/countmin_test.rs 
b/datasketches/tests/countmin_test.rs
index b4b3685..dddc72c 100644
--- a/datasketches/tests/countmin_test.rs
+++ b/datasketches/tests/countmin_test.rs
@@ -19,7 +19,7 @@ use datasketches::countmin::CountMinSketch;
 
 #[test]
 fn test_init_defaults() {
-    let sketch = CountMinSketch::new(3, 5);
+    let sketch = CountMinSketch::<i64>::new(3, 5);
     assert_eq!(sketch.num_hashes(), 3);
     assert_eq!(sketch.num_buckets(), 5);
     assert_eq!(sketch.seed(), 9001);
@@ -30,23 +30,23 @@ fn test_init_defaults() {
 
 #[test]
 fn test_parameter_suggestions() {
-    assert_eq!(CountMinSketch::suggest_num_buckets(0.2), 14);
-    assert_eq!(CountMinSketch::suggest_num_buckets(0.1), 28);
-    assert_eq!(CountMinSketch::suggest_num_buckets(0.05), 55);
-    assert_eq!(CountMinSketch::suggest_num_buckets(0.01), 272);
+    assert_eq!(CountMinSketch::<i64>::suggest_num_buckets(0.2), 14);
+    assert_eq!(CountMinSketch::<i64>::suggest_num_buckets(0.1), 28);
+    assert_eq!(CountMinSketch::<i64>::suggest_num_buckets(0.05), 55);
+    assert_eq!(CountMinSketch::<i64>::suggest_num_buckets(0.01), 272);
 
-    assert_eq!(CountMinSketch::suggest_num_hashes(0.682689492), 2);
-    assert_eq!(CountMinSketch::suggest_num_hashes(0.954499736), 4);
-    assert_eq!(CountMinSketch::suggest_num_hashes(0.997300204), 6);
+    assert_eq!(CountMinSketch::<i64>::suggest_num_hashes(0.682689492), 2);
+    assert_eq!(CountMinSketch::<i64>::suggest_num_hashes(0.954499736), 4);
+    assert_eq!(CountMinSketch::<i64>::suggest_num_hashes(0.997300204), 6);
 
-    let buckets = CountMinSketch::suggest_num_buckets(0.1);
-    let sketch = CountMinSketch::new(3, buckets);
+    let buckets = CountMinSketch::<i64>::suggest_num_buckets(0.1);
+    let sketch = CountMinSketch::<i64>::new(3, buckets);
     assert!(sketch.relative_error() <= 0.1);
 }
 
 #[test]
 fn test_update_and_bounds() {
-    let mut sketch = CountMinSketch::with_seed(3, 128, 123);
+    let mut sketch = CountMinSketch::<i64>::with_seed(3, 128, 123);
     sketch.update("x");
     sketch.update_with_weight("x", 9);
     assert_eq!(sketch.estimate("x"), 10);
@@ -58,9 +58,50 @@ fn test_update_and_bounds() {
     assert!(estimate <= upper);
 }
 
+#[test]
+fn test_update_and_bounds_with_scaling() {
+    let mut sketch = CountMinSketch::<u64>::with_seed(3, 128, 123);
+    sketch.update_with_weight("x", 10);
+
+    let estimate = sketch.estimate("x");
+    let upper = sketch.upper_bound("x");
+    let lower = sketch.lower_bound("x");
+    assert_eq!(estimate, 10);
+    assert!(lower <= estimate);
+    assert!(estimate <= upper);
+
+    let eps = sketch.relative_error();
+
+    sketch.halve();
+    let estimate = sketch.estimate("x");
+    let upper = sketch.upper_bound("x");
+    let lower = sketch.lower_bound("x");
+    assert_eq!(sketch.total_weight(), 5);
+    assert_eq!(estimate, 5);
+    assert!(lower <= estimate);
+    assert!(estimate <= upper);
+    assert_eq!(
+        upper,
+        estimate + (eps * sketch.total_weight() as f64) as u64
+    );
+
+    sketch.decay(0.5);
+    let estimate = sketch.estimate("x");
+    let upper = sketch.upper_bound("x");
+    let lower = sketch.lower_bound("x");
+    assert_eq!(sketch.total_weight(), 2);
+    assert_eq!(estimate, 2);
+    assert!(lower <= estimate);
+    assert!(estimate <= upper);
+    assert_eq!(
+        upper,
+        estimate + (eps * sketch.total_weight() as f64) as u64
+    );
+}
+
 #[test]
 fn test_negative_weights() {
-    let mut sketch = CountMinSketch::with_seed(2, 32, 123);
+    let mut sketch = CountMinSketch::<i64>::with_seed(2, 32, 123);
     sketch.update_with_weight("y", -1);
     assert_eq!(sketch.total_weight(), 1);
     assert_eq!(sketch.estimate("y"), -1);
@@ -68,10 +109,58 @@ fn test_negative_weights() {
     assert_eq!(sketch.total_weight(), 3);
 }
 
+#[test]
+fn test_halve() {
+    let buckets = CountMinSketch::<u64>::suggest_num_buckets(0.01);
+    let hashes = CountMinSketch::<u64>::suggest_num_hashes(0.9);
+    let mut sketch = CountMinSketch::<u64>::new(hashes, buckets);
+
+    for i in 0..1000usize {
+        for _ in 0..i {
+            sketch.update(i as u64);
+        }
+    }
+
+    for i in 0..1000usize {
+        assert!(sketch.estimate(i as u64) >= i as u64);
+    }
+
+    sketch.halve();
+
+    for i in 0..1000usize {
+        assert!(sketch.estimate(i as u64) >= (i as u64) / 2);
+    }
+}
+
+#[test]
+fn test_decay() {
+    let buckets = CountMinSketch::<u64>::suggest_num_buckets(0.01);
+    let hashes = CountMinSketch::<u64>::suggest_num_hashes(0.9);
+    let mut sketch = CountMinSketch::<u64>::new(hashes, buckets);
+
+    for i in 0..1000usize {
+        for _ in 0..i {
+            sketch.update(i as u64);
+        }
+    }
+
+    for i in 0..1000usize {
+        assert!(sketch.estimate(i as u64) >= i as u64);
+    }
+
+    const FACTOR: f64 = 0.5;
+    sketch.decay(FACTOR);
+
+    for i in 0..1000usize {
+        let expected = ((i as f64) * FACTOR).floor() as u64;
+        assert!(sketch.estimate(i as u64) >= expected);
+    }
+}
+
 #[test]
 fn test_merge() {
-    let mut left = CountMinSketch::new(3, 64);
-    let mut right = CountMinSketch::new(3, 64);
+    let mut left = CountMinSketch::<i64>::new(3, 64);
+    let mut right = CountMinSketch::<i64>::new(3, 64);
     for _ in 0..10 {
         left.update("a");
     }
@@ -87,9 +176,9 @@ fn test_merge() {
 
 #[test]
 fn test_serialize_deserialize_empty() {
-    let sketch = CountMinSketch::with_seed(2, 5, 123);
+    let sketch = CountMinSketch::<i64>::with_seed(2, 5, 123);
     let bytes = sketch.serialize();
-    let decoded = CountMinSketch::deserialize_with_seed(&bytes, 123).unwrap();
+    let decoded = CountMinSketch::<i64>::deserialize_with_seed(&bytes, 
123).unwrap();
     assert!(decoded.is_empty());
     assert_eq!(decoded.num_hashes(), 2);
     assert_eq!(decoded.num_buckets(), 5);
@@ -98,39 +187,51 @@ fn test_serialize_deserialize_empty() {
 
 #[test]
 fn test_serialize_deserialize_non_empty() {
-    let mut sketch = CountMinSketch::with_seed(3, 32, 123);
+    let mut sketch = CountMinSketch::<i64>::with_seed(3, 32, 123);
     for i in 0..100i64 {
         sketch.update(i);
     }
     let bytes = sketch.serialize();
-    let decoded = CountMinSketch::deserialize_with_seed(&bytes, 123).unwrap();
+    let decoded = CountMinSketch::<i64>::deserialize_with_seed(&bytes, 
123).unwrap();
     assert_eq!(decoded.total_weight(), sketch.total_weight());
     assert_eq!(decoded.estimate(42i64), sketch.estimate(42i64));
 }
 
+#[test]
+fn test_serialize_deserialize_non_empty_u64() {
+    let mut sketch = CountMinSketch::<u64>::with_seed(3, 32, 123);
+    for i in 0..100u64 {
+        sketch.update(i);
+    }
+    let bytes = sketch.serialize();
+    let decoded = CountMinSketch::<u64>::deserialize_with_seed(&bytes, 
123).unwrap();
+    assert_eq!(decoded.total_weight(), sketch.total_weight());
+    assert_eq!(decoded.estimate(42u64), sketch.estimate(42u64));
+}
+
 #[test]
 #[should_panic(expected = "num_hashes must be at least 1")]
 fn test_invalid_hashes() {
-    CountMinSketch::new(0, 5);
+    CountMinSketch::<i64>::new(0, 5);
 }
 
 #[test]
 #[should_panic(expected = "num_buckets must be at least 3")]
 fn test_invalid_buckets() {
-    CountMinSketch::new(1, 2);
+    CountMinSketch::<i64>::new(1, 2);
 }
 
 #[test]
-#[should_panic(expected = "Incompatible sketch configuration.")]
+#[should_panic]
 fn test_merge_incompatible() {
-    let mut left = CountMinSketch::new(3, 64);
-    let right = CountMinSketch::new(2, 64);
+    let mut left = CountMinSketch::<i64>::new(3, 64);
+    let right = CountMinSketch::<i64>::new(2, 64);
     left.merge(&right);
 }
 
 #[test]
 fn test_increment_single_key_like_rust_count_min_sketch() {
-    let mut sketch = CountMinSketch::new(4, 32);
+    let mut sketch = CountMinSketch::<i64>::new(4, 32);
     for _ in 0..300 {
         sketch.update("key");
     }
@@ -139,7 +240,7 @@ fn test_increment_single_key_like_rust_count_min_sketch() {
 
 #[test]
 fn test_increment_multi_like_rust_count_min_sketch() {
-    let mut sketch = CountMinSketch::new(6, 128);
+    let mut sketch = CountMinSketch::<i64>::new(6, 128);
     for i in 0..1_000_000u64 {
         sketch.update(i % 100);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to