This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 8855aa6  feat: implement count-min sketch (#38)
8855aa6 is described below

commit 8855aa6462da50979c0804f0ee63b23b30b79910
Author: Chojan Shang <[email protected]>
AuthorDate: Fri Dec 26 06:01:41 2025 +0800

    feat: implement count-min sketch (#38)
    
    * feat: implement count-min sketch
    
    * test: add count-min cases from rust-count-min-sketch
    
    * fix: move countmin module into datasketches crate
    
    * chore: make lint happy
    
    Signed-off-by: Chojan Shang <[email protected]>
    
    ---------
    
    Signed-off-by: Chojan Shang <[email protected]>
---
 datasketches/src/{lib.rs => countmin/mod.rs}       |  24 +-
 .../src/{lib.rs => countmin/serialization.rs}      |  30 +-
 datasketches/src/countmin/sketch.rs                | 375 +++++++++++++++++++++
 datasketches/src/lib.rs                            |   1 +
 datasketches/tests/countmin_test.rs                | 150 +++++++++
 5 files changed, 546 insertions(+), 34 deletions(-)

diff --git a/datasketches/src/lib.rs b/datasketches/src/countmin/mod.rs
similarity index 53%
copy from datasketches/src/lib.rs
copy to datasketches/src/countmin/mod.rs
index c566727..b7de4b8 100644
--- a/datasketches/src/lib.rs
+++ b/datasketches/src/countmin/mod.rs
@@ -15,23 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! # Apache® DataSketches™ Core Rust Library Component
+//! Count-Min sketch implementation for frequency estimation.
 //!
-//! The Sketching Core Library provides a range of stochastic streaming 
algorithms and closely
-//! related Rust technologies that are particularly useful when integrating 
this technology into
-//! systems that must deal with massive data.
-//!
-//! This library is divided into modules that constitute distinct groups of 
functionality.
-
-#![cfg_attr(docsrs, feature(doc_cfg))]
-#![deny(missing_docs)]
-
-// See https://github.com/apache/datasketches-rust/issues/28 for more 
information.
-#[cfg(target_endian = "big")]
-compile_error!("datasketches does not support big-endian targets");
+//! The Count-Min sketch provides approximate frequency counts for streaming 
data
+//! with configurable relative error and confidence bounds.
 
-pub mod error;
-pub mod hll;
-pub mod tdigest;
+mod serialization;
 
-mod hash;
+mod sketch;
+pub use self::sketch::CountMinSketch;
+pub use self::sketch::DEFAULT_SEED;
diff --git a/datasketches/src/lib.rs 
b/datasketches/src/countmin/serialization.rs
similarity index 53%
copy from datasketches/src/lib.rs
copy to datasketches/src/countmin/serialization.rs
index c566727..bb6517d 100644
--- a/datasketches/src/lib.rs
+++ b/datasketches/src/countmin/serialization.rs
@@ -15,23 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! # Apache® DataSketches™ Core Rust Library Component
-//!
-//! The Sketching Core Library provides a range of stochastic streaming 
algorithms and closely
-//! related Rust technologies that are particularly useful when integrating 
this technology into
-//! systems that must deal with massive data.
-//!
-//! This library is divided into modules that constitute distinct groups of 
functionality.
+use std::hash::Hasher;
 
-#![cfg_attr(docsrs, feature(doc_cfg))]
-#![deny(missing_docs)]
+use crate::hash::MurmurHash3X64128;
 
-// See https://github.com/apache/datasketches-rust/issues/28 for more 
information.
-#[cfg(target_endian = "big")]
-compile_error!("datasketches does not support big-endian targets");
+pub(super) const PREAMBLE_LONGS_SHORT: u8 = 2;
+pub(super) const SERIAL_VERSION: u8 = 1;
+pub(super) const COUNTMIN_FAMILY_ID: u8 = 18;
+pub(super) const FLAGS_IS_EMPTY: u8 = 1 << 0;
+pub(super) const LONG_SIZE_BYTES: usize = 8;
 
-pub mod error;
-pub mod hll;
-pub mod tdigest;
-
-mod hash;
+pub(super) fn compute_seed_hash(seed: u64) -> u16 {
+    let mut hasher = MurmurHash3X64128::with_seed(0);
+    hasher.write(&seed.to_le_bytes());
+    let (h1, _) = hasher.finish128();
+    (h1 & 0xffff) as u16
+}
diff --git a/datasketches/src/countmin/sketch.rs 
b/datasketches/src/countmin/sketch.rs
new file mode 100644
index 0000000..3fabfb4
--- /dev/null
+++ b/datasketches/src/countmin/sketch.rs
@@ -0,0 +1,375 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::hash::Hash;
+use std::hash::Hasher;
+use std::io::Cursor;
+use std::mem::size_of;
+
+use byteorder::LE;
+use byteorder::ReadBytesExt;
+
+use crate::countmin::serialization::COUNTMIN_FAMILY_ID;
+use crate::countmin::serialization::FLAGS_IS_EMPTY;
+use crate::countmin::serialization::LONG_SIZE_BYTES;
+use crate::countmin::serialization::PREAMBLE_LONGS_SHORT;
+use crate::countmin::serialization::SERIAL_VERSION;
+use crate::countmin::serialization::compute_seed_hash;
+use crate::error::SerdeError;
+use crate::hash::MurmurHash3X64128;
+
+const MAX_TABLE_ENTRIES: usize = 1 << 30;
+
+/// Default seed used by the sketch.
+pub const DEFAULT_SEED: u64 = 9001;
+
+/// Count-Min sketch for estimating item frequencies.
+///
+/// The sketch provides upper and lower bounds on estimated item frequencies
+/// with configurable relative error and confidence.
+#[derive(Debug, Clone, PartialEq)]
+pub struct CountMinSketch {
+    num_hashes: u8,
+    num_buckets: u32,
+    seed: u64,
+    total_weight: i64,
+    counts: Vec<i64>,
+    hash_seeds: Vec<u64>,
+}
+
+impl CountMinSketch {
+    /// Creates a new Count-Min sketch with the default seed.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `num_hashes` is 0, `num_buckets` is less than 3, or the
+    /// total table size exceeds the supported limit.
+    pub fn new(num_hashes: u8, num_buckets: u32) -> Self {
+        Self::with_seed(num_hashes, num_buckets, DEFAULT_SEED)
+    }
+
+    /// Creates a new Count-Min sketch with the provided seed.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `num_hashes` is 0, `num_buckets` is less than 3, or the
+    /// total table size exceeds the supported limit.
+    pub fn with_seed(num_hashes: u8, num_buckets: u32, seed: u64) -> Self {
+        let entries = entries_for_config(num_hashes, num_buckets);
+        Self::make(num_hashes, num_buckets, seed, entries)
+    }
+
+    /// Returns the number of hash functions used by the sketch.
+    pub fn num_hashes(&self) -> u8 {
+        self.num_hashes
+    }
+
+    /// Returns the number of buckets per hash function.
+    pub fn num_buckets(&self) -> u32 {
+        self.num_buckets
+    }
+
+    /// Returns the seed used by the sketch.
+    pub fn seed(&self) -> u64 {
+        self.seed
+    }
+
+    /// Returns the total weight inserted into the sketch.
+    pub fn total_weight(&self) -> i64 {
+        self.total_weight
+    }
+
+    /// Returns the relative error (epsilon) implied by the number of buckets.
+    pub fn relative_error(&self) -> f64 {
+        std::f64::consts::E / self.num_buckets as f64
+    }
+
+    /// Returns true if the sketch has not seen any updates.
+    pub fn is_empty(&self) -> bool {
+        self.total_weight == 0
+    }
+
+    /// Suggests the number of buckets to achieve the given relative error.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `relative_error` is negative.
+    pub fn suggest_num_buckets(relative_error: f64) -> u32 {
+        assert!(relative_error >= 0.0, "relative_error must be at least 0");
+        (std::f64::consts::E / relative_error).ceil() as u32
+    }
+
+    /// Suggests the number of hashes to achieve the given confidence.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `confidence` is not in (0, 1].
+    pub fn suggest_num_hashes(confidence: f64) -> u8 {
+        assert!(
+            (0.0..=1.0).contains(&confidence),
+            "confidence must be between 0 and 1.0 (inclusive)"
+        );
+        if confidence == 1.0 {
+            return 127;
+        }
+        let hashes = (1.0 / (1.0 - confidence)).ln().ceil();
+        hashes.min(127.0) as u8
+    }
+
+    /// Updates the sketch with a single occurrence of the item.
+    pub fn update<T: Hash>(&mut self, item: T) {
+        self.update_with_weight(item, 1);
+    }
+
+    /// Updates the sketch with the given item and weight.
+    pub fn update_with_weight<T: Hash>(&mut self, item: T, weight: i64) {
+        if weight == 0 {
+            return;
+        }
+        let abs_weight = abs_i64(weight);
+        self.total_weight = self.total_weight.wrapping_add(abs_weight);
+        let num_buckets = self.num_buckets as usize;
+        for (row, seed) in self.hash_seeds.iter().enumerate() {
+            let bucket = self.bucket_index(&item, *seed);
+            let index = row * num_buckets + bucket;
+            self.counts[index] = self.counts[index].wrapping_add(weight);
+        }
+    }
+
+    /// Returns the estimated frequency of the given item.
+    pub fn estimate<T: Hash>(&self, item: T) -> i64 {
+        let num_buckets = self.num_buckets as usize;
+        let mut min = i64::MAX;
+        for (row, seed) in self.hash_seeds.iter().enumerate() {
+            let bucket = self.bucket_index(&item, *seed);
+            let index = row * num_buckets + bucket;
+            let value = self.counts[index];
+            if value < min {
+                min = value;
+            }
+        }
+        min
+    }
+
+    /// Returns the lower bound on the true frequency of the given item.
+    pub fn lower_bound<T: Hash>(&self, item: T) -> i64 {
+        self.estimate(item)
+    }
+
+    /// Returns the upper bound on the true frequency of the given item.
+    pub fn upper_bound<T: Hash>(&self, item: T) -> i64 {
+        let estimate = self.estimate(item);
+        let error = (self.relative_error() * self.total_weight as f64) as i64;
+        estimate.wrapping_add(error)
+    }
+
+    /// Merges another sketch into this one.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the sketches have incompatible configurations.
+    pub fn merge(&mut self, other: &CountMinSketch) {
+        if std::ptr::eq(self, other) {
+            panic!("Cannot merge a sketch with itself.");
+        }
+        if self.num_hashes != other.num_hashes
+            || self.num_buckets != other.num_buckets
+            || self.seed != other.seed
+        {
+            panic!("Incompatible sketch configuration.");
+        }
+        for (dst, src) in self.counts.iter_mut().zip(other.counts.iter()) {
+            *dst = dst.wrapping_add(*src);
+        }
+        self.total_weight = self.total_weight.wrapping_add(other.total_weight);
+    }
+
+    /// Serializes this sketch into the DataSketches Count-Min format.
+    pub fn serialize(&self) -> Vec<u8> {
+        let header_size = PREAMBLE_LONGS_SHORT as usize * LONG_SIZE_BYTES;
+        let payload_size = if self.is_empty() {
+            0
+        } else {
+            LONG_SIZE_BYTES + (self.counts.len() * size_of::<i64>())
+        };
+        let mut bytes = Vec::with_capacity(header_size + payload_size);
+
+        bytes.push(PREAMBLE_LONGS_SHORT);
+        bytes.push(SERIAL_VERSION);
+        bytes.push(COUNTMIN_FAMILY_ID);
+        bytes.push(if self.is_empty() { FLAGS_IS_EMPTY } else { 0 });
+        bytes.extend_from_slice(&0u32.to_le_bytes());
+
+        bytes.extend_from_slice(&self.num_buckets.to_le_bytes());
+        bytes.push(self.num_hashes);
+        bytes.extend_from_slice(&compute_seed_hash(self.seed).to_le_bytes());
+        bytes.push(0u8);
+
+        if self.is_empty() {
+            return bytes;
+        }
+
+        bytes.extend_from_slice(&self.total_weight.to_le_bytes());
+        for count in &self.counts {
+            bytes.extend_from_slice(&count.to_le_bytes());
+        }
+        bytes
+    }
+
+    /// Deserializes a sketch from bytes using the default seed.
+    pub fn deserialize(bytes: &[u8]) -> Result<Self, SerdeError> {
+        Self::deserialize_with_seed(bytes, DEFAULT_SEED)
+    }
+
+    /// Deserializes a sketch from bytes using the provided seed.
+    pub fn deserialize_with_seed(bytes: &[u8], seed: u64) -> Result<Self, 
SerdeError> {
+        fn make_error(tag: &'static str) -> impl FnOnce(std::io::Error) -> 
SerdeError {
+            move |_| SerdeError::InsufficientData(tag.to_string())
+        }
+
+        let mut cursor = Cursor::new(bytes);
+        let preamble_longs = 
cursor.read_u8().map_err(make_error("preamble_longs"))?;
+        let serial_version = 
cursor.read_u8().map_err(make_error("serial_version"))?;
+        let family_id = cursor.read_u8().map_err(make_error("family_id"))?;
+        let flags = cursor.read_u8().map_err(make_error("flags"))?;
+        cursor.read_u32::<LE>().map_err(make_error("unused32"))?;
+
+        if family_id != COUNTMIN_FAMILY_ID {
+            return Err(SerdeError::InvalidFamily(format!(
+                "expected {} (CountMinSketch), got {}",
+                COUNTMIN_FAMILY_ID, family_id
+            )));
+        }
+        if serial_version != SERIAL_VERSION {
+            return Err(SerdeError::UnsupportedVersion(format!(
+                "expected {}, got {}",
+                SERIAL_VERSION, serial_version
+            )));
+        }
+        if preamble_longs != PREAMBLE_LONGS_SHORT {
+            return Err(SerdeError::MalformedData(format!(
+                "unsupported preamble_longs {preamble_longs}"
+            )));
+        }
+
+        let num_buckets = 
cursor.read_u32::<LE>().map_err(make_error("num_buckets"))?;
+        let num_hashes = cursor.read_u8().map_err(make_error("num_hashes"))?;
+        let seed_hash = 
cursor.read_u16::<LE>().map_err(make_error("seed_hash"))?;
+        cursor.read_u8().map_err(make_error("unused8"))?;
+
+        let expected_seed_hash = compute_seed_hash(seed);
+        if seed_hash != expected_seed_hash {
+            return Err(SerdeError::InvalidParameter(format!(
+                "incompatible seed hash: expected {}, got {}",
+                expected_seed_hash, seed_hash
+            )));
+        }
+
+        let entries = entries_for_config_checked(num_hashes, num_buckets)?;
+        let mut sketch = Self::make(num_hashes, num_buckets, seed, entries);
+        if (flags & FLAGS_IS_EMPTY) != 0 {
+            return Ok(sketch);
+        }
+
+        sketch.total_weight = cursor
+            .read_i64::<LE>()
+            .map_err(make_error("total_weight"))?;
+        for count in sketch.counts.iter_mut() {
+            *count = cursor.read_i64::<LE>().map_err(make_error("counts"))?;
+        }
+        Ok(sketch)
+    }
+
+    fn make(num_hashes: u8, num_buckets: u32, seed: u64, entries: usize) -> 
Self {
+        let counts = vec![0i64; entries];
+        let hash_seeds = make_hash_seeds(seed, num_hashes);
+        CountMinSketch {
+            num_hashes,
+            num_buckets,
+            seed,
+            total_weight: 0,
+            counts,
+            hash_seeds,
+        }
+    }
+
+    fn bucket_index<T: Hash>(&self, item: &T, seed: u64) -> usize {
+        let mut hasher = MurmurHash3X64128::with_seed(seed);
+        item.hash(&mut hasher);
+        let (h1, _) = hasher.finish128();
+        (h1 % self.num_buckets as u64) as usize
+    }
+}
+
+fn entries_for_config(num_hashes: u8, num_buckets: u32) -> usize {
+    assert!(num_hashes > 0, "num_hashes must be at least 1");
+    assert!(num_buckets >= 3, "num_buckets must be at least 3");
+    let entries = (num_hashes as usize)
+        .checked_mul(num_buckets as usize)
+        .expect("num_hashes * num_buckets overflows usize");
+    assert!(
+        entries < MAX_TABLE_ENTRIES,
+        "num_hashes * num_buckets must be < {}",
+        MAX_TABLE_ENTRIES
+    );
+    entries
+}
+
+fn entries_for_config_checked(num_hashes: u8, num_buckets: u32) -> 
Result<usize, SerdeError> {
+    if num_hashes == 0 {
+        return Err(SerdeError::InvalidParameter(
+            "num_hashes must be at least 1".to_string(),
+        ));
+    }
+    if num_buckets < 3 {
+        return Err(SerdeError::InvalidParameter(
+            "num_buckets must be at least 3".to_string(),
+        ));
+    }
+    let entries = (num_hashes as usize)
+        .checked_mul(num_buckets as usize)
+        .ok_or_else(|| {
+            SerdeError::InvalidParameter("num_hashes * num_buckets overflows 
usize".to_string())
+        })?;
+    if entries >= MAX_TABLE_ENTRIES {
+        return Err(SerdeError::InvalidParameter(format!(
+            "num_hashes * num_buckets must be < {}",
+            MAX_TABLE_ENTRIES
+        )));
+    }
+    Ok(entries)
+}
+
+fn make_hash_seeds(seed: u64, num_hashes: u8) -> Vec<u64> {
+    let mut seeds = Vec::with_capacity(num_hashes as usize);
+    for i in 0..num_hashes {
+        // Derive per-row hash seeds deterministically from the sketch seed.
+        let mut hasher = MurmurHash3X64128::with_seed(seed);
+        hasher.write(&u64::from(i).to_le_bytes());
+        let (h1, _) = hasher.finish128();
+        seeds.push(h1);
+    }
+    seeds
+}
+
+fn abs_i64(value: i64) -> i64 {
+    if value >= 0 {
+        value
+    } else {
+        value.wrapping_neg()
+    }
+}
diff --git a/datasketches/src/lib.rs b/datasketches/src/lib.rs
index c566727..c7c2a14 100644
--- a/datasketches/src/lib.rs
+++ b/datasketches/src/lib.rs
@@ -30,6 +30,7 @@
 #[cfg(target_endian = "big")]
 compile_error!("datasketches does not support big-endian targets");
 
+pub mod countmin;
 pub mod error;
 pub mod hll;
 pub mod tdigest;
diff --git a/datasketches/tests/countmin_test.rs 
b/datasketches/tests/countmin_test.rs
new file mode 100644
index 0000000..06e1041
--- /dev/null
+++ b/datasketches/tests/countmin_test.rs
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datasketches::countmin::CountMinSketch;
+use datasketches::countmin::DEFAULT_SEED;
+
+#[test]
+fn test_init_defaults() {
+    let sketch = CountMinSketch::new(3, 5);
+    assert_eq!(sketch.num_hashes(), 3);
+    assert_eq!(sketch.num_buckets(), 5);
+    assert_eq!(sketch.seed(), DEFAULT_SEED);
+    assert!(sketch.is_empty());
+    assert_eq!(sketch.total_weight(), 0);
+    assert_eq!(sketch.estimate("missing"), 0);
+}
+
+#[test]
+fn test_parameter_suggestions() {
+    assert_eq!(CountMinSketch::suggest_num_buckets(0.2), 14);
+    assert_eq!(CountMinSketch::suggest_num_buckets(0.1), 28);
+    assert_eq!(CountMinSketch::suggest_num_buckets(0.05), 55);
+    assert_eq!(CountMinSketch::suggest_num_buckets(0.01), 272);
+
+    assert_eq!(CountMinSketch::suggest_num_hashes(0.682689492), 2);
+    assert_eq!(CountMinSketch::suggest_num_hashes(0.954499736), 4);
+    assert_eq!(CountMinSketch::suggest_num_hashes(0.997300204), 6);
+
+    let buckets = CountMinSketch::suggest_num_buckets(0.1);
+    let sketch = CountMinSketch::with_seed(3, buckets, DEFAULT_SEED);
+    assert!(sketch.relative_error() <= 0.1);
+}
+
+#[test]
+fn test_update_and_bounds() {
+    let mut sketch = CountMinSketch::with_seed(3, 128, 123);
+    sketch.update("x");
+    sketch.update_with_weight("x", 9);
+    assert_eq!(sketch.estimate("x"), 10);
+    assert_eq!(sketch.total_weight(), 10);
+    let estimate = sketch.estimate("x");
+    let upper = sketch.upper_bound("x");
+    let lower = sketch.lower_bound("x");
+    assert!(lower <= estimate);
+    assert!(estimate <= upper);
+}
+
+#[test]
+fn test_negative_weights() {
+    let mut sketch = CountMinSketch::with_seed(2, 32, 123);
+    sketch.update_with_weight("y", -1);
+    assert_eq!(sketch.total_weight(), 1);
+    assert_eq!(sketch.estimate("y"), -1);
+    sketch.update_with_weight("x", 2);
+    assert_eq!(sketch.total_weight(), 3);
+}
+
+#[test]
+fn test_merge() {
+    let mut left = CountMinSketch::with_seed(3, 64, DEFAULT_SEED);
+    let mut right = CountMinSketch::with_seed(3, 64, DEFAULT_SEED);
+    for _ in 0..10 {
+        left.update("a");
+    }
+    for _ in 0..4 {
+        right.update("a");
+        right.update("b");
+    }
+    left.merge(&right);
+    assert_eq!(left.total_weight(), 18);
+    assert!(left.estimate("a") >= 14);
+    assert!(left.estimate("b") >= 4);
+}
+
+#[test]
+fn test_serialize_deserialize_empty() {
+    let sketch = CountMinSketch::with_seed(2, 5, 123);
+    let bytes = sketch.serialize();
+    let decoded = CountMinSketch::deserialize_with_seed(&bytes, 123).unwrap();
+    assert!(decoded.is_empty());
+    assert_eq!(decoded.num_hashes(), 2);
+    assert_eq!(decoded.num_buckets(), 5);
+    assert_eq!(decoded.seed(), 123);
+}
+
+#[test]
+fn test_serialize_deserialize_non_empty() {
+    let mut sketch = CountMinSketch::with_seed(3, 32, 123);
+    for i in 0..100i64 {
+        sketch.update(i);
+    }
+    let bytes = sketch.serialize();
+    let decoded = CountMinSketch::deserialize_with_seed(&bytes, 123).unwrap();
+    assert_eq!(decoded.total_weight(), sketch.total_weight());
+    assert_eq!(decoded.estimate(42i64), sketch.estimate(42i64));
+}
+
+#[test]
+#[should_panic(expected = "num_hashes must be at least 1")]
+fn test_invalid_hashes() {
+    CountMinSketch::new(0, 5);
+}
+
+#[test]
+#[should_panic(expected = "num_buckets must be at least 3")]
+fn test_invalid_buckets() {
+    CountMinSketch::new(1, 2);
+}
+
+#[test]
+#[should_panic(expected = "Incompatible sketch configuration.")]
+fn test_merge_incompatible() {
+    let mut left = CountMinSketch::with_seed(3, 64, DEFAULT_SEED);
+    let right = CountMinSketch::with_seed(2, 64, DEFAULT_SEED);
+    left.merge(&right);
+}
+
+#[test]
+fn test_increment_single_key_like_rust_count_min_sketch() {
+    let mut sketch = CountMinSketch::with_seed(4, 32, DEFAULT_SEED);
+    for _ in 0..300 {
+        sketch.update("key");
+    }
+    assert_eq!(sketch.estimate("key"), 300);
+}
+
+#[test]
+fn test_increment_multi_like_rust_count_min_sketch() {
+    let mut sketch = CountMinSketch::with_seed(6, 128, DEFAULT_SEED);
+    for i in 0..1_000_000u64 {
+        sketch.update(i % 100);
+    }
+    for key in 0..100u64 {
+        assert!(sketch.estimate(key) >= 9_000);
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to