This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 9ade42d  docs: add rustdoc examples across sketches (#58)
9ade42d is described below

commit 9ade42d639afd013d19c3454a14773b6d54edc25
Author: Chojan Shang <[email protected]>
AuthorDate: Mon Jan 5 07:11:32 2026 +0800

    docs: add rustdoc examples across sketches (#58)
    
    * docs: add rustdoc examples across sketches
    
    Signed-off-by: Chojan Shang <[email protected]>
    
    * docs: make docs clean and simple
    
    Signed-off-by: Chojan Shang <[email protected]>
    
    * docs: align examples with Rust conventions
    
    Signed-off-by: Chojan Shang <[email protected]>
    
    ---------
    
    Signed-off-by: Chojan Shang <[email protected]>
---
 datasketches/src/countmin/mod.rs       |  19 ++++
 datasketches/src/countmin/sketch.rs    |  90 ++++++++++++++++
 datasketches/src/error.rs              |  10 ++
 datasketches/src/frequencies/mod.rs    |  24 +++++
 datasketches/src/frequencies/sketch.rs | 119 +++++++++++++++++++++
 datasketches/src/hll/mod.rs            |  31 ++++++
 datasketches/src/hll/sketch.rs         |  53 ++++++++++
 datasketches/src/hll/union.rs          |  49 +++++++++
 datasketches/src/resize.rs             |   9 ++
 datasketches/src/tdigest/mod.rs        |  12 +++
 datasketches/src/tdigest/sketch.rs     | 182 +++++++++++++++++++++++++++++++++
 datasketches/src/theta/mod.rs          |   9 ++
 datasketches/src/theta/sketch.rs       |  84 +++++++++++++++
 13 files changed, 691 insertions(+)

diff --git a/datasketches/src/countmin/mod.rs b/datasketches/src/countmin/mod.rs
index 2be9282..9b427e9 100644
--- a/datasketches/src/countmin/mod.rs
+++ b/datasketches/src/countmin/mod.rs
@@ -19,6 +19,25 @@
 //!
 //! The Count-Min sketch provides approximate frequency counts for streaming 
data
 //! with configurable relative error and confidence bounds.
+//!
+//! # Usage
+//!
+//! ```rust
+//! # use datasketches::countmin::CountMinSketch;
+//! let mut sketch = CountMinSketch::new(5, 256);
+//! sketch.update("apple");
+//! sketch.update_with_weight("banana", 3);
+//! assert!(sketch.estimate("banana") >= 3);
+//! ```
+//!
+//! # Configuration Helpers
+//!
+//! ```rust
+//! # use datasketches::countmin::CountMinSketch;
+//! let buckets = CountMinSketch::suggest_num_buckets(0.01);
+//! let hashes = CountMinSketch::suggest_num_hashes(0.99);
+//! let _sketch = CountMinSketch::new(hashes, buckets);
+//! ```
 
 mod serialization;
 
diff --git a/datasketches/src/countmin/sketch.rs 
b/datasketches/src/countmin/sketch.rs
index ca08bff..4f8225b 100644
--- a/datasketches/src/countmin/sketch.rs
+++ b/datasketches/src/countmin/sketch.rs
@@ -54,6 +54,14 @@ impl CountMinSketch {
     ///
     /// Panics if `num_hashes` is 0, `num_buckets` is less than 3, or the
     /// total table size exceeds the supported limit.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// let sketch = CountMinSketch::new(4, 128);
+    /// assert_eq!(sketch.num_buckets(), 128);
+    /// ```
     pub fn new(num_hashes: u8, num_buckets: u32) -> Self {
         Self::with_seed(num_hashes, num_buckets, DEFAULT_UPDATE_SEED)
     }
@@ -64,6 +72,14 @@ impl CountMinSketch {
     ///
     /// Panics if `num_hashes` is 0, `num_buckets` is less than 3, or the
     /// total table size exceeds the supported limit.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// let sketch = CountMinSketch::with_seed(4, 64, 42);
+    /// assert_eq!(sketch.seed(), 42);
+    /// ```
     pub fn with_seed(num_hashes: u8, num_buckets: u32, seed: u64) -> Self {
         let entries = entries_for_config(num_hashes, num_buckets);
         Self::make(num_hashes, num_buckets, seed, entries)
@@ -127,11 +143,29 @@ impl CountMinSketch {
     }
 
     /// Updates the sketch with a single occurrence of the item.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// let mut sketch = CountMinSketch::new(4, 128);
+    /// sketch.update("apple");
+    /// assert!(sketch.estimate("apple") >= 1);
+    /// ```
     pub fn update<T: Hash>(&mut self, item: T) {
         self.update_with_weight(item, 1);
     }
 
     /// Updates the sketch with the given item and weight.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// let mut sketch = CountMinSketch::new(4, 128);
+    /// sketch.update_with_weight("banana", 3);
+    /// assert!(sketch.estimate("banana") >= 3);
+    /// ```
     pub fn update_with_weight<T: Hash>(&mut self, item: T, weight: i64) {
         if weight == 0 {
             return;
@@ -147,6 +181,15 @@ impl CountMinSketch {
     }
 
     /// Returns the estimated frequency of the given item.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// let mut sketch = CountMinSketch::new(4, 128);
+    /// sketch.update_with_weight("pear", 2);
+    /// assert!(sketch.estimate("pear") >= 2);
+    /// ```
     pub fn estimate<T: Hash>(&self, item: T) -> i64 {
         let num_buckets = self.num_buckets as usize;
         let mut min = i64::MAX;
@@ -178,6 +221,20 @@ impl CountMinSketch {
     /// # Panics
     ///
     /// Panics if the sketches have incompatible configurations.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// let mut left = CountMinSketch::new(4, 128);
+    /// let mut right = CountMinSketch::new(4, 128);
+    ///
+    /// left.update("apple");
+    /// right.update_with_weight("banana", 2);
+    ///
+    /// left.merge(&right);
+    /// assert!(left.estimate("banana") >= 2);
+    /// ```
     pub fn merge(&mut self, other: &CountMinSketch) {
         if std::ptr::eq(self, other) {
             panic!("Cannot merge a sketch with itself.");
@@ -195,6 +252,17 @@ impl CountMinSketch {
     }
 
     /// Serializes this sketch into the DataSketches Count-Min format.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// # let mut sketch = CountMinSketch::new(4, 128);
+    /// # sketch.update("apple");
+    /// let bytes = sketch.serialize();
+    /// let decoded = CountMinSketch::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate("apple") >= 1);
+    /// ```
     pub fn serialize(&self) -> Vec<u8> {
         let header_size = PREAMBLE_LONGS_SHORT as usize * LONG_SIZE_BYTES;
         let payload_size = if self.is_empty() {
@@ -227,11 +295,33 @@ impl CountMinSketch {
     }
 
     /// Deserializes a sketch from bytes using the default seed.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// # let mut sketch = CountMinSketch::new(4, 64);
+    /// # sketch.update("apple");
+    /// # let bytes = sketch.serialize();
+    /// let decoded = CountMinSketch::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate("apple") >= 1);
+    /// ```
     pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
         Self::deserialize_with_seed(bytes, DEFAULT_UPDATE_SEED)
     }
 
     /// Deserializes a sketch from bytes using the provided seed.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use datasketches::countmin::CountMinSketch;
+    /// # let mut sketch = CountMinSketch::with_seed(4, 64, 7);
+    /// # sketch.update("apple");
+    /// # let bytes = sketch.serialize();
+    /// let decoded = CountMinSketch::deserialize_with_seed(&bytes, 
7).unwrap();
+    /// assert!(decoded.estimate("apple") >= 1);
+    /// ```
     pub fn deserialize_with_seed(bytes: &[u8], seed: u64) -> Result<Self, 
Error> {
         fn make_error(tag: &'static str) -> impl FnOnce(std::io::Error) -> 
Error {
             move |_| Error::insufficient_data(tag)
diff --git a/datasketches/src/error.rs b/datasketches/src/error.rs
index 624ee0a..e756f2b 100644
--- a/datasketches/src/error.rs
+++ b/datasketches/src/error.rs
@@ -46,6 +46,16 @@ impl fmt::Display for ErrorKind {
 }
 
 /// Error is the error struct returned by all datasketches functions.
+///
+/// # Examples
+///
+/// ```
+/// # use datasketches::error::Error;
+/// # use datasketches::error::ErrorKind;
+/// let err = Error::new(ErrorKind::InvalidArgument, "bad input");
+/// assert_eq!(err.kind(), ErrorKind::InvalidArgument);
+/// assert_eq!(err.message(), "bad input");
+/// ```
 pub struct Error {
     kind: ErrorKind,
     message: String,
diff --git a/datasketches/src/frequencies/mod.rs 
b/datasketches/src/frequencies/mod.rs
index e461b61..93fb5e4 100644
--- a/datasketches/src/frequencies/mod.rs
+++ b/datasketches/src/frequencies/mod.rs
@@ -23,6 +23,30 @@
 //!
 //! For background, see the Java documentation:
 //! 
<https://apache.github.io/datasketches-java/9.0.0/org/apache/datasketches/frequencies/FrequentItemsSketch.html>
+//!
+//! # Usage
+//!
+//! ```rust
+//! # use datasketches::frequencies::ErrorType;
+//! # use datasketches::frequencies::FrequentItemsSketch;
+//! let mut sketch = FrequentItemsSketch::<i64>::new(64);
+//! sketch.update_with_count(1, 3);
+//! sketch.update(2);
+//! let rows = sketch.frequent_items(ErrorType::NoFalseNegatives);
+//! assert!(rows.iter().any(|row| *row.item() == 1));
+//! ```
+//!
+//! # Serialization
+//!
+//! ```rust
+//! # use datasketches::frequencies::FrequentItemsSketch;
+//! let mut sketch = FrequentItemsSketch::<i64>::new(64);
+//! sketch.update_with_count(42, 2);
+//!
+//! let bytes = sketch.serialize();
+//! let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
+//! assert!(decoded.estimate(&42) >= 2);
+//! ```
 
 mod reverse_purge_item_hash_map;
 mod serialization;
diff --git a/datasketches/src/frequencies/sketch.rs 
b/datasketches/src/frequencies/sketch.rs
index 28f3325..f399445 100644
--- a/datasketches/src/frequencies/sketch.rs
+++ b/datasketches/src/frequencies/sketch.rs
@@ -104,6 +104,16 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     /// # Panics
     ///
     /// Panics if `max_map_size` is not a power of two.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// sketch.update(1);
+    /// sketch.update(2);
+    /// assert_eq!(sketch.num_active_items(), 2);
+    /// ```
     pub fn new(max_map_size: usize) -> Self {
         let lg_max_map_size = exact_log2(max_map_size);
         Self::with_lg_map_sizes(lg_max_map_size, LG_MIN_MAP_SIZE)
@@ -129,6 +139,15 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     /// Returns the estimated frequency for an item.
     ///
     /// If the item is tracked, this is `item_count + offset`. Otherwise it is 
zero.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// sketch.update_with_count(10, 2);
+    /// assert!(sketch.estimate(&10) >= 2);
+    /// ```
     pub fn estimate(&self, item: &T) -> u64 {
         let value = self.hash_map.get(item);
         if value > 0 { value + self.offset } else { 0 }
@@ -199,6 +218,15 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     }
 
     /// Updates the sketch with a count of one.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// sketch.update(42);
+    /// assert!(sketch.estimate(&42) >= 1);
+    /// ```
     pub fn update(&mut self, item: T) {
         self.update_with_count(item, 1);
     }
@@ -206,6 +234,15 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     /// Updates the sketch with an item and count.
     ///
     /// A count of zero is a no-op.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// sketch.update_with_count(10, 3);
+    /// assert!(sketch.estimate(&10) >= 3);
+    /// ```
     pub fn update_with_count(&mut self, item: T, count: u64) {
         if count == 0 {
             return;
@@ -220,6 +257,18 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     ///
     /// The other sketch may have a different map size. The merged sketch 
respects the
     /// larger error tolerance of the inputs.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// let mut left = FrequentItemsSketch::<i64>::new(64);
+    /// let mut right = FrequentItemsSketch::<i64>::new(64);
+    /// left.update(1);
+    /// right.update_with_count(2, 2);
+    /// left.merge(&right);
+    /// assert!(left.estimate(&2) >= 2);
+    /// ```
     pub fn merge(&mut self, other: &Self)
     where
         T: Clone,
@@ -243,6 +292,18 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     /// Returns frequent items using the sketch maximum error as threshold.
     ///
     /// This is equivalent to 
`frequent_items_with_threshold(self.maximum_error(), error_type)`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::ErrorType;
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// sketch.update_with_count(1, 5);
+    /// sketch.update(2);
+    /// let rows = sketch.frequent_items(ErrorType::NoFalseNegatives);
+    /// assert!(rows.iter().any(|row| *row.item() == 1));
+    /// ```
     pub fn frequent_items(&self, error_type: ErrorType) -> Vec<Row<T>>
     where
         T: Clone,
@@ -256,6 +317,18 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     ///
     /// For [`ErrorType::NoFalseNegatives`], items are included when 
`upper_bound > threshold`.
     /// For [`ErrorType::NoFalsePositives`], items are included when 
`lower_bound > threshold`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::ErrorType;
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// sketch.update_with_count(1, 5);
+    /// sketch.update(2);
+    /// let rows = 
sketch.frequent_items_with_threshold(ErrorType::NoFalsePositives, 3);
+    /// assert!(rows.iter().any(|row| *row.item() == 1));
+    /// ```
     pub fn frequent_items_with_threshold(
         &self,
         error_type: ErrorType,
@@ -459,11 +532,33 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
 
 impl FrequentItemsSketch<i64> {
     /// Serializes this sketch into a byte vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// # sketch.update_with_count(7, 2);
+    /// let bytes = sketch.serialize();
+    /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate(&7) >= 2);
+    /// ```
     pub fn serialize(&self) -> Vec<u8> {
         self.serialize_inner(count_i64_items_bytes, serialize_i64_items)
     }
 
     /// Deserializes a sketch from bytes.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
+    /// # sketch.update_with_count(7, 2);
+    /// # let bytes = sketch.serialize();
+    /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate(&7) >= 2);
+    /// ```
     pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
         Self::deserialize_inner(bytes, deserialize_i64_items)
     }
@@ -471,11 +566,35 @@ impl FrequentItemsSketch<i64> {
 
 impl FrequentItemsSketch<String> {
     /// Serializes this sketch into a byte vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// # let mut sketch = FrequentItemsSketch::<String>::new(64);
+    /// # let apple = "apple".to_string();
+    /// # sketch.update_with_count(apple.clone(), 2);
+    /// let bytes = sketch.serialize();
+    /// let decoded = 
FrequentItemsSketch::<String>::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate(&apple) >= 2);
+    /// ```
     pub fn serialize(&self) -> Vec<u8> {
         self.serialize_inner(count_string_items_bytes, serialize_string_items)
     }
 
     /// Deserializes a sketch from bytes.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::frequencies::FrequentItemsSketch;
+    /// # let mut sketch = FrequentItemsSketch::<String>::new(64);
+    /// # let apple = "apple".to_string();
+    /// # sketch.update_with_count(apple.clone(), 2);
+    /// # let bytes = sketch.serialize();
+    /// let decoded = 
FrequentItemsSketch::<String>::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate(&apple) >= 2);
+    /// ```
     pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
         Self::deserialize_inner(bytes, deserialize_string_items)
     }
diff --git a/datasketches/src/hll/mod.rs b/datasketches/src/hll/mod.rs
index 9bf2e7d..da61ccc 100644
--- a/datasketches/src/hll/mod.rs
+++ b/datasketches/src/hll/mod.rs
@@ -71,6 +71,37 @@
 //!
 //! The serialization format is compatible with Apache DataSketches 
implementations
 //! in Java and C++, enabling cross-platform sketch exchange.
+//!
+//! # Usage
+//!
+//! ```rust
+//! # use datasketches::hll::HllSketch;
+//! # use datasketches::hll::HllType;
+//! # use datasketches::hll::NumStdDev;
+//! let mut sketch = HllSketch::new(12, HllType::Hll8);
+//! sketch.update("apple");
+//! let upper = sketch.upper_bound(NumStdDev::Two);
+//! assert!(upper >= sketch.estimate());
+//! ```
+//!
+//! # Union
+//!
+//! ```rust
+//! # use datasketches::hll::HllSketch;
+//! # use datasketches::hll::HllType;
+//! # use datasketches::hll::HllUnion;
+//! let mut left = HllSketch::new(10, HllType::Hll8);
+//! let mut right = HllSketch::new(10, HllType::Hll8);
+//! left.update("apple");
+//! right.update("banana");
+//!
+//! let mut union = HllUnion::new(10);
+//! union.update(&left);
+//! union.update(&right);
+//!
+//! let result = union.get_result(HllType::Hll8);
+//! assert!(result.estimate() >= 2.0);
+//! ```
 
 use std::hash::Hash;
 
diff --git a/datasketches/src/hll/sketch.rs b/datasketches/src/hll/sketch.rs
index 64626cd..7fb6c79 100644
--- a/datasketches/src/hll/sketch.rs
+++ b/datasketches/src/hll/sketch.rs
@@ -61,6 +61,15 @@ impl HllSketch {
     /// # Panics
     ///
     /// If lg_config_k is not in range [4, 21]
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::hll::HllSketch;
+    /// # use datasketches::hll::HllType;
+    /// let sketch = HllSketch::new(12, HllType::Hll8);
+    /// assert_eq!(sketch.lg_config_k(), 12);
+    /// ```
     pub fn new(lg_config_k: u8, hll_type: HllType) -> Self {
         assert!(
             (4..=21).contains(&lg_config_k),
@@ -134,6 +143,16 @@ impl HllSketch {
     ///
     /// This accepts any type that implements `Hash`. The value is hashed
     /// and converted to a coupon, which is then inserted into the sketch.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::hll::HllSketch;
+    /// # use datasketches::hll::HllType;
+    /// let mut sketch = HllSketch::new(10, HllType::Hll8);
+    /// sketch.update("apple");
+    /// assert!(sketch.estimate() >= 1.0);
+    /// ```
     pub fn update<T: Hash>(&mut self, value: T) {
         let coupon = coupon(value);
         self.update_with_coupon(coupon);
@@ -174,6 +193,16 @@ impl HllSketch {
     }
 
     /// Get the current cardinality estimate
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::hll::HllSketch;
+    /// # use datasketches::hll::HllType;
+    /// let mut sketch = HllSketch::new(10, HllType::Hll8);
+    /// sketch.update("apple");
+    /// assert!(sketch.estimate() >= 1.0);
+    /// ```
     pub fn estimate(&self) -> f64 {
         match &self.mode {
             Mode::List { list, .. } => list.container().estimate(),
@@ -213,6 +242,18 @@ impl HllSketch {
     }
 
     /// Deserializes an HLL sketch from bytes
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::hll::HllSketch;
+    /// # use datasketches::hll::HllType;
+    /// # let mut sketch = HllSketch::new(10, HllType::Hll8);
+    /// # sketch.update("apple");
+    /// # let bytes = sketch.serialize();
+    /// let decoded = HllSketch::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate() >= 1.0);
+    /// ```
     pub fn deserialize(bytes: &[u8]) -> Result<HllSketch, Error> {
         fn make_error(tag: &'static str) -> impl FnOnce(std::io::Error) -> 
Error {
             move |_| Error::insufficient_data(tag)
@@ -323,6 +364,18 @@ impl HllSketch {
     }
 
     /// Serializes the HLL sketch to bytes
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::hll::HllSketch;
+    /// # use datasketches::hll::HllType;
+    /// # let mut sketch = HllSketch::new(10, HllType::Hll8);
+    /// # sketch.update("apple");
+    /// let bytes = sketch.serialize();
+    /// let decoded = HllSketch::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate() >= 1.0);
+    /// ```
     pub fn serialize(&self) -> Vec<u8> {
         match &self.mode {
             Mode::List { list, hll_type } => list.serialize(self.lg_config_k, 
*hll_type),
diff --git a/datasketches/src/hll/union.rs b/datasketches/src/hll/union.rs
index 1d6c215..7946972 100644
--- a/datasketches/src/hll/union.rs
+++ b/datasketches/src/hll/union.rs
@@ -66,6 +66,16 @@ impl HllUnion {
     /// # Panics
     ///
     /// Panics if `lg_max_k` is not in the range [4, 21].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::hll::HllType;
+    /// # use datasketches::hll::HllUnion;
+    /// let mut union = HllUnion::new(10);
+    /// union.update_value("apple");
+    /// let _result = union.get_result(HllType::Hll8);
+    /// ```
     pub fn new(lg_max_k: u8) -> Self {
         assert!(
             (4..=21).contains(&lg_max_k),
@@ -83,6 +93,16 @@ impl HllUnion {
     ///
     /// This accepts any type that implements `Hash`. The value is hashed
     /// and converted to a coupon, which is then inserted into the sketch.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::hll::HllType;
+    /// # use datasketches::hll::HllUnion;
+    /// let mut union = HllUnion::new(10);
+    /// union.update_value("apple");
+    /// let _result = union.get_result(HllType::Hll8);
+    /// ```
     pub fn update_value<T: Hash>(&mut self, value: T) {
         self.gadget.update(value);
     }
@@ -93,6 +113,24 @@ impl HllUnion {
     /// - Sketches with different lg_k values (resizes/downsamples as needed)
     /// - Sketches in different modes (List, Set, Array4/6/8)
     /// - Sketches with different target HLL types
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::hll::HllSketch;
+    /// # use datasketches::hll::HllType;
+    /// # use datasketches::hll::HllUnion;
+    /// let mut left = HllSketch::new(10, HllType::Hll8);
+    /// let mut right = HllSketch::new(10, HllType::Hll8);
+    /// left.update("apple");
+    /// right.update("banana");
+    ///
+    /// let mut union = HllUnion::new(10);
+    /// union.update(&left);
+    /// union.update(&right);
+    /// let result = union.get_result(HllType::Hll8);
+    /// assert!(result.estimate() >= 2.0);
+    /// ```
     pub fn update(&mut self, sketch: &HllSketch) {
         if sketch.is_empty() {
             return;
@@ -207,6 +245,17 @@ impl HllUnion {
     /// # Arguments
     ///
     /// * `hll_type` - The target HLL type for the result sketch (Hll4, Hll6, 
or Hll8)
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::hll::HllType;
+    /// # use datasketches::hll::HllUnion;
+    /// let mut union = HllUnion::new(10);
+    /// union.update_value("apple");
+    /// let result = union.get_result(HllType::Hll6);
+    /// assert!(result.estimate() >= 1.0);
+    /// ```
     pub fn get_result(&self, hll_type: HllType) -> HllSketch {
         let gadget_type = self.gadget.target_type();
 
diff --git a/datasketches/src/resize.rs b/datasketches/src/resize.rs
index caf87ab..1255bd7 100644
--- a/datasketches/src/resize.rs
+++ b/datasketches/src/resize.rs
@@ -32,6 +32,15 @@
 /// reached.
 ///
 /// Similarly, "X4" is a factor of 4 and "X8" is a factor of 8.
+///
+/// # Examples
+///
+/// ```
+/// # use datasketches::ResizeFactor;
+/// let factor = ResizeFactor::X4;
+/// assert_eq!(factor.value(), 4);
+/// assert_eq!(factor.lg_value(), 2);
+/// ```
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum ResizeFactor {
     /// Do not resize. Sketch will be configured to full size.
diff --git a/datasketches/src/tdigest/mod.rs b/datasketches/src/tdigest/mod.rs
index ad9ca42..d1a80c5 100644
--- a/datasketches/src/tdigest/mod.rs
+++ b/datasketches/src/tdigest/mod.rs
@@ -47,6 +47,18 @@
 //! [Datasketches page on 
t-digest](https://datasketches.apache.org/docs/tdigest/tdigest.html).
 //!
 //! [paper]: https://arxiv.org/abs/1902.04023
+//!
+//! # Usage
+//!
+//! ```rust
+//! # use datasketches::tdigest::TDigestMut;
+//! let mut sketch = TDigestMut::new(100);
+//! sketch.update(1.0);
+//! sketch.update(2.0);
+//! let median = sketch.quantile(0.5).unwrap();
+//! let frozen = sketch.freeze();
+//! assert!(frozen.rank(2.0).is_some());
+//! ```
 
 mod serialization;
 
diff --git a/datasketches/src/tdigest/sketch.rs 
b/datasketches/src/tdigest/sketch.rs
index ddf440f..037953d 100644
--- a/datasketches/src/tdigest/sketch.rs
+++ b/datasketches/src/tdigest/sketch.rs
@@ -63,6 +63,14 @@ impl TDigestMut {
     /// # Panics
     ///
     /// Panics if k is less than 10
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// let sketch = TDigestMut::new(100);
+    /// assert_eq!(sketch.k(), 100);
+    /// ```
     pub fn new(k: u16) -> Self {
         Self::make(
             k,
@@ -82,6 +90,14 @@ impl TDigestMut {
     /// # Errors
     ///
     /// If k is less than 10, returns [`ErrorKind::InvalidArgument`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// let sketch = TDigestMut::try_new(20).unwrap();
+    /// assert_eq!(sketch.k(), 20);
+    /// ```
     pub fn try_new(k: u16) -> Result<Self, Error> {
         if k < 10 {
             return Err(Error::new(
@@ -134,6 +150,15 @@ impl TDigestMut {
     /// Update this TDigest with the given value.
     ///
     /// [f64::NAN], [f64::INFINITY], and [f64::NEG_INFINITY] values are 
ignored.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// let mut sketch = TDigestMut::new(100);
+    /// sketch.update(1.0);
+    /// assert!(sketch.total_weight() >= 1);
+    /// ```
     pub fn update(&mut self, value: f64) {
         if value.is_nan() || value.is_infinite() {
             return;
@@ -182,6 +207,18 @@ impl TDigestMut {
     }
 
     /// Merge the given TDigest into this one
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// let mut left = TDigestMut::new(100);
+    /// let mut right = TDigestMut::new(100);
+    /// left.update(1.0);
+    /// right.update(2.0);
+    /// left.merge(&right);
+    /// assert_eq!(left.total_weight(), 2);
+    /// ```
     pub fn merge(&mut self, other: &TDigestMut) {
         if other.is_empty() {
             return;
@@ -209,6 +246,16 @@ impl TDigestMut {
     }
 
     /// Freezes this TDigest into an immutable one.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// let mut sketch = TDigestMut::new(100);
+    /// sketch.update(1.0);
+    /// let frozen = sketch.freeze();
+    /// assert!(!frozen.is_empty());
+    /// ```
     pub fn freeze(mut self) -> TDigest {
         self.compress();
         TDigest {
@@ -232,6 +279,18 @@ impl TDigestMut {
     }
 
     /// See [`TDigest::cdf`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # for value in [1.0, 2.0, 3.0] {
+    /// #     sketch.update(value);
+    /// # }
+    /// let cdf = sketch.cdf(&[1.5]).unwrap();
+    /// assert_eq!(cdf.len(), 2);
+    /// ```
     pub fn cdf(&mut self, split_points: &[f64]) -> Option<Vec<f64>> {
         check_split_points(split_points);
 
@@ -243,6 +302,18 @@ impl TDigestMut {
     }
 
     /// See [`TDigest::pmf`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # for value in [1.0, 2.0, 3.0] {
+    /// #     sketch.update(value);
+    /// # }
+    /// let pmf = sketch.pmf(&[1.5]).unwrap();
+    /// assert_eq!(pmf.len(), 2);
+    /// ```
     pub fn pmf(&mut self, split_points: &[f64]) -> Option<Vec<f64>> {
         check_split_points(split_points);
 
@@ -254,6 +325,18 @@ impl TDigestMut {
     }
 
     /// See [`TDigest::rank`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # for value in [1.0, 2.0, 3.0] {
+    /// #     sketch.update(value);
+    /// # }
+    /// let rank = sketch.rank(2.0).unwrap();
+    /// assert!((0.0..=1.0).contains(&rank));
+    /// ```
     pub fn rank(&mut self, value: f64) -> Option<f64> {
         assert!(!value.is_nan(), "value must not be NaN");
 
@@ -275,6 +358,18 @@ impl TDigestMut {
     }
 
     /// See [`TDigest::quantile`].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # for value in [1.0, 2.0, 3.0] {
+    /// #     sketch.update(value);
+    /// # }
+    /// let median = sketch.quantile(0.5).unwrap();
+    /// assert!((1.0..=3.0).contains(&median));
+    /// ```
     pub fn quantile(&mut self, rank: f64) -> Option<f64> {
         assert!((0.0..=1.0).contains(&rank), "rank must be in [0.0, 1.0]");
 
@@ -286,6 +381,17 @@ impl TDigestMut {
     }
 
     /// Serializes this TDigest to bytes.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # sketch.update(1.0);
+    /// let bytes = sketch.serialize();
+    /// let decoded = TDigestMut::deserialize(&bytes, false).unwrap();
+    /// assert_eq!(decoded.max_value(), Some(1.0));
+    /// ```
     pub fn serialize(&mut self) -> Vec<u8> {
         self.compress();
 
@@ -367,6 +473,18 @@ impl TDigestMut {
     ///
     /// [^1]: This is to support reading the `tdigest<float>` format from the 
C++ implementation.
     /// [^2]: <https://github.com/tdunning/t-digest>
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # sketch.update(1.0);
+    /// # sketch.update(2.0);
+    /// # let bytes = sketch.serialize();
+    /// let decoded = TDigestMut::deserialize(&bytes, false).unwrap();
+    /// assert_eq!(decoded.max_value(), Some(2.0));
+    /// ```
     pub fn deserialize(bytes: &[u8], is_f32: bool) -> Result<Self, Error> {
         fn make_error(tag: &'static str) -> impl FnOnce(std::io::Error) -> 
Error {
             move |_| Error::insufficient_data(tag)
@@ -747,6 +865,19 @@ impl TDigest {
     ///
     /// Panics if `split_points` is not unique, not monotonically increasing, 
or contains `NaN`
     /// values.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # for value in [1.0, 2.0, 3.0] {
+    /// #     sketch.update(value);
+    /// # }
+    /// let digest = sketch.freeze();
+    /// let cdf = digest.cdf(&[1.5]).unwrap();
+    /// assert_eq!(cdf.len(), 2);
+    /// ```
     pub fn cdf(&self, split_points: &[f64]) -> Option<Vec<f64>> {
         self.view().cdf(split_points)
     }
@@ -770,6 +901,19 @@ impl TDigest {
     ///
     /// Panics if `split_points` is not unique, not monotonically increasing, 
or contains `NaN`
     /// values.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # for value in [1.0, 2.0, 3.0] {
+    /// #     sketch.update(value);
+    /// # }
+    /// let digest = sketch.freeze();
+    /// let pmf = digest.pmf(&[1.5]).unwrap();
+    /// assert_eq!(pmf.len(), 2);
+    /// ```
     pub fn pmf(&self, split_points: &[f64]) -> Option<Vec<f64>> {
         self.view().pmf(split_points)
     }
@@ -781,6 +925,19 @@ impl TDigest {
     /// # Panics
     ///
     /// Panics if the value is `NaN`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # for value in [1.0, 2.0, 3.0] {
+    /// #     sketch.update(value);
+    /// # }
+    /// let digest = sketch.freeze();
+    /// let rank = digest.rank(2.0).unwrap();
+    /// assert!((0.0..=1.0).contains(&rank));
+    /// ```
     pub fn rank(&self, value: f64) -> Option<f64> {
         assert!(!value.is_nan(), "value must not be NaN");
         self.view().rank(value)
@@ -793,12 +950,37 @@ impl TDigest {
     /// # Panics
     ///
     /// Panics if rank is not in [0.0, 1.0].
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # for value in [1.0, 2.0, 3.0] {
+    /// #     sketch.update(value);
+    /// # }
+    /// let digest = sketch.freeze();
+    /// let q = digest.quantile(0.5).unwrap();
+    /// assert!((1.0..=3.0).contains(&q));
+    /// ```
     pub fn quantile(&self, rank: f64) -> Option<f64> {
         assert!((0.0..=1.0).contains(&rank), "rank must be in [0.0, 1.0]");
         self.view().quantile(rank)
     }
 
     /// Converts this immutable TDigest into a mutable one.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::tdigest::TDigestMut;
+    /// # let mut sketch = TDigestMut::new(100);
+    /// # sketch.update(1.0);
+    /// # let digest = sketch.freeze();
+    /// let mut mutable = digest.unfreeze();
+    /// mutable.update(2.0);
+    /// assert_eq!(mutable.total_weight(), 2);
+    /// ```
     pub fn unfreeze(self) -> TDigestMut {
         TDigestMut::make(
             self.k,
diff --git a/datasketches/src/theta/mod.rs b/datasketches/src/theta/mod.rs
index 0d50348..ccaac52 100644
--- a/datasketches/src/theta/mod.rs
+++ b/datasketches/src/theta/mod.rs
@@ -28,6 +28,15 @@
 //! configurable accuracy and memory usage. The implementation supports:
 //!
 //! - **ThetaSketch**: Mutable sketch for building from input data
+//!
+//! # Usage
+//!
+//! ```rust
+//! # use datasketches::theta::ThetaSketch;
+//! let mut sketch = ThetaSketch::builder().build();
+//! sketch.update("apple");
+//! assert!(sketch.estimate() >= 1.0);
+//! ```
 
 mod hash_table;
 mod sketch;
diff --git a/datasketches/src/theta/sketch.rs b/datasketches/src/theta/sketch.rs
index 0ad5357..0baa0f7 100644
--- a/datasketches/src/theta/sketch.rs
+++ b/datasketches/src/theta/sketch.rs
@@ -38,11 +38,28 @@ pub struct ThetaSketch {
 
 impl ThetaSketch {
     /// Create a new builder for ThetaSketch
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// let sketch = ThetaSketch::builder().lg_k(12).build();
+    /// assert_eq!(sketch.lg_k(), 12);
+    /// ```
     pub fn builder() -> ThetaSketchBuilder {
         ThetaSketchBuilder::default()
     }
 
     /// Update the sketch with a hashable value
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// let mut sketch = ThetaSketch::builder().build();
+    /// sketch.update("apple");
+    /// assert!(sketch.estimate() >= 1.0);
+    /// ```
     pub fn update<T: Hash>(&mut self, value: T) {
         let hash = self.table.hash_and_screen(value);
         if hash != 0 {
@@ -51,6 +68,15 @@ impl ThetaSketch {
     }
 
     /// Update the sketch with a f64 value
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// let mut sketch = ThetaSketch::builder().build();
+    /// sketch.update_f64(1.0);
+    /// assert!(sketch.estimate() >= 1.0);
+    /// ```
     pub fn update_f64(&mut self, value: f64) {
         // Canonicalize double for compatibility with Java
         let canonical = canonical_double(value);
@@ -58,11 +84,29 @@ impl ThetaSketch {
     }
 
     /// Update the sketch with a f32 value
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// let mut sketch = ThetaSketch::builder().build();
+    /// sketch.update_f32(1.0);
+    /// assert!(sketch.estimate() >= 1.0);
+    /// ```
     pub fn update_f32(&mut self, value: f32) {
         self.update_f64(value as f64);
     }
 
     /// Return cardinality estimate
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// # let mut sketch = ThetaSketch::builder().build();
+    /// # sketch.update("apple");
+    /// assert!(sketch.estimate() >= 1.0);
+    /// ```
     pub fn estimate(&self) -> f64 {
         if self.is_empty() {
             return 0.0;
@@ -113,6 +157,16 @@ impl ThetaSketch {
     }
 
     /// Return iterator over hash values
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// # let mut sketch = ThetaSketch::builder().build();
+    /// # sketch.update("apple");
+    /// let mut iter = sketch.iter();
+    /// assert!(iter.next().is_some());
+    /// ```
     pub fn iter(&self) -> impl Iterator<Item = u64> + '_ {
         self.table.iter()
     }
@@ -144,6 +198,14 @@ impl ThetaSketchBuilder {
     /// # Panics
     ///
     /// If lg_k is not in range [5, 26]
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// let sketch = ThetaSketch::builder().lg_k(12).build();
+    /// assert_eq!(sketch.lg_k(), 12);
+    /// ```
     pub fn lg_k(mut self, lg_k: u8) -> Self {
         assert!(
             (MIN_LG_K..=MAX_LG_K).contains(&lg_k),
@@ -167,6 +229,13 @@ impl ThetaSketchBuilder {
     /// # Panics
     ///
     /// If p is not in range [0.0, 1.0]
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// let _sketch = ThetaSketch::builder().sampling_probability(0.5).build();
+    /// ```
     pub fn sampling_probability(mut self, probability: f32) -> Self {
         assert!(
             (0.0..=1.0).contains(&probability),
@@ -177,12 +246,27 @@ impl ThetaSketchBuilder {
     }
 
     /// Set hash seed.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// let _sketch = ThetaSketch::builder().seed(7).build();
+    /// ```
     pub fn seed(mut self, seed: u64) -> Self {
         self.seed = seed;
         self
     }
 
     /// Build the ThetaSketch.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datasketches::theta::ThetaSketch;
+    /// let sketch = ThetaSketch::builder().lg_k(10).build();
+    /// assert_eq!(sketch.lg_k(), 10);
+    /// ```
     pub fn build(self) -> ThetaSketch {
         let table = ThetaHashTable::new(
             self.lg_k,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to