(datasketches-rust) branch main updated: refactor: export FrequentItemValue and improve docs (#98)

tison Thu, 19 Feb 2026 03:22:41 -0800

This is an automated email from the ASF dual-hosted git repository.

tison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git



The following commit(s) were added to refs/heads/main by this push:
     new 6546d66  refactor: export FrequentItemValue and improve docs (#98)
6546d66 is described below

commit 6546d665c629e311b27279c2212a7b92b922fc09
Author: tison <[email protected]>
AuthorDate: Thu Feb 19 19:22:24 2026 +0800

    refactor: export FrequentItemValue and improve docs (#98)
    
    Signed-off-by: tison <[email protected]>
---
 CHANGELOG.md                                       |   3 +-
 datasketches/src/bloom/builder.rs                  |  16 +--
 datasketches/src/bloom/mod.rs                      |  20 ++--
 datasketches/src/bloom/sketch.rs                   |  30 +++---
 datasketches/src/common/binomial_bounds.rs         |  52 ++++-----
 datasketches/src/countmin/sketch.rs                |   8 +-
 datasketches/src/frequencies/mod.rs                |  67 ++++++++++--
 .../src/frequencies/reverse_purge_item_hash_map.rs |   4 +-
 datasketches/src/frequencies/sketch.rs             | 116 ++++-----------------
 datasketches/src/hash/mod.rs                       |   1 -
 datasketches/src/hll/array4.rs                     |   4 +-
 datasketches/src/hll/array8.rs                     |   4 +-
 datasketches/src/hll/estimator.rs                  |  44 ++++----
 datasketches/src/hll/harmonic_numbers.rs           |   4 +-
 datasketches/src/hll/mod.rs                        |  26 ++---
 datasketches/src/hll/serialization.rs              |   4 +-
 datasketches/src/hll/sketch.rs                     |  16 +--
 datasketches/src/hll/union.rs                      |  24 ++---
 datasketches/src/theta/bit_pack.rs                 |  12 +--
 datasketches/src/theta/hash_table.rs               |   4 +-
 datasketches/src/theta/mod.rs                      |   4 +-
 datasketches/src/theta/sketch.rs                   |   4 +-
 datasketches/tests/bloom_serialization_test.rs     |   9 --
 datasketches/tests/hll_serialization_test.rs       |  15 +--
 datasketches/tests/hll_union_test.rs               |  12 +--
 25 files changed, 229 insertions(+), 274 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab7a242..af1928f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,8 @@ All significant changes to this project will be documented in 
this file.
 
 * `CountMinSketch` with unsigned values now supports `halve` and `decay` 
operations.
 * `CpcSketch` and `CpcUnion` are now available for cardinality estimation.
-* `FrequentItemsSketch` now supports serde for `u64` value.
+* `FrequentItemsSketch` now supports serde for any value implement 
`FrequentItemValue` (builtin supports for `i64`, `u64`, and `String`).
+* Expose `codec::SketchBytes`, `codec::SketchSlice`, and `FrequentItemValue` 
as public API.
 
 ## v0.2.0 (2026-01-14)
 
diff --git a/datasketches/src/bloom/builder.rs 
b/datasketches/src/bloom/builder.rs
index 1918a13..6cb0158 100644
--- a/datasketches/src/bloom/builder.rs
+++ b/datasketches/src/bloom/builder.rs
@@ -22,9 +22,9 @@ use crate::hash::DEFAULT_UPDATE_SEED;
 /// Builder for creating [`BloomFilter`] instances.
 ///
 /// Provides two construction modes:
-/// - [`with_accuracy()`](Self::with_accuracy): Specify target items and false 
positive rate
+/// * [`with_accuracy()`](Self::with_accuracy): Specify target items and false 
positive rate
 ///   (recommended)
-/// - [`with_size()`](Self::with_size): Specify requested bit count and hash 
functions (manual)
+/// * [`with_size()`](Self::with_size): Specify requested bit count and hash 
functions (manual)
 #[derive(Debug, Clone)]
 pub struct BloomFilterBuilder {
     num_bits: u64,
@@ -52,8 +52,8 @@ impl BloomFilterBuilder {
     ///
     /// # Arguments
     ///
-    /// - `max_items`: Maximum expected number of distinct items
-    /// - `fpp`: Target false positive probability (e.g., 0.01 for 1%)
+    /// * `max_items`: Maximum expected number of distinct items
+    /// * `fpp`: Target false positive probability (e.g., 0.01 for 1%)
     ///
     /// # Panics
     ///
@@ -95,14 +95,14 @@ impl BloomFilterBuilder {
     ///
     /// # Arguments
     ///
-    /// - `num_bits`: Total number of bits in the filter
-    /// - `num_hashes`: Number of hash functions to use
+    /// * `num_bits`: Total number of bits in the filter
+    /// * `num_hashes`: Number of hash functions to use
     ///
     /// # Panics
     ///
     /// Panics if any of:
-    /// - `num_bits` < [`Self::MIN_NUM_BITS`] or `num_bits` > 
[`Self::MAX_NUM_BITS`]
-    /// - `num_hashes` < [`Self::MIN_NUM_HASHES`] or `num_hashes` > 
[`Self::MIN_NUM_HASHES`]
+    /// * `num_bits` < [`Self::MIN_NUM_BITS`] or `num_bits` > 
[`Self::MAX_NUM_BITS`]
+    /// * `num_hashes` < [`Self::MIN_NUM_HASHES`] or `num_hashes` > 
[`Self::MAX_NUM_HASHES`]
     ///
     /// # Examples
     ///
diff --git a/datasketches/src/bloom/mod.rs b/datasketches/src/bloom/mod.rs
index e5ac69e..8e58139 100644
--- a/datasketches/src/bloom/mod.rs
+++ b/datasketches/src/bloom/mod.rs
@@ -23,10 +23,10 @@
 //!
 //! # Properties
 //!
-//! - **No false negatives**: If an item was inserted, `contains()` will 
always return `true`
-//! - **Possible false positives**: `contains()` may return `true` for items 
never inserted
-//! - **Fixed size**: Unlike typical sketches, Bloom filters do not resize 
automatically
-//! - **Linear space**: Size is proportional to the expected number of 
distinct items
+//! * **No false negatives**: If an item was inserted, `contains()` will 
always return `true`
+//! * **Possible false positives**: `contains()` may return `true` for items 
never inserted
+//! * **Fixed size**: Unlike typical sketches, Bloom filters do not resize 
automatically
+//! * **Linear space**: Size is proportional to the expected number of 
distinct items
 //!
 //! # Usage
 //!
@@ -109,15 +109,15 @@
 //!
 //! # Implementation Details
 //!
-//! - Uses XXHash64 for hashing
-//! - Implements double hashing (Kirsch-Mitzenmacher method) for k hash 
functions
-//! - Bits packed efficiently in `u64` words
-//! - Compatible serialization format (family ID: 21)
+//! * Uses XXHash64 for hashing
+//! * Implements double hashing (Kirsch-Mitzenmacher method) for k hash 
functions
+//! * Bits packed efficiently in `u64` words
+//! * Compatible serialization format (family ID: 21)
 //!
 //! # References
 //!
-//! - Bloom, Burton H. (1970). "Space/time trade-offs in hash coding with 
allowable errors"
-//! - Kirsch and Mitzenmacher (2008). "Less Hashing, Same Performance: 
Building a Better Bloom
+//! * Bloom, Burton H. (1970). "Space/time trade-offs in hash coding with 
allowable errors"
+//! * Kirsch and Mitzenmacher (2008). "Less Hashing, Same Performance: 
Building a Better Bloom
 //!   Filter"
 
 mod builder;
diff --git a/datasketches/src/bloom/sketch.rs b/datasketches/src/bloom/sketch.rs
index 304c450..d7332e0 100644
--- a/datasketches/src/bloom/sketch.rs
+++ b/datasketches/src/bloom/sketch.rs
@@ -33,9 +33,9 @@ const EMPTY_FLAG_MASK: u8 = 1 << 2;
 /// A Bloom filter for probabilistic set membership testing.
 ///
 /// Provides fast membership queries with:
-/// - No false negatives (inserted items always return `true`)
-/// - Tunable false positive rate
-/// - Constant space usage
+/// * No false negatives (inserted items always return `true`)
+/// * Tunable false positive rate
+/// * Constant space usage
 ///
 /// Use [`super::BloomFilterBuilder`] to construct instances.
 #[derive(Debug, Clone, PartialEq)]
@@ -54,8 +54,8 @@ impl BloomFilter {
     /// Tests whether an item is possibly in the set.
     ///
     /// Returns:
-    /// - `true`: Item was **possibly** inserted (or false positive)
-    /// - `false`: Item was **definitely not** inserted
+    /// * `true`: Item was **possibly** inserted (or false positive)
+    /// * `false`: Item was **definitely not** inserted
     ///
     /// # Examples
     ///
@@ -290,8 +290,8 @@ impl BloomFilter {
     ///
     /// Uses the approximation: `load_factor^k`
     /// where:
-    /// - load_factor = fraction of bits set (bits_used / capacity)
-    /// - k = num_hashes
+    /// * load_factor = fraction of bits set (bits_used / capacity)
+    /// * k = num_hashes
     ///
     /// This assumes uniform bit distribution and is more accurate than
     /// trying to estimate insertion count from the load factor.
@@ -307,9 +307,9 @@ impl BloomFilter {
     /// Checks if two filters are compatible for merging.
     ///
     /// Filters are compatible if they have the same:
-    /// - Capacity (number of bits)
-    /// - Number of hash functions
-    /// - Seed
+    /// * Capacity (number of bits)
+    /// * Number of hash functions
+    /// * Seed
     pub fn is_compatible(&self, other: &Self) -> bool {
         self.bit_array.len() == other.bit_array.len()
             && self.num_hashes == other.num_hashes
@@ -379,9 +379,9 @@ impl BloomFilter {
     /// # Errors
     ///
     /// Returns an error if:
-    /// - The data is truncated or corrupted
-    /// - The family ID doesn't match (not a Bloom filter)
-    /// - The serial version is unsupported
+    /// * The data is truncated or corrupted
+    /// * The family ID doesn't match (not a Bloom filter)
+    /// * The serial version is unsupported
     ///
     /// # Examples
     ///
@@ -501,8 +501,8 @@ impl BloomFilter {
     /// Computes the two base hash values using XXHash64.
     ///
     /// Uses a two-hash approach:
-    /// - h0 = XXHash64(item, seed)
-    /// - h1 = XXHash64(item, h0)
+    /// * h0 = XXHash64(item, seed)
+    /// * h1 = XXHash64(item, h0)
     fn compute_hash<T: Hash>(&self, item: &T) -> (u64, u64) {
         // First hash with the configured seed
         let mut hasher = XxHash64::with_seed(self.seed);
diff --git a/datasketches/src/common/binomial_bounds.rs 
b/datasketches/src/common/binomial_bounds.rs
index afeeddc..c2b7d74 100644
--- a/datasketches/src/common/binomial_bounds.rs
+++ b/datasketches/src/common/binomial_bounds.rs
@@ -274,9 +274,9 @@ static UB_EQUIV_TABLE: [f64; 363] = [
 ///
 /// # Arguments
 ///
-/// * `num_samples` - The number of samples in the sample set.
-/// * `theta` - The sampling probability. Must be in the range (0.0, 1.0].
-/// * `num_std_dev` - The number of standard deviations for confidence bounds.
+/// * `num_samples`: The number of samples in the sample set.
+/// * `theta`: The sampling probability. Must be in the range (0.0, 1.0].
+/// * `num_std_dev`: The number of standard deviations for confidence bounds.
 ///
 /// # Returns
 ///
@@ -301,11 +301,11 @@ pub(crate) fn lower_bound(
 ///
 /// # Arguments
 ///
-/// * `num_samples` - The number of samples in the sample set.
-/// * `theta` - The sampling probability. Must be in the range `(0.0, 1.0]`.
-/// * `num_std_dev` - The number of standard deviations for confidence bounds.
-/// * `no_data_seen` - This is normally false. However, in the case where you 
have zero samples and
-///   a theta < 1.0, this flag enables the distinction between a virgin case 
when no actual data has
+/// * `num_samples`: The number of samples in the sample set.
+/// * `theta`: The sampling probability. Must be in the range `(0.0, 1.0]`.
+/// * `num_std_dev`: The number of standard deviations for confidence bounds.
+/// * `no_data_seen`: This is normally false. However, in the case where you 
have zero samples and a
+///   theta < 1.0, this flag enables the distinction between a virgin case 
when no actual data has
 ///   been seen and the case where the estimate may be zero but an upper error 
bound may still
 ///   exist.
 ///
@@ -367,16 +367,16 @@ fn cont_classic_ub(num_samples: u64, theta: f64, 
num_std_devs: f64) -> f64 {
 ///
 /// # Arguments
 ///
-/// * `num_samples` - The number of observed samples (k). Must be >= 1.
-/// * `p` - The sampling probability. Must satisfy: 0 < p < 1.
-/// * `delta` - The tail probability. Must satisfy: 0 < delta < 1.
+/// * `num_samples`: The number of observed samples (k). Must be >= 1.
+/// * `p`: The sampling probability. Must satisfy: 0 < p < 1.
+/// * `delta`: The tail probability. Must satisfy: 0 < delta < 1.
 ///
 /// # Invariants
 ///
-/// - `num_samples >= 1`
-/// - `0.0 < p < 1.0`
-/// - `0.0 < delta < 1.0`
-/// - `(num_samples / p) < 500.0` (enforced for performance and numerical 
stability)
+/// * `num_samples >= 1`
+/// * `0.0 < p < 1.0`
+/// * `0.0 < delta < 1.0`
+/// * `(num_samples / p) < 500.0` (enforced for performance and numerical 
stability)
 ///
 /// # Returns
 ///
@@ -413,15 +413,15 @@ fn special_n_star(num_samples: u64, p: f64, delta: f64) 
-> Result<u64, Error> {
 ///
 /// # Arguments
 ///
-/// * `num_samples` - The number of observed samples (k). Must be >= 1.
-/// * `p` - The sampling probability. Must satisfy: 0 < p < 1.
-/// * `delta` - The tail probability. Must satisfy: 0 < delta < 1.
+/// * `num_samples`: The number of observed samples (k). Must be >= 1.
+/// * `p`: The sampling probability. Must satisfy: 0 < p < 1.
+/// * `delta`: The tail probability. Must satisfy: 0 < delta < 1.
 ///
 /// # Invariants
 ///
-/// - `num_samples >= 1`
-/// - `0.0 < p < 1.0`
-/// - `0.0 < delta < 1.0`
+/// * `num_samples >= 1`
+/// * `0.0 < p < 1.0`
+/// * `0.0 < delta < 1.0`
 ///
 /// # Returns
 ///
@@ -452,14 +452,14 @@ fn special_n_prime_b(num_samples: u64, p: f64, delta: 
f64) -> Result<u64, Error>
 ///
 /// # Arguments
 ///
-/// * `num_samples` - The number of observed samples (k). Must be >= 1.
-/// * `p` - The sampling probability. Must satisfy: 0 < p < 1.
-/// * `delta` - The tail probability. Must satisfy: 0 < delta < 1.
+/// * `num_samples`: The number of observed samples (k). Must be >= 1.
+/// * `p`: The sampling probability. Must satisfy: 0 < p < 1.
+/// * `delta`: The tail probability. Must satisfy: 0 < delta < 1.
 ///
 /// # Invariants
 ///
-/// - `(num_samples / p) < 500.0` (enforced for performance)
-/// - A super-small delta could also make it slow.
+/// * `(num_samples / p) < 500.0` (enforced for performance)
+/// * A super-small delta could also make it slow.
 fn special_n_prime_f(num_samples: u64, p: f64, delta: f64) -> Result<u64, 
Error> {
     // Use a different algorithm if the following is true; this one will be 
too slow, or worse.
     if (num_samples as f64 / p) >= 500.0 {
diff --git a/datasketches/src/countmin/sketch.rs 
b/datasketches/src/countmin/sketch.rs
index 3bc50a3..2116b75 100644
--- a/datasketches/src/countmin/sketch.rs
+++ b/datasketches/src/countmin/sketch.rs
@@ -75,10 +75,10 @@ impl<T: CountMinValue> CountMinSketch<T> {
     /// # Panics
     ///
     /// Panics if any of:
-    /// - `num_hashes` is 0
-    /// - `num_buckets` is less than 3
-    /// - the total table size exceeds the supported limit
-    /// - the computed seed hash is zero
+    /// * `num_hashes` is 0
+    /// * `num_buckets` is less than 3
+    /// * the total table size exceeds the supported limit
+    /// * the computed seed hash is zero
     ///
     /// # Examples
     ///
diff --git a/datasketches/src/frequencies/mod.rs 
b/datasketches/src/frequencies/mod.rs
index 93fb5e4..d5e7cf0 100644
--- a/datasketches/src/frequencies/mod.rs
+++ b/datasketches/src/frequencies/mod.rs
@@ -17,16 +17,66 @@
 
 //! Frequency sketches for finding heavy hitters in data streams.
 //!
-//! This module implements the Frequent Items sketch from Apache DataSketches. 
It tracks
-//! approximate frequencies in a stream and can report heavy hitters with 
explicit
-//! error guarantees (no false negatives or no false positives).
+//! # Overview
 //!
-//! For background, see the Java documentation:
-//! 
<https://apache.github.io/datasketches-java/9.0.0/org/apache/datasketches/frequencies/FrequentItemsSketch.html>
+//! This sketch is based on the paper ["A High-Performance Algorithm for 
Identifying Frequent Items
+//! in Data Streams"](https://arxiv.org/abs/1705.07001) by Daniel Anderson, 
Pryce Bevan, Kevin Lang,
+//! Edo Liberty, Lee Rhodes, and Justin Thaler.
 //!
-//! # Usage
+//! This sketch is useful for tracking approximate frequencies of items of 
type `T` that implements
+//! [`FrequentItemValue`], with optional associated counts (`T` item, `u64` 
count) that are members
+//! of a multiset of such items. The true frequency of an item is defined to 
be the sum of
+//! associated counts.
 //!
-//! ```rust
+//! This implementation provides the following capabilities:
+//! * Estimate the frequency of an item.
+//! * Return upper and lower bounds of any item, such that the true frequency 
is always between the
+//!   upper and lower bounds.
+//! * Return a global maximum error that holds for all items in the stream.
+//! * Return an array of frequent items that qualify either 
[`ErrorType::NoFalsePositives`] or
+//!   [`ErrorType::NoFalseNegatives`].
+//! * Merge itself with another sketch created from this module.
+//! * Serialize to bytes, or deserialize from bytes, for storage or 
transmission.
+//!
+//! # Accuracy
+//!
+//! If fewer than `0.75 * max_map_size` different items are inserted into the 
sketch the estimated
+//! frequencies returned by the sketch will be exact.
+//!
+//! The logic of the frequent items sketch is such that the stored counts and 
true counts are never
+//! too different. More specifically, for any item, the sketch can return an 
estimate of the true
+//! frequency of item, along with upper and lower bounds on the frequency 
(that hold
+//! deterministically).
+//!
+//! For this implementation and for a specific active item, it is guaranteed 
that the true frequency
+//! will be between the Upper Bound (UB) and the Lower Bound (LB) computed for 
that item.
+//! Specifically, `(UB - LB) ≤ W * epsilon`, where `W` denotes the sum of all 
item counts, and
+//! `epsilon = 3.5/M`, where `M` is the `max_map_size`.
+//!
+//! This is the worst case guarantee that applies to arbitrary inputs. [^1]
+//! For inputs typically seen in practice (`UB - LB`) is usually much smaller.
+//!
+//! [^1]: For speed we do employ some randomization that introduces a small 
probability that our
+//! proof of the worst-case bound might not apply to a given run. However, we 
have ensured that this
+//! probability is extremely small. For example, if the stream causes one 
table purge (rebuild),
+//! our proof of the worst case bound applies with probability at least `1 - 
1E-14`. If the stream
+//! causes `1E9` purges, our proof applies with probability at least `1 - 
1E-5`.
+//!
+//! # Background
+//!
+//! This code implements a variant of what is commonly known as the 
"Misra-Gries algorithm".
+//! Variants of it were discovered and rediscovered and redesigned several 
times over the years:
+//! * "Finding repeated elements", Misra, Gries, 1982
+//! * "Frequency estimation of Internet packet streams with limited space" 
Demaine, Lopez-Ortiz,
+//!   Munro, 2002
+//! * "A simple algorithm for finding frequent elements in streams and bags" 
Karp, Shenker,
+//!   Papadimitriou, 2003
+//! * "Efficient Computation of Frequent and Top-k Elements in Data Streams" 
Metwally, Agrawal,
+//!   Abbadi, 2006
+//!
+//! # Examples
+//!
+//! ```
 //! # use datasketches::frequencies::ErrorType;
 //! # use datasketches::frequencies::FrequentItemsSketch;
 //! let mut sketch = FrequentItemsSketch::<i64>::new(64);
@@ -38,7 +88,7 @@
 //!
 //! # Serialization
 //!
-//! ```rust
+//! ```
 //! # use datasketches::frequencies::FrequentItemsSketch;
 //! let mut sketch = FrequentItemsSketch::<i64>::new(64);
 //! sketch.update_with_count(42, 2);
@@ -52,6 +102,7 @@ mod reverse_purge_item_hash_map;
 mod serialization;
 mod sketch;
 
+pub use self::serialization::FrequentItemValue;
 pub use self::sketch::ErrorType;
 pub use self::sketch::FrequentItemsSketch;
 pub use self::sketch::Row;
diff --git a/datasketches/src/frequencies/reverse_purge_item_hash_map.rs 
b/datasketches/src/frequencies/reverse_purge_item_hash_map.rs
index f934b87..79ed290 100644
--- a/datasketches/src/frequencies/reverse_purge_item_hash_map.rs
+++ b/datasketches/src/frequencies/reverse_purge_item_hash_map.rs
@@ -192,7 +192,7 @@ impl<T: Eq + Hash> ReversePurgeItemHashMap<T> {
         T: Clone,
     {
         if self.num_active == 0 {
-            return Vec::new();
+            return vec![];
         }
         let mut keys = Vec::with_capacity(self.num_active);
         for i in 0..self.keys.len() {
@@ -208,7 +208,7 @@ impl<T: Eq + Hash> ReversePurgeItemHashMap<T> {
     /// Returns the active values in the map.
     pub fn active_values(&self) -> Vec<u64> {
         if self.num_active == 0 {
-            return Vec::new();
+            return vec![];
         }
         let mut values = Vec::with_capacity(self.num_active);
         for i in 0..self.values.len() {
diff --git a/datasketches/src/frequencies/sketch.rs 
b/datasketches/src/frequencies/sketch.rs
index 13c79f9..83de1cf 100644
--- a/datasketches/src/frequencies/sketch.rs
+++ b/datasketches/src/frequencies/sketch.rs
@@ -85,7 +85,7 @@ impl<T> Row<T> {
 /// The sketch tracks approximate item frequencies and can return estimates 
with
 /// guaranteed upper and lower bounds.
 ///
-/// See [`crate::frequencies`] for an overview and error guarantees.
+/// See the [module level documentation](super) for an overview and error 
guarantees.
 #[derive(Debug, Clone)]
 pub struct FrequentItemsSketch<T> {
     lg_max_map_size: u8,
@@ -296,7 +296,7 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
 
     /// Returns frequent items using the sketch maximum error as threshold.
     ///
-    /// This is equivalent to 
`frequent_items_with_threshold(self.maximum_error(), error_type)`.
+    /// This is equivalent to `frequent_items_with_threshold(error_type, 
self.maximum_error())`.
     ///
     /// # Examples
     ///
@@ -343,7 +343,7 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
         T: Clone,
     {
         let threshold = threshold.max(self.offset);
-        let mut rows = Vec::new();
+        let mut rows = vec![];
         for (item, count) in self.hash_map.iter() {
             let lower = count;
             let upper = count + self.offset;
@@ -510,74 +510,36 @@ impl<T: Eq + Hash> FrequentItemsSketch<T> {
     }
 }
 
-impl FrequentItemsSketch<i64> {
+impl<T: FrequentItemValue> FrequentItemsSketch<T> {
     /// Serializes this sketch into a byte vector.
     ///
     /// # Examples
     ///
-    /// ```
-    /// # use datasketches::frequencies::FrequentItemsSketch;
-    /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
-    /// # sketch.update_with_count(7, 2);
-    /// let bytes = sketch.serialize();
-    /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
-    /// assert!(decoded.estimate(&7) >= 2);
-    /// ```
-    pub fn serialize(&self) -> Vec<u8> {
-        self.serialize_inner(
-            |items| items.iter().map(i64::serialize_size).sum(),
-            |bytes, items| {
-                for item in items {
-                    item.serialize_value(bytes);
-                }
-            },
-        )
-    }
-
-    /// Deserializes a sketch from bytes.
-    ///
-    /// # Examples
+    /// Built-in support for `i64`:
     ///
     /// ```
     /// # use datasketches::frequencies::FrequentItemsSketch;
     /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
     /// # sketch.update_with_count(7, 2);
-    /// # let bytes = sketch.serialize();
+    /// let bytes = sketch.serialize();
     /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
     /// assert!(decoded.estimate(&7) >= 2);
     /// ```
-    pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
-        Self::deserialize_inner(bytes, |mut cursor, num_items| {
-            let mut items = Vec::with_capacity(num_items);
-            for i in 0..num_items {
-                let item = i64::deserialize_value(&mut cursor).map_err(|_| {
-                    Error::insufficient_data(format!(
-                        "expected {num_items} items, failed to read item at 
index {i}"
-                    ))
-                })?;
-                items.push(item);
-            }
-            Ok(items)
-        })
-    }
-}
-
-impl FrequentItemsSketch<u64> {
-    /// Serializes this sketch into a byte vector.
     ///
-    /// # Examples
+    /// Built-in support for `String`:
     ///
     /// ```
     /// # use datasketches::frequencies::FrequentItemsSketch;
-    /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
-    /// # sketch.update_with_count(7, 2);
+    /// # let mut sketch = FrequentItemsSketch::<String>::new(64);
+    /// # let apple = "apple".to_string();
+    /// # sketch.update_with_count(apple.clone(), 2);
     /// let bytes = sketch.serialize();
-    /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
-    /// assert!(decoded.estimate(&7) >= 2);
+    /// let decoded = 
FrequentItemsSketch::<String>::deserialize(&bytes).unwrap();
+    /// assert!(decoded.estimate(&apple) >= 2);
     /// ```
     pub fn serialize(&self) -> Vec<u8> {
         self.serialize_inner(
-            |items| items.iter().map(u64::serialize_size).sum(),
+            |items| items.iter().map(T::serialize_size).sum(),
             |bytes, items| {
                 for item in items {
                     item.serialize_value(bytes);
@@ -590,58 +552,18 @@ impl FrequentItemsSketch<u64> {
     ///
     /// # Examples
     ///
+    /// Built-in support for `i64`:
+    ///
     /// ```
     /// # use datasketches::frequencies::FrequentItemsSketch;
-    /// # let mut sketch = FrequentItemsSketch::<u64>::new(64);
+    /// # let mut sketch = FrequentItemsSketch::<i64>::new(64);
     /// # sketch.update_with_count(7, 2);
     /// # let bytes = sketch.serialize();
-    /// let decoded = FrequentItemsSketch::<u64>::deserialize(&bytes).unwrap();
+    /// let decoded = FrequentItemsSketch::<i64>::deserialize(&bytes).unwrap();
     /// assert!(decoded.estimate(&7) >= 2);
     /// ```
-    pub fn deserialize(bytes: &[u8]) -> Result<Self, Error> {
-        Self::deserialize_inner(bytes, |mut cursor, num_items| {
-            let mut items = Vec::with_capacity(num_items);
-            for i in 0..num_items {
-                let item = u64::deserialize_value(&mut cursor).map_err(|_| {
-                    Error::insufficient_data(format!(
-                        "expected {num_items} items, failed to read item at 
index {i}"
-                    ))
-                })?;
-                items.push(item);
-            }
-            Ok(items)
-        })
-    }
-}
-
-impl FrequentItemsSketch<String> {
-    /// Serializes this sketch into a byte vector.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// # use datasketches::frequencies::FrequentItemsSketch;
-    /// # let mut sketch = FrequentItemsSketch::<String>::new(64);
-    /// # let apple = "apple".to_string();
-    /// # sketch.update_with_count(apple.clone(), 2);
-    /// let bytes = sketch.serialize();
-    /// let decoded = 
FrequentItemsSketch::<String>::deserialize(&bytes).unwrap();
-    /// assert!(decoded.estimate(&apple) >= 2);
-    /// ```
-    pub fn serialize(&self) -> Vec<u8> {
-        self.serialize_inner(
-            |items| items.iter().map(String::serialize_size).sum(),
-            |bytes, items| {
-                for item in items {
-                    item.serialize_value(bytes);
-                }
-            },
-        )
-    }
-
-    /// Deserializes a sketch from bytes.
     ///
-    /// # Examples
+    /// Built-in support for `String`:
     ///
     /// ```
     /// # use datasketches::frequencies::FrequentItemsSketch;
@@ -656,7 +578,7 @@ impl FrequentItemsSketch<String> {
         Self::deserialize_inner(bytes, |mut cursor, num_items| {
             let mut items = Vec::with_capacity(num_items);
             for i in 0..num_items {
-                let item = String::deserialize_value(&mut cursor).map_err(|_| {
+                let item = T::deserialize_value(&mut cursor).map_err(|_| {
                     Error::insufficient_data(format!(
                         "expected {num_items} items, failed to read item at 
index {i}"
                     ))
diff --git a/datasketches/src/hash/mod.rs b/datasketches/src/hash/mod.rs
index 87eaf22..99d2cca 100644
--- a/datasketches/src/hash/mod.rs
+++ b/datasketches/src/hash/mod.rs
@@ -19,7 +19,6 @@ mod murmurhash;
 mod xxhash;
 
 pub(crate) use self::murmurhash::MurmurHash3X64128;
-#[allow(unused_imports)]
 pub(crate) use self::xxhash::XxHash64;
 
 /// The seed 9001 used in the sketch update methods is a prime number that was 
chosen very early
diff --git a/datasketches/src/hll/array4.rs b/datasketches/src/hll/array4.rs
index a17b4da..073c335 100644
--- a/datasketches/src/hll/array4.rs
+++ b/datasketches/src/hll/array4.rs
@@ -79,8 +79,8 @@ impl Array4 {
     /// Get the actual value at a slot (adjusted for cur_min and aux_map)
     ///
     /// Returns the true register value:
-    /// - If raw < 15: value = cur_min + raw
-    /// - If raw == 15 (AUX_TOKEN): value is in aux_map
+    /// * If raw < 15: value = cur_min + raw
+    /// * If raw == 15 (AUX_TOKEN): value is in aux_map
     pub(super) fn get(&self, slot: u32) -> u8 {
         let raw = self.get_raw(slot);
 
diff --git a/datasketches/src/hll/array8.rs b/datasketches/src/hll/array8.rs
index 00faf16..2bd1509 100644
--- a/datasketches/src/hll/array8.rs
+++ b/datasketches/src/hll/array8.rs
@@ -187,8 +187,8 @@ impl Array8 {
     ///
     /// # Parameters
     ///
-    /// * `src` - Source register values (length must be 2^src_lg_k)
-    /// * `src_lg_k` - Log2 of source register count
+    /// * `src`: Source register values (length must be 2^src_lg_k)
+    /// * `src_lg_k`: Log2 of source register count
     ///
     /// # Panics
     ///
diff --git a/datasketches/src/hll/estimator.rs 
b/datasketches/src/hll/estimator.rs
index a9bd63b..7c9ca08 100644
--- a/datasketches/src/hll/estimator.rs
+++ b/datasketches/src/hll/estimator.rs
@@ -33,9 +33,9 @@ use crate::hll::harmonic_numbers;
 /// allowing it to be composed into Array4, Array6, and Array8.
 ///
 /// The estimator supports two modes:
-/// - **In-order mode**: Uses HIP (Historical Inverse Probability) accumulator 
for accurate
+/// * **In-order mode**: Uses HIP (Historical Inverse Probability) accumulator 
for accurate
 ///   sequential updates
-/// - **Out-of-order mode**: Uses composite estimator (raw HLL + linear 
counting) after
+/// * **Out-of-order mode**: Uses composite estimator (raw HLL + linear 
counting) after
 ///   deserialization or merging
 #[derive(Debug, Clone, PartialEq)]
 pub struct HipEstimator {
@@ -71,8 +71,8 @@ impl HipEstimator {
     /// 2. Update KxQ registers (always)
     ///
     /// The KxQ registers are split for numerical precision:
-    /// - kxq0: sum of 1/2^v for v < 32
-    /// - kxq1: sum of 1/2^v for v >= 32
+    /// * kxq0: sum of 1/2^v for v < 32
+    /// * kxq1: sum of 1/2^v for v >= 32
     pub fn update(&mut self, lg_config_k: u8, old_value: u8, new_value: u8) {
         let k = (1 << lg_config_k) as f64;
 
@@ -109,9 +109,9 @@ impl HipEstimator {
     ///
     /// # Arguments
     ///
-    /// * `lg_config_k` - Log2 of number of registers (k)
-    /// * `cur_min` - Current minimum register value (for Array4, 0 for 
Array6/8)
-    /// * `num_at_cur_min` - Number of registers at cur_min value
+    /// * `lg_config_k`: Log2 of number of registers (k)
+    /// * `cur_min`: Current minimum register value (for Array4, 0 for 
Array6/8)
+    /// * `num_at_cur_min`: Number of registers at cur_min value
     pub fn estimate(&self, lg_config_k: u8, cur_min: u8, num_at_cur_min: u32) 
-> f64 {
         if self.out_of_order {
             self.get_composite_estimate(lg_config_k, cur_min, num_at_cur_min)
@@ -126,10 +126,10 @@ impl HipEstimator {
     ///
     /// # Arguments
     ///
-    /// * `lg_config_k` - Log2 of number of registers (k)
-    /// * `cur_min` - Current minimum register value (for Array4, 0 for 
Array6/8)
-    /// * `num_at_cur_min` - Number of registers at cur_min value
-    /// * `num_std_dev` - Number of standard deviations (1, 2, or 3)
+    /// * `lg_config_k`: Log2 of number of registers (k)
+    /// * `cur_min`: Current minimum register value (for Array4, 0 for 
Array6/8)
+    /// * `num_at_cur_min`: Number of registers at cur_min value
+    /// * `num_std_dev`: Number of standard deviations (1, 2, or 3)
     pub fn upper_bound(
         &self,
         lg_config_k: u8,
@@ -149,10 +149,10 @@ impl HipEstimator {
     ///
     /// # Arguments
     ///
-    /// * `lg_config_k` - Log2 of number of registers (k)
-    /// * `cur_min` - Current minimum register value (for Array4, 0 for 
Array6/8)
-    /// * `num_at_cur_min` - Number of registers at cur_min value
-    /// * `num_std_dev` - Number of standard deviations (1, 2, or 3)
+    /// * `lg_config_k`: Log2 of number of registers (k)
+    /// * `cur_min`: Current minimum register value (for Array4, 0 for 
Array6/8)
+    /// * `num_at_cur_min`: Number of registers at cur_min value
+    /// * `num_std_dev`: Number of standard deviations (1, 2, or 3)
     pub fn lower_bound(
         &self,
         lg_config_k: u8,
@@ -286,8 +286,8 @@ impl HipEstimator {
     /// Set the out-of-order flag
     ///
     /// This should be set to true when:
-    /// - Deserializing a sketch from bytes
-    /// - After a merge/union operation
+    /// * Deserializing a sketch from bytes
+    /// * After a merge/union operation
     pub fn set_out_of_order(&mut self, ooo: bool) {
         self.out_of_order = ooo;
         if ooo {
@@ -331,10 +331,10 @@ fn inv_pow2(value: u8) -> f64 {
 ///
 /// # Arguments
 ///
-/// * `lg_config_k` - Log2 of number of registers (must be 4-21)
-/// * `upper_bound` - Whether computing upper bound (vs lower bound)
-/// * `ooo` - Whether sketch is out-of-order (merged/deserialized)
-/// * `num_std_dev` - Number of standard deviations (1, 2, or 3)
+/// * `lg_config_k`: Log2 of number of registers (must be 4-21)
+/// * `upper_bound`: Whether computing upper bound (vs lower bound)
+/// * `ooo`: Whether sketch is out-of-order (merged/deserialized)
+/// * `num_std_dev`: Number of standard deviations (1, 2, or 3)
 ///
 /// # Returns
 ///
@@ -357,7 +357,7 @@ fn get_rel_err(lg_config_k: u8, upper_bound: bool, ooo: 
bool, num_std_dev: NumSt
         return sign * (num_std_dev as u8 as f64) * rse_factor / k.sqrt();
     }
 
-    // For lg_k <= 12, use empirically measured lookup tables
+    // For lg_k <= 12, use empirically measured lookup tables.
     // Tables are indexed by: ((lg_k - 4) * 3) + (num_std_dev - 1)
     let idx = ((lg_config_k as usize) - 4) * 3 + ((num_std_dev as usize) - 1);
 
diff --git a/datasketches/src/hll/harmonic_numbers.rs 
b/datasketches/src/hll/harmonic_numbers.rs
index cdc4161..dea7141 100644
--- a/datasketches/src/hll/harmonic_numbers.rs
+++ b/datasketches/src/hll/harmonic_numbers.rs
@@ -86,8 +86,8 @@ fn harmonic_number(n: usize) -> f64 {
 ///
 /// # Arguments
 ///
-/// * `bit_vector_length` - Total length of bit vector (k for HLL)
-/// * `num_bits_set` - Number of bits set (non-zero registers)
+/// * `bit_vector_length`: Total length of bit vector (k for HLL)
+/// * `num_bits_set`: Number of bits set (non-zero registers)
 ///
 /// # Returns
 ///
diff --git a/datasketches/src/hll/mod.rs b/datasketches/src/hll/mod.rs
index f9476fe..6f99a49 100644
--- a/datasketches/src/hll/mod.rs
+++ b/datasketches/src/hll/mod.rs
@@ -26,9 +26,9 @@
 //! This implementation follows the Apache DataSketches specification and 
supports multiple
 //! storage modes that automatically adapt based on cardinality:
 //!
-//! - **List mode**: Stores individual values for small cardinalities
-//! - **Set mode**: Uses a hash set for medium cardinalities
-//! - **HLL mode**: Uses compact arrays for large cardinalities
+//! * **List mode**: Stores individual values for small cardinalities
+//! * **Set mode**: Uses a hash set for medium cardinalities
+//! * **HLL mode**: Uses compact arrays for large cardinalities
 //!
 //! Mode transitions are automatic and transparent to the user. Each promotion 
preserves
 //! all previously observed values and maintains estimation accuracy.
@@ -44,9 +44,9 @@
 //!
 //! Three target HLL types are supported, trading precision for memory:
 //!
-//! - [`HllType::Hll4`]: 4 bits per bucket (most compact)
-//! - [`HllType::Hll6`]: 6 bits per bucket (balanced)
-//! - [`HllType::Hll8`]: 8 bits per bucket (highest precision)
+//! * [`HllType::Hll4`]: 4 bits per bucket (most compact)
+//! * [`HllType::Hll6`]: 6 bits per bucket (balanced)
+//! * [`HllType::Hll8`]: 8 bits per bucket (highest precision)
 //!
 //! # Union Operations
 //!
@@ -54,9 +54,9 @@
 //! It maintains an internal "gadget" sketch that accumulates the union of all 
input sketches
 //! and automatically handles:
 //!
-//! - Sketches with different `lg_k` precision levels (resizes/downsamples as 
needed)
-//! - Sketches in different modes (List, Set, or Array)
-//! - Sketches with different target HLL types
+//! * Sketches with different `lg_k` precision levels (resizes/downsamples as 
needed)
+//! * Sketches in different modes (List, Set, or Array)
+//! * Sketches with different target HLL types
 //!
 //! The union operation preserves cardinality estimation accuracy while 
enabling distributed
 //! computation patterns where sketches are built independently and merged 
later.
@@ -64,10 +64,10 @@
 //! # Serialization
 //!
 //! Sketches can be serialized and deserialized while preserving all state, 
including:
-//! - Current mode and HLL type
-//! - All observed values (coupons or register values)
-//! - HIP accumulator state for accurate estimation
-//! - Out-of-order flag for merged/deserialized sketches
+//! * Current mode and HLL type
+//! * All observed values (coupons or register values)
+//! * HIP accumulator state for accurate estimation
+//! * Out-of-order flag for merged/deserialized sketches
 //!
 //! The serialization format is compatible with Apache DataSketches 
implementations
 //! in Java and C++, enabling cross-platform sketch exchange.
diff --git a/datasketches/src/hll/serialization.rs 
b/datasketches/src/hll/serialization.rs
index 014b890..30740a9 100644
--- a/datasketches/src/hll/serialization.rs
+++ b/datasketches/src/hll/serialization.rs
@@ -64,8 +64,8 @@ pub fn extract_tgt_hll_type(mode_byte: u8) -> u8 {
 ///
 /// # Arguments
 ///
-/// * `cur_mode` - 0 = LIST, 1 = SET, 2 = HLL
-/// * `tgt_type` - 0 = HLL4, 1 = HLL6, 2 = HLL8
+/// * `cur_mode`: 0 = LIST, 1 = SET, 2 = HLL
+/// * `tgt_type`: 0 = HLL4, 1 = HLL6, 2 = HLL8
 #[inline]
 pub fn encode_mode_byte(cur_mode: u8, tgt_type: u8) -> u8 {
     (cur_mode & 0x3) | ((tgt_type & 0x3) << 2)
diff --git a/datasketches/src/hll/sketch.rs b/datasketches/src/hll/sketch.rs
index 484e16a..ecf3ff1 100644
--- a/datasketches/src/hll/sketch.rs
+++ b/datasketches/src/hll/sketch.rs
@@ -54,15 +54,15 @@ impl HllSketch {
     ///
     /// # Arguments
     ///
-    /// * `lg_config_k` - Log2 of the number of buckets (K). Must be in [4, 
21].
-    ///   - lg_k=4: 16 buckets, ~26% relative error
-    ///   - lg_k=12: 4096 buckets, ~1.6% relative error (common choice)
-    ///   - lg_k=21: 2M buckets, ~0.4% relative error
-    /// * `hll_type` - Target HLL array type (Hll4, Hll6, or Hll8)
+    /// * `lg_config_k`: Log2 of the number of buckets (K). Must be in `[4, 
21]`.
+    ///   * lg_k=4: 16 buckets, ~26% relative error
+    ///   * lg_k=12: 4096 buckets, ~1.6% relative error (common choice)
+    ///   * lg_k=21: 2M buckets, ~0.4% relative error
+    /// * `hll_type`: Target HLL array type (Hll4, Hll6, or Hll8)
     ///
     /// # Panics
     ///
-    /// If lg_config_k is not in range [4, 21]
+    /// If lg_config_k is not in range `[4, 21]`
     ///
     /// # Examples
     ///
@@ -94,8 +94,8 @@ impl HllSketch {
     ///
     /// # Arguments
     ///
-    /// * `lg_config_k` - Log2 of the number of buckets (K)
-    /// * `mode` - The mode to initialize the sketch with
+    /// * `lg_config_k`: Log2 of the number of buckets (K)
+    /// * `mode`: The mode to initialize the sketch with
     pub(super) fn from_mode(lg_config_k: u8, mode: Mode) -> Self {
         Self { lg_config_k, mode }
     }
diff --git a/datasketches/src/hll/union.rs b/datasketches/src/hll/union.rs
index 03fb4ea..5f3929d 100644
--- a/datasketches/src/hll/union.rs
+++ b/datasketches/src/hll/union.rs
@@ -24,9 +24,9 @@
 //!
 //! The union maintains an internal "gadget" sketch that accumulates the union
 //! of all input sketches. It can handle sketches with:
-//! - Different lg_k values (automatically resizes as needed)
-//! - Different modes (List, Set, Array4/6/8)
-//! - Different target HLL types
+//! * Different lg_k values (automatically resizes as needed)
+//! * Different modes (List, Set, Array4/6/8)
+//! * Different target HLL types
 
 use std::hash::Hash;
 
@@ -59,13 +59,13 @@ impl HllUnion {
     ///
     /// # Arguments
     ///
-    /// * `lg_max_k` - Maximum log2 of the number of buckets. Must be in [4, 
21]. This determines
+    /// * `lg_max_k`: Maximum log2 of the number of buckets. Must be in `[4, 
21]`. This determines
     ///   the maximum precision the union can handle. Input sketches with 
larger lg_k will be
     ///   down-sampled.
     ///
     /// # Panics
     ///
-    /// Panics if `lg_max_k` is not in the range [4, 21].
+    /// Panics if `lg_max_k` is not in the range `[4, 21]`.
     ///
     /// # Examples
     ///
@@ -110,9 +110,9 @@ impl HllUnion {
     /// Update the union with another sketch
     ///
     /// Merges the input sketch into the union's internal gadget, handling:
-    /// - Sketches with different lg_k values (resizes/downsamples as needed)
-    /// - Sketches in different modes (List, Set, Array4/6/8)
-    /// - Sketches with different target HLL types
+    /// * Sketches with different lg_k values (resizes/downsamples as needed)
+    /// * Sketches in different modes (List, Set, Array4/6/8)
+    /// * Sketches with different target HLL types
     ///
     /// # Examples
     ///
@@ -244,7 +244,7 @@ impl HllUnion {
     ///
     /// # Arguments
     ///
-    /// * `hll_type` - The target HLL type for the result sketch (Hll4, Hll6, 
or Hll8)
+    /// * `hll_type`: The target HLL type for the result sketch (Hll4, Hll6, 
or Hll8)
     ///
     /// # Examples
     ///
@@ -401,9 +401,9 @@ fn merge_coupons_into_mode(dst: &mut Array8, src_mode: 
&Mode) {
 /// Merge an HLL array into an Array8
 ///
 /// Handles merging from Array4, Array6, or Array8 sources. Dispatches based 
on lg_k:
-/// - Same lg_k: optimized bulk merge
-/// - src lg_k > dst lg_k: downsample src into dst
-/// - src lg_k < dst lg_k: handled by caller (requires gadget replacement)
+/// * Same lg_k: optimized bulk merge
+/// * src lg_k > dst lg_k: downsample src into dst
+/// * src lg_k < dst lg_k: handled by caller (requires gadget replacement)
 fn merge_array_into_array8(dst_array8: &mut Array8, dst_lg_k: u8, src_mode: 
&Mode, src_lg_k: u8) {
     assert!(
         src_lg_k >= dst_lg_k,
diff --git a/datasketches/src/theta/bit_pack.rs 
b/datasketches/src/theta/bit_pack.rs
index 031afce..2a59351 100644
--- a/datasketches/src/theta/bit_pack.rs
+++ b/datasketches/src/theta/bit_pack.rs
@@ -4972,9 +4972,9 @@ fn unpack_bits_63(values: &mut [u64], bytes: &[u8]) {
 ///
 /// # Panics
 ///
-/// - Panics if `values.len()` is not equal to `BLOCK_WIDTH`.
-/// - Panics if `bits` is not in the range `1..=63`.
-/// - Panics if `bytes.len()` is less than `bits * BLOCK_WIDTH`.
+/// * Panics if `values.len()` is not equal to `BLOCK_WIDTH`.
+/// * Panics if `bits` is not in the range `1..=63`.
+/// * Panics if `bytes.len()` is less than `bits * BLOCK_WIDTH`.
 pub(crate) fn pack_bits_block(values: &[u64], bytes: &mut [u8], bits: u8) {
     assert_eq!(values.len(), BLOCK_WIDTH, "values length must be 8");
     assert!(
@@ -5058,9 +5058,9 @@ pub(crate) fn pack_bits_block(values: &[u64], bytes: &mut 
[u8], bits: u8) {
 ///
 /// # Panics
 ///
-/// - Panics if `values.len()` is not equal to `BLOCK_WIDTH`.
-/// - Panics if `bits` is not in the range `1..=63`.
-/// - Panics if `bytes.len()` is less than `bits * BLOCK_WIDTH`.
+/// * Panics if `values.len()` is not equal to `BLOCK_WIDTH`.
+/// * Panics if `bits` is not in the range `1..=63`.
+/// * Panics if `bytes.len()` is less than `bits * BLOCK_WIDTH`.
 pub(crate) fn unpack_bits_block(values: &mut [u64], bytes: &[u8], bits: u8) {
     assert_eq!(values.len(), BLOCK_WIDTH, "values length must be 8");
     assert!(
diff --git a/datasketches/src/theta/hash_table.rs 
b/datasketches/src/theta/hash_table.rs
index d77304e..b06c695 100644
--- a/datasketches/src/theta/hash_table.rs
+++ b/datasketches/src/theta/hash_table.rs
@@ -587,7 +587,7 @@ mod tests {
         let mut table = ThetaHashTable::new(8, ResizeFactor::X8, 1.0, 
DEFAULT_UPDATE_SEED);
 
         // Insert some values
-        let mut inserted_hashes = Vec::new();
+        let mut inserted_hashes = vec![];
         for i in 0..10 {
             let hash = table.hash_and_screen(format!("value_{}", i));
             if hash != 0 && table.try_insert(hash) {
@@ -633,7 +633,7 @@ mod tests {
 
         // Insert many values to trigger rebuild
         let mut i = 0;
-        let mut inserted_hashes = Vec::new();
+        let mut inserted_hashes = vec![];
         loop {
             let hash = table.hash_and_screen(format!("value_{}", i));
             i += 1;
diff --git a/datasketches/src/theta/mod.rs b/datasketches/src/theta/mod.rs
index 1d33a71..fdde037 100644
--- a/datasketches/src/theta/mod.rs
+++ b/datasketches/src/theta/mod.rs
@@ -27,8 +27,8 @@
 //! Theta sketches provide approximate distinct count (cardinality) estimation 
with
 //! configurable accuracy and memory usage. The implementation supports:
 //!
-//! - **ThetaSketch**: Mutable sketch for building from input data
-//! - **CompactThetaSketch**: Immutable sketch with compact memory layout
+//! * **ThetaSketch**: Mutable sketch for building from input data
+//! * **CompactThetaSketch**: Immutable sketch with compact memory layout
 //!
 //! # Usage
 //!
diff --git a/datasketches/src/theta/sketch.rs b/datasketches/src/theta/sketch.rs
index a56d3c4..32f6e9a 100644
--- a/datasketches/src/theta/sketch.rs
+++ b/datasketches/src/theta/sketch.rs
@@ -236,7 +236,7 @@ impl ThetaSketch {
     ///
     /// # Arguments
     ///
-    /// * `num_std_dev` - The number of standard deviations for confidence 
bounds.
+    /// * `num_std_dev`: The number of standard deviations for confidence 
bounds.
     ///
     /// # Examples
     ///
@@ -270,7 +270,7 @@ impl ThetaSketch {
     ///
     /// # Arguments
     ///
-    /// * `num_std_dev` - The number of standard deviations for confidence 
bounds.
+    /// * `num_std_dev`: The number of standard deviations for confidence 
bounds.
     ///
     /// # Examples
     ///
diff --git a/datasketches/tests/bloom_serialization_test.rs 
b/datasketches/tests/bloom_serialization_test.rs
index 5370f89..15daba2 100644
--- a/datasketches/tests/bloom_serialization_test.rs
+++ b/datasketches/tests/bloom_serialization_test.rs
@@ -15,15 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Bloom Filter Serialization Compatibility Tests
-//!
-//! These tests verify binary compatibility with Apache DataSketches 
implementations:
-//! - Java (datasketches-java)
-//! - C++ (datasketches-cpp)
-//!
-//! Test data is generated by the reference implementations and stored in:
-//! `tests/serialization_test_data/`
-
 mod common;
 
 use std::fs;
diff --git a/datasketches/tests/hll_serialization_test.rs 
b/datasketches/tests/hll_serialization_test.rs
index 9c8200f..a7e00e6 100644
--- a/datasketches/tests/hll_serialization_test.rs
+++ b/datasketches/tests/hll_serialization_test.rs
@@ -15,15 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! HLL Sketch Serialization Compatibility Tests
-//!
-//! These tests verify binary compatibility with Apache DataSketches 
implementations:
-//! - Java (datasketches-java)
-//! - C++ (datasketches-cpp)
-//!
-//! Test data is generated by the reference implementations and stored in:
-//! `tests/serialization_test_data/`
-
 mod common;
 
 use std::fs;
@@ -48,9 +39,9 @@ fn test_sketch_file(path: PathBuf, expected_cardinality: 
usize, expected_lg_k: u
 
     // Check cardinality estimate with error bounds
     // For lg_k=12, theoretical RSE ≈ 1.625%, but we use 2% margin to account 
for:
-    // - Small sample sizes (especially n < 100)
-    // - Out-of-order mode (composite estimator)
-    // - Variation across implementations
+    // * Small sample sizes (especially n < 100)
+    // * Out-of-order mode (composite estimator)
+    // * Variation across implementations
     if expected > 0.0 {
         let error_margin = 0.02; // 2% error margin
         let lower_bound = expected * (1.0 - error_margin);
diff --git a/datasketches/tests/hll_union_test.rs 
b/datasketches/tests/hll_union_test.rs
index 2f17a29..91080bf 100644
--- a/datasketches/tests/hll_union_test.rs
+++ b/datasketches/tests/hll_union_test.rs
@@ -18,12 +18,12 @@
 //! HyperLogLog Union Integration Tests
 //!
 //! These tests verify the public API behavior of HllUnion, focusing on:
-//! - Basic union operations
-//! - Mode transitions and mixed-mode unions
-//! - Different HLL types and lg_k values
-//! - Bounds and statistical properties
-//! - Mathematical properties (commutativity, associativity, idempotency)
-//! - Reset and reuse patterns
+//! * Basic union operations
+//! * Mode transitions and mixed-mode unions
+//! * Different HLL types and lg_k values
+//! * Bounds and statistical properties
+//! * Mathematical properties (commutativity, associativity, idempotency)
+//! * Reset and reuse patterns
 //!
 //! This mirrors the testing strategy used in hll_update_test.rs
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datasketches-rust) branch main updated: refactor: export FrequentItemValue and improve docs (#98)

Reply via email to