This is an automated email from the ASF dual-hosted git repository.
placave pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datasketches-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 4f5f266 test cleanup (#26)
4f5f266 is described below
commit 4f5f2665b590be992e75eb5d6d5f8ee6b2cc75b4
Author: Filippo Rossi <[email protected]>
AuthorDate: Wed Dec 17 15:52:00 2025 +0100
test cleanup (#26)
---
src/hll/sketch.rs | 5 +-
src/hll/union.rs | 468 --------------------------------------
tests/hll_union_test.rs | 583 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 586 insertions(+), 470 deletions(-)
diff --git a/src/hll/sketch.rs b/src/hll/sketch.rs
index 5aab18b..f4ca064 100644
--- a/src/hll/sketch.rs
+++ b/src/hll/sketch.rs
@@ -58,8 +58,9 @@ impl HllSketch {
/// If lg_config_k is not in range [4, 21]
pub fn new(lg_config_k: u8, hll_type: HllType) -> Self {
assert!(
- lg_config_k > 4 && lg_config_k < 21,
- "lg_config_k must be in [4, 21]"
+ (4..=21).contains(&lg_config_k),
+ "lg_config_k must be in [4, 21], got {}",
+ lg_config_k
);
let list = List::default();
diff --git a/src/hll/union.rs b/src/hll/union.rs
index 89795a3..8b9b666 100644
--- a/src/hll/union.rs
+++ b/src/hll/union.rs
@@ -552,471 +552,3 @@ fn copy_or_downsample(src_mode: &Mode, src_lg_k: u8,
tgt_lg_k: u8) -> Array8 {
result
}
}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn test_union_basic_list_mode() {
- // Test the simplest case: union of two sketches in List mode
- let mut union = HllUnion::new(12);
-
- // Create first sketch and add some values
- let mut sketch1 = HllSketch::new(12, HllType::Hll8);
- sketch1.update("foo");
- sketch1.update("bar");
- sketch1.update("baz");
-
- // Create second sketch with overlapping and new values
- let mut sketch2 = HllSketch::new(12, HllType::Hll8);
- sketch2.update("bar"); // duplicate
- sketch2.update("qux"); // new
- sketch2.update("quux"); // new
-
- // Union them
- union.update(&sketch1);
- union.update(&sketch2);
-
- // Get result
- let result = union.get_result(HllType::Hll8);
-
- // Should have ~5 unique values (foo, bar, baz, qux, quux)
- let estimate = result.estimate();
- assert!(
- (4.0..=6.0).contains(&estimate),
- "Expected estimate around 5, got {}",
- estimate
- );
-
- // Should not be empty
- assert!(!result.is_empty());
- }
-
- #[test]
- fn test_union_empty_sketch() {
- let mut union = HllUnion::new(10);
- let empty_sketch = HllSketch::new(10, HllType::Hll8);
-
- // Updating with empty sketch should not panic
- union.update(&empty_sketch);
-
- // Union should still be empty
- assert!(union.is_empty());
- }
-
- #[test]
- fn test_union_estimate_accuracy() {
- let mut union = HllUnion::new(12);
-
- // Add 1000 unique values across multiple sketches
- // This will cause sketches to promote to Array mode
- let mut sketch1 = HllSketch::new(12, HllType::Hll8);
- for i in 0..500 {
- sketch1.update(i);
- }
-
- let mut sketch2 = HllSketch::new(12, HllType::Hll8);
- for i in 400..900 {
- // 400-500 overlap with sketch1
- sketch2.update(i);
- }
-
- union.update(&sketch1);
- union.update(&sketch2);
-
- let result = union.get_result(HllType::Hll8);
- let estimate = result.estimate();
-
- // Should estimate ~900 unique values (0-899)
- // With lg_k=12, we expect ~1.6% relative error
- // So estimate should be within 900 ± 50 or so
- assert!(
- estimate > 800.0 && estimate < 1000.0,
- "Expected estimate around 900, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_array_to_array_same_lgk() {
- // Test merging two Array mode sketches with same lg_k
- let mut union = HllUnion::new(12);
-
- // Create two sketches that will be in Array mode (add enough values)
- let mut sketch1 = HllSketch::new(12, HllType::Hll8);
- for i in 0..10_000 {
- sketch1.update(i);
- }
-
- let mut sketch2 = HllSketch::new(12, HllType::Hll8);
- for i in 5_000..15_000 {
- sketch2.update(i);
- }
-
- // Both should be in Array mode now
- assert!(matches!(sketch1.mode(), Mode::Array8(_)));
- assert!(matches!(sketch2.mode(), Mode::Array8(_)));
-
- union.update(&sketch1);
- union.update(&sketch2);
-
- let result = union.get_result(HllType::Hll8);
- let estimate = result.estimate();
-
- // Should estimate ~15,000 unique values (0-14,999)
- // With lg_k=12, we expect ~1.6% relative error
- assert!(
- estimate > 14_000.0 && estimate < 16_000.0,
- "Expected estimate around 15000, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_downsampling_src_larger() {
- // Test src_lg_k > dst_lg_k (downsampling)
- let mut union = HllUnion::new(10); // Union at lg_k=10
-
- // Create sketch at lg_k=12 (higher precision)
- let mut sketch = HllSketch::new(12, HllType::Hll8);
- for i in 0..5_000 {
- sketch.update(i);
- }
-
- // Union should downsample sketch to lg_k=10
- union.update(&sketch);
-
- let result = union.get_result(HllType::Hll8);
- let estimate = result.estimate();
-
- // Should still estimate ~5,000 unique values
- assert!(
- estimate > 4_000.0 && estimate < 6_000.0,
- "Expected estimate around 5000, got {}",
- estimate
- );
- assert_eq!(result.lg_config_k(), 10, "Result should be at lg_k=10");
- }
-
- #[test]
- fn test_union_gadget_downsizing_src_smaller() {
- // Test src_lg_k < dst_lg_k (gadget downsizing)
- let mut union = HllUnion::new(12);
-
- // First update with lg_k=12 sketch to establish gadget at lg_k=12
- let mut sketch1 = HllSketch::new(12, HllType::Hll8);
- for i in 0..10_000 {
- sketch1.update(i);
- }
- union.update(&sketch1);
- assert_eq!(union.lg_config_k(), 12, "Gadget should be at lg_k=12");
-
- // Now update with lg_k=10 sketch (lower precision)
- let mut sketch2 = HllSketch::new(10, HllType::Hll8);
- for i in 5_000..15_000 {
- sketch2.update(i);
- }
-
- // This should trigger gadget downsizing to lg_k=10
- union.update(&sketch2);
-
- let result = union.get_result(HllType::Hll8);
- let estimate = result.estimate();
-
- // Should estimate ~15,000 unique values
- assert!(
- estimate > 13_000.0 && estimate < 17_000.0,
- "Expected estimate around 15000, got {}",
- estimate
- );
- assert_eq!(
- result.lg_config_k(),
- 10,
- "Gadget should have downsized to lg_k=10"
- );
- }
-
- #[test]
- fn test_union_list_to_array() {
- // Test union where first sketch is List and second is Array
- let mut union = HllUnion::new(12);
-
- // First sketch: small (List mode)
- let mut sketch1 = HllSketch::new(12, HllType::Hll8);
- sketch1.update("a");
- sketch1.update("b");
- sketch1.update("c");
- assert!(matches!(sketch1.mode(), Mode::List { .. }));
-
- // Second sketch: large (Array mode)
- let mut sketch2 = HllSketch::new(12, HllType::Hll8);
- for i in 0..10_000 {
- sketch2.update(i);
- }
- assert!(matches!(sketch2.mode(), Mode::Array8(_)));
-
- union.update(&sketch1);
- union.update(&sketch2);
-
- let result = union.get_result(HllType::Hll8);
- let estimate = result.estimate();
-
- // Should estimate ~10,003 unique values
- assert!(
- estimate > 9_500.0 && estimate < 10_500.0,
- "Expected estimate around 10000, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_array_to_list() {
- // Test union where first sketch is Array and second is List
- let mut union = HllUnion::new(12);
-
- // First sketch: large (Array mode)
- let mut sketch1 = HllSketch::new(12, HllType::Hll8);
- for i in 0..10_000 {
- sketch1.update(i);
- }
- assert!(matches!(sketch1.mode(), Mode::Array8(_)));
-
- // Second sketch: small (List mode)
- let mut sketch2 = HllSketch::new(12, HllType::Hll8);
- sketch2.update("a");
- sketch2.update("b");
- sketch2.update("c");
- assert!(matches!(sketch2.mode(), Mode::List { .. }));
-
- union.update(&sketch1);
- union.update(&sketch2);
-
- let result = union.get_result(HllType::Hll8);
- let estimate = result.estimate();
-
- // Should estimate ~10,003 unique values
- assert!(
- estimate > 9_500.0 && estimate < 10_500.0,
- "Expected estimate around 10000, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_mixed_hll_types() {
- // Test union with different HLL types (Hll4, Hll6, Hll8)
- let mut union = HllUnion::new(12);
-
- // Sketch with Hll4
- let mut sketch1 = HllSketch::new(12, HllType::Hll4);
- for i in 0..3_000 {
- sketch1.update(i);
- }
-
- // Sketch with Hll6
- let mut sketch2 = HllSketch::new(12, HllType::Hll6);
- for i in 2_000..5_000 {
- sketch2.update(i);
- }
-
- // Sketch with Hll8
- let mut sketch3 = HllSketch::new(12, HllType::Hll8);
- for i in 4_000..7_000 {
- sketch3.update(i);
- }
-
- union.update(&sketch1);
- union.update(&sketch2);
- union.update(&sketch3);
-
- let result = union.get_result(HllType::Hll8);
- let estimate = result.estimate();
-
- // Should estimate ~7,000 unique values (0-6,999)
- assert!(
- estimate > 6_000.0 && estimate < 8_000.0,
- "Expected estimate around 7000, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_multiple_downsizing_operations() {
- // Test multiple gadget downsizing operations
- let mut union = HllUnion::new(12);
-
- // Start with lg_k=12
- let mut sketch1 = HllSketch::new(12, HllType::Hll8);
- for i in 0..5_000 {
- sketch1.update(i);
- }
- union.update(&sketch1);
- assert_eq!(union.lg_config_k(), 12);
-
- // Downsize to lg_k=10
- let mut sketch2 = HllSketch::new(10, HllType::Hll8);
- for i in 4_000..8_000 {
- sketch2.update(i);
- }
- union.update(&sketch2);
- assert_eq!(union.lg_config_k(), 10);
-
- // Downsize again to lg_k=8
- let mut sketch3 = HllSketch::new(8, HllType::Hll8);
- for i in 7_000..10_000 {
- sketch3.update(i);
- }
- union.update(&sketch3);
- assert_eq!(union.lg_config_k(), 8);
-
- let result = union.get_result(HllType::Hll8);
- let estimate = result.estimate();
-
- // Should estimate ~10,000 unique values (0-9,999)
- // Lower precision means higher error
- assert!(
- estimate > 8_000.0 && estimate < 12_000.0,
- "Expected estimate around 10000, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_get_result_type_conversion_hll6() {
- // Test getting result as Hll6
- let mut union = HllUnion::new(12);
-
- let mut sketch = HllSketch::new(12, HllType::Hll8);
- for i in 0..5_000 {
- sketch.update(i);
- }
-
- union.update(&sketch);
-
- // Get result as Hll6
- let result = union.get_result(HllType::Hll6);
-
- // Verify it's Hll6
- assert_eq!(result.target_type(), HllType::Hll6);
-
- // Estimate should be similar
- let estimate = result.estimate();
- assert!(
- estimate > 4_000.0 && estimate < 6_000.0,
- "Expected estimate around 5000, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_get_result_type_conversion_hll4() {
- // Test getting result as Hll4
- let mut union = HllUnion::new(12);
-
- let mut sketch = HllSketch::new(12, HllType::Hll8);
- for i in 0..5_000 {
- sketch.update(i);
- }
-
- union.update(&sketch);
-
- // Get result as Hll4
- let result = union.get_result(HllType::Hll4);
-
- // Verify it's Hll4
- assert_eq!(result.target_type(), HllType::Hll4);
-
- // Estimate should be similar (Hll4 may have slightly different
precision)
- let estimate = result.estimate();
- assert!(
- estimate > 4_000.0 && estimate < 6_000.0,
- "Expected estimate around 5000, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_get_result_no_conversion_needed() {
- // Test that requesting Hll8 when gadget is Hll8 just clones
- let mut union = HllUnion::new(12);
-
- let mut sketch = HllSketch::new(12, HllType::Hll8);
- for i in 0..1_000 {
- sketch.update(i);
- }
-
- union.update(&sketch);
-
- // Get result as Hll8 (no conversion needed)
- let result = union.get_result(HllType::Hll8);
-
- // Verify it's Hll8
- assert_eq!(result.target_type(), HllType::Hll8);
-
- // Estimate should match
- let estimate = result.estimate();
- assert!(
- estimate > 900.0 && estimate < 1_100.0,
- "Expected estimate around 1000, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_get_result_from_list_mode() {
- // Test type conversion when gadget is still in List mode
- let mut union = HllUnion::new(12);
-
- // Add just a few values so gadget stays in List mode
- let mut sketch = HllSketch::new(12, HllType::Hll8);
- sketch.update("a");
- sketch.update("b");
- sketch.update("c");
-
- union.update(&sketch);
-
- // Get result as Hll6 - should just change the target type
- let result = union.get_result(HllType::Hll6);
-
- assert_eq!(result.target_type(), HllType::Hll6);
- assert!(matches!(result.mode(), Mode::List { .. }));
-
- let estimate = result.estimate();
- assert!(
- (3.0..=5.0).contains(&estimate),
- "Expected estimate around 3, got {}",
- estimate
- );
- }
-
- #[test]
- fn test_union_hll6_arrays_with_overlap() {
- // Test unioning Hll6 sketches (which will be in Array6 mode)
- let mut union = HllUnion::new(12);
-
- let mut sketch1 = HllSketch::new(12, HllType::Hll6);
- for i in 0..10_000 {
- sketch1.update(i);
- }
-
- let mut sketch2 = HllSketch::new(12, HllType::Hll6);
- for i in 5_000..15_000 {
- sketch2.update(i);
- }
-
- union.update(&sketch1);
- union.update(&sketch2);
-
- let result = union.get_result(HllType::Hll6);
- let estimate = result.estimate();
-
- // Should estimate ~15,000 unique values (0-14,999)
- // Both sketches are Hll6, so we expect the full union
- assert!(
- estimate > 13_000.0 && estimate < 17_000.0,
- "Expected estimate around 15000, got {}. This suggests sketch2
overwrote sketch1 instead of merging.",
- estimate
- );
- }
-}
diff --git a/tests/hll_union_test.rs b/tests/hll_union_test.rs
new file mode 100644
index 0000000..1df4165
--- /dev/null
+++ b/tests/hll_union_test.rs
@@ -0,0 +1,583 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! HyperLogLog Union Integration Tests
+//!
+//! These tests verify the public API behavior of HllUnion, focusing on:
+//! - Basic union operations
+//! - Mode transitions and mixed-mode unions
+//! - Different HLL types and lg_k values
+//! - Bounds and statistical properties
+//! - Mathematical properties (commutativity, associativity, idempotency)
+//! - Reset and reuse patterns
+//!
+//! This mirrors the testing strategy used in hll_update_test.rs
+
+use datasketches::hll::{HllSketch, HllType, HllUnion, NumStdDev};
+
+#[test]
+fn test_union_basic_operations() {
+ let mut union = HllUnion::new(12);
+
+ // Empty union
+ assert!(union.is_empty());
+ assert_eq!(union.estimate(), 0.0);
+
+ // Update with empty sketch should not change state
+ let empty_sketch = HllSketch::new(12, HllType::Hll8);
+ union.update(&empty_sketch);
+ assert!(union.is_empty());
+
+ // Create sketches with overlapping values
+ let mut sketch1 = HllSketch::new(12, HllType::Hll8);
+ for i in 0..500 {
+ sketch1.update(i);
+ }
+
+ let mut sketch2 = HllSketch::new(12, HllType::Hll8);
+ for i in 400..900 {
+ sketch2.update(i);
+ }
+
+ union.update(&sketch1);
+ union.update(&sketch2);
+
+ // Should estimate ~900 unique values (0-899)
+ let estimate = union.estimate();
+ assert!(
+ estimate > 800.0 && estimate < 1000.0,
+ "Expected estimate around 900, got {}",
+ estimate
+ );
+ assert!(!union.is_empty());
+
+ // Adding empty sketch should not affect estimate
+ let estimate_before = union.estimate();
+ union.update(&empty_sketch);
+ assert_eq!(estimate_before, union.estimate());
+
+ // Test update_value with different types
+ union.update_value("hello");
+ union.update_value(42i32);
+ union.update_value(vec![1, 2, 3]);
+ assert!(union.estimate() > estimate_before);
+
+ // Test duplicate handling - same sketch added multiple times
+ let mut dup_union = HllUnion::new(12);
+ let mut sketch = HllSketch::new(12, HllType::Hll8);
+ for i in 0..100 {
+ sketch.update(i);
+ }
+ for _ in 0..5 {
+ dup_union.update(&sketch);
+ }
+ let dup_estimate = dup_union.estimate();
+ assert!(
+ (dup_estimate - 100.0).abs() < 20.0,
+ "Duplicates should not inflate estimate, got {}",
+ dup_estimate
+ );
+}
+
+#[test]
+fn test_union_mode_transitions() {
+ let mut union = HllUnion::new(12);
+
+ // Start with List mode (small cardinality)
+ let mut sketch1 = HllSketch::new(12, HllType::Hll8);
+ for i in 0..10 {
+ sketch1.update(i);
+ }
+
+ let mut sketch2 = HllSketch::new(12, HllType::Hll8);
+ for i in 5..15 {
+ sketch2.update(i);
+ }
+
+ union.update(&sketch1);
+ union.update(&sketch2);
+
+ let estimate = union.estimate();
+ assert!(
+ (estimate - 15.0).abs() < 5.0,
+ "List mode: expected estimate around 15, got {}",
+ estimate
+ );
+
+ // Trigger Set mode promotion
+ let mut sketch3 = HllSketch::new(12, HllType::Hll8);
+ for i in 0..600 {
+ sketch3.update(i);
+ }
+ union.update(&sketch3);
+
+ let estimate = union.estimate();
+ assert!(
+ (estimate - 600.0).abs() < 100.0,
+ "Set mode: estimate should be close to 600, got {}",
+ estimate
+ );
+
+ // Trigger HLL mode promotion
+ let mut sketch4 = HllSketch::new(12, HllType::Hll8);
+ for i in 500..10_000 {
+ sketch4.update(i);
+ }
+ union.update(&sketch4);
+
+ let estimate = union.estimate();
+ assert!(
+ estimate > 9_000.0 && estimate < 11_000.0,
+ "HLL mode: expected estimate around 10000, got {}",
+ estimate
+ );
+}
+
+#[test]
+fn test_union_mixed_modes() {
+ let mut union = HllUnion::new(12);
+
+ // Small sketch (List mode)
+ let mut sketch1 = HllSketch::new(12, HllType::Hll8);
+ sketch1.update("a");
+ sketch1.update("b");
+ sketch1.update("c");
+
+ // Large sketch (Array mode)
+ let mut sketch2 = HllSketch::new(12, HllType::Hll8);
+ for i in 0..10_000 {
+ sketch2.update(i);
+ }
+
+ union.update(&sketch1);
+ union.update(&sketch2);
+
+ let result = union.get_result(HllType::Hll8);
+ let estimate = result.estimate();
+
+ // Should estimate ~10,003 unique values
+ assert!(
+ estimate > 9_500.0 && estimate < 10_500.0,
+ "Expected estimate around 10000, got {}",
+ estimate
+ );
+}
+
+#[test]
+fn test_union_mixed_hll_types() {
+ let mut union = HllUnion::new(12);
+
+ // Mix Hll4, Hll6, and Hll8 sketches
+ let mut sketch1 = HllSketch::new(12, HllType::Hll4);
+ for i in 0..3_000 {
+ sketch1.update(i);
+ }
+
+ let mut sketch2 = HllSketch::new(12, HllType::Hll6);
+ for i in 2_000..5_000 {
+ sketch2.update(i);
+ }
+
+ let mut sketch3 = HllSketch::new(12, HllType::Hll8);
+ for i in 4_000..7_000 {
+ sketch3.update(i);
+ }
+
+ union.update(&sketch1);
+ union.update(&sketch2);
+ union.update(&sketch3);
+
+ // Test getting result in different types
+ let result4 = union.get_result(HllType::Hll4);
+ let result6 = union.get_result(HllType::Hll6);
+ let result8 = union.get_result(HllType::Hll8);
+
+ assert_eq!(result4.target_type(), HllType::Hll4);
+ assert_eq!(result6.target_type(), HllType::Hll6);
+ assert_eq!(result8.target_type(), HllType::Hll8);
+
+ // Should estimate ~7,000 unique values (0-6,999)
+ for (result, type_name) in [
+ (result4.estimate(), "Hll4"),
+ (result6.estimate(), "Hll6"),
+ (result8.estimate(), "Hll8"),
+ ] {
+ assert!(
+ result > 6_000.0 && result < 8_000.0,
+ "{}: expected estimate around 7000, got {}",
+ type_name,
+ result
+ );
+ }
+}
+
+#[test]
+fn test_union_lg_k_handling() {
+ // Test multiple downsizing operations: 12 → 10 → 8
+ let mut union = HllUnion::new(12);
+
+ // Start with lg_k=12
+ let mut sketch1 = HllSketch::new(12, HllType::Hll8);
+ for i in 0..5_000 {
+ sketch1.update(i);
+ }
+ union.update(&sketch1);
+ assert_eq!(union.lg_config_k(), 12);
+
+ // Add sketch with lg_k=10 (triggers downsizing)
+ let mut sketch2 = HllSketch::new(10, HllType::Hll8);
+ for i in 4_000..8_000 {
+ sketch2.update(i);
+ }
+ union.update(&sketch2);
+ assert_eq!(union.lg_config_k(), 10, "Gadget should downsize to lg_k=10");
+
+ // Add sketch with lg_k=8 (triggers another downsizing)
+ let mut sketch3 = HllSketch::new(8, HllType::Hll8);
+ for i in 7_000..10_000 {
+ sketch3.update(i);
+ }
+ union.update(&sketch3);
+ assert_eq!(union.lg_config_k(), 8, "Gadget should downsize to lg_k=8");
+
+ let result = union.get_result(HllType::Hll8);
+ let estimate = result.estimate();
+
+ // Should estimate ~10,000 unique values (0-9,999)
+ // Lower precision means higher error tolerance
+ assert!(
+ estimate > 8_000.0 && estimate < 12_000.0,
+ "Expected estimate around 10000, got {}",
+ estimate
+ );
+
+ // Test downsampling: union at lower precision than sketch
+ let mut union2 = HllUnion::new(10);
+ let mut sketch_high_precision = HllSketch::new(12, HllType::Hll8);
+ for i in 0..5_000 {
+ sketch_high_precision.update(i);
+ }
+
+ union2.update(&sketch_high_precision);
+ let result2 = union2.get_result(HllType::Hll8);
+ assert_eq!(result2.lg_config_k(), 10, "Result should be at lg_k=10");
+
+ let estimate2 = result2.estimate();
+ assert!(
+ estimate2 > 4_000.0 && estimate2 < 6_000.0,
+ "Downsampling should still estimate ~5000, got {}",
+ estimate2
+ );
+}
+
+#[test]
+fn test_union_bounds() {
+ let mut union = HllUnion::new(12);
+
+ // Empty union
+ assert_eq!(union.estimate(), 0.0);
+ let empty_lower = union.lower_bound(NumStdDev::Two);
+ let empty_upper = union.upper_bound(NumStdDev::Two);
+ assert!(empty_lower >= 0.0, "Lower bound should be non-negative");
+ assert!(empty_upper >= 0.0, "Upper bound should be non-negative");
+ assert!(empty_lower <= empty_upper);
+
+ // Add sketches
+ let mut sketch1 = HllSketch::new(12, HllType::Hll8);
+ for i in 0..500 {
+ sketch1.update(i);
+ }
+
+ let mut sketch2 = HllSketch::new(12, HllType::Hll8);
+ for i in 400..900 {
+ sketch2.update(i);
+ }
+
+ union.update(&sketch1);
+ union.update(&sketch2);
+
+ let estimate = union.estimate();
+ let upper1 = union.upper_bound(NumStdDev::One);
+ let lower1 = union.lower_bound(NumStdDev::One);
+ let upper2 = union.upper_bound(NumStdDev::Two);
+ let lower2 = union.lower_bound(NumStdDev::Two);
+ let upper3 = union.upper_bound(NumStdDev::Three);
+ let lower3 = union.lower_bound(NumStdDev::Three);
+
+ // Basic sanity checks
+ assert!(lower1 <= estimate);
+ assert!(estimate <= upper1);
+
+ // Bounds should widen with more standard deviations
+ assert!(lower2 <= lower1);
+ assert!(upper1 <= upper2);
+ assert!(lower3 <= lower2);
+ assert!(upper2 <= upper3);
+
+ // Bounds should be reasonable
+ assert!(lower3 > estimate * 0.5);
+ assert!(upper3 < estimate * 1.5);
+
+ // Test that smaller lg_k has wider bounds (higher RSE)
+ let mut union_small = HllUnion::new(8);
+ let mut union_large = HllUnion::new(14);
+
+ let mut sketch_small = HllSketch::new(8, HllType::Hll8);
+ let mut sketch_large = HllSketch::new(14, HllType::Hll8);
+
+ for i in 0..1000 {
+ sketch_small.update(i);
+ sketch_large.update(i);
+ }
+
+ union_small.update(&sketch_small);
+ union_large.update(&sketch_large);
+
+ let est_small = union_small.estimate();
+ let est_large = union_large.estimate();
+
+ let width_small = (union_small.upper_bound(NumStdDev::Two)
+ - union_small.lower_bound(NumStdDev::Two))
+ / est_small;
+ let width_large = (union_large.upper_bound(NumStdDev::Two)
+ - union_large.lower_bound(NumStdDev::Two))
+ / est_large;
+
+ assert!(
+ width_small > width_large,
+ "Smaller union should have wider confidence interval: {} vs {}",
+ width_small,
+ width_large
+ );
+}
+
+#[test]
+fn test_union_reset() {
+ let mut union = HllUnion::new(12);
+
+ let mut sketch = HllSketch::new(12, HllType::Hll8);
+ for i in 0..1000 {
+ sketch.update(i);
+ }
+
+ union.update(&sketch);
+ assert!(!union.is_empty());
+ assert!(union.estimate() > 900.0);
+
+ // Reset should clear all state
+ union.reset();
+ assert!(union.is_empty());
+ assert_eq!(union.estimate(), 0.0);
+ assert_eq!(union.lg_config_k(), 12, "lg_max_k should be preserved");
+
+ // Reuse after reset - multiple iterations
+ for iteration in 0..3 {
+ let mut sketch = HllSketch::new(12, HllType::Hll8);
+ for i in (iteration * 100)..((iteration + 1) * 100) {
+ sketch.update(i);
+ }
+
+ union.update(&sketch);
+ assert!(!union.is_empty());
+
+ union.reset();
+ assert!(union.is_empty());
+ }
+}
+
+#[test]
+fn test_union_commutativity() {
+ // Verify A∪B = B∪A
+ let mut sketch_a = HllSketch::new(12, HllType::Hll8);
+ let mut sketch_b = HllSketch::new(12, HllType::Hll8);
+
+ for i in 0..1000 {
+ sketch_a.update(i);
+ }
+ for i in 500..1500 {
+ sketch_b.update(i);
+ }
+
+ // Union in order A, B
+ let mut union1 = HllUnion::new(12);
+ union1.update(&sketch_a);
+ union1.update(&sketch_b);
+
+ // Union in order B, A
+ let mut union2 = HllUnion::new(12);
+ union2.update(&sketch_b);
+ union2.update(&sketch_a);
+
+ let est1 = union1.estimate();
+ let est2 = union2.estimate();
+
+ let relative_diff = (est1 - est2).abs() / est1.max(est2);
+ assert!(
+ relative_diff < 0.001,
+ "Union not commutative: {} vs {} (diff: {:.4}%)",
+ est1,
+ est2,
+ relative_diff * 100.0
+ );
+}
+
+#[test]
+fn test_union_associativity() {
+ // Verify (A∪B)∪C = A∪(B∪C)
+ let mut sketch_a = HllSketch::new(12, HllType::Hll8);
+ let mut sketch_b = HllSketch::new(12, HllType::Hll8);
+ let mut sketch_c = HllSketch::new(12, HllType::Hll8);
+
+ for i in 0..1000 {
+ sketch_a.update(i);
+ }
+ for i in 500..1500 {
+ sketch_b.update(i);
+ }
+ for i in 1000..2000 {
+ sketch_c.update(i);
+ }
+
+ // Compute (A∪B)∪C
+ let mut union1 = HllUnion::new(12);
+ union1.update(&sketch_a);
+ union1.update(&sketch_b);
+ let ab_sketch = union1.get_result(HllType::Hll8);
+
+ let mut union2 = HllUnion::new(12);
+ union2.update(&ab_sketch);
+ union2.update(&sketch_c);
+ let est1 = union2.estimate();
+
+ // Compute A∪(B∪C)
+ let mut union3 = HllUnion::new(12);
+ union3.update(&sketch_b);
+ union3.update(&sketch_c);
+ let bc_sketch = union3.get_result(HllType::Hll8);
+
+ let mut union4 = HllUnion::new(12);
+ union4.update(&sketch_a);
+ union4.update(&bc_sketch);
+ let est2 = union4.estimate();
+
+ let relative_diff = (est1 - est2).abs() / est1.max(est2);
+ assert!(
+ relative_diff < 0.01,
+ "Union not associative: {} vs {} (diff: {:.4}%)",
+ est1,
+ est2,
+ relative_diff * 100.0
+ );
+}
+
+#[test]
+fn test_union_idempotency() {
+ // Verify A∪A = A
+ let mut sketch = HllSketch::new(12, HllType::Hll8);
+ for i in 0..1000 {
+ sketch.update(i);
+ }
+
+ let mut union = HllUnion::new(12);
+ union.update(&sketch);
+ let est1 = union.estimate();
+
+ // Union with itself
+ union.update(&sketch);
+ let est2 = union.estimate();
+
+ let relative_diff = (est1 - est2).abs() / est1;
+ assert!(
+ relative_diff < 0.01,
+ "Union not idempotent: {} vs {} (diff: {:.4}%)",
+ est1,
+ est2,
+ relative_diff * 100.0
+ );
+}
+
+#[test]
+fn test_union_large_cardinality() {
+ let mut union = HllUnion::new(14);
+
+ // Create three large sketches with overlap
+ let mut sketch1 = HllSketch::new(14, HllType::Hll8);
+ for i in 0..100_000 {
+ sketch1.update(i);
+ }
+
+ let mut sketch2 = HllSketch::new(14, HllType::Hll8);
+ for i in 50_000..150_000 {
+ sketch2.update(i);
+ }
+
+ let mut sketch3 = HllSketch::new(14, HllType::Hll8);
+ for i in 100_000..200_000 {
+ sketch3.update(i);
+ }
+
+ union.update(&sketch1);
+ union.update(&sketch2);
+ union.update(&sketch3);
+
+ let estimate = union.estimate();
+ let relative_error = (estimate - 200_000.0).abs() / 200_000.0;
+
+ // For lg_k=14, relative error should be ~1.04%
+ assert!(
+ relative_error < 0.05,
+ "Relative error should be < 5%, got {:.2}%",
+ relative_error * 100.0
+ );
+}
+
+#[test]
+#[should_panic(expected = "lg_max_k must be in [4, 21]")]
+fn test_union_invalid_lg_k_low() {
+ HllUnion::new(3);
+}
+
+#[test]
+#[should_panic(expected = "lg_max_k must be in [4, 21]")]
+fn test_union_invalid_lg_k_high() {
+ HllUnion::new(22);
+}
+
+#[test]
+fn test_union_validation() {
+ // Test valid boundaries
+ let union_min = HllUnion::new(4);
+ let union_max = HllUnion::new(21);
+
+ assert_eq!(union_min.lg_max_k(), 4);
+ assert_eq!(union_max.lg_max_k(), 21);
+ assert!(union_min.is_empty());
+ assert!(union_max.is_empty());
+
+ // Test lg_max_k is preserved
+ let mut union = HllUnion::new(15);
+ let mut sketch = HllSketch::new(12, HllType::Hll8);
+ for i in 0..1000 {
+ sketch.update(i);
+ }
+
+ union.update(&sketch);
+ assert_eq!(union.lg_max_k(), 15, "lg_max_k should not change");
+
+ union.reset();
+ assert_eq!(union.lg_max_k(), 15, "lg_max_k should persist after reset");
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]