This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch bf_test_fix
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git


The following commit(s) were added to refs/heads/bf_test_fix by this push:
     new c020e8f  fix seed for bloom filter test where we use probabilistic 
bounds
c020e8f is described below

commit c020e8f5a2957ae688c927e30da399298c08fb92
Author: Jon Malkin <[email protected]>
AuthorDate: Mon Oct 21 17:27:12 2024 -0700

    fix seed for bloom filter test where we use probabilistic bounds
---
 filters/test/bloom_filter_test.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/filters/test/bloom_filter_test.cpp 
b/filters/test/bloom_filter_test.cpp
index eb7157b..41b63e6 100644
--- a/filters/test/bloom_filter_test.cpp
+++ b/filters/test/bloom_filter_test.cpp
@@ -79,8 +79,9 @@ TEST_CASE("bloom_filter: standard constructors", 
"[bloom_filter]") {
 TEST_CASE("bloom_filter: basic operations", "[bloom_filter]") {
   uint64_t num_items = 5000;
   double fpp = 0.01;
+  uint64_t seed = 4897301548054ULL;
 
-  auto bf = bloom_filter::builder::create_by_accuracy(num_items, fpp);
+  auto bf = bloom_filter::builder::create_by_accuracy(num_items, fpp, seed);
   REQUIRE(bf.is_empty());
   REQUIRE(bf.get_bits_used() == 0);
 
@@ -90,7 +91,10 @@ TEST_CASE("bloom_filter: basic operations", 
"[bloom_filter]") {
 
   REQUIRE(!bf.is_empty());
   // filter is about 50% full at target capacity
-  REQUIRE(bf.get_bits_used() == Approx(0.5 * bf.get_capacity()).epsilon(0.05));
+  // since seed is fixed we expect an exact value every time
+  // but leaving the approximate test in since that's more the "expectation"
+  REQUIRE(bf.get_bits_used() == 24793); // exact value is not important but 
should be consistent
+  REQUIRE(bf.get_bits_used() == Approx(0.5 * 
bf.get_capacity()).epsilon(0.05)); // just over 3.3% in practice
 
   uint32_t num_found = 0;
   for (uint64_t i = num_items; i < bf.get_capacity(); ++i) {
@@ -98,8 +102,9 @@ TEST_CASE("bloom_filter: basic operations", 
"[bloom_filter]") {
       ++num_found;
     }
   }
-  // fpp is average with significant variance
-  REQUIRE(num_found == Approx((bf.get_capacity() - num_items) * 
fpp).epsilon(0.12));
+  // fpp is average with significant variance -- even at 12% it would fail 
occasionally
+  REQUIRE(num_found == 423);
+  //REQUIRE(num_found == Approx((bf.get_capacity() - num_items) * 
fpp).epsilon(0.12));
   auto bytes = bf.serialize();
 
   // initialize in memory and run the same tests


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to