This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch bloom
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit 533cb81c67dc88644917a18411e50c31c926002c
Author: jmalkin <[email protected]>
AuthorDate: Mon Feb 26 18:36:52 2024 -0800

    finish javadocs for builder class
---
 .../filters/bloomfilter/BloomFilterBuilder.java    | 52 +++++++++++++++++++++-
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git 
a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java
 
b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java
index 46d48a26..938889d8 100644
--- 
a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java
+++ 
b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilterBuilder.java
@@ -19,31 +19,79 @@
 
 package org.apache.datasketches.filters.bloomfilter;
 
+import org.apache.datasketches.common.SketchesArgumentException;
+
 public final class BloomFilterBuilder {
 
+  /**
+   * Returns the optimal number of hash functions to given target numbers of 
distinct items
+   * and the BloomFliter size in bits.
+   * @param maxDistinctItems The maximum expected number of distinct items to 
add to the filter
+   * @param numFilterBits The target size, in bits, of the Bloom Filter
+   * @return The suggested number of hash functions to use with the filter
+   */
   public static short suggestNumHashes(final long maxDistinctItems, final long 
numFilterBits) {
     // ceil to ensure we never average worse than the target
     return (short) Math.max(1, (int) Math.ceil((double) numFilterBits / 
maxDistinctItems * Math.log(2.0)));
   }
 
+  /**
+   * Returns the optimal number of hash functions to achieve a target false 
positive probability.
+   * @param targetFalsePositiveProb A desired false positive probability per 
item
+   * @return The suggested number of hash functions to use with the filter.
+   */
   public static short suggestNumHashes(final double targetFalsePositiveProb) {
+    if (targetFalsePositiveProb <= 0.0 || targetFalsePositiveProb > 1.0) {
+      throw new SketchesArgumentException("targetFalsePositiveProb must be a 
valid probability and strictly greater than 0");
+    }
     // ceil to ensure we never average worse than the target
     return (short) Math.ceil((- Math.log(targetFalsePositiveProb) / 
Math.log(2)));
   }
 
+  /**
+   * Returns the optimal number of bits to use in a Bloom Filter given a 
target number of distinct
+   * items and a target false positive probability.
+   * @param maxDistinctItems The maximum expected number of distinct items to 
add to the filter
+   * @param targetFalsePositiveProb A desired false positive probability per 
item
+   * @return The suggested number of bits to use with the filter
+   */
   public static long suggestNumFilterBits(final long maxDistinctItems, final 
double targetFalsePositiveProb) {
+    if (maxDistinctItems <= 0) {
+      throw new SketchesArgumentException("maxDistinctItems must be strictly 
positive");
+    }
+    if (targetFalsePositiveProb <= 0.0 || targetFalsePositiveProb > 1.0) {
+      throw new SketchesArgumentException("targetFalsePositiveProb must be a 
valid probability and strictly greater than 0");
+    }
     return (long) Math.round(-maxDistinctItems * 
Math.log(targetFalsePositiveProb) / (Math.log(2) * Math.log(2)));
   }
 
+  /**
+   * Creates a new BloomFilter with an optimal number of bits and hash 
functions for the given inputs.
+   * @param maxDistinctItems The maximum expected number of distinct items to 
add to the filter
+   * @param targetFalsePositiveProb A desired false positive probability per 
item
+   * @return A new BloomFilter configured for the given input parameters
+   */
   public static BloomFilter newBloomFilter(final long maxDistinctItems, final 
double targetFalsePositiveProb) {
-    // TODO validate inputs
+    if (maxDistinctItems <= 0) {
+      throw new SketchesArgumentException("maxDistinctItems must be strictly 
positive");
+    }
+    if (targetFalsePositiveProb <= 0.0 || targetFalsePositiveProb > 1.0) {
+      throw new SketchesArgumentException("targetFalsePositiveProb must be a 
valid probability and strictly greater than 0");
+    }
     final long numBits = suggestNumFilterBits(maxDistinctItems, 
targetFalsePositiveProb);
     final short numHashes = suggestNumHashes(maxDistinctItems, numBits);
     return new BloomFilter(numBits, numHashes);
   }
 
+  /**
+   * Creates a new BloomFilter with an optimal number of bits and hash 
functions for the given inputs,
+   * using the provided base seed for the hash function.
+   * @param maxDistinctItems The maximum expected number of distinct items to 
add to the filter
+   * @param targetFalsePositiveProb A desired false positive probability per 
item
+   * @param seed A base hash seed 
+   * @return A new BloomFilter configured for the given input parameters
+   */
   public static BloomFilter newBloomFilter(final long maxDistinctItems, final 
double targetFalsePositiveProb, final long seed) {
-    // TODO validate inputs
     final long numBits = suggestNumFilterBits(maxDistinctItems, 
targetFalsePositiveProb);
     final short numHashes = suggestNumHashes(maxDistinctItems, numBits);
     return new BloomFilter(numBits, numHashes, seed);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to