Claudenw commented on a change in pull request #140: Bloomfilter updates2
URL: 
https://github.com/apache/commons-collections/pull/140#discussion_r391773836
 
 

 ##########
 File path: 
src/main/java/org/apache/commons/collections4/bloomfilter/hasher/Shape.java
 ##########
 @@ -124,103 +128,188 @@ public Shape(final HashFunctionIdentity 
hashFunctionIdentity, final double proba
         // similarly we can not produce a number greater than numberOfBits so 
we
         // do not have to check for Integer.MAX_VALUE either.
         this.numberOfItems = (int) n;
-        this.hashCode = generateHashCode();
         // check that probability is within range
-        getProbability();
+        checkCalculatedProbability(getProbability());
+        this.hashCode = generateHashCode();
     }
 
     /**
-     * Constructs a filter configuration with the specified number of items and
-     * probability. <p> The actual probability will be approximately equal to 
the
-     * desired probability but will be dependent upon the calculated bloom 
filter size
-     * and function count. </p>
+     * Constructs a filter configuration with the specified number of items 
({@code n}) and
+     * desired false-positive probability ({@code p}).
+     *
+     * <p>The number of bits ({@code m}) for the filter is computed.
+     * <pre>m = ceil(n * log(p) / log(1 / 2^log(2)))</pre>
      *
-     * @param hashFunctionIdentity The HashFunctionIdentity of the hash 
function this shape uses.
-     * @param numberOfItems Number of items to be placed in the filter.
-     * @param probability The desired probability of duplicates. Must be in 
the range
-     * (0.0,1.0).
+     * <p>The optimal number of hash functions ({@code k}) is computed.
+     * <pre>k = round((m / n) * log(2))</pre>
+     *
+     * <p>The actual probability will be approximately equal to the
+     * desired probability but will be dependent upon the calculated number of 
bits and hash
+     * functions. An exception is raised if this is greater than or equal to 1 
(i.e. the
+     * shape is invalid for use as a Bloom filter).
+     *
+     * @param hashFunctionIdentity The identity of the hash function this 
shape uses
+     * @param numberOfItems Number of items to be placed in the filter
+     * @param probability The desired false-positive probability in the range 
{@code (0, 1)}
+     * @throws NullPointerException if the hash function identity is null
+     * @throws IllegalArgumentException if the number of items is not strictly 
positive
+     * @throws IllegalArgumentException if the desired probability is not in 
the range {@code (0, 1)}
+     * @throws IllegalArgumentException if the calculated probability is not 
below 1
+     * @see #getProbability()
      */
     public Shape(final HashFunctionIdentity hashFunctionIdentity, final int 
numberOfItems, final double probability) {
-        Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
-        if (numberOfItems < 1) {
-            throw new IllegalArgumentException("Number of Items must be 
greater than 0");
-        }
-        if (probability <= 0.0) {
-            throw new IllegalArgumentException("Probability must be greater 
than 0.0");
-        }
-        if (probability >= 1.0) {
-            throw new IllegalArgumentException("Probability must be less than 
1.0");
-        }
-        this.hashFunctionIdentity = hashFunctionIdentity;
-        this.numberOfItems = numberOfItems;
-        /*
-         * number of bits is called "m" in most mathematical statement 
describing
-         * bloom filters so we use it here.
-         */
+        this.hashFunctionIdentity = 
Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
+        this.numberOfItems = checkNumberOfItems(numberOfItems);
+        checkProbability(probability);
+
+        // Number of bits (m)
         final double m = Math.ceil(numberOfItems * Math.log(probability) / 
DENOMINATOR);
         if (m > Integer.MAX_VALUE) {
-            throw new IllegalArgumentException("Resulting filter has more than 
" + Integer.MAX_VALUE + " bits");
+            throw new IllegalArgumentException("Resulting filter has more than 
" + Integer.MAX_VALUE + " bits: " + m);
         }
         this.numberOfBits = (int) m;
+
         this.numberOfHashFunctions = 
calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
-        this.hashCode = generateHashCode();
         // check that probability is within range
-        getProbability();
+        checkCalculatedProbability(getProbability());
+        this.hashCode = generateHashCode();
     }
 
     /**
-     * Constructs a filter configuration with the specified number of items and
-     * probability.
+     * Constructs a filter configuration with the specified number of items 
({@code n}) and
+     * bits ({@code m}).
      *
-     * @param hashFunctionIdentity The HashFunctionIdentity of the hash 
function this shape uses.
-     * @param numberOfItems Number of items to be placed in the filter.
-     * @param numberOfBits The number of bits in the filter.
+     * <p>The optimal number of hash functions ({@code k}) is computed.
+     * <pre>k = round((m / n) * log(2))</pre>
+     *
+     * <p>The false-positive probability is computed using the number of 
items, bits and hash
+     * functions. An exception is raised if this is greater than or equal to 1 
(i.e. the
+     * shape is invalid for use as a Bloom filter).
+     *
+     * @param hashFunctionIdentity The identity of the hash function this 
shape uses
+     * @param numberOfItems Number of items to be placed in the filter
+     * @param numberOfBits The number of bits in the filter
+     * @throws NullPointerException if the hash function identity is null
+     * @throws IllegalArgumentException if the number of items is not strictly 
positive
+     * @throws IllegalArgumentException if the number of bits is not above 8
+     * @throws IllegalArgumentException if the calculated number of hash 
function is below 1
+     * @throws IllegalArgumentException if the calculated probability is not 
below 1
+     * @see #getProbability()
      */
     public Shape(final HashFunctionIdentity hashFunctionIdentity, final int 
numberOfItems, final int numberOfBits) {
-        Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
-        if (numberOfItems < 1) {
-            throw new IllegalArgumentException("Number of Items must be 
greater than 0");
-        }
-        if (numberOfBits < 8) {
-            throw new IllegalArgumentException("Number of Bits must be greater 
than or equal to 8");
-        }
-        this.hashFunctionIdentity = hashFunctionIdentity;
-        this.numberOfItems = numberOfItems;
-        this.numberOfBits = numberOfBits;
+        this.hashFunctionIdentity = 
Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
+        this.numberOfItems = checkNumberOfItems(numberOfItems);
+        this.numberOfBits = checkNumberOfBits(numberOfBits);
         this.numberOfHashFunctions = 
calculateNumberOfHashFunctions(numberOfItems, numberOfBits);
-        this.hashCode = generateHashCode();
         // check that probability is within range
-        getProbability();
+        checkCalculatedProbability(getProbability());
+        this.hashCode = generateHashCode();
     }
 
     /**
-     * Constructs a filter configuration with the specified number of items and
-     * probability.
+     * Constructs a filter configuration with the specified number of items, 
bits
+     * and hash functions.
+     *
+     * <p>The false-positive probability is computed using the number of 
items, bits and hash
+     * functions. An exception is raised if this is greater than or equal to 1 
(i.e. the
+     * shape is invalid for use as a Bloom filter).
      *
-     * @param hashFunctionIdentity The HashFunctionIdentity of the hash 
function this shape uses.
-     * @param numberOfItems Number of items to be placed in the filter.
+     * @param hashFunctionIdentity The identity of the hash function this 
shape uses
+     * @param numberOfItems Number of items to be placed in the filter
      * @param numberOfBits The number of bits in the filter.
-     * @param numberOfHashFunctions The number of hash functions in the filter.
+     * @param numberOfHashFunctions The number of hash functions in the filter
+     * @throws NullPointerException if the hash function identity is null
+     * @throws IllegalArgumentException if the number of items is not strictly 
positive
+     * @throws IllegalArgumentException if the number of bits is not above 8
+     * @throws IllegalArgumentException if the number of hash functions is not 
strictly positive
+     * @throws IllegalArgumentException if the calculated probability is not 
below 1
+     * @see #getProbability()
      */
     public Shape(final HashFunctionIdentity hashFunctionIdentity, final int 
numberOfItems, final int numberOfBits,
         final int numberOfHashFunctions) {
-        Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
+        this.hashFunctionIdentity = 
Objects.requireNonNull(hashFunctionIdentity, "hashFunctionIdentity");
+        this.numberOfItems = checkNumberOfItems(numberOfItems);
+        this.numberOfBits = checkNumberOfBits(numberOfBits);
+        this.numberOfHashFunctions = 
checkNumberOfHashFunctions(numberOfHashFunctions);
+        // check that probability is within range
+        checkCalculatedProbability(getProbability());
+        this.hashCode = generateHashCode();
+    }
+
+    /**
+     * Check number of items is strictly positive.
+     *
+     * @param numberOfItems the number of items
+     * @return the number of items
+     * @throws IllegalArgumentException if the number of items is not strictly 
positive
+     */
+    private static int checkNumberOfItems(final int numberOfItems) {
         if (numberOfItems < 1) {
-            throw new IllegalArgumentException("Number of Items must be 
greater than 0");
+            throw new IllegalArgumentException("Number of items must be 
greater than 0: " + numberOfItems);
         }
+        return numberOfItems;
+    }
+
+    /**
+     * Check number of bits is above 8.
+     *
+     * @param numberOfBits the number of bits
+     * @return the number of bits
+     * @throws IllegalArgumentException if the number of bits is not above 8
+     */
+    private static int checkNumberOfBits(final int numberOfBits) {
         if (numberOfBits < 8) {
-            throw new IllegalArgumentException("Number of Bits must be greater 
than or equal to 8");
+            throw new IllegalArgumentException("Number of bits must be greater 
than or equal to 8: " + numberOfBits);
         }
+        return numberOfBits;
+    }
+
+    /**
+     * Check number of hash functions is strictly positive
+     *
+     * @param numberOfHashFunctions the number of hash functions
+     * @return the number of hash functions
+     * @throws IllegalArgumentException if the number of hash functions is not 
strictly positive
+     */
+    private static int checkNumberOfHashFunctions(final int 
numberOfHashFunctions) {
         if (numberOfHashFunctions < 1) {
-            throw new IllegalArgumentException("Number of Hash Functions must 
be greater than or equal to 8");
+            throw new IllegalArgumentException("Number of hash functions must 
be greater than 0: " + numberOfHashFunctions);
+        }
+        return numberOfHashFunctions;
+    }
+
+    /**
+     * Check the probability is in the range 0.0, exclusive, to 1.0, exclusive.
+     *
+     * @param probability the probability
+     * @throws IllegalArgumentException if the probability is not in the range 
{@code (0, 1)}
+     */
+    private static void checkProbability(final double probability) {
+        // Using the negation of within the desired range will catch NaN
+        if (!(probability > 0.0 && probability < 1.0)) {
+            throw new IllegalArgumentException("Probability must be greater 
than 0 and less than 1: " + probability);
+        }
+    }
+
+    /**
+     * Check the calculated probability is below 1.0.
+     *
+     * <p>This function is used to verify that the dynamically calculated 
probability for the
+     * Shape is in the valid range 0 to 1 exclusive. This need only be 
performed once upon
+     * construction.
+     *
+     * @param probability the probability
+     * @throws IllegalArgumentException if the calculated probability is not 
below 1
+     */
+    private static void checkCalculatedProbability(final double probability) {
+        // We do not need to check for p < = since we only allow positive 
values for
 
 Review comment:
   should be " p <= 0"

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to