Claudenw commented on a change in pull request #258: URL: https://github.com/apache/commons-collections/pull/258#discussion_r800094885
########## File path: src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java ########## @@ -0,0 +1,486 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.Objects; + +/** + * The definition of a Bloom filter shape. + * + * <p> This class contains the values for the filter configuration and is used to + * convert a Hasher into a BloomFilter as well as verify that two Bloom filters are + * compatible. (i.e. can be compared or merged)</p> + * + * <h2>Interrelatedness of values</h2> + * + * <dl> <dt>Number of Items ({@code n})</dt> + * <dd>{@code n = ceil(m / (-k / ln(1 - exp(ln(p) / k))))}</dd> <dt>Probability of + * False Positives ({@code p})</dt> <dd>{@code p = pow(1 - exp(-k / (m / n)), k)}</dd> <dt>Number + * of Bits ({@code m})</dt> + * <dd>{@code m = ceil((n * ln(p)) / ln(1 / pow(2, ln(2))))}</dd> <dt>Number of + * Functions ({@code k})</dt> <dd>{@code k = round((m / n) * ln(2))}</dd> </dl> + * + * @see <a href="http://hur.st/bloomfilter?n=3&p=1.0E-5">Bloom Filter calculator</a> + * @see <a href="https://en.wikipedia.org/wiki/Bloom_filter">Bloom filter + * [Wikipedia]</a> + * @since 4.5 + */ +public final class Shape implements Comparable<Shape> { + + /** + * Number of hash functions to create a filter ({@code k}). + */ + private final int numberOfHashFunctions; + + /** + * Number of bits in the filter ({@code m}). + */ + private final int numberOfBits; + + /** + * Constructs a filter configuration with the specified number of items ({@code n}) and + * bits ({@code m}). + * + * <p>The optimal number of hash functions ({@code k}) is computed. + * <pre>k = round((m / n) * ln(2))</pre> + * + * <p>The false-positive probability is computed using the number of items, bits and hash + * functions. An exception is raised if this is greater than or equal to 1 (i.e. the + * shape is invalid for use as a Bloom filter). + * + * @param numberOfHashFunctions Number of hash functions to use for each item placed in the filter. + * @param numberOfBits The number of bits in the filter + * @throws IllegalArgumentException if {@code numberOfHashFunctions < 1} or {@code numberOfBits < 1} + */ + public Shape(final int numberOfHashFunctions, final int numberOfBits) { + this.numberOfBits = checkNumberOfBits(numberOfBits); + this.numberOfHashFunctions = checkNumberOfHashFunctions(numberOfHashFunctions); + } + + /** + * Check number of bits is strictly positive. + * + * @param numberOfBits the number of bits + * @return the number of bits + * @throws IllegalArgumentException if the number of bits is {@code < 1} + */ + private static int checkNumberOfBits(final int numberOfBits) { + if (numberOfBits < 1) { + throw new IllegalArgumentException("Number of bits must be greater than 0: " + numberOfBits); + } + return numberOfBits; + } + + /** + * Check number of hash functions is strictly positive + * + * @param numberOfHashFunctions the number of hash functions + * @return the number of hash functions + * @throws IllegalArgumentException if the number of hash functions is {@code < 1} + */ + private static int checkNumberOfHashFunctions(final int numberOfHashFunctions) { + if (numberOfHashFunctions < 1) { + throw new IllegalArgumentException( + "Number of hash functions must be greater than 0: " + numberOfHashFunctions); + } + return numberOfHashFunctions; + } + + @Override + public int compareTo(Shape other) { Review comment: It is not absolutely needed. I did use it in some multidimensional stuff and since Shape is immutable it made sense to me to implement hash(), equals() and compareTo(). I suppose compareTo establishes an arbitrary order and can be removed if you feel strongly. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
