Claudenw commented on a change in pull request #258:
URL:
https://github.com/apache/commons-collections/pull/258#discussion_r798326968
##########
File path:
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
##########
@@ -16,138 +16,265 @@
*/
package org.apache.commons.collections4.bloomfilter;
+import java.util.Objects;
+import java.util.function.IntPredicate;
+
import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
/**
* The interface that describes a Bloom filter.
* @since 4.5
*/
-public interface BloomFilter {
+public interface BloomFilter extends IndexProducer, BitMapProducer {
- // Query Operations
+ /**
+ * Return the Bloom filter data as a BitMap array.
+ * @param filter the filter to get the data from.
+ * @return An array of BitMap long.
+ */
+ static long[] asBitMapArray(BloomFilter filter) {
+ BitMapProducer.ArrayBuilder builder = new
BitMapProducer.ArrayBuilder(filter.getShape());
+ filter.forEachBitMap(builder);
+ return builder.getArray();
+ }
/**
- * Gets the shape of this filter.
- *
- * @return the shape of this filter
+ * Return the Bloom filter data as an array of indices for the enabled
bits.
+ * @param filter the Filter to get the data from.
+ * @return An array of indices for enabled bits in the Bloom filter.
*/
- Shape getShape();
+ static int[] asIndexArray(BloomFilter filter) {
+ int[] result = new int[filter.cardinality()];
+
+ filter.forEachIndex(new IntPredicate() {
+ int i = 0;
+
+ @Override
+ public boolean test(int idx) {
+ result[i++] = idx;
+ return true;
+ }
+ });
+ return result;
+ }
+
+ // Query Operations
/**
- * Gets an array of little-endian long values representing the bits of
this filter.
+ * This method is used to determine the best method for matching.
*
- * <p>The returned array will have length {@code ceil(m / 64)} where
{@code m} is the
- * number of bits in the filter and {@code ceil} is the ceiling function.
- * Bits 0-63 are in the first long. A value of 1 at a bit position
indicates the bit
- * index is enabled.
+ * <p>For `sparse` implementations
+ * the {@code forEachIndex(IntConsumer consumer)} method is more
efficient. For non `sparse` implementations
+ * the {@code forEachBitMap(LongConsumer consumer)} is more efficient.
Implementers should determine if it is easier
+ * for the implementation to produce indexes of BitMap blocks.</p>
*
- * @return the {@code long[]} representation of this filter
+ * @return {@code true} if the implementation is sparse {@code false}
otherwise.
+ * @see BitMap
*/
- long[] getBits();
+ boolean isSparse();
/**
- * Creates a StaticHasher that contains the indexes of the bits that are
on in this
- * filter.
- *
- * @return a StaticHasher for that produces this Bloom filter
+ * Gets the shape that was used when the filter was built.
+ * @return The shape the filter was built with.
*/
- StaticHasher getHasher();
+ Shape getShape();
/**
- * Returns {@code true} if this filter contains the specified filter.
Specifically this
+ * Returns {@code true} if this filter contains the specified filter.
+ *
+ * <p>Specifically this
* returns {@code true} if this filter is enabled for all bits that are
enabled in the
* {@code other} filter. Using the bit representations this is
- * effectively {@code (this AND other) == other}.
+ * effectively {@code (this AND other) == other}.</p>
*
* @param other the other Bloom filter
- * @return true if this filter is enabled for all enabled bits in the
other filter
- * @throws IllegalArgumentException if the shape of the other filter does
not match
- * the shape of this filter
+ * @return true if all enabled bits in the other filter are enabled in
this filter.
*/
- boolean contains(BloomFilter other);
+ default boolean contains(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return isSparse() ? contains((IndexProducer) other) :
contains((BitMapProducer) other);
+ }
/**
- * Returns {@code true} if this filter contains the specified decomposed
Bloom filter.
- * Specifically this returns {@code true} if this filter is enabled for
all bit indexes
- * identified by the {@code hasher}. Using the bit representations this is
- * effectively {@code (this AND hasher) == hasher}.
+ * Returns {@code true} if this filter contains the bits specified in the
hasher.
+ *
+ * <p>Specifically this returns {@code true} if this filter is enabled for
all bit indexes
+ * identified by the {@code hasher}. Using the BitMap representations this
is
+ * effectively {@code (this AND hasher) == hasher}.</p>
*
* @param hasher the hasher to provide the indexes
* @return true if this filter is enabled for all bits specified by the
hasher
- * @throws IllegalArgumentException if the hasher cannot generate indices
for the shape of
- * this filter
*/
- boolean contains(Hasher hasher);
+ default boolean contains(Hasher hasher) {
+ Objects.requireNonNull(hasher, "Hasher");
+ Shape shape = getShape();
+ return contains(hasher.indices(shape));
+ }
+
+ /**
+ * Returns {@code true} if this filter contains the indices specified
IndexProducer.
+ *
+ * <p>Specifically this returns {@code true} if this filter is enabled for
all bit indexes
+ * identified by the {@code IndexProducer}.</p>
+ *
+ * @param indexProducer the IndexProducer to provide the indexes
+ * @return {@code true} if this filter is enabled for all bits specified
by the IndexProducer
+ */
+ boolean contains(IndexProducer indexProducer);
- // Modification Operations
+ /**
+ * Returns {@code true} if this filter contains the bits specified in the
BitMaps produced by the
+ * bitMapProducer.
+ *
+ * @param bitMapProducer the the {@code BitMapProducer} to provide the
BitMaps.
+ * @return {@code true} if this filter is enabled for all bits specified
by the BitMaps
+ */
+ boolean contains(BitMapProducer bitMapProducer);
/**
- * Merges the specified Bloom filter into this Bloom filter. Specifically
all bit indexes
- * that are enabled in the {@code other} filter will be enabled in this
filter.
+ * Merges the specified Bloom filter with this Bloom filter creating a new
Bloom filter.
*
- * <p>Note: This method should return {@code true} even if no additional
bit indexes were
- * enabled. A {@code false} result indicates that this filter is not
ensured to contain
- * the {@code other} Bloom filter.
+ * <p>Specifically all bit indexes that are enabled in the {@code other}
and in @code this} filter will be
+ * enabled in the resulting filter.</p>
*
* @param other the other Bloom filter
+ * @return The new Bloom filter.
+ */
+ default BloomFilter merge(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ Shape shape = getShape();
+ BloomFilter result = shape.isSparse((cardinality() +
other.cardinality())) ? new SparseBloomFilter(shape)
+ : new SimpleBloomFilter(shape);
+
+ result.mergeInPlace(this);
+ result.mergeInPlace(other);
+ return result;
+ }
+
+ /**
+ * Merges the specified Hasher with this Bloom filter and returns a new
Bloom filter.
+ *
+ * <p>Specifically all bit indexes that are identified by the {@code
hasher} and in {@code this} Bloom filter
+ * be enabled in the resulting filter.</p>
+ *
+ * @param hasher the hasher to provide the indices
+ * @return the new Bloom filter.
+ */
+ default BloomFilter merge(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ Shape shape = getShape();
+ BloomFilter result = shape.isSparse((hasher.size() *
shape.getNumberOfHashFunctions()) + cardinality())
+ ? new SparseBloomFilter(shape, hasher)
+ : new SimpleBloomFilter(shape, hasher);
+ result.mergeInPlace(this);
+ return result;
+ }
+
+ /**
+ * Merges the specified Bloom filter into this Bloom filter.
+ *
+ * <p>Specifically all
+ * bit indexes that are identified by the {@code other} will be enabled in
this filter.</p>
+ *
+ * <p><em>Note: This method should return {@code true} even if no
additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may
not contain
+ * the {@code other} Bloom filter.</em> This state may occur in complex
Bloom filter implementations like
+ * counting Bloom filters.</p>
+ *
+ * @param other The bloom filter to merge into this one.
* @return true if the merge was successful
- * @throws IllegalArgumentException if the shape of the other filter does
not match
- * the shape of this filter
*/
- boolean merge(BloomFilter other);
+ boolean mergeInPlace(BloomFilter other);
/**
- * Merges the specified decomposed Bloom filter into this Bloom filter.
Specifically all
+ * Merges the specified hasher into this Bloom filter. Specifically all
* bit indexes that are identified by the {@code hasher} will be enabled
in this filter.
*
- * <p>Note: This method should return {@code true} even if no additional
bit indexes were
- * enabled. A {@code false} result indicates that this filter is not
ensured to contain
- * the specified decomposed Bloom filter.
+ * <p><em>Note: This method should return {@code true} even if no
additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may
not contain
+ * the {@code other} Bloom filter.</em> This state may occur in complex
Bloom filter implementations like
+ * counting Bloom filters.</p>
*
- * @param hasher the hasher to provide the indexes
+ * @param hasher The hasher to merge.
* @return true if the merge was successful
- * @throws IllegalArgumentException if the hasher cannot generate indices
for the shape of
- * this filter
*/
- boolean merge(Hasher hasher);
+ default boolean mergeInPlace(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ Shape shape = getShape();
+ BloomFilter result = shape.isSparse((hasher.size() *
shape.getNumberOfHashFunctions()) + cardinality())
+ ? new SparseBloomFilter(shape, hasher)
+ : new SimpleBloomFilter(shape, hasher);
+ return mergeInPlace(result);
+ }
+
+ /**
+ * Determines if the bloom filter is "full".
+ *
+ * <p>Full is defined as having no unset bits.</p>
+ *
+ * @return {@code true} if the filter is full, {@code false} otherwise.
+ */
+ default boolean isFull() {
+ return cardinality() == getShape().getNumberOfBits();
+ }
// Counting Operations
/**
* Gets the cardinality (number of enabled bits) of this Bloom filter.
*
- * <p>This is also known as the Hamming value.</p>
+ * <p>This is also known as the Hamming value or Hamming number.</p>
*
* @return the cardinality of this filter
*/
int cardinality();
/**
- * Performs a logical "AND" with the other Bloom filter and returns the
cardinality
- * (number of enabled bits) of the result.
+ * Estimates the number of items in the Bloom filter.
*
- * @param other the other Bloom filter
- * @return the cardinality of the result of {@code (this AND other)}
+ * <p>By default this is the rounding of the {@code
Shape.estimateN(cardinality)} calculation for the
+ * shape and cardinality of this filter.</p>
+ *
+ * <p>An item is roughly equivalent to the number of Hashers that have
been merged. As the Bloom filter
Review comment:
updated and clarified
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]