Claudenw commented on a change in pull request #258:
URL:
https://github.com/apache/commons-collections/pull/258#discussion_r804878551
##########
File path:
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java
##########
@@ -16,138 +16,239 @@
*/
package org.apache.commons.collections4.bloomfilter;
+import java.util.Objects;
import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
-import org.apache.commons.collections4.bloomfilter.hasher.Shape;
-import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;
/**
* The interface that describes a Bloom filter.
+ * <p>
+ * <em>See implementation notes for BitMapProducer and IndexProducer.</em>
+ * </p>
+ * @see BitMapProducer
+ * @see IndexProducer
* @since 4.5
*/
-public interface BloomFilter {
-
- // Query Operations
+public interface BloomFilter extends IndexProducer, BitMapProducer {
/**
- * Gets the shape of this filter.
- *
- * @return the shape of this filter
+ * Creates a new instance of the BloomFilter with the same properties as
the current one.
+ * @return a copy of this BloomFilter
*/
- Shape getShape();
+ BloomFilter copy();
+
+ // Query Operations
/**
- * Gets an array of little-endian long values representing the bits of
this filter.
+ * This method is used to determine the best method for matching.
*
- * <p>The returned array will have length {@code ceil(m / 64)} where
{@code m} is the
- * number of bits in the filter and {@code ceil} is the ceiling function.
- * Bits 0-63 are in the first long. A value of 1 at a bit position
indicates the bit
- * index is enabled.
+ * <p>For `sparse` implementations
+ * the {@code forEachIndex(IntConsumer consumer)} method is more
efficient. For non `sparse` implementations
+ * the {@code forEachBitMap(LongConsumer consumer)} is more efficient.
Implementers should determine if it is easier
+ * for the implementation to produce indexes of bit map blocks.</p>
*
- * @return the {@code long[]} representation of this filter
+ * @return {@code true} if the implementation is sparse {@code false}
otherwise.
+ * @see BitMap
*/
- long[] getBits();
+ boolean isSparse();
/**
- * Creates a StaticHasher that contains the indexes of the bits that are
on in this
- * filter.
- *
- * @return a StaticHasher for that produces this Bloom filter
+ * Gets the shape that was used when the filter was built.
+ * @return The shape the filter was built with.
*/
- StaticHasher getHasher();
+ Shape getShape();
/**
- * Returns {@code true} if this filter contains the specified filter.
Specifically this
+ * Returns {@code true} if this filter contains the specified filter.
+ *
+ * <p>Specifically this
* returns {@code true} if this filter is enabled for all bits that are
enabled in the
* {@code other} filter. Using the bit representations this is
- * effectively {@code (this AND other) == other}.
+ * effectively {@code (this AND other) == other}.</p>
*
* @param other the other Bloom filter
- * @return true if this filter is enabled for all enabled bits in the
other filter
- * @throws IllegalArgumentException if the shape of the other filter does
not match
- * the shape of this filter
+ * @return true if all enabled bits in the other filter are enabled in
this filter.
*/
- boolean contains(BloomFilter other);
+ default boolean contains(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ return isSparse() ? contains((IndexProducer) other) :
contains((BitMapProducer) other);
+ }
/**
- * Returns {@code true} if this filter contains the specified decomposed
Bloom filter.
- * Specifically this returns {@code true} if this filter is enabled for
all bit indexes
- * identified by the {@code hasher}. Using the bit representations this is
- * effectively {@code (this AND hasher) == hasher}.
+ * Returns {@code true} if this filter contains the bits specified in the
hasher.
+ *
+ * <p>Specifically this returns {@code true} if this filter is enabled for
all bit indexes
+ * identified by the {@code hasher}. Using the bit map representations
this is
+ * effectively {@code (this AND hasher) == hasher}.</p>
*
* @param hasher the hasher to provide the indexes
* @return true if this filter is enabled for all bits specified by the
hasher
- * @throws IllegalArgumentException if the hasher cannot generate indices
for the shape of
- * this filter
*/
- boolean contains(Hasher hasher);
+ default boolean contains(Hasher hasher) {
+ Objects.requireNonNull(hasher, "Hasher");
+ Shape shape = getShape();
+ return contains(hasher.indices(shape));
+ }
- // Modification Operations
+ /**
+ * Returns {@code true} if this filter contains the indices specified
IndexProducer.
+ *
+ * <p>Specifically this returns {@code true} if this filter is enabled for
all bit indexes
+ * identified by the {@code IndexProducer}.</p>
+ *
+ * @param indexProducer the IndexProducer to provide the indexes
+ * @return {@code true} if this filter is enabled for all bits specified
by the IndexProducer
+ */
+ boolean contains(IndexProducer indexProducer);
/**
- * Merges the specified Bloom filter into this Bloom filter. Specifically
all bit indexes
- * that are enabled in the {@code other} filter will be enabled in this
filter.
+ * Returns {@code true} if this filter contains the bits specified in the
bit maps produced by the
+ * bitMapProducer.
*
- * <p>Note: This method should return {@code true} even if no additional
bit indexes were
- * enabled. A {@code false} result indicates that this filter is not
ensured to contain
- * the {@code other} Bloom filter.
+ * @param bitMapProducer the the {@code BitMapProducer} to provide the bit
maps.
+ * @return {@code true} if this filter is enabled for all bits specified
by the bit maps
+ */
+ default boolean contains(BitMapProducer bitMapProducer) {
+ return bitMapProducer.forEachBitMap(this.makePredicate((x, y) -> (x &
y) == y));
+ }
+
+ // update operations
+
+ /**
+ * Merges the specified Bloom filter with this Bloom filter creating a new
Bloom filter.
+ *
+ * <p>Specifically all bit indexes that are enabled in the {@code other}
and in @code this} filter will be
+ * enabled in the resulting filter.</p>
*
* @param other the other Bloom filter
+ * @return The new Bloom filter.
+ */
+ default BloomFilter merge(BloomFilter other) {
+ Objects.requireNonNull(other, "other");
+ BloomFilter result = copy();
+ result.mergeInPlace(other);
+ return result;
+ }
+
+ /**
+ * Merges the specified Hasher with this Bloom filter and returns a new
Bloom filter.
+ *
+ * <p>Specifically all bit indexes that are identified by the {@code
hasher} and in {@code this} Bloom filter
+ * be enabled in the resulting filter.</p>
+ *
+ * @param hasher the hasher to provide the indices
+ * @return the new Bloom filter.
+ */
+ default BloomFilter merge(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ BloomFilter result = copy();
+ result.mergeInPlace(hasher);
+ return result;
+ }
+
+ /**
+ * Merges the specified Bloom filter into this Bloom filter.
+ *
+ * <p>Specifically all
+ * bit indexes that are identified by the {@code other} will be enabled in
this filter.</p>
+ *
+ * <p><em>Note: This method should return {@code true} even if no
additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may
not contain
+ * the {@code other} Bloom filter.</em> This state may occur in complex
Bloom filter implementations like
+ * counting Bloom filters.</p>
+ *
+ * @param other The bloom filter to merge into this one.
* @return true if the merge was successful
- * @throws IllegalArgumentException if the shape of the other filter does
not match
- * the shape of this filter
*/
- boolean merge(BloomFilter other);
+ boolean mergeInPlace(BloomFilter other);
/**
- * Merges the specified decomposed Bloom filter into this Bloom filter.
Specifically all
+ * Merges the specified hasher into this Bloom filter. Specifically all
* bit indexes that are identified by the {@code hasher} will be enabled
in this filter.
*
- * <p>Note: This method should return {@code true} even if no additional
bit indexes were
- * enabled. A {@code false} result indicates that this filter is not
ensured to contain
- * the specified decomposed Bloom filter.
+ * <p><em>Note: This method should return {@code true} even if no
additional bit indexes were
+ * enabled. A {@code false} result indicates that this filter may or may
not contain
+ * the {@code other} Bloom filter.</em> This state may occur in complex
Bloom filter implementations like
+ * counting Bloom filters.</p>
*
- * @param hasher the hasher to provide the indexes
+ * @param hasher The hasher to merge.
* @return true if the merge was successful
- * @throws IllegalArgumentException if the hasher cannot generate indices
for the shape of
- * this filter
*/
- boolean merge(Hasher hasher);
+ default boolean mergeInPlace(Hasher hasher) {
+ Objects.requireNonNull(hasher, "hasher");
+ Shape shape = getShape();
+ BloomFilter result = shape.isSparse((hasher.size() *
shape.getNumberOfHashFunctions()) + cardinality())
Review comment:
changed. Now builds Bloomfilter based on whether the current one is
sparse on the assumption that the merge is faster for filters that are both
sparse or both not sparse.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]