This is an automated email from the ASF dual-hosted git repository. aherbert pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/commons-collections.git
commit 9a58c1bbdf7c65e71b33ab1ba406f1de5a8191df Author: Claude Warren, Jr <[email protected]> AuthorDate: Fri Sep 9 22:18:32 2022 +0100 Collections-763: Remove BloomFilter constructors that create initial entry --- .../bloomfilter/ArrayCountingBloomFilter.java | 33 ----- .../collections4/bloomfilter/BloomFilter.java | 41 +++++- .../bloomfilter/CountingBloomFilter.java | 137 +++++++++++++++++++- .../bloomfilter/SimpleBloomFilter.java | 79 ++---------- .../bloomfilter/SparseBloomFilter.java | 82 ++---------- .../collections4/bloomfilter/package-info.java | 90 +++++++------- .../bloomfilter/AbstractBloomFilterTest.java | 129 ++++++++++++------- .../AbstractCountingBloomFilterTest.java | 68 +++++++--- .../bloomfilter/AbstractHasherTest.java | 8 +- .../bloomfilter/ArrayCountingBloomFilterTest.java | 24 ---- ...ultBitMapProducerTest.java => ArrayHasher.java} | 42 ++++--- .../BitMapProducerFromSimpleBloomFilterTest.java | 4 +- .../BitMapProducerFromSparseBloomFilterTest.java | 4 +- .../bloomfilter/DefaultBitMapProducerTest.java | 42 ++++++- .../bloomfilter/DefaultBloomFilterTest.java | 86 +++---------- .../bloomfilter/DefaultIndexProducerTest.java | 113 +++++++++++++++++ .../IndexProducerFromSimpleBloomFilterTest.java | 6 +- .../IndexProducerFromSparseBloomFilterTest.java | 7 +- .../bloomfilter/SetOperationsTest.java | 138 +++++++++++---------- .../bloomfilter/SimpleBloomFilterTest.java | 60 --------- .../bloomfilter/SparseBloomFilterTest.java | 63 +--------- 21 files changed, 662 insertions(+), 594 deletions(-) diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java index ba13e6645..e76ddda44 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilter.java @@ -125,39 +125,6 @@ public final class ArrayCountingBloomFilter implements CountingBloomFilter { return (int) IntStream.range(0, counts.length).filter(i -> counts[i] > 0).count(); } - @Override - public boolean merge(final BloomFilter other) { - Objects.requireNonNull(other, "other"); - try { - return add(BitCountProducer.from(other)); - } catch (IndexOutOfBoundsException e) { - throw new IllegalArgumentException( e ); - } - } - - @Override - public boolean merge(final Hasher hasher) { - Objects.requireNonNull(hasher, "hasher"); - try { - return add(BitCountProducer.from(hasher.uniqueIndices(shape))); - } catch (IndexOutOfBoundsException e) { - throw new IllegalArgumentException( - String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits())); - } - } - - @Override - public boolean remove(final BloomFilter other) { - Objects.requireNonNull(other, "other"); - return subtract(BitCountProducer.from(other)); - } - - @Override - public boolean remove(final Hasher hasher) { - Objects.requireNonNull(hasher, "hasher"); - return subtract(BitCountProducer.from(hasher.uniqueIndices(shape))); - } - @Override public boolean add(final BitCountProducer other) { Objects.requireNonNull(other, "other"); diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java index 9471e5dcd..f939538b0 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java @@ -135,7 +135,9 @@ public interface BloomFilter extends IndexProducer, BitMapProducer { * @param other The bloom filter to merge into this one. * @return true if the merge was successful */ - boolean merge(BloomFilter other); + default boolean merge(BloomFilter other) { + return (characteristics() & SPARSE) != 0 ? merge((IndexProducer) other ) : merge((BitMapProducer) other); + } /** * Merges the specified hasher into this Bloom filter. Specifically all @@ -143,7 +145,7 @@ public interface BloomFilter extends IndexProducer, BitMapProducer { * * <p><em>Note: This method should return {@code true} even if no additional bit indexes were * enabled. A {@code false} result indicates that this filter may or may not contain - * the {@code other} Bloom filter.</em> This state may occur in complex Bloom filter implementations like + * the {@code hasher} values.</em> This state may occur in complex Bloom filter implementations like * counting Bloom filters.</p> * * @param hasher The hasher to merge. @@ -151,12 +153,39 @@ public interface BloomFilter extends IndexProducer, BitMapProducer { */ default boolean merge(Hasher hasher) { Objects.requireNonNull(hasher, "hasher"); - Shape shape = getShape(); - // create the Bloom filter that is most likely to merge quickly with this one - BloomFilter result = (characteristics() & SPARSE) != 0 ? new SparseBloomFilter(shape, hasher) : new SimpleBloomFilter(shape, hasher); - return merge(result); + return merge(hasher.indices(getShape())); } + /** + * Merges the specified IndexProducer into this Bloom filter. Specifically all + * bit indexes that are identified by the {@code producer} will be enabled in this filter. + * + * <p><em>Note: This method should return {@code true} even if no additional bit indexes were + * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes of + * the {@code producer}.</em> This state may occur in complex Bloom filter implementations like + * counting Bloom filters.</p> + * + * @param indexProducer The IndexProducer to merge. + * @return true if the merge was successful + * @throws IllegalArgumentException if producer sends illegal value. + */ + boolean merge(IndexProducer indexProducer); + + /** + * Merges the specified hasher into this Bloom filter. Specifically all + * bit indexes that are identified by the {@code producer} will be enabled in this filter. + * + * <p><em>Note: This method should return {@code true} even if no additional bit indexes were + * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes + * enabled in the {@code producer}.</em> This state may occur in complex Bloom filter implementations like + * counting Bloom filters.</p> + * + * @param bitMapProducer The producer to merge. + * @return true if the merge was successful + * @throws IllegalArgumentException if producer sends illegal value. + */ + boolean merge(BitMapProducer bitMapProducer); + // Counting Operations /** diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java index 186c284e9..1d6f65cf5 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/CountingBloomFilter.java @@ -16,6 +16,8 @@ */ package org.apache.commons.collections4.bloomfilter; +import java.util.Objects; + /** * The interface that describes a Bloom filter that associates a count with each * bit index to allow reversal of merge operations with remove operations. @@ -77,6 +79,84 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer { // Modification Operations + /** + * Merges the specified Bloom filter into this Bloom filter. + * + * <p>Specifically: all counts for the indexes identified by the {@code other} filter will be incremented by 1,</p> + * + * <p>Note: If the other filter is a counting Bloom filter the index counts are ignored and it is treated as an + * IndexProducer.</p> + * + * <p>This method will return {@code true} if the filter is valid after the operation.</p> + * + * @param other the other Bloom filter + * @return {@code true} if the removal was successful and the state is valid + * @see #isValid() + * @see #add(BitCountProducer) + */ + default boolean merge(final BloomFilter other) { + Objects.requireNonNull(other, "other"); + return merge((IndexProducer) other); + } + + /** + * Merges the specified Hasher into this Bloom filter. + * + * <p>Specifically: all counts for the unique indexes identified by the {@code hasher} will be incremented by 1,</p> + * + * <p>This method will return {@code true} if the filter is valid after the operation.</p> + * + * @param hasher the hasher + * @return {@code true} if the removal was successful and the state is valid + * @see #isValid() + * @see #add(BitCountProducer) + */ + default boolean merge(final Hasher hasher) { + Objects.requireNonNull(hasher, "hasher"); + return merge(hasher.uniqueIndices(getShape())); + } + + /** + * Merges the specified index producer into this Bloom filter. + * + * <p>Specifically: all counts for the indexes identified by the {@code indexProducer} will be incremented by 1,</p> + * + * <p>This method will return {@code true} if the filter is valid after the operation.</p> + * + * <p>Note: Indices that are returned multiple times will be incremented multiple times.</p> + * + * @param indexProducer the IndexProducer + * @return {@code true} if the removal was successful and the state is valid + * @see #isValid() + * @see #add(BitCountProducer) + */ + default boolean merge(final IndexProducer indexProducer) { + Objects.requireNonNull(indexProducer, "indexProducer"); + try { + return add(BitCountProducer.from(indexProducer)); + } catch (IndexOutOfBoundsException e) { + throw new IllegalArgumentException( + String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e); + } + } + + /** + * Merges the specified BitMap producer into this Bloom filter. + * + * <p>Specifically: all counts for the indexes identified by the {@code bitMapProducer} will be incremented by 1,</p> + * + * <p>This method will return {@code true} if the filter is valid after the operation.</p> + * + * @param bitMapProducer the BitMapProducer + * @return {@code true} if the removal was successful and the state is valid + * @see #isValid() + * @see #add(BitCountProducer) + */ + default boolean merge(final BitMapProducer bitMapProducer) { + Objects.requireNonNull(bitMapProducer, "bitMapProducer"); + return merge(IndexProducer.fromBitMapProducer(bitMapProducer)); + } + /** * Removes the specified Bloom filter from this Bloom filter. * @@ -92,12 +172,15 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer { * @see #isValid() * @see #subtract(BitCountProducer) */ - boolean remove(BloomFilter other); + default boolean remove(final BloomFilter other) { + Objects.requireNonNull(other, "other"); + return remove((IndexProducer) other); + } /** - * Removes the specified hasher from the Bloom filter from this Bloom filter. + * Removes the unique values from the specified hasher from this Bloom filter. * - * <p>Specifically all counts for the indices produced by the {@code hasher} will be + * <p>Specifically all counts for the unique indices produced by the {@code hasher} will be * decremented by 1.</p> * * <p>For HasherCollections each enclosed Hasher will be considered a single item and decremented @@ -110,7 +193,53 @@ public interface CountingBloomFilter extends BloomFilter, BitCountProducer { * @see #isValid() * @see #subtract(BitCountProducer) */ - boolean remove(Hasher hasher); + default boolean remove(final Hasher hasher) { + Objects.requireNonNull(hasher, "hasher"); + return remove(hasher.uniqueIndices(getShape())); + } + + /** + * Removes the values from the specified IndexProducer from the Bloom filter from this Bloom filter. + * + * <p>Specifically all counts for the unique indices produced by the {@code hasher} will be + * decremented by 1.</p> + * + * <p>This method will return {@code true} if the filter is valid after the operation.</p> + * + * <p>Node: This method expects index producers that produce unique values.</p> + * + * @param indexProducer the IndexProducer to provide the indexes + * @return {@code true} if the removal was successful and the state is valid + * @see #isValid() + * @see #subtract(BitCountProducer) + */ + default boolean remove(final IndexProducer indexProducer) { + Objects.requireNonNull(indexProducer, "indexProducer"); + try { + return subtract(BitCountProducer.from(indexProducer)); + } catch (IndexOutOfBoundsException e) { + throw new IllegalArgumentException( + String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits())); + } + } + + /** + * Removes the specified BitMapProducer from this Bloom filter. + * + * <p>Specifically all counts for the indices produced by the {@code bitMapProducer} will be + * decremented by 1.</p> + * + * <p>This method will return {@code true} if the filter is valid after the operation.</p> + * + * @param bitMapProducer the BitMapProducer to provide the indexes + * @return {@code true} if the removal was successful and the state is valid + * @see #isValid() + * @see #subtract(BitCountProducer) + */ + default boolean remove(final BitMapProducer bitMapProducer) { + Objects.requireNonNull(bitMapProducer, "bitMapProducer"); + return remove(IndexProducer.fromBitMapProducer(bitMapProducer)); + } /** * Adds the specified BitCountProducer to this Bloom filter. diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java index eb777f726..df5ee707b 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilter.java @@ -55,58 +55,6 @@ public final class SimpleBloomFilter implements BloomFilter { this.cardinality = 0; } - /** - * Creates an instance that is equivalent to {@code other}. - * - * @param other The bloom filter to copy. - */ - public SimpleBloomFilter(BloomFilter other) { - Objects.requireNonNull(other, "other"); - this.shape = other.getShape(); - this.bitMap = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())]; - this.cardinality = 0; - if ((other.characteristics() & SPARSE) != 0) { - merge((IndexProducer) other); - } else { - merge((BitMapProducer) other); - } - } - - /** - * Creates a populated instance. - * @param shape The shape for the filter. - * @param hasher the Hasher to initialize the filter with. - */ - public SimpleBloomFilter(final Shape shape, Hasher hasher) { - this(shape); - Objects.requireNonNull(hasher, "hasher"); - merge(hasher); - } - - /** - * Creates a populated instance. - * @param shape The shape for the filter. - * @param indices the IndexProducer to initialize the filter with. - * @throws IllegalArgumentException if producer sends illegal value. - */ - public SimpleBloomFilter(final Shape shape, IndexProducer indices) { - this(shape); - Objects.requireNonNull(indices, "indices"); - merge(indices); - } - - /** - * Creates a populated instance. - * @param shape The shape for the filter. - * @param bitMaps the BitMapProducer to initialize the filter with. - * @throws IllegalArgumentException if the producer returns too many or too few bit maps. - */ - public SimpleBloomFilter(final Shape shape, BitMapProducer bitMaps) { - this(shape); - Objects.requireNonNull(bitMaps, "bitMaps"); - merge(bitMaps); - } - /** * Copy constructor for {@code copy()} use. * @param source @@ -139,29 +87,24 @@ public final class SimpleBloomFilter implements BloomFilter { return new SimpleBloomFilter(this); } - /** - * Performs a merge using an IndexProducer. - * @param indexProducer the IndexProducer to merge from. - * @throws IllegalArgumentException if producer sends illegal value. - */ - private void merge(IndexProducer indexProducer) { + @Override + public boolean merge(IndexProducer indexProducer) { + Objects.requireNonNull(indexProducer, "indexProducer"); indexProducer.forEachIndex(idx -> { if (idx < 0 || idx >= shape.getNumberOfBits()) { throw new IllegalArgumentException(String.format( - "IndexProducer should only send values in the range[0,%s]", shape.getNumberOfBits() - 1)); + "IndexProducer should only send values in the range[0,%s)", shape.getNumberOfBits())); } BitMap.set(bitMap, idx); return true; }); cardinality = -1; + return true; } - /** - * Performs a merge using an BitMapProducer. - * @param bitMapProducer the BitMapProducer to merge from. - * @throws IllegalArgumentException if producer sends illegal value. - */ - private void merge(BitMapProducer bitMapProducer) { + @Override + public boolean merge(BitMapProducer bitMapProducer) { + Objects.requireNonNull(bitMapProducer, "bitMapProducer"); try { int[] idx = new int[1]; bitMapProducer.forEachBitMap(value -> { @@ -173,7 +116,7 @@ public final class SimpleBloomFilter implements BloomFilter { int idxLimit = BitMap.getLongIndex(shape.getNumberOfBits()); if (idxLimit < idx[0]) { throw new IllegalArgumentException(String.format( - "BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits())); + "BitMapProducer set a bit higher than the limit for the shape: %s", shape.getNumberOfBits() - 1)); } if (idxLimit == idx[0]) { long excess = (bitMap[idxLimit] >> shape.getNumberOfBits()); @@ -188,13 +131,13 @@ public final class SimpleBloomFilter implements BloomFilter { throw new IllegalArgumentException( String.format("BitMapProducer should send at most %s maps", bitMap.length), e); } + return true; } @Override public boolean merge(Hasher hasher) { Objects.requireNonNull(hasher, "hasher"); - merge(hasher.indices(shape)); - return true; + return merge(hasher.indices(shape)); } @Override diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java index 9359e6a39..fdea9b469 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilter.java @@ -49,68 +49,6 @@ public final class SparseBloomFilter implements BloomFilter { this.indices = new TreeSet<>(); } - /** - * Creates an instance that is equivalent to {@code other}. - * - * @param other The bloom filter to copy. - */ - public SparseBloomFilter(BloomFilter other) { - Objects.requireNonNull(other, "other"); - this.shape = other.getShape(); - this.indices = new TreeSet<>(); - if ((other.characteristics() & SPARSE) != 0) { - merge((IndexProducer) other); - } else { - merge(IndexProducer.fromBitMapProducer(other)); - } - } - - private void checkIndices(Shape shape) { - if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) { - throw new IllegalArgumentException( - String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits())); - } - } - - /** - * Constructs a populated Bloom filter. - * @param shape the shape for the bloom filter. - * @param hasher the hasher to provide the initial data. - */ - public SparseBloomFilter(final Shape shape, Hasher hasher) { - this(shape); - Objects.requireNonNull(hasher, "hasher"); - hasher.indices(shape).forEachIndex(this::add); - checkIndices(shape); - } - - /** - * Constructs a populated Bloom filter. - * @param shape the shape of the filter. - * @param indices an index producer for the indices to to enable. - * @throws IllegalArgumentException if indices contains a value greater than the number - * of bits in the shape. - */ - public SparseBloomFilter(Shape shape, IndexProducer indices) { - this(shape); - Objects.requireNonNull(indices, "indices"); - indices.forEachIndex(this::add); - checkIndices(shape); - } - - /** - * Constructs a populated Bloom filter. - * @param shape the shape of the filter. - * @param bitMaps a BitMapProducer for the bit maps to add. - * @throws IllegalArgumentException if the bit maps contain a value greater than the number - * of bits in the shape. - */ - public SparseBloomFilter(Shape shape, BitMapProducer bitMaps) { - this(shape); - Objects.requireNonNull(bitMaps, "bitMaps"); - merge(IndexProducer.fromBitMapProducer(bitMaps)); - } - private SparseBloomFilter(SparseBloomFilter source) { shape = source.shape; indices = new TreeSet<>(source.indices); @@ -140,23 +78,27 @@ public final class SparseBloomFilter implements BloomFilter { return true; } - /** - * Performs a merge using an IndexProducer. - * @param indexProducer the IndexProducer to merge from. - * @throws IllegalArgumentException if producer sends illegal value. - */ - private void merge(IndexProducer indexProducer) { + @Override + public boolean merge(IndexProducer indexProducer) { + Objects.requireNonNull(indexProducer, "indexProducer"); indexProducer.forEachIndex(this::add); if (!this.indices.isEmpty()) { if (this.indices.last() >= shape.getNumberOfBits()) { throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)", - this.indices.last(), shape.getNumberOfBits())); + this.indices.last(), shape.getNumberOfBits() - 1)); } if (this.indices.first() < 0) { throw new IllegalArgumentException( String.format("Value in list %s is less than 0", this.indices.first())); } } + return true; + } + + @Override + public boolean merge(BitMapProducer bitMapProducer) { + Objects.requireNonNull(bitMapProducer, "bitMapProducer"); + return this.merge(IndexProducer.fromBitMapProducer(bitMapProducer)); } @Override @@ -213,7 +155,7 @@ public final class SparseBloomFilter implements BloomFilter { * because our indices are always in order we can shorten the time necessary to * create the longs for the consumer */ - // the currenlty constructed bitMap + // the currently constructed bitMap long bitMap = 0; // the bitmap we are working on int idx = 0; diff --git a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java index c20725456..a4f5ce4f2 100644 --- a/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java +++ b/src/main/java/org/apache/commons/collections4/bloomfilter/package-info.java @@ -20,58 +20,57 @@ * * <h2>Background:</h2> * - * <p>The Bloom filter is a probabilistic data structure that indicates where things are not. - * Conceptually it is a bit vector. You create a Bloom filter by creating hashes - * and converting those to enabled bits in the vector. Multiple Bloom filters may be merged - * together into one Bloom filter. It is possible to test if a filter {@code B} has merged into - * another filter {@code A} by verifying that {@code (A & B) == B}.</p> - * - * <p>Bloom filters are generally used where hash - * tables would be too large, or as a filter front end for longer processes. For example - * most browsers have a Bloom filter that is built from all known bad URLs (ones that - * serve up malware). When you enter a URL the browser builds a Bloom filter and checks to - * see if it is "in" the bad URL filter. If not the URL is good, if it matches, then the - * expensive lookup on a remote system is made to see if it actually is in the list. There - * are lots of other uses, and in most cases the reason is to perform a fast check as a - * gateway for a longer operation. </p> + * <p>The Bloom filter is a probabilistic data structure that indicates where things are not. Conceptually it is a bit + * vector. You create a Bloom filter by creating hashes and converting those to enabled bits in the vector. Multiple + * Bloom filters may be merged together into one Bloom filter. It is possible to test if a filter {@code B} has merged + * into another filter {@code A} by verifying that {@code (A & B) == B}.</p> + * + * <p>Bloom filters are generally used where hash tables would be too large, or as a filter front end for longer processes. + * For example most browsers have a Bloom filter that is built from all known bad URLs (ones that serve up malware). + * When you enter a URL the browser builds a Bloom filter and checks to see if it is "in" the bad URL filter. If not the + * URL is good, if it matches, then the expensive lookup on a remote system is made to see if it actually is in the + * list. There are lots of other uses, and in most cases the reason is to perform a fast check as a gateway for a longer + * operation.</p> * * <h3>BloomFilter</h3> * - * <p>The Bloom filter architecture here is designed so that the implementation of the storage of bits is abstracted. + * <p>The Bloom filter architecture here is designed for speed of execution, so some methods like {@code merge}, {@code remove}, + * {@code add}, and {@code subtract} may throw exceptions. Once an exception is thrown the state of the Bloom filter is unknown. + * The choice to use not use atomic transactions was made to achieve maximum performance under correct usage.</p> + * + * <p>In addition the architecture is designed so that the implementation of the storage of bits is abstracted. * Programs that utilize the Bloom filters may use the {@code BitMapProducer} or {@code IndexProducer} to retrieve a - * representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in + * representation of the internal structure. Additional methods are available in the {@code BitMap} to assist in * manipulation of the representations.</p> * - * <p>The bloom filter code is an interface that requires implementation of 6 methods:</p> + * <p>The bloom filter code is an interface that requires implementation of 9 methods:</p> * <ul> - * <li>{@code cardinality()} - * returns the number of bits enabled in the Bloom filter.</li> + * <li>{@link BloomFilter#cardinality()} returns the number of bits enabled in the Bloom filter.</li> * - * <li>{@code contains(BitMapProducer)} which - * returns true if the bits specified by the bit maps generated by the BitMapProducer are enabled in the Bloom filter.</li> + * <li>{@link BloomFilter#characteristics()} which returns a integer of characteristics flags.</li> * - * <li>{@code contains(IndexProducer)} which - * returns true if the bits specified by the indices generated by IndexProducer are enabled in the Bloom filter.</li> + * <li>{@link BloomFilter#clear()} which resets the Bloomfilter to its initial empty state.</li> * - * <li>{@code getShape()} which - * returns the shape the Bloom filter was created with.</li> - - * <li>{@code isSparse()} which - * returns true if an the implementation tracks indices natively, false if bit maps are used. In cases where - * neither are used the {@code isSparse} return value should reflect which is faster to produce.</li> + * <li>{@link BloomFilter#contains(IndexProducer)} which returns true if the bits specified by the indices generated by + * IndexProducer are enabled in the Bloom filter.</li> + * + * <li>{@link BloomFilter#copy()} which returns a fresh copy of the bitmap.</li> + * + * <li>{@link BloomFilter#getShape()} which returns the shape the Bloom filter was created with.</li> + * + * <li>{@link BloomFilter#merge(BitMapProducer)} which merges the BitMaps from the BitMapProducer into the internal + * representation of the Bloom filter.</li> * - * <li>{@code mergeInPlace(BloomFilter)} which - * utilizes either the {@code BitMapProducer} or {@code IndexProducer} from the argument to enable extra bits - * in the internal representation of the Bloom filter.</li> + * <li>{@link BloomFilter#merge(IndexProducer)} which merges the indices from the IndexProducer into the internal + * representation of the Bloom filter.</li> * </ul> * - * <p>Other methods should be implemented where they can be done so more efficiently than the default implementations. - * </p> + * <p>Other methods should be implemented where they can be done so more efficiently than the default implementations.</p> * * <h3>CountingBloomFilter</h3> * * <p>The counting bloom filter extends the Bloom filter by counting the number of times a specific bit has been - * enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional + * enabled or disabled. This allows the removal (opposite of merge) of Bloom filters at the expense of additional * overhead.</p> * * <h3>Shape</h3> @@ -80,22 +79,23 @@ * * <h3>Hasher</h3> * - * <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton, - * each hasher represents one item being added to the Bloom filter. The HasherCollection represents the - * number of items as the sum of the number of items represented by the Hashers in the collection.</p> + * <p>A Hasher converts bytes into a series of integers based on a Shape. With the exception of the HasherCollecton, + * each hasher represents one item being added to the Bloom filter. The HasherCollection represents the number of + * items as the sum of the number of items represented by the Hashers in the collection.</p> * - * <p>The SimpleHasher uses a combinatorial generation technique to create the integers. It is easily - * initialized by using a standard {@code MessageDigest} or other Hash function to hash the item to insert and - * then splitting the hash bytes in half and considering each as a long value.</p> + * <p>The EnhancedDoubleHasher uses a combinatorial generation technique to create the integers. It is easily + * initialized by using a byte array returned by the standard {@code MessageDigest} or other hash function to + * initialize the Hasher. Alternatively a pair of a long values may also be used.</p> * - * <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter} - * and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices.</p> + * <p>Other implementations of the Hasher are easy to implement, and should make use of the {@code Hasher.Filter} + * and/or {@code Hasher.FileredIntConsumer} classes to filter out duplicate indices when implementing + * {@code Hasher.uniqueIndices(Shape)}.</p> * * <h2>References</h2> * * <ol> - * <li> https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li> - * <li> https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li> + * <li>https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf</li> + * <li>https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java#L60</li> * </ol> * * @since 4.5 diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java index da2aa897c..91eb763af 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractBloomFilterTest.java @@ -16,6 +16,7 @@ */ package org.apache.commons.collections4.bloomfilter; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -24,6 +25,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.List; +import java.util.SortedSet; + import org.junit.jupiter.api.Test; /** @@ -70,7 +73,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> { * @param hasher the hasher to use to create the filter. * @return a BloomFilter implementation. */ - protected abstract T createFilter(Shape shape, Hasher hasher); + protected final T createFilter(Shape shape, Hasher hasher) { + T bf = createEmptyFilter(shape); + bf.merge(hasher); + return bf; + } /** * Create the BloomFilter implementation we are testing. @@ -79,7 +86,11 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> { * @param producer A BitMap producer to build the filter with. * @return a BloomFilter implementation. */ - protected abstract T createFilter(Shape shape, BitMapProducer producer); + protected final T createFilter(Shape shape, BitMapProducer producer) { + T bf = createEmptyFilter(shape); + bf.merge(producer); + return bf; + } /** * Create the BloomFilter implementation we are testing. @@ -88,57 +99,85 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> { * @param producer An Index producer to build the filter with. * @return a BloomFilter implementation. */ - protected abstract T createFilter(Shape shape, IndexProducer producer); + protected final T createFilter(Shape shape, IndexProducer producer) { + T bf = createEmptyFilter(shape); + bf.merge(producer); + return bf; + } /** * */ @Test - public void testConstructWithBadHasher() { + public void testMergeWithBadHasher() { // value too large + final BloomFilter f = createEmptyFilter(getTestShape()); assertThrows(IllegalArgumentException.class, - () -> createFilter(getTestShape(), new BadHasher(getTestShape().getNumberOfBits()))); + () -> f.merge(new BadHasher(getTestShape().getNumberOfBits()))); // negative value - assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), new BadHasher(-1))); + BloomFilter f2 = createEmptyFilter(getTestShape()); + assertThrows(IllegalArgumentException.class, () -> f2.merge(new BadHasher(-1))); } @Test - public void testConstructWitBitMapProducer() { - long[] values = { from11Value, 0x9L }; - BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values)); - List<Long> lst = new ArrayList<>(); - for (long l : values) { - lst.add(l); + public void testMergeWithHasher() { + for (int i=0; i<5000; i++) { + final BloomFilter f = createEmptyFilter(getTestShape()); + int[] expected = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits()); + Hasher hasher = new ArrayHasher(expected); + f.merge(hasher); + // create sorted unique array of expected values + assertArrayEquals(DefaultIndexProducerTest.unique(expected), f.asIndexArray( )); } - assertTrue(f.forEachBitMap(l -> { - return lst.remove(Long.valueOf(l)); - })); - assertTrue(lst.isEmpty()); + } - BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE); + @Test + public void testMergeWithBitMapProducer() { + for (int i=0; i<5000; i++) { + long[] values = new long[2]; + for (int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) { + BitMap.set(values, idx); + } + BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values)); + List<Long> lst = new ArrayList<>(); + for (long l : values) { + lst.add(l); + } + assertTrue(f.forEachBitMap(l -> { + return lst.remove(Long.valueOf(l)); + })); + assertTrue(lst.isEmpty()); + } // values too large - assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), badProducer)); + final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(0L, Long.MAX_VALUE); + final BloomFilter bf = createEmptyFilter(getTestShape()); + assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer)); + + // test where merged bits exceed expected bits but both bitmaps are the same length. + final BitMapProducer badProducer2 = BitMapProducer.fromBitMapArray(0x80_00_00_00_00_00_00_00L); + final BloomFilter bf2 = createEmptyFilter(Shape.fromKM(3, 32)); + assertThrows(IllegalArgumentException.class, () -> bf2.merge(badProducer2)); } @Test - public void testConstructWithIndexProducer() { - int[] values = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 }; - BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values)); - List<Integer> lst = new ArrayList<>(); - for (int i : values) { - lst.add(i); + public void testMergeWithIndexProducer() { + for (int i=0; i<5000; i++) { + int[] values = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits()); + BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values)); + SortedSet<Integer> uniqueValues = DefaultIndexProducerTest.uniqueSet(values); + assertTrue(f.forEachIndex(idx -> { + return uniqueValues.remove(Integer.valueOf(idx)); + })); + assertTrue(uniqueValues.isEmpty()); } - assertTrue(f.forEachIndex(i -> { - return lst.remove(Integer.valueOf(i)); - })); - assertTrue(lst.isEmpty()); - // value to large - assertThrows(IllegalArgumentException.class, () -> createFilter(getTestShape(), - IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() }))); + final BloomFilter f1 = createEmptyFilter(getTestShape()); + assertThrows(IllegalArgumentException.class, + () -> f1.merge(IndexProducer.fromIndexArray(new int[] { getTestShape().getNumberOfBits() }))); // negative value + final BloomFilter f2 = createEmptyFilter(getTestShape()); assertThrows(IllegalArgumentException.class, - () -> createFilter(getTestShape(), IndexProducer.fromIndexArray(new int[] { -1 }))); + () -> f2.merge(IndexProducer.fromIndexArray(new int[] { -1 }))); } @Test @@ -228,7 +267,7 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> { @Test public final void testEstimateN() { // build a filter - BloomFilter filter1 = new SimpleBloomFilter(getTestShape(), from1); + BloomFilter filter1 = createFilter(getTestShape(), from1); assertEquals(1, filter1.estimateN()); // the data provided above do not generate an estimate that is equivalent to the @@ -316,20 +355,20 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> { assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1))); // test error when bloom filter returns values out of range - final BloomFilter bf5 = new SimpleBloomFilter( - Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE), - new IncrementingHasher(Long.SIZE * 2, 1)); + BloomFilter bf5 = new SimpleBloomFilter( + Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE)); + bf5.merge(new IncrementingHasher(Long.SIZE * 2, 1)); assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5)); - final BloomFilter bf6 = new SparseBloomFilter( - Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE), - new IncrementingHasher(Long.SIZE * 2, 1)); + BloomFilter bf6 = new SparseBloomFilter( + Shape.fromKM(getTestShape().getNumberOfHashFunctions(), 3 * Long.SIZE)); + bf6.merge(new IncrementingHasher(Long.SIZE * 2, 1)); assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6)); } - private static void assertIndexProducerConstructor(Shape shape, int[] values, int[] expected) { + private void assertIndexProducerMerge(Shape shape, int[] values, int[] expected) { IndexProducer indices = IndexProducer.fromIndexArray(values); - SparseBloomFilter filter = new SparseBloomFilter(shape, indices); + BloomFilter filter = createFilter(shape, indices); List<Integer> lst = new ArrayList<>(); filter.forEachIndex(x -> { lst.add(x); @@ -347,18 +386,18 @@ public abstract class AbstractBloomFilterTest<T extends BloomFilter> { } @Test - public void testIndexProducerConstructor() { + public void testIndexProducerMerge() { Shape shape = Shape.fromKM(5, 10); - assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 }); + assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 6, 8 }, new int[] { 0, 2, 4, 6, 8 }); // test duplicate values - assertIndexProducerConstructor(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 }); + assertIndexProducerMerge(shape, new int[] { 0, 2, 4, 2, 8 }, new int[] { 0, 2, 4, 8 }); // test negative values assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, -2, 8 }); // test index too large assertFailedIndexProducerConstructor(shape, new int[] { 0, 2, 4, 12, 8 }); // test no indices - assertIndexProducerConstructor(shape, new int[0], new int[0]); + assertIndexProducerMerge(shape, new int[0], new int[0]); } @Test diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java index b7ca7dd37..48fbd92f4 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractCountingBloomFilterTest.java @@ -17,6 +17,7 @@ package org.apache.commons.collections4.bloomfilter; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -94,7 +95,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil @Test public final void testCountingBloomFilterSpecificContains() { - final BloomFilter bf = new SimpleBloomFilter(getTestShape(), from1); + final BloomFilter bf = new SimpleBloomFilter(getTestShape()); + bf.merge(from1); final CountingBloomFilter bf2 = createFilter(getTestShape(), bigHasher); assertTrue(bf.contains(bf), "BF Should contain itself"); @@ -112,7 +114,8 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil public final void testCountingSpecificMerge() { final BloomFilter bf1 = createFilter(getTestShape(), from1); - final BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from11); + final BloomFilter bf2 = new SimpleBloomFilter(getTestShape()); + bf2.merge(from11); final BloomFilter bf3 = bf1.copy(); bf3.merge(bf2); @@ -133,7 +136,9 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil assertTrue(bf5.isValid(), "Should be valid"); CountingBloomFilter bf6 = bf5.copy(); - bf6.merge(new SimpleBloomFilter(getTestShape(), from1)); + BloomFilter bf7 = new SimpleBloomFilter(getTestShape()); + bf7.merge(from1); + bf6.merge(bf7); assertFalse(bf6.isValid(), "Should not be valid"); } @@ -195,10 +200,13 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil */ @Test public final void testRemove() { + BloomFilter simple = new SimpleBloomFilter(getTestShape()); + simple.merge(from11); + final CountingBloomFilter bf1 = createFilter(getTestShape(), from1); bf1.add(BitCountProducer.from(from11.indices(getTestShape()))); - assertTrue(bf1.remove(new SimpleBloomFilter(getTestShape(), from11)), "Remove should work"); + assertTrue(bf1.remove(simple), "Remove should work"); assertFalse(bf1.contains(from11), "Should not contain"); assertTrue(bf1.contains(from1), "Should contain"); @@ -215,17 +223,45 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil assertCounts(bf2, from1Counts); // test underflow - final CountingBloomFilter bf3 = createFilter(getTestShape(), from1); - - final BloomFilter bf4 = new SimpleBloomFilter(getTestShape(), from11); - - assertFalse(bf3.remove(bf4), "Subtract should not work"); + assertFalse(bf3.remove(simple), "Subtract should not work"); assertFalse(bf3.isValid(), "isValid should return false"); assertFalse(bf3.contains(from1), "Should not contain"); - assertFalse(bf3.contains(bf4), "Should not contain"); + assertFalse(bf3.contains(simple), "Should not contain"); assertCounts(bf3, new int[] { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }); + + // with IndexProducer + IndexProducer ip = from11.indices(getTestShape()); + + final CountingBloomFilter bf4 = createFilter(getTestShape(), from1); + bf4.add(BitCountProducer.from(from11.indices(getTestShape()))); + + assertTrue(bf4.remove(ip), "Remove should work"); + assertFalse(bf4.contains(from11), "Should not contain"); + assertTrue(bf4.contains(from1), "Should contain"); + + assertCounts(bf4, from1Counts); + + // with BitMapProducer + final BitMapProducer bmp = BitMapProducer.fromIndexProducer(ip, getTestShape().getNumberOfBits()); + final CountingBloomFilter bf5 = createFilter(getTestShape(), from1); + bf5.add(BitCountProducer.from(from11.indices(getTestShape()))); + + assertTrue(bf5.remove(bmp), "Remove should work"); + assertFalse(bf5.contains(from11), "Should not contain"); + assertTrue(bf5.contains(from1), "Should contain"); + + assertCounts(bf5, from1Counts); + + // test producer errors + IndexProducer ip2 = IndexProducer.fromIndexArray(1, 2, getTestShape().getNumberOfBits()); + final CountingBloomFilter bf6 = createFilter(getTestShape(), from1); + assertThrows( IllegalArgumentException.class, () -> bf6.remove(ip2)); + + final CountingBloomFilter bf7 = createFilter(getTestShape(), from1); + final BitMapProducer bmp2 = BitMapProducer.fromIndexProducer(ip2, getTestShape().getNumberOfBits()); + assertThrows( IllegalArgumentException.class, () -> bf7.remove(bmp2)); } @Test @@ -247,20 +283,12 @@ public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFil bf1.merge(hasher); assertEquals(6, bf1.cardinality()); bf1.forEachCount((x, y) -> { - assertEquals(1, y, "Hasher in mergeInPlace results in value not equal to 1"); - return true; - }); - - bf1 = createEmptyFilter(shape); - CountingBloomFilter bf2 = bf1.copy(); - bf2.merge(hasher); - assertEquals(6, bf2.cardinality()); - bf2.forEachCount((x, y) -> { assertEquals(1, y, "Hasher in merge results in value not equal to 1"); return true; }); - bf1 = createFilter(shape, hasher); + bf1 = createEmptyFilter(shape); + bf1.merge(hasher); bf1.remove(hasher); assertEquals(0, bf1.cardinality()); assertTrue(bf1.forEachCount((x, y) -> (false)), "Hasher in removes results in value not equal to 0"); diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java index cd5eda18c..0e9fae410 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/AbstractHasherTest.java @@ -85,7 +85,7 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest { count[0]++; return false; }); - assertEquals(1, count[0], "did not exit early" ); + assertEquals(1, count[0], "did not exit early"); } @Test @@ -97,8 +97,8 @@ public abstract class AbstractHasherTest extends AbstractIndexProducerTest { List<Integer> full = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList()); producer = hasher.uniqueIndices(shape); List<Integer> unique = Arrays.stream(producer.asIndexArray()).boxed().collect(Collectors.toList()); - assertTrue( full.size() > unique.size() ); - Set<Integer> set = new HashSet<Integer>( unique ); - assertEquals( set.size(), unique.size() ); + assertTrue(full.size() > unique.size()); + Set<Integer> set = new HashSet<>(unique); + assertEquals(set.size(), unique.size()); } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java index 86bd638b7..e7141dade 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayCountingBloomFilterTest.java @@ -25,28 +25,4 @@ public class ArrayCountingBloomFilterTest extends AbstractCountingBloomFilterTes protected ArrayCountingBloomFilter createEmptyFilter(Shape shape) { return new ArrayCountingBloomFilter(shape); } - - @Override - protected ArrayCountingBloomFilter createFilter(Shape shape, Hasher hasher) { - return createFilter(shape, hasher.uniqueIndices(shape)); - } - - @Override - protected ArrayCountingBloomFilter createFilter(Shape shape, BitMapProducer producer) { - return createFilter(shape, IndexProducer.fromBitMapProducer(producer)); - } - - @Override - protected ArrayCountingBloomFilter createFilter(Shape shape, IndexProducer producer) { - ArrayCountingBloomFilter filter = createEmptyFilter(shape); - try { - filter.add(BitCountProducer.from(producer)); - return filter; - } catch (ArrayIndexOutOfBoundsException e) { - // since ArrayCountingBloomFilter does not ahave a constructor that takes a - // hasher - // we have to duplicate the expected results here. - throw new IllegalArgumentException(e); - } - } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java similarity index 51% copy from src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java copy to src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java index e7af149b1..babd13d2c 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/ArrayHasher.java @@ -16,36 +16,46 @@ */ package org.apache.commons.collections4.bloomfilter; -import java.util.function.LongPredicate; +import java.util.Objects; +import java.util.function.IntPredicate; -public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest { +/** + * A Testing Hasher that returns the array values % shape.getNumberOfBits(). + * + * @since 4.5 + */ +public final class ArrayHasher implements Hasher { + final int[] values; - @Override - protected BitMapProducer createProducer() { - return new DefaultBitMapProducer(new long[] { 1L, 2L }); + ArrayHasher(final int... values) { + this.values = values; } @Override - protected BitMapProducer createEmptyProducer() { - return new DefaultBitMapProducer(new long[0]); + public IndexProducer indices(final Shape shape) { + Objects.requireNonNull(shape, "shape"); + return new Producer(shape); } @Override - protected boolean emptyIsZeroLength() { - return true; + public IndexProducer uniqueIndices(Shape shape) { + return new Producer(shape); } - class DefaultBitMapProducer implements BitMapProducer { - long[] bitMaps; + private class Producer implements IndexProducer { + Shape shape; - DefaultBitMapProducer(long[] bitMaps) { - this.bitMaps = bitMaps; + Producer(Shape shape) { + this.shape = shape; } @Override - public boolean forEachBitMap(LongPredicate predicate) { - for (long bitmap : bitMaps) { - if (!predicate.test(bitmap)) { + public boolean forEachIndex(IntPredicate consumer) { + int pos = 0; + for (int i=0; i<shape.getNumberOfHashFunctions(); i++) { + int result = values[pos++] % shape.getNumberOfBits(); + pos = pos % values.length; + if (!consumer.test(result)) { return false; } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java index aa000797e..96f121f0a 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSimpleBloomFilterTest.java @@ -23,7 +23,9 @@ public class BitMapProducerFromSimpleBloomFilterTest extends AbstractBitMapProdu @Override protected BitMapProducer createProducer() { Hasher hasher = new IncrementingHasher(0, 1); - return new SimpleBloomFilter(shape, hasher); + BloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(hasher); + return bf; } @Override diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java index 0c80b6d0d..7d22dee12 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/BitMapProducerFromSparseBloomFilterTest.java @@ -23,7 +23,9 @@ public class BitMapProducerFromSparseBloomFilterTest extends AbstractBitMapProdu @Override protected BitMapProducer createProducer() { Hasher hasher = new IncrementingHasher(0, 1); - return new SparseBloomFilter(shape, hasher); + BloomFilter bf = new SparseBloomFilter(shape); + bf.merge(hasher); + return bf; } @Override diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java index e7af149b1..c14f15212 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBitMapProducerTest.java @@ -16,13 +16,21 @@ */ package org.apache.commons.collections4.bloomfilter; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Random; import java.util.function.LongPredicate; +import org.junit.jupiter.api.Test; + public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest { + long[] values = generateLongArray( 5 ); + @Override protected BitMapProducer createProducer() { - return new DefaultBitMapProducer(new long[] { 1L, 2L }); + return new DefaultBitMapProducer(values); } @Override @@ -52,4 +60,36 @@ public class DefaultBitMapProducerTest extends AbstractBitMapProducerTest { return true; } } + + /** + * Generates an array of random long values. + * @param size the number of values to generate + * @return the array of random values. + */ + public static long[] generateLongArray( int size ) { + Random rnd = new Random(); + long[] expected = new long[size]; + for (int i=0; i<size; i++) { + expected[i] = rnd.nextLong(); + } + return expected; + } + + @Test + public void testFromIndexProducer() { + int[] expected = DefaultIndexProducerTest.generateIntArray(10, 256); + IndexProducer ip = IndexProducer.fromIndexArray(expected); + long[] ary = BitMapProducer.fromIndexProducer(ip, 256).asBitMapArray(); + for (int idx : expected) { + assertTrue( BitMap.contains(ary, idx)); + } + } + + @Test + public void testFromBitMapArray() { + int nOfBitMaps = BitMap.numberOfBitMaps(256); + long[] expected = generateLongArray( nOfBitMaps ); + long[] ary = BitMapProducer.fromBitMapArray(expected).asBitMapArray(); + assertArrayEquals( expected, ary ); + } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java index ea7bd25f6..eb23d84a3 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultBloomFilterTest.java @@ -34,21 +34,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom return new SparseDefaultBloomFilter(shape); } - @Override - protected AbstractDefaultBloomFilter createFilter(final Shape shape, final Hasher hasher) { - return new SparseDefaultBloomFilter(shape, hasher); - } - - @Override - protected AbstractDefaultBloomFilter createFilter(final Shape shape, final BitMapProducer producer) { - return new SparseDefaultBloomFilter(shape, producer); - } - - @Override - protected AbstractDefaultBloomFilter createFilter(final Shape shape, final IndexProducer producer) { - return new SparseDefaultBloomFilter(shape, producer); - } - @Test public void testDefaultBloomFilterSimpleSpecificMerge() { AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(Shape.fromKM(3, 150)); @@ -62,14 +47,14 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom public void testDefaultBloomFilterSparseSpecificMerge() { Shape shape = Shape.fromKM(3, 150); AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(shape); - AbstractDefaultBloomFilter filter2 = new SparseDefaultBloomFilter(shape, new IncrementingHasher(0, 1)); + AbstractDefaultBloomFilter filter2 = createFilter(shape, new IncrementingHasher(0, 1)); BloomFilter newFilter = filter.copy(); newFilter.merge(filter2); assertEquals(3, newFilter.cardinality()); } @Test - public void testHasherBasedMergeInPlaceWithDifferingSparseness() { + public void testHasherBasedMergeWithDifferingSparseness() { Hasher hasher = new IncrementingHasher(1, 1); BloomFilter bf1 = new NonSparseDefaultBloomFilter(getTestShape()); @@ -92,26 +77,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom this.indices = new TreeSet<>(); } - AbstractDefaultBloomFilter(Shape shape, Hasher hasher) { - this(shape, hasher.indices(shape)); - } - - AbstractDefaultBloomFilter(Shape shape, BitMapProducer producer) { - this(shape, IndexProducer.fromBitMapProducer(producer)); - } - - AbstractDefaultBloomFilter(Shape shape, IndexProducer producer) { - this(shape); - producer.forEachIndex((i) -> { - indices.add(i); - return true; - }); - if (this.indices.floor(-1) != null || this.indices.ceiling(shape.getNumberOfBits()) != null) { - throw new IllegalArgumentException( - String.format("Filter only accepts values in the [0,%d) range", shape.getNumberOfBits())); - } - } - @Override public void clear() { indices.clear(); @@ -147,12 +112,7 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom return contains(IndexProducer.fromBitMapProducer(bitMapProducer)); } - @Override - public boolean merge(BloomFilter other) { - other.forEachIndex((i) -> { - indices.add(i); - return true; - }); + private void checkIndicesRange() { if (!indices.isEmpty()) { if (indices.last() >= shape.getNumberOfBits()) { throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)", @@ -163,28 +123,30 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom String.format("Value in list %s is less than 0", indices.first())); } } - return true; } @Override - public int cardinality() { - return indices.size(); + public boolean merge(IndexProducer indexProducer) { + boolean result = indexProducer.forEachIndex(x -> { + indices.add(x); + return true; + }); + checkIndicesRange(); + return result; } - } - static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter { - - SparseDefaultBloomFilter(Shape shape, BitMapProducer producer) { - super(shape, producer); + @Override + public boolean merge(BitMapProducer bitMapProducer){ + return merge(IndexProducer.fromBitMapProducer(bitMapProducer)); } - SparseDefaultBloomFilter(Shape shape, Hasher hasher) { - super(shape, hasher); + @Override + public int cardinality() { + return indices.size(); } + } - SparseDefaultBloomFilter(Shape shape, IndexProducer producer) { - super(shape, producer); - } + static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter { SparseDefaultBloomFilter(Shape shape) { super(shape); @@ -205,18 +167,6 @@ public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloom static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter { - NonSparseDefaultBloomFilter(Shape shape, BitMapProducer producer) { - super(shape, producer); - } - - NonSparseDefaultBloomFilter(Shape shape, Hasher hasher) { - super(shape, hasher); - } - - NonSparseDefaultBloomFilter(Shape shape, IndexProducer producer) { - super(shape, producer); - } - NonSparseDefaultBloomFilter(Shape shape) { super(shape); } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java new file mode 100644 index 000000000..a36f545d0 --- /dev/null +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/DefaultIndexProducerTest.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; + +import java.util.Random; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.function.IntPredicate; + +import org.junit.jupiter.api.Test; + +public class DefaultIndexProducerTest extends AbstractIndexProducerTest { + + private int[] values = generateIntArray( 10, 512 ); + + @Override + protected IndexProducer createProducer() { + return IndexProducer.fromIndexArray( values ); + } + + @Override + protected IndexProducer createEmptyProducer() { + return new IndexProducer() { + + @Override + public boolean forEachIndex(IntPredicate predicate) { + return true; + } + }; + } + + /** + * Generates an array of integers. + * @param size the size of the array + * @param bound the upper bound (exclusive) of the values in the array. + * @return an array of int. + */ + public static int[] generateIntArray( int size, int bound ) { + Random rnd = new Random(); + int[] expected = new int[size]; + for (int i=0; i<size; i++) { + expected[i] = rnd.nextInt(bound); + } + return expected; + } + + /** + * Creates a sorted set of Integers. + * @param ary the array to sort and make unique + * @return the sorted Set. + */ + public static SortedSet<Integer> uniqueSet(int[] ary) { + SortedSet<Integer> uniq = new TreeSet<Integer>(); + for (int idx : ary) { + uniq.add(idx); + } + return uniq; + } + + /** + * Creates a sorted unique array of ints. + * @param ary the array to sort and make unique + * @return the sorted unique array. + */ + public static int[] unique(int[] ary) { + Set<Integer> uniq = uniqueSet(ary); + int[] result = new int[uniq.size()]; + int i=0; + for (int idx : uniq) { + result[i++] = idx; + } + return result; + } + + @Test + public void testFromBitMapProducer() { + for (int i=0; i<5000; i++) { + int[] expected = generateIntArray( 7, 256 ); + long[] bits = new long[BitMap.numberOfBitMaps(256)]; + for (int bitIndex : expected) { + BitMap.set(bits, bitIndex); + } + IndexProducer ip = IndexProducer.fromBitMapProducer(BitMapProducer.fromBitMapArray(bits)); + assertArrayEquals(unique(expected), ip.asIndexArray()); + } + } + + @Test + public void testFromIndexArray() { + for (int i=0; i<5000; i++) { + int[] expected = generateIntArray(10, 256); + IndexProducer ip = IndexProducer.fromIndexArray(expected); + assertArrayEquals(unique(expected), ip.asIndexArray()); + } + } +} diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java index d62ac959e..40fded6dd 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSimpleBloomFilterTest.java @@ -23,11 +23,13 @@ public class IndexProducerFromSimpleBloomFilterTest extends AbstractIndexProduce @Override protected IndexProducer createProducer() { Hasher hasher = new IncrementingHasher(0, 1); - return new SparseBloomFilter(shape, hasher); + BloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(hasher); + return bf; } @Override protected IndexProducer createEmptyProducer() { - return new SparseBloomFilter(shape); + return new SimpleBloomFilter(shape); } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java index b51e01b40..bd1e34836 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/IndexProducerFromSparseBloomFilterTest.java @@ -23,11 +23,14 @@ public class IndexProducerFromSparseBloomFilterTest extends AbstractIndexProduce @Override protected IndexProducer createProducer() { Hasher hasher = new IncrementingHasher(0, 1); - return new SimpleBloomFilter(shape, hasher); + BloomFilter bf = new SparseBloomFilter(shape); + bf.merge(hasher); + return bf; + } @Override protected IndexProducer createEmptyProducer() { - return new SimpleBloomFilter(shape); + return new SparseBloomFilter(shape); } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java index 890e22f8f..98a62604f 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SetOperationsTest.java @@ -49,22 +49,34 @@ public class SetOperationsTest { assertEquals(expected, operation.applyAsDouble(filter2, filter1), "op(filter2, filter1)"); } + private BloomFilter createFilter(Shape shape, Hasher hasher) { + BloomFilter bf = new SimpleBloomFilter(shape); + bf.merge(hasher); + return bf; + } + + private BloomFilter createFilter(Shape shape, IndexProducer producer) { + BloomFilter bf = new SparseBloomFilter(shape); + bf.merge(producer); + return bf; + } + /** * Tests that the Cosine similarity is correctly calculated. */ @Test public final void testCosineDistance() { - BloomFilter filter1 = new SimpleBloomFilter(shape, from1); - BloomFilter filter2 = new SimpleBloomFilter(shape, from1); + BloomFilter filter1 = createFilter(shape, from1); + BloomFilter filter2 = createFilter(shape, from1); // identical filters should have no distance. double expected = 0; assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2); Shape shape2 = Shape.fromKM(2, 72); - filter1 = new SimpleBloomFilter(shape2, from1); - filter2 = new SimpleBloomFilter(shape2, new IncrementingHasher(2, 1)); + filter1 = createFilter(shape2, from1); + filter2 = createFilter(shape2, new IncrementingHasher(2, 1)); int dotProduct = /* [1,2] & [2,3] = [2] = */ 1; int cardinalityA = 2; @@ -72,8 +84,8 @@ public class SetOperationsTest { expected = 1 - (dotProduct / Math.sqrt(cardinalityA * cardinalityB)); assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2); - filter1 = new SimpleBloomFilter(shape, from1); - filter2 = new SimpleBloomFilter(shape, from11); + filter1 = createFilter(shape, from1); + filter2 = createFilter(shape, from11); dotProduct = /* [1..17] & [11..27] = [] = */ 7; cardinalityA = 17; cardinalityB = 17; @@ -81,20 +93,19 @@ public class SetOperationsTest { assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2); // test with no values - filter1 = new SimpleBloomFilter(shape, from1); + filter1 = createFilter(shape, from1); filter2 = new SimpleBloomFilter(shape); - BloomFilter filter3 = new SimpleBloomFilter(shape); dotProduct = /* [1,2] & [] = [] = */ 0; cardinalityA = 2; cardinalityB = 0; - expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0; + expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0; assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2); dotProduct = /* [] & [] = [] = */ 0; cardinalityA = 0; cardinalityB = 0; - expected = /* 1 - (dotProduct/Math.sqrt( cardinalityA * cardinalityB )) = */ 1.0; + expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0; assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2); } @@ -103,27 +114,27 @@ public class SetOperationsTest { */ @Test public final void testCosineSimilarity() { - BloomFilter filter1 = new SimpleBloomFilter(shape, from1); - BloomFilter filter2 = new SimpleBloomFilter(shape, from1); + BloomFilter filter1 = createFilter(shape, from1); + BloomFilter filter2 = createFilter(shape, from1); int dotProduct = /* [1..17] & [1..17] = [1..17] = */ 17; int cardinalityA = 17; int cardinalityB = 17; - double expected = /* dotProduct/Sqrt( cardinalityA * cardinalityB ) = */ 1.0; + double expected = /* dotProduct/Sqrt(cardinalityA * cardinalityB) = */ 1.0; assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2); dotProduct = /* [1..17] & [11..27] = [11..17] = */ 7; cardinalityA = 17; cardinalityB = 17; expected = dotProduct / Math.sqrt(cardinalityA * cardinalityB); - filter2 = new SimpleBloomFilter(shape, from11); + filter2 = createFilter(shape, from11); assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2); // test no values filter1 = new SimpleBloomFilter(shape); filter2 = new SimpleBloomFilter(shape); // build a filter - BloomFilter filter3 = new SimpleBloomFilter(shape, from1); + BloomFilter filter3 = createFilter(shape, from1); assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter2); assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter3); } @@ -133,13 +144,13 @@ public class SetOperationsTest { */ @Test public final void testHammingDistance() { - final BloomFilter filter1 = new SimpleBloomFilter(shape, from1); - BloomFilter filter2 = new SimpleBloomFilter(shape, from1); + final BloomFilter filter1 = createFilter(shape, from1); + BloomFilter filter2 = createFilter(shape, from1); int hammingDistance = /* [1..17] ^ [1..17] = [] = */ 0; assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2); - filter2 = new SimpleBloomFilter(shape, from11); + filter2 = createFilter(shape, from11); hammingDistance = /* [1..17] ^ [11..27] = [1..10][17-27] = */ 20; assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2); } @@ -149,13 +160,13 @@ public class SetOperationsTest { */ @Test public final void testJaccardDistance() { - BloomFilter filter1 = new SimpleBloomFilter(shape, from1); - BloomFilter filter2 = new SimpleBloomFilter(shape, from1); + BloomFilter filter1 = createFilter(shape, from1); + BloomFilter filter2 = createFilter(shape, from1); // 1 - jaccardSimilarity -- see jaccardSimilarityTest assertSymmetricOperation(0.0, SetOperations::jaccardDistance, filter1, filter2); - filter2 = new SimpleBloomFilter(shape, from11); + filter2 = createFilter(shape, from11); double intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0; int union = /* [1..17] | [11..27] = [1..27] = */ 27; double expected = 1 - (intersection / union); @@ -164,7 +175,7 @@ public class SetOperationsTest { // test no values filter1 = new SimpleBloomFilter(shape); filter2 = new SimpleBloomFilter(shape); - BloomFilter filter3 = new SimpleBloomFilter(shape, from1); + BloomFilter filter3 = createFilter(shape, from1); // 1 - jaccardSimilarity -- see jaccardSimilarityTest assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter2); @@ -176,15 +187,15 @@ public class SetOperationsTest { */ @Test public final void testJaccardSimilarity() { - BloomFilter filter1 = new SimpleBloomFilter(shape, from1); - BloomFilter filter2 = new SimpleBloomFilter(shape, from1); + BloomFilter filter1 = createFilter(shape, from1); + BloomFilter filter2 = createFilter(shape, from1); double intersection = /* [1..17] & [1..17] = [1..17] = */ 17.0; int union = /* [1..17] | [1..17] = [1..17] = */ 17; double expected = intersection / union; assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2); - filter2 = new SimpleBloomFilter(shape, from11); + filter2 = createFilter(shape, from11); intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0; union = /* [1..17] | [11..27] = [1..27] = */ 27; expected = intersection / union; @@ -193,7 +204,6 @@ public class SetOperationsTest { // test no values filter1 = new SimpleBloomFilter(shape); filter2 = new SimpleBloomFilter(shape); - BloomFilter filter3 = new SimpleBloomFilter(shape, from1); assertSymmetricOperation(0.0, SetOperations::jaccardSimilarity, filter1, filter2); intersection = /* [] & [1..17] = [] = */ 0.0; @@ -205,16 +215,16 @@ public class SetOperationsTest { @Test public final void testOrCardinality() { Shape shape = Shape.fromKM(3, 128); - SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); - SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); + BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); + BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); - filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); + filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); - filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); + filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2); } @@ -222,32 +232,32 @@ public class SetOperationsTest { public final void testOrCardinalityWithDifferentLengthFilters() { Shape shape = Shape.fromKM(3, 128); Shape shape2 = Shape.fromKM(3, 192); - SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); - SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); + BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); + BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); - filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); + filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); - filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); + filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2); } @Test public final void testAndCardinality() { Shape shape = Shape.fromKM(3, 128); - SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); - SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); + BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); + BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); - filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); + filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); - filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); + filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2); } @@ -255,37 +265,37 @@ public class SetOperationsTest { public final void testAndCardinalityWithDifferentLengthFilters() { Shape shape = Shape.fromKM(3, 128); Shape shape2 = Shape.fromKM(3, 192); - SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); - SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); + BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); + BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); - filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); + filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); - filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); + filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2); } @Test public final void testXorCardinality() { Shape shape = Shape.fromKM(3, 128); - SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); - SparseBloomFilter filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); + BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); + BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); - filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); + filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); - filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); + filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 64, 69 })); assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2); Shape bigShape = Shape.fromKM(3, 192); - filter1 = new SparseBloomFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185})); - filter2 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 })); + filter1 = createFilter(bigShape, IndexProducer.fromIndexArray(new int[] { 1, 63, 185})); + filter2 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63, 69 })); assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2); } @@ -294,16 +304,16 @@ public class SetOperationsTest { Shape shape = Shape.fromKM(3, 128); Shape shape2 = Shape.fromKM(3, 192); - SparseBloomFilter filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); - SparseBloomFilter filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); + BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63, 64 })); + BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); - filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 1, 63 })); + filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2); - filter1 = new SparseBloomFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); - filter2 = new SparseBloomFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); + filter1 = createFilter(shape, IndexProducer.fromIndexArray(new int[] { 5, 63 })); + filter2 = createFilter(shape2, IndexProducer.fromIndexArray(new int[] { 5, 64, 169 })); assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2); } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java index 15aeea62c..b37c0ffe8 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SimpleBloomFilterTest.java @@ -16,7 +16,6 @@ */ package org.apache.commons.collections4.bloomfilter; -import org.junit.jupiter.api.Test; /** * Tests for the {@link SimpleBloomFilter}. @@ -26,63 +25,4 @@ public class SimpleBloomFilterTest extends AbstractBloomFilterTest<SimpleBloomFi protected SimpleBloomFilter createEmptyFilter(final Shape shape) { return new SimpleBloomFilter(shape); } - - @Override - protected SimpleBloomFilter createFilter(final Shape shape, final Hasher hasher) { - return new SimpleBloomFilter(shape, hasher); - } - - @Override - protected SimpleBloomFilter createFilter(final Shape shape, final BitMapProducer producer) { - return new SimpleBloomFilter(shape, producer); - } - - @Override - protected SimpleBloomFilter createFilter(final Shape shape, final IndexProducer producer) { - return new SimpleBloomFilter(shape, producer); - } - - private void executeNestedTest(SimpleBloomFilterTest nestedTest) { - nestedTest.testAsBitMapArray(); - nestedTest.testContains(); - nestedTest.testEstimateIntersection(); - nestedTest.testEstimateN(); - nestedTest.testEstimateUnion(); - nestedTest.testIsFull(); - nestedTest.testMerge(); - } - - @Test - public void testConstructors() { - - // // copy of Sparse - SimpleBloomFilterTest nestedTest = new SimpleBloomFilterTest() { - - @Override - protected SimpleBloomFilter createEmptyFilter(Shape shape) { - return new SimpleBloomFilter(new SparseBloomFilter(shape)); - } - - @Override - protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) { - return new SimpleBloomFilter(new SparseBloomFilter(shape, hasher)); - } - }; - executeNestedTest(nestedTest); - - // copy of Simple - nestedTest = new SimpleBloomFilterTest() { - - @Override - protected SimpleBloomFilter createEmptyFilter(Shape shape) { - return new SimpleBloomFilter(new SimpleBloomFilter(shape)); - } - - @Override - protected SimpleBloomFilter createFilter(Shape shape, Hasher hasher) { - return new SimpleBloomFilter(new SimpleBloomFilter(shape, hasher)); - } - }; - executeNestedTest(nestedTest); - } } diff --git a/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java b/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java index d3645f1e8..95484d967 100644 --- a/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java +++ b/src/test/java/org/apache/commons/collections4/bloomfilter/SparseBloomFilterTest.java @@ -31,64 +31,6 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi return new SparseBloomFilter(shape); } - @Override - protected SparseBloomFilter createFilter(final Shape shape, final Hasher hasher) { - return new SparseBloomFilter(shape, hasher); - } - - @Override - protected SparseBloomFilter createFilter(final Shape shape, final BitMapProducer producer) { - return new SparseBloomFilter(shape, producer); - } - - @Override - protected SparseBloomFilter createFilter(final Shape shape, final IndexProducer producer) { - return new SparseBloomFilter(shape, producer); - } - - private void executeNestedTest(SparseBloomFilterTest nestedTest) { - nestedTest.testContains(); - nestedTest.testEstimateIntersection(); - nestedTest.testEstimateN(); - nestedTest.testEstimateUnion(); - nestedTest.testIsFull(); - nestedTest.testMerge(); - } - - @Test - public void testConstructors() { - - // copy of Sparse - SparseBloomFilterTest nestedTest = new SparseBloomFilterTest() { - - @Override - protected SparseBloomFilter createEmptyFilter(Shape shape) { - return new SparseBloomFilter(new SparseBloomFilter(shape)); - } - - @Override - protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) { - return new SparseBloomFilter(new SparseBloomFilter(shape, hasher)); - } - }; - executeNestedTest(nestedTest); - - // copy of Simple - nestedTest = new SparseBloomFilterTest() { - - @Override - protected SparseBloomFilter createEmptyFilter(Shape shape) { - return new SparseBloomFilter(new SimpleBloomFilter(shape)); - } - - @Override - protected SparseBloomFilter createFilter(Shape shape, Hasher hasher) { - return new SparseBloomFilter(new SimpleBloomFilter(shape, hasher)); - } - }; - executeNestedTest(nestedTest); - } - @Test public void testBitMapProducerEdgeCases() { int[] values = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 65, 66, 67, 68, 69, 70, 71 }; @@ -140,9 +82,10 @@ public class SparseBloomFilterTest extends AbstractBloomFilterTest<SparseBloomFi } @Test - public void testBloomFilterBasedMergeInPlaceEdgeCases() { + public void testBloomFilterBasedMergeEdgeCases() { BloomFilter bf1 = createEmptyFilter(getTestShape()); - BloomFilter bf2 = new SimpleBloomFilter(getTestShape(), from1); + BloomFilter bf2 = new SimpleBloomFilter(getTestShape()); + bf2.merge(from1); bf1.merge(bf2); assertTrue(bf2.forEachBitMapPair(bf1, (x, y) -> x == y)); }
