aherbert commented on code in PR #402:
URL:
https://github.com/apache/commons-collections/pull/402#discussion_r1339164850
##########
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java:
##########
@@ -38,54 +40,102 @@ public interface BloomFilterProducer {
boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate);
/**
- * Return a deep copy of the BloomFilterProducer data as a Bloom filter
array.
- * <p>
- * The default implementation of this method is slow. It is recommended
that
- * implementing classes reimplement this method.
- * </p>
+ * Return an array of the Bloom filters in the collection.
+ * <p><em>Implementations should specify if the array contains deep
copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
*
* @return An array of Bloom filters.
*/
default BloomFilter[] asBloomFilterArray() {
- class Filters {
- private BloomFilter[] data = new BloomFilter[16];
- private int size;
-
- boolean add(final BloomFilter filter) {
- if (size == data.length) {
- // This will throw an out-of-memory error if there are too
many Bloom filters.
- data = Arrays.copyOf(data, size * 2);
- }
- data[size++] = filter.copy();
- return true;
- }
-
- BloomFilter[] toArray() {
- // Edge case to avoid a large array copy
- return size == data.length ? data : Arrays.copyOf(data, size);
- }
- }
- final Filters filters = new Filters();
- forEachBloomFilter(filters::add);
- return filters.toArray();
+ final List<BloomFilter> filters = new ArrayList<>();
+ forEachBloomFilter(f -> filters.add(f.copy()));
+ return filters.toArray(new BloomFilter[filters.size()]);
}
/**
* Applies the {@code func} to each Bloom filter pair in order. Will apply
all
* of the Bloom filters from the other BloomFilterProducer to this
producer. If
- * this producer does not have as many BloomFilters it will provide
- * {@code null} for all excess calls to the BiPredicate.
+ * either {@code this} producer or {@code other} producer has fewer
BloomFilters
+ * ths method will provide {@code null} for all excess calls to the {@code
func}.
+ *
+ * <p><em>This implementation returns references to the Bloom filter.
Other implementations
+ * should specify if the array contains deep copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
*
* @param other The other BloomFilterProducer that provides the y values
in the
* (x,y) pair.
* @param func The function to apply.
- * @return A LongPredicate that tests this BitMapProducers bitmap values in
- * order.
+ * @return {@code true} if the {@code func} returned {@code true} for
every pair,
+ * {@code false} otherwise.
*/
default boolean forEachBloomFilterPair(final BloomFilterProducer other,
final BiPredicate<BloomFilter, BloomFilter> func) {
final CountingPredicate<BloomFilter> p = new
CountingPredicate<>(asBloomFilterArray(), func);
return other.forEachBloomFilter(p) && p.forEachRemaining();
}
+ /**
+ * Create a standard (non-layered) Bloom filter by merging all of the
layers. If
+ * the filter is empty this method will return an empty Bloom filter.
+ *
+ * @return the merged bloom filter.
+ */
+ default BloomFilter flatten() {
+ BloomFilter bf[] = {null};
+ forEachBloomFilter( x -> {
+ if (bf[0]==null) {
Review Comment:
whitespace:
```java
forEachBloomFilter(x -> {
if (bf[0] == null) {
bf[0] = new SimpleBloomFilter(x.getShape());
}
return bf[0].merge(x);
```
##########
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java:
##########
@@ -38,54 +40,102 @@ public interface BloomFilterProducer {
boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate);
/**
- * Return a deep copy of the BloomFilterProducer data as a Bloom filter
array.
- * <p>
- * The default implementation of this method is slow. It is recommended
that
- * implementing classes reimplement this method.
- * </p>
+ * Return an array of the Bloom filters in the collection.
+ * <p><em>Implementations should specify if the array contains deep
copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
*
* @return An array of Bloom filters.
*/
default BloomFilter[] asBloomFilterArray() {
- class Filters {
- private BloomFilter[] data = new BloomFilter[16];
- private int size;
-
- boolean add(final BloomFilter filter) {
- if (size == data.length) {
- // This will throw an out-of-memory error if there are too
many Bloom filters.
- data = Arrays.copyOf(data, size * 2);
- }
- data[size++] = filter.copy();
- return true;
- }
-
- BloomFilter[] toArray() {
- // Edge case to avoid a large array copy
- return size == data.length ? data : Arrays.copyOf(data, size);
- }
- }
- final Filters filters = new Filters();
- forEachBloomFilter(filters::add);
- return filters.toArray();
+ final List<BloomFilter> filters = new ArrayList<>();
+ forEachBloomFilter(f -> filters.add(f.copy()));
+ return filters.toArray(new BloomFilter[filters.size()]);
}
/**
* Applies the {@code func} to each Bloom filter pair in order. Will apply
all
* of the Bloom filters from the other BloomFilterProducer to this
producer. If
- * this producer does not have as many BloomFilters it will provide
- * {@code null} for all excess calls to the BiPredicate.
+ * either {@code this} producer or {@code other} producer has fewer
BloomFilters
+ * ths method will provide {@code null} for all excess calls to the {@code
func}.
+ *
+ * <p><em>This implementation returns references to the Bloom filter.
Other implementations
+ * should specify if the array contains deep copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
*
* @param other The other BloomFilterProducer that provides the y values
in the
* (x,y) pair.
* @param func The function to apply.
- * @return A LongPredicate that tests this BitMapProducers bitmap values in
- * order.
+ * @return {@code true} if the {@code func} returned {@code true} for
every pair,
+ * {@code false} otherwise.
*/
default boolean forEachBloomFilterPair(final BloomFilterProducer other,
final BiPredicate<BloomFilter, BloomFilter> func) {
final CountingPredicate<BloomFilter> p = new
CountingPredicate<>(asBloomFilterArray(), func);
return other.forEachBloomFilter(p) && p.forEachRemaining();
}
+ /**
+ * Create a standard (non-layered) Bloom filter by merging all of the
layers. If
+ * the filter is empty this method will return an empty Bloom filter.
+ *
+ * @return the merged bloom filter.
+ */
+ default BloomFilter flatten() {
+ BloomFilter bf[] = {null};
Review Comment:
No c-style arrays: `BloomFilter[] bf`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java:
##########
@@ -38,54 +40,102 @@ public interface BloomFilterProducer {
boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate);
/**
- * Return a deep copy of the BloomFilterProducer data as a Bloom filter
array.
- * <p>
- * The default implementation of this method is slow. It is recommended
that
- * implementing classes reimplement this method.
- * </p>
+ * Return an array of the Bloom filters in the collection.
+ * <p><em>Implementations should specify if the array contains deep
copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
Review Comment:
This should indicate that the default implementation returns copies.
##########
src/main/java/org/apache/commons/collections4/bloomfilter/CountingPredicate.java:
##########
@@ -20,29 +20,27 @@
import java.util.function.Predicate;
/**
- * A predicate that applies the test func to each member of the @{code ary} in
- * sequence for each call to @{code test()}. if the @{code ary} is exhausted,
- * the subsequent calls to @{code test} are executed with a {@code null} value.
- * If the calls to @{code test} do not exhaust the @{code ary} the @{code
+ * A predicate that applies the test {@code func} to each member of the {@code
ary} in
+ * sequence for each call to {@code test()}. if the {@code ary} is exhausted,
+ * the subsequent calls to {@code test} are executed with a {@code null} value.
+ * If the calls to {@code test} do not exhaust the {@code ary} the {@code
* forEachRemaining} method can be called to execute the @code{text} with a
Review Comment:
`@code{text} -> {@code test}`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -53,35 +52,47 @@
public class LayerManager implements BloomFilterProducer {
/**
- * Static methods an variable for standard extend checks.
- *
+ * A collection of common ExtendCheck implementations to test whether to
extend
+ * the depth of a LayerManager.
*/
public static class ExtendCheck {
private ExtendCheck() {
}
+ private static final Predicate<LayerManager> ADVANCE_ON_POPULATED = lm
-> {
+ return !lm.filters.peekLast().isEmpty();
+ };
+
+ private static final Predicate<LayerManager> NEVER_ADVANCE = x ->
false;
+
/**
* Advances the target once a merge has been performed.
+ * @return A Predicate suitable for the LayerManager {@code
extendCheck} parameter.
*/
- public static final Predicate<LayerManager> ADVANCE_ON_POPULATED = lm
-> {
- return !lm.filters.isEmpty() &&
!lm.filters.peekLast().forEachBitMap(y -> y == 0);
- };
+ public static final Predicate<LayerManager> advanceOnPopulated() {
+ return ADVANCE_ON_POPULATED;
+ }
/**
- * Does not automatically advance the target. next() must be called
directly to
+ * Does not automatically advance the target. @{code next()} must be
called directly to
* perform the advance.
+ * @return A Predicate suitable for the LayerManager {@code
extendCheck} parameter.
*/
- public static final Predicate<LayerManager> NEVER_ADVANCE = x -> false;
+ public static final Predicate<LayerManager> neverAdvance() {
+ return NEVER_ADVANCE;
+ }
/**
* Calculates the estimated number of Bloom filters (n) that have been
merged
- * into the target and compares that with the estimated maximum
expected n based
- * on the shape. If the target is full then a new target is created.
+ * into the target and compares that with the estimated maximum
expected
+ * {@code n} based on the shape. If the target is saturated (Bloom
filter {@code n} >= Shape {@code n})
+ * a new target is created.
*
* @param shape The shape of the filters in the LayerManager.
- * @return A Predicate suitable for the LayerManager extendCheck
parameter.
+ * @return A Predicate suitable for the LayerManager {@code
extendCheck} parameter.
+ * @see ExtendCheck#advanceOnSaturation(double)
*/
- public static final Predicate<LayerManager>
advanceOnCalculatedFull(Shape shape) {
+ public static final Predicate<LayerManager>
advanceOnCalculatedSaturation(Shape shape) {
Review Comment:
This method is a one liner. Is it required. Could the same not be achieved
by adding this pattern as an example to the `advanceOnSaturation` method?
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -53,35 +52,47 @@
public class LayerManager implements BloomFilterProducer {
/**
- * Static methods an variable for standard extend checks.
- *
+ * A collection of common ExtendCheck implementations to test whether to
extend
+ * the depth of a LayerManager.
*/
public static class ExtendCheck {
Review Comment:
With the private constructor, this should be final.
##########
src/main/java/org/apache/commons/collections4/bloomfilter/Shape.java:
##########
@@ -228,12 +228,24 @@ public double estimateN(final int cardinality) {
}
/**
- * Estimates the maximum number of elements that can be merged into a
filter of this shape before
- * it begins to return excessive false positives.
+ * Estimates the maximum number of elements that can be merged into a
filter of
+ * this shape before the false positive rate exceeds the desired rate. <p>
The
+ * formula for deriving {@code k} when {@code m} and {@code n} are known
is:
+ *
+ * <pre>
+ * k = ln2 * m / n
+ * </pre>
+ *
+ * solving for {@code n} yields:
Review Comment:
Should have a `<p>`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilter.java:
##########
@@ -209,6 +209,21 @@ default boolean isFull() {
*/
int cardinality();
+ /**
+ * Determines if all the bits are off. This is equivalent to
+ * {@code cardinality() == 0}.
+ *
+ * <p>
+ * <em>Note: This method is optimised for non-sparse filters.</em>
Implementers
+ * are encouraged to implement faster checks if possible.
+ * </p>
+ *
+ * @return {@code true} if no bites are enabled, {@code false} otherwise.
Review Comment:
`bits`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java:
##########
@@ -38,54 +40,102 @@ public interface BloomFilterProducer {
boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate);
/**
- * Return a deep copy of the BloomFilterProducer data as a Bloom filter
array.
- * <p>
- * The default implementation of this method is slow. It is recommended
that
- * implementing classes reimplement this method.
- * </p>
+ * Return an array of the Bloom filters in the collection.
+ * <p><em>Implementations should specify if the array contains deep
copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
*
* @return An array of Bloom filters.
*/
default BloomFilter[] asBloomFilterArray() {
- class Filters {
- private BloomFilter[] data = new BloomFilter[16];
- private int size;
-
- boolean add(final BloomFilter filter) {
- if (size == data.length) {
- // This will throw an out-of-memory error if there are too
many Bloom filters.
- data = Arrays.copyOf(data, size * 2);
- }
- data[size++] = filter.copy();
- return true;
- }
-
- BloomFilter[] toArray() {
- // Edge case to avoid a large array copy
- return size == data.length ? data : Arrays.copyOf(data, size);
- }
- }
- final Filters filters = new Filters();
- forEachBloomFilter(filters::add);
- return filters.toArray();
+ final List<BloomFilter> filters = new ArrayList<>();
+ forEachBloomFilter(f -> filters.add(f.copy()));
+ return filters.toArray(new BloomFilter[filters.size()]);
}
/**
* Applies the {@code func} to each Bloom filter pair in order. Will apply
all
* of the Bloom filters from the other BloomFilterProducer to this
producer. If
- * this producer does not have as many BloomFilters it will provide
- * {@code null} for all excess calls to the BiPredicate.
+ * either {@code this} producer or {@code other} producer has fewer
BloomFilters
+ * ths method will provide {@code null} for all excess calls to the {@code
func}.
+ *
+ * <p><em>This implementation returns references to the Bloom filter.
Other implementations
+ * should specify if the array contains deep copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
*
* @param other The other BloomFilterProducer that provides the y values
in the
* (x,y) pair.
* @param func The function to apply.
- * @return A LongPredicate that tests this BitMapProducers bitmap values in
- * order.
+ * @return {@code true} if the {@code func} returned {@code true} for
every pair,
+ * {@code false} otherwise.
*/
default boolean forEachBloomFilterPair(final BloomFilterProducer other,
final BiPredicate<BloomFilter, BloomFilter> func) {
final CountingPredicate<BloomFilter> p = new
CountingPredicate<>(asBloomFilterArray(), func);
return other.forEachBloomFilter(p) && p.forEachRemaining();
}
+ /**
+ * Create a standard (non-layered) Bloom filter by merging all of the
layers. If
+ * the filter is empty this method will return an empty Bloom filter.
+ *
+ * @return the merged bloom filter.
+ */
+ default BloomFilter flatten() {
+ BloomFilter bf[] = {null};
+ forEachBloomFilter( x -> {
+ if (bf[0]==null) {
+ bf[0] = new SimpleBloomFilter( x.getShape());
+ }
+ return bf[0].merge( x );
+ });
+ return bf[0];
+ }
+
+ /**
+ * Creates a BloomFilterProducer from an array of Bloom filters.
+ *
+ * <ul>
+ * <li>The asBloomFilterArray() method returns a copy of the original array
+ * with references to the original filters.</li>
+ * <li>The forEachBloomFilterPair() method uses references to the original
filters.</li>
+ * </ul>
+ * <p><em>All modifications to the Bloom filters are reflected in the
original filters</em></p>
+ *
+ * @param filters The filters to be returned by the producer.
+ * @return THe BloomFilterProducer containing the filters.
+ */
+ static BloomFilterProducer fromBloomFilterArray(BloomFilter... filters) {
+ return new BloomFilterProducer() {
+ @Override
+ public boolean forEachBloomFilter(final Predicate<BloomFilter>
predicate) {
+ for (final BloomFilter filter : filters) {
+ if (!predicate.test(filter)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * This implementation returns a copy the original array, the
contained Bloom filters
+ * are references to the originals, any modifications to them are
reflected in the original
+ * filters.
+ */
+ @Override
+ public BloomFilter[] asBloomFilterArray() {
+ return Arrays.copyOf(filters, filters.length);
Review Comment:
For an array of the same length you can use `filters.clone()`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/CountingLongPredicate.java:
##########
@@ -19,20 +19,21 @@
import java.util.function.LongPredicate;
/**
- * A long predicate that applies the test func to each member of the @{code
ary} in sequence for each call to @{code test()}.
- * if the @{code ary} is exhausted, the subsequent calls to @{code test} are
executed with a zero value.
- * If the calls to @{code test} do not exhaust the @{code ary} the @{code
forEachRemaining} method can be called to
- * execute the @code{text} with a zero value for each remaining @{code idx}
value.
+ * A long predicate that applies the test func to each member of the {@code
ary} in sequence for each call to {@code test()}.
+ * if the {@code ary} is exhausted, the subsequent calls to {@code test} are
executed with a zero value.
+ * If the calls to {@code test} do not exhaust the {@code ary} the {@code
forEachRemaining} method can be called to
+ * execute the @code{text} with a zero value for each remaining {@code idx}
value.
Review Comment:
`@code{text} -> {@code test}`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java:
##########
@@ -38,54 +40,102 @@ public interface BloomFilterProducer {
boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate);
/**
- * Return a deep copy of the BloomFilterProducer data as a Bloom filter
array.
- * <p>
- * The default implementation of this method is slow. It is recommended
that
- * implementing classes reimplement this method.
- * </p>
+ * Return an array of the Bloom filters in the collection.
+ * <p><em>Implementations should specify if the array contains deep
copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
*
* @return An array of Bloom filters.
*/
default BloomFilter[] asBloomFilterArray() {
- class Filters {
- private BloomFilter[] data = new BloomFilter[16];
- private int size;
-
- boolean add(final BloomFilter filter) {
- if (size == data.length) {
- // This will throw an out-of-memory error if there are too
many Bloom filters.
- data = Arrays.copyOf(data, size * 2);
- }
- data[size++] = filter.copy();
- return true;
- }
-
- BloomFilter[] toArray() {
- // Edge case to avoid a large array copy
- return size == data.length ? data : Arrays.copyOf(data, size);
- }
- }
- final Filters filters = new Filters();
- forEachBloomFilter(filters::add);
- return filters.toArray();
+ final List<BloomFilter> filters = new ArrayList<>();
+ forEachBloomFilter(f -> filters.add(f.copy()));
+ return filters.toArray(new BloomFilter[filters.size()]);
Review Comment:
This call is optimised by the JVM if using `filters.toArray(new
BloomFilter[0])` (since a zero fill is not required in the new array allocated
by the JVM for the correct length).
##########
src/main/java/org/apache/commons/collections4/bloomfilter/BloomFilterProducer.java:
##########
@@ -38,54 +40,102 @@ public interface BloomFilterProducer {
boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate);
/**
- * Return a deep copy of the BloomFilterProducer data as a Bloom filter
array.
- * <p>
- * The default implementation of this method is slow. It is recommended
that
- * implementing classes reimplement this method.
- * </p>
+ * Return an array of the Bloom filters in the collection.
+ * <p><em>Implementations should specify if the array contains deep
copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
*
* @return An array of Bloom filters.
*/
default BloomFilter[] asBloomFilterArray() {
- class Filters {
- private BloomFilter[] data = new BloomFilter[16];
- private int size;
-
- boolean add(final BloomFilter filter) {
- if (size == data.length) {
- // This will throw an out-of-memory error if there are too
many Bloom filters.
- data = Arrays.copyOf(data, size * 2);
- }
- data[size++] = filter.copy();
- return true;
- }
-
- BloomFilter[] toArray() {
- // Edge case to avoid a large array copy
- return size == data.length ? data : Arrays.copyOf(data, size);
- }
- }
- final Filters filters = new Filters();
- forEachBloomFilter(filters::add);
- return filters.toArray();
+ final List<BloomFilter> filters = new ArrayList<>();
+ forEachBloomFilter(f -> filters.add(f.copy()));
+ return filters.toArray(new BloomFilter[filters.size()]);
}
/**
* Applies the {@code func} to each Bloom filter pair in order. Will apply
all
* of the Bloom filters from the other BloomFilterProducer to this
producer. If
- * this producer does not have as many BloomFilters it will provide
- * {@code null} for all excess calls to the BiPredicate.
+ * either {@code this} producer or {@code other} producer has fewer
BloomFilters
+ * ths method will provide {@code null} for all excess calls to the {@code
func}.
+ *
+ * <p><em>This implementation returns references to the Bloom filter.
Other implementations
+ * should specify if the array contains deep copies, immutable instances,
+ * or references to the filters in the collection.</em></p>
*
* @param other The other BloomFilterProducer that provides the y values
in the
* (x,y) pair.
* @param func The function to apply.
- * @return A LongPredicate that tests this BitMapProducers bitmap values in
- * order.
+ * @return {@code true} if the {@code func} returned {@code true} for
every pair,
+ * {@code false} otherwise.
*/
default boolean forEachBloomFilterPair(final BloomFilterProducer other,
final BiPredicate<BloomFilter, BloomFilter> func) {
final CountingPredicate<BloomFilter> p = new
CountingPredicate<>(asBloomFilterArray(), func);
return other.forEachBloomFilter(p) && p.forEachRemaining();
}
+ /**
+ * Create a standard (non-layered) Bloom filter by merging all of the
layers. If
+ * the filter is empty this method will return an empty Bloom filter.
+ *
+ * @return the merged bloom filter.
+ */
+ default BloomFilter flatten() {
+ BloomFilter bf[] = {null};
+ forEachBloomFilter( x -> {
+ if (bf[0]==null) {
+ bf[0] = new SimpleBloomFilter( x.getShape());
+ }
+ return bf[0].merge( x );
+ });
+ return bf[0];
+ }
+
+ /**
+ * Creates a BloomFilterProducer from an array of Bloom filters.
+ *
+ * <ul>
+ * <li>The asBloomFilterArray() method returns a copy of the original array
+ * with references to the original filters.</li>
+ * <li>The forEachBloomFilterPair() method uses references to the original
filters.</li>
+ * </ul>
+ * <p><em>All modifications to the Bloom filters are reflected in the
original filters</em></p>
+ *
+ * @param filters The filters to be returned by the producer.
+ * @return THe BloomFilterProducer containing the filters.
+ */
+ static BloomFilterProducer fromBloomFilterArray(BloomFilter... filters) {
Review Comment:
We should pre-check for null to fail-fast:
`Objects.requireNonNull(filters, "filters");`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/CountingPredicate.java:
##########
@@ -57,10 +55,10 @@ public boolean test(final T other) {
}
/**
- * Call the T-T consuming bi-predicate for each remaining unpaired T in the
+ * Call BiPredicate<T,T> for each remaining unpaired <T> in the
Review Comment:
This block of changes with HTML escape chars makes the source less human
readable. This is a package-private class that will not be rendered to javadoc.
Perhaps just put any HTML gt and lt tags into `@code` blocks, e.g.
```
{@code BiPredicate<T, T>}
```
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -53,35 +52,47 @@
public class LayerManager implements BloomFilterProducer {
/**
- * Static methods an variable for standard extend checks.
- *
+ * A collection of common ExtendCheck implementations to test whether to
extend
+ * the depth of a LayerManager.
*/
public static class ExtendCheck {
private ExtendCheck() {
}
+ private static final Predicate<LayerManager> ADVANCE_ON_POPULATED = lm
-> {
Review Comment:
These constants are only used on one place. They could be moved to the
methods and returned directly as a lambda function.
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -90,9 +101,13 @@ public static final Predicate<LayerManager>
advanceOnCalculatedFull(Shape shape)
* the current target.
*
* @param breakAt the number of filters to merge into each filter in
the list.
- * @return A Predicate suitable for the LayerManager extendCheck
parameter.
+ * @return A Predicate suitable for the LayerManager {@code
extendCheck} parameter.
+ * @throws IllegalArgumentException if {@code breakAt} is <= 0
Review Comment:
Avoid HTML encoding: `{@code breakAt <= 0}`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -107,80 +122,76 @@ public boolean test(LayerManager filter) {
* Creates a new target after the current target is saturated.
Saturation is
* defined as the estimated N of the target Bloom filter being greater
than the
* maxN specified.
- * <p>
- * This method uses the integer estimation found in the Bloom filter.
To use the
- * estimation from the Shape use the double version of this function.
- *
- * @param maxN the maximum number of estimated items in the filter.
- * @return A Predicate suitable for the LayerManager extendCheck
parameter.
- */
- public static final Predicate<LayerManager> advanceOnSaturation(int
maxN) {
- return new Predicate<LayerManager>() {
- @Override
- public boolean test(LayerManager manager) {
- if (manager.filters.isEmpty()) {
- return false;
- }
- return maxN <= manager.filters.peekLast().estimateN();
- }
-
- };
- }
-
- /**
- * Creates a new target after the current target is saturated.
Saturation is
- * defined as the estimated N of the target Bloom filter being greater
than the
- * maxN specified.
- * <p>
- * This method uses the integer estimation found in the Bloom filter.
To use the
- * estimation from the Shape use the double version of this function.
*
* @param maxN the maximum number of estimated items in the filter.
- * @return A Predicate suitable for the LayerManager extendCheck
parameter.
+ * @return A Predicate suitable for the LayerManager {@code
extendCheck} parameter.
+ * @throws IllegalArgumentException if maxN is <= 0
Review Comment:
Avoid HTML encoding: `{@code maxN <= 0}`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -314,34 +347,61 @@ public static class Builder {
private Consumer<LinkedList<BloomFilter>> cleanup;
private Builder() {
- extendCheck = ExtendCheck.NEVER_ADVANCE;
- cleanup = Cleanup.NO_CLEANUP;
+ extendCheck = ExtendCheck.neverAdvance();
+ cleanup = Cleanup.noCleanup();
}
+ /**
+ * Builds the layer manager with the specified properties.
+ *
+ * @return a new LayerManager.
+ */
public LayerManager build() {
if (supplier == null) {
- throw new IllegalStateException("Supplier must not be null");
+ throw new NullPointerException("Supplier must not be null");
Review Comment:
Switch to `Objects.requireNonNull`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -107,80 +122,76 @@ public boolean test(LayerManager filter) {
* Creates a new target after the current target is saturated.
Saturation is
* defined as the estimated N of the target Bloom filter being greater
than the
* maxN specified.
- * <p>
- * This method uses the integer estimation found in the Bloom filter.
To use the
- * estimation from the Shape use the double version of this function.
- *
- * @param maxN the maximum number of estimated items in the filter.
- * @return A Predicate suitable for the LayerManager extendCheck
parameter.
- */
- public static final Predicate<LayerManager> advanceOnSaturation(int
maxN) {
- return new Predicate<LayerManager>() {
- @Override
- public boolean test(LayerManager manager) {
- if (manager.filters.isEmpty()) {
- return false;
- }
- return maxN <= manager.filters.peekLast().estimateN();
- }
-
- };
- }
-
- /**
- * Creates a new target after the current target is saturated.
Saturation is
- * defined as the estimated N of the target Bloom filter being greater
than the
- * maxN specified.
- * <p>
- * This method uses the integer estimation found in the Bloom filter.
To use the
- * estimation from the Shape use the double version of this function.
*
* @param maxN the maximum number of estimated items in the filter.
- * @return A Predicate suitable for the LayerManager extendCheck
parameter.
+ * @return A Predicate suitable for the LayerManager {@code
extendCheck} parameter.
+ * @throws IllegalArgumentException if maxN is <= 0
*/
public static final Predicate<LayerManager> advanceOnSaturation(double
maxN) {
- return new Predicate<LayerManager>() {
- @Override
- public boolean test(LayerManager manager) {
- if (manager.filters.isEmpty()) {
- return false;
- }
- BloomFilter bf = manager.filters.peekLast();
- return maxN <= bf.getShape().estimateN(bf.cardinality());
- }
+ if (maxN <= 0) {
+ throw new IllegalArgumentException("'maxN' must be greater
than 0");
+ }
+ return manager -> {
+ BloomFilter bf = manager.filters.peekLast();
+ return maxN <= bf.getShape().estimateN(bf.cardinality());
};
}
}
/**
- * Static methods to create a Consumer of a LinkedList of BloomFilter to
manage
- * the size of the list.
- *
+ * Static methods to create a Consumer of a LinkedList of BloomFilter
perform
+ * tests on whether to reduce the collection of Bloom filters.
*/
public static class Cleanup {
Review Comment:
With the private constructor, this should be final.
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -107,80 +122,76 @@ public boolean test(LayerManager filter) {
* Creates a new target after the current target is saturated.
Saturation is
* defined as the estimated N of the target Bloom filter being greater
than the
* maxN specified.
- * <p>
- * This method uses the integer estimation found in the Bloom filter.
To use the
- * estimation from the Shape use the double version of this function.
- *
- * @param maxN the maximum number of estimated items in the filter.
- * @return A Predicate suitable for the LayerManager extendCheck
parameter.
- */
- public static final Predicate<LayerManager> advanceOnSaturation(int
maxN) {
- return new Predicate<LayerManager>() {
- @Override
- public boolean test(LayerManager manager) {
- if (manager.filters.isEmpty()) {
- return false;
- }
- return maxN <= manager.filters.peekLast().estimateN();
- }
-
- };
- }
-
- /**
- * Creates a new target after the current target is saturated.
Saturation is
- * defined as the estimated N of the target Bloom filter being greater
than the
- * maxN specified.
- * <p>
- * This method uses the integer estimation found in the Bloom filter.
To use the
- * estimation from the Shape use the double version of this function.
*
* @param maxN the maximum number of estimated items in the filter.
- * @return A Predicate suitable for the LayerManager extendCheck
parameter.
+ * @return A Predicate suitable for the LayerManager {@code
extendCheck} parameter.
+ * @throws IllegalArgumentException if maxN is <= 0
*/
public static final Predicate<LayerManager> advanceOnSaturation(double
maxN) {
- return new Predicate<LayerManager>() {
- @Override
- public boolean test(LayerManager manager) {
- if (manager.filters.isEmpty()) {
- return false;
- }
- BloomFilter bf = manager.filters.peekLast();
- return maxN <= bf.getShape().estimateN(bf.cardinality());
- }
+ if (maxN <= 0) {
+ throw new IllegalArgumentException("'maxN' must be greater
than 0");
+ }
+ return manager -> {
+ BloomFilter bf = manager.filters.peekLast();
+ return maxN <= bf.getShape().estimateN(bf.cardinality());
};
}
}
/**
- * Static methods to create a Consumer of a LinkedList of BloomFilter to
manage
- * the size of the list.
- *
+ * Static methods to create a Consumer of a LinkedList of BloomFilter
perform
+ * tests on whether to reduce the collection of Bloom filters.
*/
public static class Cleanup {
private Cleanup() {
}
+ private static final Consumer<LinkedList<BloomFilter>> NO_CLEANUP = x
-> {
Review Comment:
As before, these constants can be moved directly to the single methods that
use them.
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -204,44 +218,63 @@ public static Builder builder() {
* when necessary.
* @param extendCheck The predicate that checks if a new filter should
be
* added to the list.
- * @param filterCleanup the consumer the removes any old filters from the
list.
+ * @param filterCleanup the consumer that removes any old filters from the
+ * list.
+ * @param initialize true if the filter list should be initialized.
*/
private LayerManager(Supplier<BloomFilter> filterSupplier,
Predicate<LayerManager> extendCheck,
- Consumer<LinkedList<BloomFilter>> filterCleanup) {
+ Consumer<LinkedList<BloomFilter>> filterCleanup, boolean
initialize) {
this.filterSupplier = filterSupplier;
this.extendCheck = extendCheck;
this.filterCleanup = filterCleanup;
- filters.add(this.filterSupplier.get());
+ if (initialize) {
+ addFilter();
+ }
+ }
+
+ /**
+ * Adds a new Bloom filter to the list.
+ */
+ private void addFilter() {
+ BloomFilter bf = filterSupplier.get();
+ if (bf == null) {
+ throw new NullPointerException("filterSupplier returned null.");
+ }
+ filters.add(bf);
}
/**
* Creates a deep copy of this LayerManager.
+ * <p><em>Filters in the copy are deep copies, not references, so changes
in the copy
+ * are NOT reflected in the original.</em></p>
*
* @return a copy of this layer Manager.
*/
public LayerManager copy() {
- LayerManager newMgr = new LayerManager(filterSupplier, extendCheck,
filterCleanup);
- newMgr.filters.clear();
+ LayerManager newMgr = new LayerManager(filterSupplier, extendCheck,
filterCleanup, false);
for (BloomFilter bf : filters) {
newMgr.filters.add(bf.copy());
}
return newMgr;
}
/**
- * Forces an advance to the next depth for subsequent merges. Executes the
same
- * logic as when {@code ExtendCheck} returns {@code true}
+ * Forces an advance to the next depth. This method will clean-up the
current
+ * layers and generate a new filter layer. In most cases is it unnecessary
to
+ * call this method directly.
+ * <p>
+ * Ths method is used within the {@link #getTarget()} when the configured
Review Comment:
`within {@link #getTarget()}`
##########
src/main/java/org/apache/commons/collections4/bloomfilter/LayerManager.java:
##########
@@ -204,44 +218,63 @@ public static Builder builder() {
* when necessary.
* @param extendCheck The predicate that checks if a new filter should
be
* added to the list.
- * @param filterCleanup the consumer the removes any old filters from the
list.
+ * @param filterCleanup the consumer that removes any old filters from the
+ * list.
+ * @param initialize true if the filter list should be initialized.
*/
private LayerManager(Supplier<BloomFilter> filterSupplier,
Predicate<LayerManager> extendCheck,
- Consumer<LinkedList<BloomFilter>> filterCleanup) {
+ Consumer<LinkedList<BloomFilter>> filterCleanup, boolean
initialize) {
this.filterSupplier = filterSupplier;
this.extendCheck = extendCheck;
this.filterCleanup = filterCleanup;
- filters.add(this.filterSupplier.get());
+ if (initialize) {
+ addFilter();
+ }
+ }
+
+ /**
+ * Adds a new Bloom filter to the list.
+ */
+ private void addFilter() {
+ BloomFilter bf = filterSupplier.get();
+ if (bf == null) {
+ throw new NullPointerException("filterSupplier returned null.");
+ }
+ filters.add(bf);
}
/**
* Creates a deep copy of this LayerManager.
+ * <p><em>Filters in the copy are deep copies, not references, so changes
in the copy
+ * are NOT reflected in the original.</em></p>
Review Comment:
Do we need a note that the filter supplier/extend check/filter clean-up are
shared between this instance and the copy. Could there be concurrency issues
using two layer managers in parallel? If so then we should note that there
could be concurrency problems.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]