Claudenw commented on code in PR #402: URL: https://github.com/apache/commons-collections/pull/402#discussion_r1256043841
########## src/main/java/org/apache/commons/collections4/bloomfilter/LayeredBloomFilter.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.function.IntPredicate; +import java.util.function.LongPredicate; +import java.util.function.Predicate; + +/** + * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun + * (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd + * International Conference on Advanced Computer Theory and Engineering (ICACTE + * 2010), vol. 1, pp. V1–586–V1–591, doi:10.1109/ICACTE.2010.5578947, ISBN + * 978-1-4244-6539-2, S2CID 3108985 + * <p> + * In short, Layered Bloom filter contains several bloom filters arranged in + * layers. + * </p> + * <ul> + * <li>When membership in the filter is checked each layer in turn is checked + * and if a match is found {@code true} is returned.</li> + * <li>When merging each bloom filter is merged into the last newest filter in + * the list of layers.</li> + * <li>When questions of cardinality are asked the cardinality of the union of + * the enclosed Bloom filters is used.</li> + * </ul> + * The net result is that the layered Bloom filter can be populated with more + * items than the Shape would indicate and yet still return a false positive + * rate in line with the Shape and not the over population. + * <p> + * This implementation uses a LayerManager to handle the manipulation of the + * layers. + * </p> + * <ul> + * <li>Level 0 is the oldest layer and the highest level is the newest.</li> + * <li>There is always at least one enclosed filter.</li> + * <li>The newest filter is the {@code target} into which merges are performed. + * <li>Whenever the target is retrieved, or a {@code merge} operation is + * performed the code checks if any older layers should be removed, and if so + * removes them. It also checks it a new layer should be added, and if so adds + * it and sets the {@code target} before the operation.</li> + * </ul> + */ +public class LayeredBloomFilter implements BloomFilter, BloomFilterProducer { + private final Shape shape; + private LayerManager layerManager; + + /** + * Creates a fixed size layered bloom filter that adds new filters to the list, + * but never merges them. List will never exceed maxDepth. As additional filters + * are added earlier filters are removed. + * + * @param shape The shape for the enclosed Bloom filters + * @param maxDepth The maximum depth of layers. + * @return An empty layered Bloom filter of the specified shape and depth. + */ + public static LayeredBloomFilter fixed(final Shape shape, int maxDepth) { + LayerManager manager = LayerManager.builder().withExtendCheck(LayerManager.ExtendCheck.ADVANCE_ON_POPULATED) + .withCleanup(LayerManager.Cleanup.onMaxSize(maxDepth)).withSuplier(() -> new SimpleBloomFilter(shape)) + .build(); + return new LayeredBloomFilter(shape, manager); + } + + /** + * Constructor. + * + * @param shape the Shape of the enclosed Bloom filters + * @param layerManager the LayerManager to manage the layers. + */ + public LayeredBloomFilter(Shape shape, LayerManager layerManager) { + this.shape = shape; + this.layerManager = layerManager; + } + + @Override + public LayeredBloomFilter copy() { + return new LayeredBloomFilter(shape, layerManager.copy()); + } + + /** + * Gets the depth of the deepest layer. + * + * @return the depth of the deepest layer. + */ + public final int getDepth() { + return layerManager.getDepth(); + } + + /** + * Gets the Bloom filter at the specified depth + * + * @param depth the depth of the filter to return. + * @return the Bloom filter at the specified depth. + * @throws NoSuchElementException if depth is not in the range [0,getDepth()) + */ + public BloomFilter get(int depth) { + return layerManager.get(depth); + } + + @Override + public int cardinality() { + return SetOperations.cardinality(this); + } + + @Override + public final void clear() { + layerManager.clear(); + } + + /** + * Clears the Bloom filter (removes all set bits) at the specified level. + * + * @param level the level to clear. + */ + public final void clear(int level) { + layerManager.get(level).clear(); + } + + /** + * Get the Bloom filter that is currently being merged into. This method ensures + * that the {@code target} filter meets the criteria specified within the + * {@code LayerManager}. if the {@code LayerManager} determines that a new + * target filter is required it is constructed and used. + * + * @return the current Bloom filter. + * @see LayerManager + */ + public final BloomFilter target() { + return layerManager.target(); + } + + /** + * Processes the Bloom filters in depth order with the most recent filters + * first. Each filter is passed to the predicate in turn. The function exits on + * the first {@code false} returned by the predicate. + * + * @param bloomFilterPredicate the predicate to execute. + * @return {@code true} if all filters passed the predicate, {@code false} + * otherwise. + */ + @Override + public final boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate) { + return layerManager.forEachBloomFilter(bloomFilterPredicate); + } + + /** + * Create a standard (non-layered) Bloom filter by merging all of the layers. + * + * @return the merged bloom filter. + */ + public BloomFilter flatten() { Review Comment: I updated the comment to reflect that an empty filter should be returned if there are no filters in the collection. I don;t have a problem adding flatten() to the BloomFilterProducer. I will do that shortly. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
