Claudenw commented on code in PR #402: URL: https://github.com/apache/commons-collections/pull/402#discussion_r1256041156
########## src/main/java/org/apache/commons/collections4/bloomfilter/LayeredBloomFilter.java: ########## @@ -0,0 +1,416 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.collections4.bloomfilter; + +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.function.IntPredicate; +import java.util.function.LongPredicate; +import java.util.function.Predicate; + +/** + * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun + * (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd + * International Conference on Advanced Computer Theory and Engineering (ICACTE + * 2010), vol. 1, pp. V1–586–V1–591, doi:10.1109/ICACTE.2010.5578947, ISBN + * 978-1-4244-6539-2, S2CID 3108985 + * <p> + * In short, Layered Bloom filter contains several bloom filters arranged in + * layers. + * </p> + * <ul> + * <li>When membership in the filter is checked each layer in turn is checked + * and if a match is found {@code true} is returned.</li> + * <li>When merging each bloom filter is merged into the last newest filter in + * the list of layers.</li> + * <li>When questions of cardinality are asked the cardinality of the union of + * the enclosed Bloom filters is used.</li> + * </ul> + * The net result is that the layered Bloom filter can be populated with more + * items than the Shape would indicate and yet still return a false positive + * rate in line with the Shape and not the over population. + * <p> + * This implementation uses a LayerManager to handle the manipulation of the + * layers. + * </p> + * <ul> + * <li>Level 0 is the oldest layer and the highest level is the newest.</li> + * <li>There is always at least one enclosed filter.</li> + * <li>The newest filter is the {@code target} into which merges are performed. + * <li>Whenever the target is retrieved, or a {@code merge} operation is + * performed the code checks if any older layers should be removed, and if so + * removes them. It also checks it a new layer should be added, and if so adds + * it and sets the {@code target} before the operation.</li> + * </ul> + */ +public class LayeredBloomFilter implements BloomFilter, BloomFilterProducer { + private final Shape shape; + private LayerManager layerManager; + + /** + * Creates a fixed size layered bloom filter that adds new filters to the list, + * but never merges them. List will never exceed maxDepth. As additional filters + * are added earlier filters are removed. + * + * @param shape The shape for the enclosed Bloom filters + * @param maxDepth The maximum depth of layers. + * @return An empty layered Bloom filter of the specified shape and depth. + */ + public static LayeredBloomFilter fixed(final Shape shape, int maxDepth) { + LayerManager manager = LayerManager.builder().withExtendCheck(LayerManager.ExtendCheck.ADVANCE_ON_POPULATED) + .withCleanup(LayerManager.Cleanup.onMaxSize(maxDepth)).withSuplier(() -> new SimpleBloomFilter(shape)) + .build(); + return new LayeredBloomFilter(shape, manager); + } + + /** + * Constructor. + * + * @param shape the Shape of the enclosed Bloom filters + * @param layerManager the LayerManager to manage the layers. + */ + public LayeredBloomFilter(Shape shape, LayerManager layerManager) { + this.shape = shape; + this.layerManager = layerManager; + } + + @Override + public LayeredBloomFilter copy() { + return new LayeredBloomFilter(shape, layerManager.copy()); + } + + /** + * Gets the depth of the deepest layer. + * + * @return the depth of the deepest layer. + */ + public final int getDepth() { + return layerManager.getDepth(); + } + + /** + * Gets the Bloom filter at the specified depth + * + * @param depth the depth of the filter to return. + * @return the Bloom filter at the specified depth. + * @throws NoSuchElementException if depth is not in the range [0,getDepth()) + */ + public BloomFilter get(int depth) { Review Comment: Because there can be additional info associated with the Bloom filter. In the paper and in many implementations the find returns the deepest node that matches. Our finds returns the indexes that match so that the application may select the deepest, the shallowest, the one from the middle, or perhaps all of them and do some subsequent processing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
