Claudenw commented on a change in pull request #83: Initial bloom filter code 
contribution
URL: https://github.com/apache/commons-collections/pull/83#discussion_r365576786
 
 

 ##########
 File path: 
src/main/java/org/apache/commons/collections4/bloomfilter/SetOperations.java
 ##########
 @@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.collections4.bloomfilter;
+
+import org.apache.commons.collections4.bloomfilter.BloomFilter.Shape;
+
+/**
+ * Implementations of set operations on Bloom filters.
+ *
+ */
+public final class SetOperations {
+
+    /**
+     * Do not instantiate.
+     */
+    private SetOperations() {}
+
+    /**
+     * Verifies the Bloom filters have the same shape.
+     *
+     * @param first the first filter to check.
+     * @param second the second filter to check.
+     * @throws IllegalArgumentException if the shapes are not the same.
+     */
+    private static void verifyShape(BloomFilter first, BloomFilter second) {
+        if (!first.getShape().equals(second.getShape())) {
+            throw new IllegalArgumentException(String.format("Shape %s is not 
the same as %s",
+                first.getShape(), second.getShape()));
+        }
+    }
+
+    /**
+     * Calculates the Hamming distance between two Bloom filters.
+     *
+     * @param first the first Bloom filter.
+     * @param second the second Bloom filter.
+     * @return the Hamming distance.
+     */
+    public static int hammingDistance(BloomFilter first, BloomFilter second) {
+        verifyShape(first,second);
+        return first.xorCardinality(second);
+    }
+
+
+    /**
+     * Calculates the Jaccard similarity between two Bloom filters.
+     *
+     * <p>Also known as Jaccard index, Intersection over Union, and Jaccard 
similarity coefficient</p>
+     *
+     * @param first the first Bloom filter.
+     * @param second the second Bloom filter.
+     * @return the Jaccard similarity.
+     */
+    public static double jaccardSimilarity(BloomFilter first, BloomFilter 
second) {
+        verifyShape(first,second);
+        int orCard = first.orCardinality(second);
+        // if the orCard is zero then the hamming distance will also be zero.
+        return orCard==0?0:hammingDistance(first,second) / (double) orCard;
+    }
+
+    /**
+     * Calculates the Jaccard distance between two Bloom filters.
+     *
+     * <p>Jaccard distance is defined as {@code 1 - Jaccard similarity}</p>
+     *
+     * @param first the first Bloom filter.
+     * @param second the second Bloom filter.
+     * @return the Jaccard distance.
+     */
+    public static double jaccardDistance(BloomFilter first, BloomFilter 
second) {
+        return 1.0 - jaccardSimilarity(first,second);
+    }
+
+    /**
+     * Calculates the Cosine similarity between two Bloom filters.
+     * <p> Also known as Orchini similarity and the Tucker coefficient of 
congruence or
+     * Ochiai similarity.</p>
+     *
+     * <p>If either filter is empty (no enabled bits) the result is 0 
(zero)</p>
+     *
+     * @param first the first Bloom filter.
+     * @param second the second Bloom filter.
+     * @return the Cosine similarity.
+     */
+    public static double cosineSimilarity(BloomFilter first, BloomFilter 
second) {
+        verifyShape(first,second);
+        int numerator = first.andCardinality(second);
+
+        return numerator==0?0:numerator / (Math.sqrt(first.cardinality()) * 
Math.sqrt(second.cardinality()));
+    }
+
+    /**
+     * Calculates the Cosine distance between two Bloom filters.
+     *
+     * <p>Cosine distance is defined as {@code 1 - Cosine similarity}</p>
+     *
+     * @param first the first Bloom filter.
+     * @param second the second Bloom filter.
+     * @return the jaccard distance.
+     */
+    public static double cosineDistance(BloomFilter first, BloomFilter second) 
{
+        return 1.0 - cosineSimilarity(first,second);
+    }
+
+    /**
+     * Estimates the number of items in the Bloom filter based on the shape 
and the number
+     * of bits that are enabled.
+     *
+     * @param filter the Bloom filter to estimate size for.
+     * @return and estimate of the number of items that were placed in the 
Bloom filter.
 
 Review comment:
   fixed

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to