This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch quotient-filter
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git


The following commit(s) were added to refs/heads/quotient-filter by this push:
     new 2fd83132 no insertions of duplicates
2fd83132 is described below

commit 2fd83132c5b74a60ae07018f5ce161dc06fe6fc4
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Mon Jun 3 13:40:21 2024 -0700

    no insertions of duplicates
---
 .../filters/quotientfilter/Filter.java             |   6 +-
 .../filters/quotientfilter/QuotientFilter.java     |  41 +++-----
 .../filters/quotientfilter/DeletionTests.java      |  96 +++++++++--------
 .../filters/quotientfilter/QuotientFilterTest.java | 114 +++++++++++----------
 4 files changed, 127 insertions(+), 130 deletions(-)

diff --git 
a/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java 
b/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
index 8f079673..bfb1ad25 100644
--- a/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
+++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
@@ -34,7 +34,7 @@ public abstract class Filter {
     //abstract boolean rejuvenate(long key);
     //abstract boolean expand();
     //protected abstract boolean _delete(long large_hash);
-    abstract protected boolean _insert(long large_hash, boolean 
insert_only_if_no_match);
+    abstract protected boolean _insert(long large_hash);
     abstract protected boolean _search(long large_hash);
 
 
@@ -53,11 +53,11 @@ public abstract class Filter {
 //        return _delete(HashFunctions.xxhash(input_buffer));
 //    }
 //
-    public boolean insert(long input, boolean insert_only_if_no_match) {
+    public boolean insert(long input) {
         //System.out.println("The ABC input is " + input);
         long hash = get_hash(input);
         //System.out.println("The ABC hash  is " + hash);
-        return _insert(hash, insert_only_if_no_match);
+        return _insert(hash);
     }
 //
 //    public boolean insert(String input, boolean insert_only_if_no_match) {
diff --git 
a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
 
b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
index 40406dc4..e0a442fc 100644
--- 
a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
+++ 
b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
@@ -400,30 +400,19 @@ public class QuotientFilter extends Filter {
         return insert_fingerprint_and_push_all_else(long_fp, 
start_of_this_new_run, false);
     }
 
-    boolean insert(long long_fp, long index, boolean insert_only_if_no_match) {
-        //System.out.println("Inserting Fingerprint " + long_fp);
-        //System.out.println("Inserting @ index     " + index);
-        //System.out.println("BoolMatch? " + insert_only_if_no_match);
-        //System.out.println("**********");
-        //System.out.println("Num items: " + num_entries);
-        //System.out.println("Max items: " + max_entries_before_expansion);
-
-        if (index >= get_num_slots() || num_entries == get_num_slots()) {
-            return false;
-        }
-        boolean does_run_exist = is_occupied(index);
-        if (!does_run_exist) {
-            return insert_new_run(index, long_fp);
-        }
-
-        long run_start_index = find_run_start(index);
-        if (insert_only_if_no_match) {
-            long found_index = find_first_fingerprint_in_run(run_start_index, 
long_fp);
-            if (found_index > -1) {
-                return false;
-            }
-        }
-        return insert_fingerprint_and_push_all_else(long_fp, run_start_index, 
true);
+    boolean insert(long long_fp, long index) {
+      if (index >= get_num_slots() || num_entries == get_num_slots()) {
+        return false;
+      }
+      if (!is_occupied(index)) {
+        return insert_new_run(index, long_fp);
+      }
+      long run_start_index = find_run_start(index);
+      final long found_index = find_first_fingerprint_in_run(run_start_index, 
long_fp);
+      if (found_index > -1) {
+        return false;
+      }
+      return insert_fingerprint_and_push_all_else(long_fp, run_start_index, 
true);
     }
 
     // insert a fingerprint as the last fingerprint of the run and push all 
other entries in the cluster to the right.
@@ -601,7 +590,7 @@ public class QuotientFilter extends Filter {
     Hence, the `large_hash` argument is already a hash key that has been 
generated
     by the hashing library (eg xxhash).
      */
-    protected boolean _insert(long large_hash, boolean 
insert_only_if_no_match) {
+    protected boolean _insert(long large_hash) {
         //System.out.println("Inserting long hash " + large_hash);
         if (is_full) {
             return false;
@@ -615,7 +604,7 @@ public class QuotientFilter extends Filter {
                System.out.println(slot_index + "  " + fingerprint );
                System.out.println(); */
 
-        boolean success = insert(fingerprint, slot_index, false);
+        boolean success = insert(fingerprint, slot_index);
                /*if (!success) {
                        System.out.println("insertion failure");
                        System.out.println(input + "\t" + slot_index + "\t" + 
get_fingerprint_str(fingerprint, fingerprintLength));
diff --git 
a/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java
 
b/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java
index 1ed94b8a..6e1beb9f 100644
--- 
a/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java
+++ 
b/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java
@@ -45,13 +45,13 @@ public class DeletionTests {
         long fp3 = 1 << 2;
         long fp4 = 31;
 
-        qf.insert(fp4, 1, false);
-        qf.insert(fp1, 1, false);
-        qf.insert(fp1, 1, false);
-        qf.insert(fp2, 2, false);
-        qf.insert(fp1, 1, false);
-        qf.insert(fp1, 1, false);
-        qf.insert(fp3, 4, false);
+        qf.insert(fp4, 1);
+        qf.insert(fp1, 1);
+        qf.insert(fp1, 1);
+        qf.insert(fp2, 2);
+        qf.insert(fp1, 1);
+        qf.insert(fp1, 1);
+        qf.insert(fp3, 4);
 
 
         qf.delete(31, 1);
@@ -75,39 +75,36 @@ public class DeletionTests {
      * The expected outcome is that after deletion, the remaining keys should 
be in their canonical slots.
      */
     @Test
-    static public void DeletionsWithSameFingerprint() {
+    static public void Deletions() {
         int bits_per_entry = 8;
         int num_entries_power = 3;
         int num_entries = (int)Math.pow(2, num_entries_power);
         QuotientFilter qf = new QuotientFilter(num_entries_power, 
bits_per_entry);
 
-
-        // All keys have the same fingerprint but are mapped into (mostly) 
different slots
-        qf.insert(0, 1, false);
-        qf.insert(0, 1, false);
-        qf.insert(0, 2, false);
-        qf.insert(0, 2, false);
-        qf.insert(0, 3, false);
-        qf.insert(0, 3, false);
-        qf.insert(0, 3, false);
-        qf.insert(0, 6, false);
-        qf.insert(0, 6, false); // these are ignored
-        qf.insert(0, 6, false);
-        qf.insert(0, 7, false);
-
-        qf.delete(0, 2);
-        qf.delete(0, 3);
+        qf.insert(1, 1);
+        qf.insert(2, 1);
+        qf.insert(3, 2);
+        qf.insert(4, 2);
+        qf.insert(5, 3);
+        qf.insert(6, 3);
+        qf.insert(7, 3);
+        qf.insert(8, 6);
+        qf.insert(9, 6); // these are ignored
+        qf.insert(10, 6);
+        qf.insert(11, 7);
+
+        qf.delete(3, 2);
+        qf.delete(5, 3);
 
         BitSet result = new BitSet(num_entries * bits_per_entry);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
1, true, false, false, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
2, true, true, true, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
3, true, false, true, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
4, false, false, true, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
5, false, true, true, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
6, true, false, false, 0);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
0, false, false, false, 0);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
1, true, false, false, 1);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
2, true, true, true, 2);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
3, true, false, true, 4);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
4, false, false, true, 6);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
5, false, true, true, 7);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
6, true, false, false, 8);
         result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
7, false, false, false, 0);
-//        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
8, false, true, true, 0);
-//        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
9, false, false, true, 0);
 
         assertTrue(QuotientFilterTest.check_equality(qf, result, true));
     }
@@ -123,33 +120,34 @@ public class DeletionTests {
      *
      * The expected outcome is that after deletion, the remaining keys should 
be in their canonical slots.
      */
-    static public void DeletionsWithOverflow() {
+    static public void DeletionsWithWrap() {
         int bits_per_entry = 8;
         int num_entries_power = 3;
         int num_entries = (int)Math.pow(2, num_entries_power);
         QuotientFilter qf = new QuotientFilter(num_entries_power, 
bits_per_entry);
 
-        qf.insert(0, 1, false);
-        qf.insert(0, 1, false);
-        qf.insert(0, 2, false);
-        qf.insert(0, 2, false);
-        qf.insert(0, 3, false);
-        qf.insert(0, 4, false);
-        qf.insert(0, 4, false);
-        qf.insert(0, 5, false);
+        qf.insert(1, 1);
+        qf.insert(2, 1);
+        qf.insert(3, 2);
+        qf.insert(4, 2);
+        qf.insert(5, 3);
+        qf.insert(6, 4);
+        qf.insert(7, 4);
+        qf.insert(8, 5);
 
         //qf.pretty_print();
-        qf.delete(0, 3);
+        qf.delete(5, 3);
         //qf.pretty_print();
 
         BitSet result = new BitSet(num_entries * bits_per_entry);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
1, true, false, false, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
2, true, true, true, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
3, false, false, true, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
4, true, true, true, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
5, true, false, true, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
6, false, true, true, 0);
-        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
7, false, false, true, 0);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
0, false, false, false, 0);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
1, true, false, false, 1);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
2, true, true, true, 2);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
3, false, false, true, 3);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
4, true, true, true, 4);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
5, true, false, true, 6);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
6, false, true, true, 7);
+        result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry, 
7, false, false, true, 8);
         assertTrue(QuotientFilterTest.check_equality(qf, result, true));
     }
 }
diff --git 
a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
 
b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
index 64f92ddc..4f6486ea 100644
--- 
a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
+++ 
b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
@@ -56,12 +56,12 @@ public class QuotientFilterTest {
         final int E = 5;
         final int F = 6;
 
-        qf.insert(B, 1, false);
-        qf.insert(E, 4, false);
-        qf.insert(F, 7, false);
-        qf.insert(C, 1, false);
-        qf.insert(D, 2, false);
-        qf.insert(A, 1, false);
+        qf.insert(B, 1);
+        qf.insert(E, 4);
+        qf.insert(F, 7);
+        qf.insert(C, 1);
+        qf.insert(D, 2);
+        qf.insert(A, 1);
         assertEquals(qf.get_num_entries(), 6);
 
         assertEquals(getState(qf, 0), 0);
@@ -99,26 +99,35 @@ public class QuotientFilterTest {
         int num_entries = (int)Math.pow(2, num_entries_power);
         QuotientFilter qf = new QuotientFilter(4, 8);
 
-        // (key, slot): {(a, 1), (b,1), (c ,3), (d, 3), (e, 3), (f, 4), (g, 
6), (h, 6)}
-        qf.insert(0, 1, false);
-        qf.insert(0, 1, false);
-        qf.insert(0, 3, false);
-        qf.insert(0, 3, false);
-        qf.insert(0, 3, false);
-        qf.insert(0, 4, false);
-        qf.insert(0, 6, false);
-        qf.insert(0, 6, false);
+        final int A = 1;
+        final int B = 2;
+        final int C = 3;
+        final int D = 4;
+        final int E = 5;
+        final int F = 6;
+        final int G = 7;
+        final int H = 8;
+
+        // (key, slot): {(a, 1), (b, 1), (c, 3), (d, 3), (e, 3), (f, 4), (g, 
6), (h, 6)}
+        qf.insert(A, 1);
+        qf.insert(B, 1);
+        qf.insert(C, 3);
+        qf.insert(D, 3);
+        qf.insert(E, 3);
+        qf.insert(F, 4);
+        qf.insert(G, 6);
+        qf.insert(H, 6);
 
         BitSet result = new BitSet(num_entries * bits_per_entry);
         result = set_slot_in_test(result, bits_per_entry, 0, false, false, 
false, 0);
-        result = set_slot_in_test(result, bits_per_entry, 1, true, false, 
false, 0);
-        result = set_slot_in_test(result, bits_per_entry, 2, false, true, 
true, 0);
-        result = set_slot_in_test(result, bits_per_entry, 3, true, false, 
false, 0);
-        result = set_slot_in_test(result, bits_per_entry, 4, true, true, true, 
0);
-        result = set_slot_in_test(result, bits_per_entry, 5, false, true, 
true, 0);
-        result = set_slot_in_test(result, bits_per_entry, 6, true, false, 
true, 0);
-        result = set_slot_in_test(result, bits_per_entry, 7, false, false, 
true, 0);
-        result = set_slot_in_test(result, bits_per_entry, 8, false, true, 
true, 0);
+        result = set_slot_in_test(result, bits_per_entry, 1, true, false, 
false, A);
+        result = set_slot_in_test(result, bits_per_entry, 2, false, true, 
true, B);
+        result = set_slot_in_test(result, bits_per_entry, 3, true, false, 
false, C);
+        result = set_slot_in_test(result, bits_per_entry, 4, true, true, true, 
D);
+        result = set_slot_in_test(result, bits_per_entry, 5, false, true, 
true, E);
+        result = set_slot_in_test(result, bits_per_entry, 6, true, false, 
true, F);
+        result = set_slot_in_test(result, bits_per_entry, 7, false, false, 
true, G);
+        result = set_slot_in_test(result, bits_per_entry, 8, false, true, 
true, H);
         assertTrue(check_equality(qf, result, false));
     }
 
@@ -139,23 +148,24 @@ public class QuotientFilterTest {
      */
     @Test
     public void OverflowTest() {
-        int bits_per_entry = 8;
-        int num_entries_power = 3;
-        int num_entries = (int)Math.pow(2, num_entries_power);
-        int fingerprint_size = bits_per_entry - 3;
-        QuotientFilter qf = new QuotientFilter(num_entries_power, 
bits_per_entry);
-
-        long fp2 = 1 << fingerprint_size - 1;
-        qf.insert(fp2, num_entries - 1, false);
-        assertEquals(qf.get_fingerprint(7), fp2);
-        assertEquals(getState(qf, 7), 0b100);
-        qf.insert(fp2, num_entries - 1, false);
+        final int bits_per_entry = 8;
+        final int num_entries_power = 3;
+        final int num_entries = (int)Math.pow(2, num_entries_power);
+        final int fingerprint_size = bits_per_entry - 3;
+        final QuotientFilter qf = new QuotientFilter(num_entries_power, 
bits_per_entry);
+
+        final long fp1 = 1;
+        final long fp2 = 1 << fingerprint_size - 1;
+        qf.insert(fp1, num_entries - 1);
+        assertEquals(qf.get_fingerprint(num_entries - 1), fp1);
+        assertEquals(getState(qf, num_entries - 1), 0b100);
+        qf.insert(fp2, num_entries - 1);
         assertEquals(qf.get_fingerprint(0), fp2);
         assertEquals(getState(qf, 0), 0b011);
         qf.delete(fp2, num_entries - 1);
         assertEquals(qf.get_fingerprint(0), 0);
         assertEquals(getState(qf, 0), 0b000);
-        boolean found = qf.search(fp2, num_entries - 1);
+        final boolean found = qf.search(fp1, num_entries - 1);
         assertTrue(found);
     }
 
@@ -174,16 +184,16 @@ public class QuotientFilterTest {
         //int fingerprint_size = bits_per_entry - 3;
         QuotientFilter qf = new QuotientFilter(num_entries_power, 
bits_per_entry);
 
-        qf.insert(0x1F, 2, false);
-        qf.insert(0x1F, 3, false);
-        qf.insert(0x1F, 3, false);
-        qf.insert(0x1F, 4, false);
-        qf.insert(0x1F, 15, false); // last slot in the filter
-        qf.insert(0x1F, 16, false); // outside the bounds
+        qf.insert(0x1F, 2);
+        qf.insert(0x1F, 3);
+        qf.insert(0x1F, 3);
+        qf.insert(0x1F, 4);
+        qf.insert(0x1F, 15); // last slot in the filter
+        qf.insert(0x1F, 16); // outside the bounds
         qf.pretty_print() ;
 
         Iterator it = new Iterator(qf);
-        int[] arr = new int[] {2, 3, 3, 4, 15};
+        int[] arr = new int[] {2, 3, 4, 15};
         int arr_index = 0;
         while (it.next()) {assertEquals(it.bucket_index, arr[arr_index++]);}
     }
@@ -195,16 +205,16 @@ public class QuotientFilterTest {
         int num_entries_power = 4;
         QuotientFilter qf = new QuotientFilter(num_entries_power, 
bits_per_entry);
 
-        qf.insert(0, 1, false);
-        qf.insert(0, 4, false);
-        qf.insert(0, 7, false);
-        qf.insert(0, 1, false);
-        qf.insert(0, 2, false);
-        qf.insert(0, 1, false);
-        qf.insert(0, 15, false);
+        qf.insert(0, 1);
+        qf.insert(0, 4);
+        qf.insert(0, 7);
+        qf.insert(0, 1);
+        qf.insert(0, 2);
+        qf.insert(0, 1);
+        qf.insert(0, 15);
 
         Iterator it = new Iterator(qf);
-        int[] arr = new int[] {1, 1, 1, 2, 4, 7, 15};
+        int[] arr = new int[] {1, 2, 4, 7, 15};
         int arr_index = 0;
         while (it.next()) {assertEquals(arr[arr_index++], it.bucket_index);}
     }
@@ -270,7 +280,7 @@ public class QuotientFilterTest {
 
         for (int i = 0; i < num_entries; i++) {
             int rand_num = rand.nextInt();
-            boolean success = filter.insert(rand_num, false);
+            boolean success = filter.insert(rand_num);
             if (success) {
                 added.add(rand_num);
             }
@@ -279,7 +289,7 @@ public class QuotientFilterTest {
             }
         }
 
-        for (Integer i : added) {
+        for (Integer i: added) {
             boolean found = filter.search((long)i);
             if (!found) {
                 return false;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to