This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch quotient-filter
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
The following commit(s) were added to refs/heads/quotient-filter by this push:
new 2fd83132 no insertions of duplicates
2fd83132 is described below
commit 2fd83132c5b74a60ae07018f5ce161dc06fe6fc4
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Mon Jun 3 13:40:21 2024 -0700
no insertions of duplicates
---
.../filters/quotientfilter/Filter.java | 6 +-
.../filters/quotientfilter/QuotientFilter.java | 41 +++-----
.../filters/quotientfilter/DeletionTests.java | 96 +++++++++--------
.../filters/quotientfilter/QuotientFilterTest.java | 114 +++++++++++----------
4 files changed, 127 insertions(+), 130 deletions(-)
diff --git
a/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
b/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
index 8f079673..bfb1ad25 100644
--- a/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
+++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/Filter.java
@@ -34,7 +34,7 @@ public abstract class Filter {
//abstract boolean rejuvenate(long key);
//abstract boolean expand();
//protected abstract boolean _delete(long large_hash);
- abstract protected boolean _insert(long large_hash, boolean
insert_only_if_no_match);
+ abstract protected boolean _insert(long large_hash);
abstract protected boolean _search(long large_hash);
@@ -53,11 +53,11 @@ public abstract class Filter {
// return _delete(HashFunctions.xxhash(input_buffer));
// }
//
- public boolean insert(long input, boolean insert_only_if_no_match) {
+ public boolean insert(long input) {
//System.out.println("The ABC input is " + input);
long hash = get_hash(input);
//System.out.println("The ABC hash is " + hash);
- return _insert(hash, insert_only_if_no_match);
+ return _insert(hash);
}
//
// public boolean insert(String input, boolean insert_only_if_no_match) {
diff --git
a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
index 40406dc4..e0a442fc 100644
---
a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
+++
b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java
@@ -400,30 +400,19 @@ public class QuotientFilter extends Filter {
return insert_fingerprint_and_push_all_else(long_fp,
start_of_this_new_run, false);
}
- boolean insert(long long_fp, long index, boolean insert_only_if_no_match) {
- //System.out.println("Inserting Fingerprint " + long_fp);
- //System.out.println("Inserting @ index " + index);
- //System.out.println("BoolMatch? " + insert_only_if_no_match);
- //System.out.println("**********");
- //System.out.println("Num items: " + num_entries);
- //System.out.println("Max items: " + max_entries_before_expansion);
-
- if (index >= get_num_slots() || num_entries == get_num_slots()) {
- return false;
- }
- boolean does_run_exist = is_occupied(index);
- if (!does_run_exist) {
- return insert_new_run(index, long_fp);
- }
-
- long run_start_index = find_run_start(index);
- if (insert_only_if_no_match) {
- long found_index = find_first_fingerprint_in_run(run_start_index,
long_fp);
- if (found_index > -1) {
- return false;
- }
- }
- return insert_fingerprint_and_push_all_else(long_fp, run_start_index,
true);
+ boolean insert(long long_fp, long index) {
+ if (index >= get_num_slots() || num_entries == get_num_slots()) {
+ return false;
+ }
+ if (!is_occupied(index)) {
+ return insert_new_run(index, long_fp);
+ }
+ long run_start_index = find_run_start(index);
+ final long found_index = find_first_fingerprint_in_run(run_start_index,
long_fp);
+ if (found_index > -1) {
+ return false;
+ }
+ return insert_fingerprint_and_push_all_else(long_fp, run_start_index,
true);
}
// insert a fingerprint as the last fingerprint of the run and push all
other entries in the cluster to the right.
@@ -601,7 +590,7 @@ public class QuotientFilter extends Filter {
Hence, the `large_hash` argument is already a hash key that has been
generated
by the hashing library (eg xxhash).
*/
- protected boolean _insert(long large_hash, boolean
insert_only_if_no_match) {
+ protected boolean _insert(long large_hash) {
//System.out.println("Inserting long hash " + large_hash);
if (is_full) {
return false;
@@ -615,7 +604,7 @@ public class QuotientFilter extends Filter {
System.out.println(slot_index + " " + fingerprint );
System.out.println(); */
- boolean success = insert(fingerprint, slot_index, false);
+ boolean success = insert(fingerprint, slot_index);
/*if (!success) {
System.out.println("insertion failure");
System.out.println(input + "\t" + slot_index + "\t" +
get_fingerprint_str(fingerprint, fingerprintLength));
diff --git
a/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java
b/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java
index 1ed94b8a..6e1beb9f 100644
---
a/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java
+++
b/src/test/java/org/apache/datasketches/filters/quotientfilter/DeletionTests.java
@@ -45,13 +45,13 @@ public class DeletionTests {
long fp3 = 1 << 2;
long fp4 = 31;
- qf.insert(fp4, 1, false);
- qf.insert(fp1, 1, false);
- qf.insert(fp1, 1, false);
- qf.insert(fp2, 2, false);
- qf.insert(fp1, 1, false);
- qf.insert(fp1, 1, false);
- qf.insert(fp3, 4, false);
+ qf.insert(fp4, 1);
+ qf.insert(fp1, 1);
+ qf.insert(fp1, 1);
+ qf.insert(fp2, 2);
+ qf.insert(fp1, 1);
+ qf.insert(fp1, 1);
+ qf.insert(fp3, 4);
qf.delete(31, 1);
@@ -75,39 +75,36 @@ public class DeletionTests {
* The expected outcome is that after deletion, the remaining keys should
be in their canonical slots.
*/
@Test
- static public void DeletionsWithSameFingerprint() {
+ static public void Deletions() {
int bits_per_entry = 8;
int num_entries_power = 3;
int num_entries = (int)Math.pow(2, num_entries_power);
QuotientFilter qf = new QuotientFilter(num_entries_power,
bits_per_entry);
-
- // All keys have the same fingerprint but are mapped into (mostly)
different slots
- qf.insert(0, 1, false);
- qf.insert(0, 1, false);
- qf.insert(0, 2, false);
- qf.insert(0, 2, false);
- qf.insert(0, 3, false);
- qf.insert(0, 3, false);
- qf.insert(0, 3, false);
- qf.insert(0, 6, false);
- qf.insert(0, 6, false); // these are ignored
- qf.insert(0, 6, false);
- qf.insert(0, 7, false);
-
- qf.delete(0, 2);
- qf.delete(0, 3);
+ qf.insert(1, 1);
+ qf.insert(2, 1);
+ qf.insert(3, 2);
+ qf.insert(4, 2);
+ qf.insert(5, 3);
+ qf.insert(6, 3);
+ qf.insert(7, 3);
+ qf.insert(8, 6);
+ qf.insert(9, 6); // these are ignored
+ qf.insert(10, 6);
+ qf.insert(11, 7);
+
+ qf.delete(3, 2);
+ qf.delete(5, 3);
BitSet result = new BitSet(num_entries * bits_per_entry);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
1, true, false, false, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
2, true, true, true, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
3, true, false, true, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
4, false, false, true, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
5, false, true, true, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
6, true, false, false, 0);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
0, false, false, false, 0);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
1, true, false, false, 1);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
2, true, true, true, 2);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
3, true, false, true, 4);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
4, false, false, true, 6);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
5, false, true, true, 7);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
6, true, false, false, 8);
result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
7, false, false, false, 0);
-// result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
8, false, true, true, 0);
-// result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
9, false, false, true, 0);
assertTrue(QuotientFilterTest.check_equality(qf, result, true));
}
@@ -123,33 +120,34 @@ public class DeletionTests {
*
* The expected outcome is that after deletion, the remaining keys should
be in their canonical slots.
*/
- static public void DeletionsWithOverflow() {
+ static public void DeletionsWithWrap() {
int bits_per_entry = 8;
int num_entries_power = 3;
int num_entries = (int)Math.pow(2, num_entries_power);
QuotientFilter qf = new QuotientFilter(num_entries_power,
bits_per_entry);
- qf.insert(0, 1, false);
- qf.insert(0, 1, false);
- qf.insert(0, 2, false);
- qf.insert(0, 2, false);
- qf.insert(0, 3, false);
- qf.insert(0, 4, false);
- qf.insert(0, 4, false);
- qf.insert(0, 5, false);
+ qf.insert(1, 1);
+ qf.insert(2, 1);
+ qf.insert(3, 2);
+ qf.insert(4, 2);
+ qf.insert(5, 3);
+ qf.insert(6, 4);
+ qf.insert(7, 4);
+ qf.insert(8, 5);
//qf.pretty_print();
- qf.delete(0, 3);
+ qf.delete(5, 3);
//qf.pretty_print();
BitSet result = new BitSet(num_entries * bits_per_entry);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
1, true, false, false, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
2, true, true, true, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
3, false, false, true, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
4, true, true, true, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
5, true, false, true, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
6, false, true, true, 0);
- result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
7, false, false, true, 0);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
0, false, false, false, 0);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
1, true, false, false, 1);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
2, true, true, true, 2);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
3, false, false, true, 3);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
4, true, true, true, 4);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
5, true, false, true, 6);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
6, false, true, true, 7);
+ result = QuotientFilterTest.set_slot_in_test(result, bits_per_entry,
7, false, false, true, 8);
assertTrue(QuotientFilterTest.check_equality(qf, result, true));
}
}
diff --git
a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
index 64f92ddc..4f6486ea 100644
---
a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
+++
b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java
@@ -56,12 +56,12 @@ public class QuotientFilterTest {
final int E = 5;
final int F = 6;
- qf.insert(B, 1, false);
- qf.insert(E, 4, false);
- qf.insert(F, 7, false);
- qf.insert(C, 1, false);
- qf.insert(D, 2, false);
- qf.insert(A, 1, false);
+ qf.insert(B, 1);
+ qf.insert(E, 4);
+ qf.insert(F, 7);
+ qf.insert(C, 1);
+ qf.insert(D, 2);
+ qf.insert(A, 1);
assertEquals(qf.get_num_entries(), 6);
assertEquals(getState(qf, 0), 0);
@@ -99,26 +99,35 @@ public class QuotientFilterTest {
int num_entries = (int)Math.pow(2, num_entries_power);
QuotientFilter qf = new QuotientFilter(4, 8);
- // (key, slot): {(a, 1), (b,1), (c ,3), (d, 3), (e, 3), (f, 4), (g,
6), (h, 6)}
- qf.insert(0, 1, false);
- qf.insert(0, 1, false);
- qf.insert(0, 3, false);
- qf.insert(0, 3, false);
- qf.insert(0, 3, false);
- qf.insert(0, 4, false);
- qf.insert(0, 6, false);
- qf.insert(0, 6, false);
+ final int A = 1;
+ final int B = 2;
+ final int C = 3;
+ final int D = 4;
+ final int E = 5;
+ final int F = 6;
+ final int G = 7;
+ final int H = 8;
+
+ // (key, slot): {(a, 1), (b, 1), (c, 3), (d, 3), (e, 3), (f, 4), (g,
6), (h, 6)}
+ qf.insert(A, 1);
+ qf.insert(B, 1);
+ qf.insert(C, 3);
+ qf.insert(D, 3);
+ qf.insert(E, 3);
+ qf.insert(F, 4);
+ qf.insert(G, 6);
+ qf.insert(H, 6);
BitSet result = new BitSet(num_entries * bits_per_entry);
result = set_slot_in_test(result, bits_per_entry, 0, false, false,
false, 0);
- result = set_slot_in_test(result, bits_per_entry, 1, true, false,
false, 0);
- result = set_slot_in_test(result, bits_per_entry, 2, false, true,
true, 0);
- result = set_slot_in_test(result, bits_per_entry, 3, true, false,
false, 0);
- result = set_slot_in_test(result, bits_per_entry, 4, true, true, true,
0);
- result = set_slot_in_test(result, bits_per_entry, 5, false, true,
true, 0);
- result = set_slot_in_test(result, bits_per_entry, 6, true, false,
true, 0);
- result = set_slot_in_test(result, bits_per_entry, 7, false, false,
true, 0);
- result = set_slot_in_test(result, bits_per_entry, 8, false, true,
true, 0);
+ result = set_slot_in_test(result, bits_per_entry, 1, true, false,
false, A);
+ result = set_slot_in_test(result, bits_per_entry, 2, false, true,
true, B);
+ result = set_slot_in_test(result, bits_per_entry, 3, true, false,
false, C);
+ result = set_slot_in_test(result, bits_per_entry, 4, true, true, true,
D);
+ result = set_slot_in_test(result, bits_per_entry, 5, false, true,
true, E);
+ result = set_slot_in_test(result, bits_per_entry, 6, true, false,
true, F);
+ result = set_slot_in_test(result, bits_per_entry, 7, false, false,
true, G);
+ result = set_slot_in_test(result, bits_per_entry, 8, false, true,
true, H);
assertTrue(check_equality(qf, result, false));
}
@@ -139,23 +148,24 @@ public class QuotientFilterTest {
*/
@Test
public void OverflowTest() {
- int bits_per_entry = 8;
- int num_entries_power = 3;
- int num_entries = (int)Math.pow(2, num_entries_power);
- int fingerprint_size = bits_per_entry - 3;
- QuotientFilter qf = new QuotientFilter(num_entries_power,
bits_per_entry);
-
- long fp2 = 1 << fingerprint_size - 1;
- qf.insert(fp2, num_entries - 1, false);
- assertEquals(qf.get_fingerprint(7), fp2);
- assertEquals(getState(qf, 7), 0b100);
- qf.insert(fp2, num_entries - 1, false);
+ final int bits_per_entry = 8;
+ final int num_entries_power = 3;
+ final int num_entries = (int)Math.pow(2, num_entries_power);
+ final int fingerprint_size = bits_per_entry - 3;
+ final QuotientFilter qf = new QuotientFilter(num_entries_power,
bits_per_entry);
+
+ final long fp1 = 1;
+ final long fp2 = 1 << fingerprint_size - 1;
+ qf.insert(fp1, num_entries - 1);
+ assertEquals(qf.get_fingerprint(num_entries - 1), fp1);
+ assertEquals(getState(qf, num_entries - 1), 0b100);
+ qf.insert(fp2, num_entries - 1);
assertEquals(qf.get_fingerprint(0), fp2);
assertEquals(getState(qf, 0), 0b011);
qf.delete(fp2, num_entries - 1);
assertEquals(qf.get_fingerprint(0), 0);
assertEquals(getState(qf, 0), 0b000);
- boolean found = qf.search(fp2, num_entries - 1);
+ final boolean found = qf.search(fp1, num_entries - 1);
assertTrue(found);
}
@@ -174,16 +184,16 @@ public class QuotientFilterTest {
//int fingerprint_size = bits_per_entry - 3;
QuotientFilter qf = new QuotientFilter(num_entries_power,
bits_per_entry);
- qf.insert(0x1F, 2, false);
- qf.insert(0x1F, 3, false);
- qf.insert(0x1F, 3, false);
- qf.insert(0x1F, 4, false);
- qf.insert(0x1F, 15, false); // last slot in the filter
- qf.insert(0x1F, 16, false); // outside the bounds
+ qf.insert(0x1F, 2);
+ qf.insert(0x1F, 3);
+ qf.insert(0x1F, 3);
+ qf.insert(0x1F, 4);
+ qf.insert(0x1F, 15); // last slot in the filter
+ qf.insert(0x1F, 16); // outside the bounds
qf.pretty_print() ;
Iterator it = new Iterator(qf);
- int[] arr = new int[] {2, 3, 3, 4, 15};
+ int[] arr = new int[] {2, 3, 4, 15};
int arr_index = 0;
while (it.next()) {assertEquals(it.bucket_index, arr[arr_index++]);}
}
@@ -195,16 +205,16 @@ public class QuotientFilterTest {
int num_entries_power = 4;
QuotientFilter qf = new QuotientFilter(num_entries_power,
bits_per_entry);
- qf.insert(0, 1, false);
- qf.insert(0, 4, false);
- qf.insert(0, 7, false);
- qf.insert(0, 1, false);
- qf.insert(0, 2, false);
- qf.insert(0, 1, false);
- qf.insert(0, 15, false);
+ qf.insert(0, 1);
+ qf.insert(0, 4);
+ qf.insert(0, 7);
+ qf.insert(0, 1);
+ qf.insert(0, 2);
+ qf.insert(0, 1);
+ qf.insert(0, 15);
Iterator it = new Iterator(qf);
- int[] arr = new int[] {1, 1, 1, 2, 4, 7, 15};
+ int[] arr = new int[] {1, 2, 4, 7, 15};
int arr_index = 0;
while (it.next()) {assertEquals(arr[arr_index++], it.bucket_index);}
}
@@ -270,7 +280,7 @@ public class QuotientFilterTest {
for (int i = 0; i < num_entries; i++) {
int rand_num = rand.nextInt();
- boolean success = filter.insert(rand_num, false);
+ boolean success = filter.insert(rand_num);
if (success) {
added.add(rand_num);
}
@@ -279,7 +289,7 @@ public class QuotientFilterTest {
}
}
- for (Integer i : added) {
+ for (Integer i: added) {
boolean found = filter.search((long)i);
if (!found) {
return false;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]