c-dickens commented on code in PR #571:
URL: https://github.com/apache/datasketches-java/pull/571#discussion_r1628084666


##########
src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java:
##########
@@ -373,74 +376,49 @@ Set<Long> get_all_fingerprints(long bucket_index) {
         return set;
     }
 
-    // Swaps the fingerprint in a given slot with a new one. Return the 
pre-existing fingerprint
-    long swap_fingerprints(long index, long new_fingerprint) {
-        long existing = get_fingerprint(index);
-        set_fingerprint(index, new_fingerprint);
-        return existing;
-    }
-
-    boolean insert_new_run(long canonical_slot, long long_fp) {
-        long start_of_this_new_run = find_run_start(canonical_slot);
-        boolean slot_initially_empty = is_slot_empty(start_of_this_new_run);
-
-        // modify some metadata flags to mark the new run
-        set_occupied(canonical_slot, true);
-        if (start_of_this_new_run != canonical_slot) {
-            set_shifted(start_of_this_new_run, true);
-        }
-        set_continuation(start_of_this_new_run, false);
-
-        // if the slot was initially empty, we can just terminate, as there is 
nothing to push to the right
-        if (slot_initially_empty) {
-            set_fingerprint(start_of_this_new_run, long_fp);
-            num_entries++;
-            return true;
-        }
-        return insert_fingerprint_and_push_all_else(long_fp, 
start_of_this_new_run, false);
-    }
-
-    boolean insert(long long_fp, long index) {
+    boolean insert(long fingerprint, long index) {
       if (index >= get_num_slots() || num_entries == get_num_slots()) {
         return false;
       }
+      final long run_start = find_run_start(index);
       if (!is_occupied(index)) {
-        return insert_new_run(index, long_fp);
+        return insert_fingerprint_and_push_all_else(fingerprint, run_start, 
index, true, true);
       }
-      long run_start_index = find_run_start(index);
-      final long found_index = find_first_fingerprint_in_run(run_start_index, 
long_fp);
-      if (found_index > -1) {
+      final long found_index = find_first_fingerprint_in_run(run_start, 
fingerprint);
+      if (found_index >= 0) {
         return false;
       }
-      return insert_fingerprint_and_push_all_else(long_fp, run_start_index, 
true);
+      return insert_fingerprint_and_push_all_else(fingerprint, ~found_index, 
index, false, ~found_index == run_start);
     }
 
-    // insert a fingerprint as the last fingerprint of the run and push all 
other entries in the cluster to the right.
-    boolean insert_fingerprint_and_push_all_else(long long_fp, long 
run_start_index, boolean is_same_run) {
-        long current_index = run_start_index;
-        boolean is_this_slot_empty;
-        boolean temp_continuation = false;
-
-        do {
-            is_this_slot_empty = is_slot_empty(current_index);
-            if (current_index != run_start_index) {
-                set_shifted(current_index, true);
-            }
-            if (current_index != run_start_index && is_same_run && 
!is_continuation(current_index)) {
-              is_same_run = false;
-                set_continuation(current_index, true);
-                long_fp = swap_fingerprints(current_index, long_fp);
-            }
-            else if (!is_same_run) {
-                boolean current_continuation = is_continuation(current_index);
-                set_continuation(current_index, temp_continuation);
-                temp_continuation = current_continuation;
-                long_fp = swap_fingerprints(current_index, long_fp);
-            }
-            current_index = (current_index + 1) & getMask();
-        } while (!is_this_slot_empty);
-        num_entries++;
-        return true;
+    boolean insert_fingerprint_and_push_all_else(long fingerprint, long index, 
long canonical, boolean is_new_run, boolean is_run_start) {
+      boolean existing_is_continuation = is_continuation(index);
+      boolean is_continuation = !is_run_start;
+      boolean is_shifted = index != canonical;
+      boolean force_continuation = !is_new_run && is_run_start;

Review Comment:
   Can you add a comment explaining why the `force_continuation` flag is 
necessary?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to