leerho commented on a change in pull request #319:
URL: 
https://github.com/apache/incubator-datasketches-java/pull/319#discussion_r433405011



##########
File path: src/main/java/org/apache/datasketches/tuple/Intersection.java
##########
@@ -19,98 +19,176 @@
 
 package org.apache.datasketches.tuple;
 
+import static java.lang.Math.ceil;
+import static java.lang.Math.max;
 import static java.lang.Math.min;
+import static org.apache.datasketches.HashOperations.hashInsertOnly;
+import static org.apache.datasketches.HashOperations.hashSearch;
+import static org.apache.datasketches.Util.MIN_LG_NOM_LONGS;
+import static org.apache.datasketches.Util.ceilingPowerOf2;
 
 import java.lang.reflect.Array;
 
-import org.apache.datasketches.ResizeFactor;
 import org.apache.datasketches.SketchesStateException;
 
+
 /**
  * Computes an intersection of two or more generic tuple sketches.
- * A new instance represents the Universal Set.
- * Every update() computes an intersection with the internal set
- * and can only reduce the internal set.
+ * A new instance represents the Universal Set. Because the Universal Set
+ * cannot be realized a <i>getResult()</i> on a new instance will produce an 
error.
+ * Every update() computes an intersection with the internal state, which will 
never
+ * grow larger and may be reduced to zero.
+ *
  * @param <S> Type of Summary
  */
+@SuppressWarnings("unchecked")
 public class Intersection<S extends Summary> {
-
   private final SummarySetOperations<S> summarySetOps_;
-  private QuickSelectSketch<S> sketch_;
-  private boolean isEmpty_;
-  private long theta_;
-  private boolean isFirstCall_;
+  //private QuickSelectSketch<S> sketch_;
+  private boolean empty_;
+  private long thetaLong_;
+  private HashTables hashTables_;
+  private boolean firstCall_;
 
   /**
    * Creates new instance
    * @param summarySetOps instance of SummarySetOperations
    */
   public Intersection(final SummarySetOperations<S> summarySetOps) {
     summarySetOps_ = summarySetOps;
-    isEmpty_ = false; // universal set at the start
-    theta_ = Long.MAX_VALUE;
-    isFirstCall_ = true;
+    empty_ = false; // universal set at the start
+    thetaLong_ = Long.MAX_VALUE;
+    hashTables_ = new HashTables();
+    firstCall_ = true;
   }
 
   /**
-   * Updates the internal set by intersecting it with the given sketch
-   * @param sketchIn input sketch to intersect with the internal set
+   * Updates the internal state by intersecting it with the given sketch
+   * @param sketchIn input sketch to intersect with the internal state
    */
-  @SuppressWarnings({ "unchecked", "null" })
   public void update(final Sketch<S> sketchIn) {
-    final boolean isFirstCall = isFirstCall_;
-    isFirstCall_ = false;
+    final boolean firstCall = firstCall_;
+    firstCall_ = false;
     if (sketchIn == null) {
-      isEmpty_ = true;
-      sketch_ = null;
+      empty_ = true;
+      thetaLong_ = Long.MAX_VALUE;
+      hashTables_.clear();
       return;
     }
-    theta_ = min(theta_, sketchIn.getThetaLong());
-    isEmpty_ |= sketchIn.isEmpty();
-    if (isEmpty_ || (sketchIn.getRetainedEntries() == 0)) {
-      sketch_ = null;
+    // input sketch is not null, could be first or next call
+    final long thetaLongIn = sketchIn.getThetaLong();
+    final int countIn = sketchIn.getRetainedEntries();
+    thetaLong_ = min(thetaLong_, thetaLongIn); //Theta rule
+    // Empty rule extended in case incoming sketch does not have empty bit 
properly set
+    empty_ |= (countIn == 0) && (thetaLongIn == Long.MAX_VALUE);
+    if (countIn == 0) {
+      hashTables_.clear();
       return;
     }
-    // assumes that constructor of QuickSelectSketch bumps the requested size 
up to the nearest power of 2
-    if (isFirstCall) {
-      sketch_ = new QuickSelectSketch<>(sketchIn.getRetainedEntries(), 
ResizeFactor.X1.lg(), null);
-      final SketchIterator<S> it = sketchIn.iterator();
-      while (it.next()) {
-        final S summary = (S)it.getSummary().copy();
-        sketch_.insert(it.getKey(), summary);
-      }
-    } else {
-      if (sketch_ == null) {
+    // input sketch will have valid entries > 0
+
+    if (firstCall) {
+      final Sketch<S> firstSketch = sketchIn;
+      //Copy firstSketch data into local instance hashTables_
+      hashTables_.fromSketch(firstSketch);
+    }
+
+    //Next Call
+    else {
+      if (hashTables_.count_ == 0) {
         return;
       }
-      final int matchSize = min(sketch_.getRetainedEntries(), 
sketchIn.getRetainedEntries());
-      final long[] matchKeys = new long[matchSize];
+      final Sketch<S> nextSketch = sketchIn;
+      //Match nextSketch data with local instance data, filtering by theta
+      final int maxMatchSize = min(hashTables_.count_, 
nextSketch.getRetainedEntries());
+
+      final long[] matchKeys = new long[maxMatchSize];
       S[] matchSummaries = null;
       int matchCount = 0;
-      final SketchIterator<S> it = sketchIn.iterator();
+
+      final SketchIterator<S> it = nextSketch.iterator();
       while (it.next()) {
-        final S summary = sketch_.find(it.getKey());
-        if (summary != null) {
-          matchKeys[matchCount] = it.getKey();
-          if (matchSummaries == null) {
-            matchSummaries = (S[]) Array.newInstance(summary.getClass(), 
matchSize);
-          }
-          matchSummaries[matchCount] =
-              summarySetOps_.intersection(summary, it.getSummary());
-          matchCount++;
+        final long key = it.getKey();
+        if (key >= thetaLong_) { continue; }
+        final int index = hashSearch(hashTables_.keys_, 
hashTables_.lgTableSize_, key);
+        if (index < 0) { continue; }
+        //Copy the intersecting items from local hashTables_
+        // sequentially into local matchKeys_ and matchSummaries_
+        final S mySummary = hashTables_.summaries_[index];
+
+        if (matchSummaries == null) {
+          matchSummaries = (S[]) Array.newInstance(mySummary.getClass(), 
maxMatchSize);
         }
+        matchKeys[matchCount] = key;
+        matchSummaries[matchCount] = summarySetOps_.intersection(mySummary, 
it.getSummary());
+        matchCount++;
+      }
+      hashTables_.fromArrays(matchKeys, matchSummaries, matchCount);
+    }
+  }
+
+  /**
+   * Updates the internal set by intersecting it with the given Theta sketch
+   * @param sketchIn input Theta Sketch to intersect with the internal state.
+   * @param summary the given proxy summary for the Theta Sketch, which 
doesn't have one.
+   */
+  public void update(final org.apache.datasketches.theta.Sketch sketchIn, 
final S summary) {

Review comment:
       This is a good catch!  Thank you. Will Fix.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to