Ben-Zvi commented on a change in pull request #1408: DRILL-6453: Resolve 
deadlock when reading from build and probe sides simultaneously in HashJoin
URL: https://github.com/apache/drill/pull/1408#discussion_r206742825
 
 

 ##########
 File path: 
exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinBatch.java
 ##########
 @@ -248,32 +254,54 @@ protected void buildSchema() throws 
SchemaChangeException {
     }
   }
 
-  @Override
-  protected boolean prefetchFirstBatchFromBothSides() {
-    if (leftUpstream != IterOutcome.NONE) {
-      // We can only get data if there is data available
-      leftUpstream = sniffNonEmptyBatch(leftUpstream, LEFT_INDEX, left);
-    }
-
+  private void prefetchFirstBuildBatch() {
     if (rightUpstream != IterOutcome.NONE) {
       // We can only get data if there is data available
       rightUpstream = sniffNonEmptyBatch(rightUpstream, RIGHT_INDEX, right);
     }
 
     buildSideIsEmpty = rightUpstream == IterOutcome.NONE;
 
-    if (verifyOutcomeToSetBatchState(leftUpstream, rightUpstream)) {
+    if (rightUpstream == IterOutcome.OUT_OF_MEMORY) {
+      // We reached a termination state
+      state = BatchState.OUT_OF_MEMORY;
+    } else if (rightUpstream == IterOutcome.STOP) {
+      state = BatchState.STOP;
+    } else {
       // For build side, use aggregate i.e. average row width across batches
-      batchMemoryManager.update(LEFT_INDEX, 0);
       batchMemoryManager.update(RIGHT_INDEX, 0, true);
-
-      logger.debug("BATCH_STATS, incoming left: {}", 
batchMemoryManager.getRecordBatchSizer(LEFT_INDEX));
       logger.debug("BATCH_STATS, incoming right: {}", 
batchMemoryManager.getRecordBatchSizer(RIGHT_INDEX));
 
       // Got our first batche(s)
       state = BatchState.FIRST;
+    }
+  }
+
+  /**
+   *
+   * @return True terminate. False continue.
+   */
+  private boolean prefetchFirstProbeBatch() {
 
 Review comment:
   To reduce code duplication: Can have a generic "prefetch first" method, 
combining this method with *prefetchFirstBuildBatch()* and returning the up 
stream. (and set the "empty" boolean after that.)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to