This is an automated email from the ASF dual-hosted git repository.

boaz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 4c9dd8d0488555863f47c881e8ccbb33c92e64f8
Author: Ben-Zvi <[email protected]>
AuthorDate: Mon Jan 7 19:53:49 2019 -0800

    Changes following code review comments for DRILL-6880
---
 .../physical/impl/common/ChainedHashTable.java     | 46 +++++++------
 .../physical/impl/common/HashTableTemplate.java    | 79 +++++++++++-----------
 2 files changed, 65 insertions(+), 60 deletions(-)

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
index 9171ad1..baed508 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java
@@ -20,6 +20,7 @@ package org.apache.drill.exec.physical.impl.common;
 import java.io.IOException;
 import java.util.List;
 
+import com.sun.codemodel.JExpression;
 import org.apache.drill.common.expression.ErrorCollector;
 import org.apache.drill.common.expression.ErrorCollectorImpl;
 import org.apache.drill.common.expression.LogicalExpression;
@@ -129,6 +130,8 @@ public class ChainedHashTable {
   private RecordBatch incomingProbe;
   private final RecordBatch outgoing;
 
+  private enum setupWork {DO_BUILD, DO_PROBE, CHECK_BOTH_NULLS};
+
   public ChainedHashTable(HashTableConfig htConfig, FragmentContext context, 
BufferAllocator allocator,
                           RecordBatch incomingBuild, RecordBatch 
incomingProbe, RecordBatch outgoing) {
 
@@ -216,15 +219,16 @@ public class ChainedHashTable {
       i++;
     }
 
-    // Only in case of an equi-join: Generate a special method to check if 
both the new key and the existing key (in this HT bucket) are nulls
-    // (used by Hash-Join to avoid creating a long hash-table chain of null 
keys, which can lead to O(n^2) work on that chain.)
+    // Only in case of a join: Generate a special method to check if both the 
new key and the existing key (in this HT bucket) are nulls
+    // (used by Hash-Join to avoid creating a long hash-table chain of null 
keys, which can lead to useless O(n^2) work on that chain.)
+    // The logic is: Nulls match on build, and don't match on probe. Note that 
this logic covers outer joins as well.
     setupIsKeyMatchInternal(cgInner, bothKeysNullIncomingBuildMapping, 
bothKeysNullHtableMapping, keyExprsBuild,
-        htConfig.getComparators(), htKeyFieldIds);
+        htConfig.getComparators(), htKeyFieldIds, setupWork.CHECK_BOTH_NULLS);
     // generate code for isKeyMatch(), setValue(), getHash() and 
outputRecordKeys()
     setupIsKeyMatchInternal(cgInner, KeyMatchIncomingBuildMapping, 
KeyMatchHtableMapping, keyExprsBuild,
-        htConfig.getComparators(), htKeyFieldIds);
+        htConfig.getComparators(), htKeyFieldIds, setupWork.DO_BUILD);
     setupIsKeyMatchInternal(cgInner, KeyMatchIncomingProbeMapping, 
KeyMatchHtableProbeMapping, keyExprsProbe,
-        htConfig.getComparators(), htKeyFieldIds);
+        htConfig.getComparators(), htKeyFieldIds, setupWork.DO_PROBE);
 
     setupSetValue(cgInner, keyExprsBuild, htKeyFieldIds);
     if (outgoing != null) {
@@ -235,8 +239,8 @@ public class ChainedHashTable {
     }
     setupOutputRecordKeys(cgInner, htKeyFieldIds, outKeyFieldIds);
 
-    setupGetHash(cg /* use top level code generator for getHash */, 
GetHashIncomingBuildMapping, incomingBuild, keyExprsBuild, false);
-    setupGetHash(cg /* use top level code generator for getHash */, 
GetHashIncomingProbeMapping, incomingProbe, keyExprsProbe, true);
+    setupGetHash(cg /* use top level code generator for getHash */, 
GetHashIncomingBuildMapping, incomingBuild, keyExprsBuild);
+    setupGetHash(cg /* use top level code generator for getHash */, 
GetHashIncomingProbeMapping, incomingProbe, keyExprsProbe);
 
     HashTable ht = context.getImplementationClass(top);
     ht.setup(htConfig, allocator, incomingBuild.getContainer(), incomingProbe, 
outgoing, htContainerOrig, context, cgInner);
@@ -245,15 +249,19 @@ public class ChainedHashTable {
   }
 
   private void setupIsKeyMatchInternal(ClassGenerator<HashTable> cg, 
MappingSet incomingMapping, MappingSet htableMapping,
-      LogicalExpression[] keyExprs, List<Comparator> comparators, 
TypedFieldId[] htKeyFieldIds) {
+      LogicalExpression[] keyExprs, List<Comparator> comparators, 
TypedFieldId[] htKeyFieldIds, setupWork work) {
+
+    boolean checkIfBothNulls = work == setupWork.CHECK_BOTH_NULLS;
 
-    boolean isProbe = incomingMapping == KeyMatchIncomingProbeMapping;
-    boolean areBothNulls = incomingMapping == bothKeysNullIncomingBuildMapping;
+    // Regular key matching may return false in the middle (i.e., some pair of 
columns did not match), and true only if all matched;
+    // but "both nulls" check returns the opposite logic (i.e., true when one 
pair of nulls is found, need check no more)
+    JExpression midPointResult = checkIfBothNulls ? JExpr.TRUE : JExpr.FALSE;
+    JExpression finalResult = checkIfBothNulls ? JExpr.FALSE : JExpr.TRUE;
 
     cg.setMappingSet(incomingMapping);
 
     if (keyExprs == null || keyExprs.length == 0 ||
-        areBothNulls && ! comparators.contains(Comparator.EQUALS)) { // e.g. 
for Hash-Aggr, or non-equi join
+        checkIfBothNulls && ! comparators.contains(Comparator.EQUALS)) { // 
e.g. for Hash-Aggr, or non-equi join
       cg.getEvalBlock()._return(JExpr.FALSE);
       return;
     }
@@ -269,16 +277,16 @@ public class ChainedHashTable {
 
       JConditional jc;
 
-      if ( areBothNulls || isProbe ) {
-        // codegen for the special case when both columns are null (i.e., 
return early - as equal or not)
+      if ( work != setupWork.DO_BUILD ) {  // BUILD runs this logic in a 
separate method - areBothKeysNull()
+        // codegen for the special case when both columns are null (i.e., 
return early with midPointResult)
         if (comparators.get(i) == Comparator.EQUALS
             && left.isOptional() && right.isOptional()) {
           jc = cg.getEvalBlock()._if(left.getIsSet().eq(JExpr.lit(0)).
             cand(right.getIsSet().eq(JExpr.lit(0))));
-          jc._then()._return( areBothNulls ? JExpr.TRUE : JExpr.FALSE);
+          jc._then()._return(midPointResult);
         }
       }
-      if ( ! areBothNulls ) {
+      if ( ! checkIfBothNulls ) { // generate comparison code (at least one of 
the two columns' values is non-null)
         final LogicalExpression f = 
FunctionGenerationHelper.getOrderingComparatorNullsHigh(left, right, 
context.getFunctionRegistry());
 
         HoldingContainer out = cg.addExpr(f, 
ClassGenerator.BlkCreateMode.FALSE);
@@ -286,12 +294,12 @@ public class ChainedHashTable {
         // check if two values are not equal (comparator result != 0)
         jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0)));
 
-        jc._then()._return(JExpr.FALSE);
+        jc._then()._return(midPointResult);
       }
     }
 
     // All key expressions compared equal, so return TRUE
-    cg.getEvalBlock()._return( areBothNulls ? JExpr.FALSE : JExpr.TRUE);
+    cg.getEvalBlock()._return(finalResult);
   }
 
   private void setupSetValue(ClassGenerator<HashTable> cg, LogicalExpression[] 
keyExprs,
@@ -321,8 +329,8 @@ public class ChainedHashTable {
     }
   }
 
-  private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet 
incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs,
-                            boolean isProbe) throws SchemaChangeException {
+  private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet 
incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs)
+    throws SchemaChangeException {
 
     cg.setMappingSet(incomingMapping);
 
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
index e123896..a064449 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java
@@ -87,9 +87,6 @@ public abstract class HashTableTemplate implements HashTable {
   // current available (free) slot globally across all batch holders
   private int freeIndex = 0;
 
-  // Placeholder for the current index while probing the hash table
-  private IndexPointer currentIdxHolder;
-
   private BufferAllocator allocator;
 
   // The incoming build side record batch
@@ -205,39 +202,38 @@ public abstract class HashTableTemplate implements 
HashTable {
       setupInterior(incomingBuild, incomingProbe, outgoing, htContainer);
     }
 
-    // Check if the key at the currentIdx position in hash table matches the 
key
-    // at the incomingRowIdx. if the key does not match, update the
-    // currentIdxHolder with the index of the next link.
+    // Check if the key at the current Index position in hash table matches 
the key
+    // at the incomingRowIdx.
     private boolean isKeyMatch(int incomingRowIdx,
-        IndexPointer currentIdxHolder,
+        int currentIndex,
         boolean isProbe) throws SchemaChangeException {
-      int currentIdxWithinBatch = currentIdxHolder.value & BATCH_MASK;
-      boolean match;
+      int currentIdxWithinBatch = currentIndex & BATCH_MASK;
 
-      if (currentIdxWithinBatch >= batchHolders.get((currentIdxHolder.value 
>>> 16) & BATCH_MASK).getTargetBatchRowCount()) {
+      if (currentIdxWithinBatch >= batchHolders.get((currentIndex >>> 16) & 
BATCH_MASK).getTargetBatchRowCount()) {
         logger.debug("Batch size = {}, incomingRowIdx = {}, 
currentIdxWithinBatch = {}.",
-          batchHolders.get((currentIdxHolder.value >>> 16) & 
BATCH_MASK).getTargetBatchRowCount(), incomingRowIdx, currentIdxWithinBatch);
+          batchHolders.get((currentIndex >>> 16) & 
BATCH_MASK).getTargetBatchRowCount(), incomingRowIdx, currentIdxWithinBatch);
       }
-      assert (currentIdxWithinBatch < batchHolders.get((currentIdxHolder.value 
>>> 16) & BATCH_MASK).getTargetBatchRowCount());
+      assert (currentIdxWithinBatch < batchHolders.get((currentIndex >>> 16) & 
BATCH_MASK).getTargetBatchRowCount());
       assert (incomingRowIdx < HashTable.BATCH_SIZE);
 
       if (isProbe) {
-        match = isKeyMatchInternalProbe(incomingRowIdx, currentIdxWithinBatch);
-      } else {
-        // in case (of a hash-join only) where both the new incoming key and 
the current are null, treat them as
-        // a match; i.e. the new would be added into the helper (but not the 
Hash-Table !), though it would never
-        // be used (not putting it into the helper would take a bigger code 
change, and some performance cost, hence
-        // not worth it).  In the past such a new null key was added into the 
Hash-Table (i.e., no match), which
-        // created long costly chains - SEE DRILL-6880)
-        if ( areBothKeysNull(incomingRowIdx, currentIdxWithinBatch) ) { return 
true; }
-
-        match = isKeyMatchInternalBuild(incomingRowIdx, currentIdxWithinBatch);
+        return isKeyMatchInternalProbe(incomingRowIdx, currentIdxWithinBatch);
       }
 
-      if (!match) {
-        currentIdxHolder.value = 
links.getAccessor().get(currentIdxWithinBatch);
-      }
-      return match;
+      // in case of a hash-join build, where both the new incoming key and the 
current key are null, treat them as
+      // a match; i.e. the new would be added into the helper (but not the 
Hash-Table !), though it would never
+      // be used (not putting it into the helper would take a bigger code 
change, and some performance cost, hence
+      // not worth it).  In the past such a new null key was added into the 
Hash-Table (i.e., no match), which
+      // created long costly chains - SEE DRILL-6880)
+      if ( areBothKeysNull(incomingRowIdx, currentIdxWithinBatch) ) { return 
true; }
+
+      return isKeyMatchInternalBuild(incomingRowIdx, currentIdxWithinBatch);
+    }
+
+    // This method should only be called following a "false" return from 
isKeyMatch()
+    // It returns the next index in the hash chain (if any) so that next call 
to isKeyMatch() would compare the next link
+    private int nextLinkInHashChain(int currentIndex) {
+      return links.getAccessor().get(currentIndex & BATCH_MASK);
     }
 
     // Insert a new <key1, key2...keyN> entry coming from the incoming batch 
into the hash table
@@ -524,7 +520,6 @@ public abstract class HashTableTemplate implements 
HashTable {
       throw new IllegalStateException("Unexpected schema change", e);
     }
 
-    currentIdxHolder = new IndexPointer();
   }
 
   @Override
@@ -674,16 +669,15 @@ public abstract class HashTableTemplate implements 
HashTable {
 
     // if startIdx is non-empty, follow the hash chain links until we find a 
matching
     // key or reach the end of the chain (and remember the last link there)
-    for ( currentIdxHolder.value = startIdx;
-          currentIdxHolder.value != EMPTY_SLOT;
-          /* isKeyMatch() below also advances the currentIdxHolder to the next 
link */) {
-
+    for ( int currentIndex = startIdx;
+          currentIndex != EMPTY_SLOT;
+          currentIndex = lastEntryBatch.nextLinkInHashChain(currentIndex)) {
       // remember the current link, which would be the last when the next link 
is empty
-      lastEntryBatch = batchHolders.get((currentIdxHolder.value >>> 16) & 
BATCH_MASK);
-      lastEntryIdxWithinBatch = currentIdxHolder.value & BATCH_MASK;
+      lastEntryBatch = batchHolders.get((currentIndex >>> 16) & BATCH_MASK);
+      lastEntryIdxWithinBatch = currentIndex & BATCH_MASK;
 
-      if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIdxHolder, false)) {
-        htIdxHolder.value = currentIdxHolder.value;
+      if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIndex, false)) {
+        htIdxHolder.value = currentIndex;
         return PutStatus.KEY_PRESENT;
       }
     }
@@ -751,12 +745,15 @@ public abstract class HashTableTemplate implements 
HashTable {
    @Override
   public int probeForKey(int incomingRowIdx, int hashCode) throws 
SchemaChangeException {
     int bucketIndex = getBucketIndex(hashCode, numBuckets());
-
-    for ( currentIdxHolder.value = startIndices.getAccessor().get(bucketIndex);
-          currentIdxHolder.value != EMPTY_SLOT; ) {
-      BatchHolder bh = batchHolders.get((currentIdxHolder.value >>> 16) & 
BATCH_MASK);
-      if (bh.isKeyMatch(incomingRowIdx, currentIdxHolder, true /* isProbe */)) 
{
-        return currentIdxHolder.value;
+     int startIdx = startIndices.getAccessor().get(bucketIndex);
+     BatchHolder lastEntryBatch = null;
+
+     for ( int currentIndex = startIdx;
+           currentIndex != EMPTY_SLOT;
+           currentIndex = lastEntryBatch.nextLinkInHashChain(currentIndex)) {
+      lastEntryBatch = batchHolders.get((currentIndex >>> 16) & BATCH_MASK);
+      if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIndex, true /* 
isProbe */)) {
+        return currentIndex;
       }
     }
     return -1;

Reply via email to