This is an automated email from the ASF dual-hosted git repository. boaz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 4c9dd8d0488555863f47c881e8ccbb33c92e64f8 Author: Ben-Zvi <[email protected]> AuthorDate: Mon Jan 7 19:53:49 2019 -0800 Changes following code review comments for DRILL-6880 --- .../physical/impl/common/ChainedHashTable.java | 46 +++++++------ .../physical/impl/common/HashTableTemplate.java | 79 +++++++++++----------- 2 files changed, 65 insertions(+), 60 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java index 9171ad1..baed508 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java @@ -20,6 +20,7 @@ package org.apache.drill.exec.physical.impl.common; import java.io.IOException; import java.util.List; +import com.sun.codemodel.JExpression; import org.apache.drill.common.expression.ErrorCollector; import org.apache.drill.common.expression.ErrorCollectorImpl; import org.apache.drill.common.expression.LogicalExpression; @@ -129,6 +130,8 @@ public class ChainedHashTable { private RecordBatch incomingProbe; private final RecordBatch outgoing; + private enum setupWork {DO_BUILD, DO_PROBE, CHECK_BOTH_NULLS}; + public ChainedHashTable(HashTableConfig htConfig, FragmentContext context, BufferAllocator allocator, RecordBatch incomingBuild, RecordBatch incomingProbe, RecordBatch outgoing) { @@ -216,15 +219,16 @@ public class ChainedHashTable { i++; } - // Only in case of an equi-join: Generate a special method to check if both the new key and the existing key (in this HT bucket) are nulls - // (used by Hash-Join to avoid creating a long hash-table chain of null keys, which can lead to O(n^2) work on that chain.) + // Only in case of a join: Generate a special method to check if both the new key and the existing key (in this HT bucket) are nulls + // (used by Hash-Join to avoid creating a long hash-table chain of null keys, which can lead to useless O(n^2) work on that chain.) + // The logic is: Nulls match on build, and don't match on probe. Note that this logic covers outer joins as well. setupIsKeyMatchInternal(cgInner, bothKeysNullIncomingBuildMapping, bothKeysNullHtableMapping, keyExprsBuild, - htConfig.getComparators(), htKeyFieldIds); + htConfig.getComparators(), htKeyFieldIds, setupWork.CHECK_BOTH_NULLS); // generate code for isKeyMatch(), setValue(), getHash() and outputRecordKeys() setupIsKeyMatchInternal(cgInner, KeyMatchIncomingBuildMapping, KeyMatchHtableMapping, keyExprsBuild, - htConfig.getComparators(), htKeyFieldIds); + htConfig.getComparators(), htKeyFieldIds, setupWork.DO_BUILD); setupIsKeyMatchInternal(cgInner, KeyMatchIncomingProbeMapping, KeyMatchHtableProbeMapping, keyExprsProbe, - htConfig.getComparators(), htKeyFieldIds); + htConfig.getComparators(), htKeyFieldIds, setupWork.DO_PROBE); setupSetValue(cgInner, keyExprsBuild, htKeyFieldIds); if (outgoing != null) { @@ -235,8 +239,8 @@ public class ChainedHashTable { } setupOutputRecordKeys(cgInner, htKeyFieldIds, outKeyFieldIds); - setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingBuildMapping, incomingBuild, keyExprsBuild, false); - setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingProbeMapping, incomingProbe, keyExprsProbe, true); + setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingBuildMapping, incomingBuild, keyExprsBuild); + setupGetHash(cg /* use top level code generator for getHash */, GetHashIncomingProbeMapping, incomingProbe, keyExprsProbe); HashTable ht = context.getImplementationClass(top); ht.setup(htConfig, allocator, incomingBuild.getContainer(), incomingProbe, outgoing, htContainerOrig, context, cgInner); @@ -245,15 +249,19 @@ public class ChainedHashTable { } private void setupIsKeyMatchInternal(ClassGenerator<HashTable> cg, MappingSet incomingMapping, MappingSet htableMapping, - LogicalExpression[] keyExprs, List<Comparator> comparators, TypedFieldId[] htKeyFieldIds) { + LogicalExpression[] keyExprs, List<Comparator> comparators, TypedFieldId[] htKeyFieldIds, setupWork work) { + + boolean checkIfBothNulls = work == setupWork.CHECK_BOTH_NULLS; - boolean isProbe = incomingMapping == KeyMatchIncomingProbeMapping; - boolean areBothNulls = incomingMapping == bothKeysNullIncomingBuildMapping; + // Regular key matching may return false in the middle (i.e., some pair of columns did not match), and true only if all matched; + // but "both nulls" check returns the opposite logic (i.e., true when one pair of nulls is found, need check no more) + JExpression midPointResult = checkIfBothNulls ? JExpr.TRUE : JExpr.FALSE; + JExpression finalResult = checkIfBothNulls ? JExpr.FALSE : JExpr.TRUE; cg.setMappingSet(incomingMapping); if (keyExprs == null || keyExprs.length == 0 || - areBothNulls && ! comparators.contains(Comparator.EQUALS)) { // e.g. for Hash-Aggr, or non-equi join + checkIfBothNulls && ! comparators.contains(Comparator.EQUALS)) { // e.g. for Hash-Aggr, or non-equi join cg.getEvalBlock()._return(JExpr.FALSE); return; } @@ -269,16 +277,16 @@ public class ChainedHashTable { JConditional jc; - if ( areBothNulls || isProbe ) { - // codegen for the special case when both columns are null (i.e., return early - as equal or not) + if ( work != setupWork.DO_BUILD ) { // BUILD runs this logic in a separate method - areBothKeysNull() + // codegen for the special case when both columns are null (i.e., return early with midPointResult) if (comparators.get(i) == Comparator.EQUALS && left.isOptional() && right.isOptional()) { jc = cg.getEvalBlock()._if(left.getIsSet().eq(JExpr.lit(0)). cand(right.getIsSet().eq(JExpr.lit(0)))); - jc._then()._return( areBothNulls ? JExpr.TRUE : JExpr.FALSE); + jc._then()._return(midPointResult); } } - if ( ! areBothNulls ) { + if ( ! checkIfBothNulls ) { // generate comparison code (at least one of the two columns' values is non-null) final LogicalExpression f = FunctionGenerationHelper.getOrderingComparatorNullsHigh(left, right, context.getFunctionRegistry()); HoldingContainer out = cg.addExpr(f, ClassGenerator.BlkCreateMode.FALSE); @@ -286,12 +294,12 @@ public class ChainedHashTable { // check if two values are not equal (comparator result != 0) jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0))); - jc._then()._return(JExpr.FALSE); + jc._then()._return(midPointResult); } } // All key expressions compared equal, so return TRUE - cg.getEvalBlock()._return( areBothNulls ? JExpr.FALSE : JExpr.TRUE); + cg.getEvalBlock()._return(finalResult); } private void setupSetValue(ClassGenerator<HashTable> cg, LogicalExpression[] keyExprs, @@ -321,8 +329,8 @@ public class ChainedHashTable { } } - private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs, - boolean isProbe) throws SchemaChangeException { + private void setupGetHash(ClassGenerator<HashTable> cg, MappingSet incomingMapping, VectorAccessible batch, LogicalExpression[] keyExprs) + throws SchemaChangeException { cg.setMappingSet(incomingMapping); diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java index e123896..a064449 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java @@ -87,9 +87,6 @@ public abstract class HashTableTemplate implements HashTable { // current available (free) slot globally across all batch holders private int freeIndex = 0; - // Placeholder for the current index while probing the hash table - private IndexPointer currentIdxHolder; - private BufferAllocator allocator; // The incoming build side record batch @@ -205,39 +202,38 @@ public abstract class HashTableTemplate implements HashTable { setupInterior(incomingBuild, incomingProbe, outgoing, htContainer); } - // Check if the key at the currentIdx position in hash table matches the key - // at the incomingRowIdx. if the key does not match, update the - // currentIdxHolder with the index of the next link. + // Check if the key at the current Index position in hash table matches the key + // at the incomingRowIdx. private boolean isKeyMatch(int incomingRowIdx, - IndexPointer currentIdxHolder, + int currentIndex, boolean isProbe) throws SchemaChangeException { - int currentIdxWithinBatch = currentIdxHolder.value & BATCH_MASK; - boolean match; + int currentIdxWithinBatch = currentIndex & BATCH_MASK; - if (currentIdxWithinBatch >= batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK).getTargetBatchRowCount()) { + if (currentIdxWithinBatch >= batchHolders.get((currentIndex >>> 16) & BATCH_MASK).getTargetBatchRowCount()) { logger.debug("Batch size = {}, incomingRowIdx = {}, currentIdxWithinBatch = {}.", - batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK).getTargetBatchRowCount(), incomingRowIdx, currentIdxWithinBatch); + batchHolders.get((currentIndex >>> 16) & BATCH_MASK).getTargetBatchRowCount(), incomingRowIdx, currentIdxWithinBatch); } - assert (currentIdxWithinBatch < batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK).getTargetBatchRowCount()); + assert (currentIdxWithinBatch < batchHolders.get((currentIndex >>> 16) & BATCH_MASK).getTargetBatchRowCount()); assert (incomingRowIdx < HashTable.BATCH_SIZE); if (isProbe) { - match = isKeyMatchInternalProbe(incomingRowIdx, currentIdxWithinBatch); - } else { - // in case (of a hash-join only) where both the new incoming key and the current are null, treat them as - // a match; i.e. the new would be added into the helper (but not the Hash-Table !), though it would never - // be used (not putting it into the helper would take a bigger code change, and some performance cost, hence - // not worth it). In the past such a new null key was added into the Hash-Table (i.e., no match), which - // created long costly chains - SEE DRILL-6880) - if ( areBothKeysNull(incomingRowIdx, currentIdxWithinBatch) ) { return true; } - - match = isKeyMatchInternalBuild(incomingRowIdx, currentIdxWithinBatch); + return isKeyMatchInternalProbe(incomingRowIdx, currentIdxWithinBatch); } - if (!match) { - currentIdxHolder.value = links.getAccessor().get(currentIdxWithinBatch); - } - return match; + // in case of a hash-join build, where both the new incoming key and the current key are null, treat them as + // a match; i.e. the new would be added into the helper (but not the Hash-Table !), though it would never + // be used (not putting it into the helper would take a bigger code change, and some performance cost, hence + // not worth it). In the past such a new null key was added into the Hash-Table (i.e., no match), which + // created long costly chains - SEE DRILL-6880) + if ( areBothKeysNull(incomingRowIdx, currentIdxWithinBatch) ) { return true; } + + return isKeyMatchInternalBuild(incomingRowIdx, currentIdxWithinBatch); + } + + // This method should only be called following a "false" return from isKeyMatch() + // It returns the next index in the hash chain (if any) so that next call to isKeyMatch() would compare the next link + private int nextLinkInHashChain(int currentIndex) { + return links.getAccessor().get(currentIndex & BATCH_MASK); } // Insert a new <key1, key2...keyN> entry coming from the incoming batch into the hash table @@ -524,7 +520,6 @@ public abstract class HashTableTemplate implements HashTable { throw new IllegalStateException("Unexpected schema change", e); } - currentIdxHolder = new IndexPointer(); } @Override @@ -674,16 +669,15 @@ public abstract class HashTableTemplate implements HashTable { // if startIdx is non-empty, follow the hash chain links until we find a matching // key or reach the end of the chain (and remember the last link there) - for ( currentIdxHolder.value = startIdx; - currentIdxHolder.value != EMPTY_SLOT; - /* isKeyMatch() below also advances the currentIdxHolder to the next link */) { - + for ( int currentIndex = startIdx; + currentIndex != EMPTY_SLOT; + currentIndex = lastEntryBatch.nextLinkInHashChain(currentIndex)) { // remember the current link, which would be the last when the next link is empty - lastEntryBatch = batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK); - lastEntryIdxWithinBatch = currentIdxHolder.value & BATCH_MASK; + lastEntryBatch = batchHolders.get((currentIndex >>> 16) & BATCH_MASK); + lastEntryIdxWithinBatch = currentIndex & BATCH_MASK; - if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIdxHolder, false)) { - htIdxHolder.value = currentIdxHolder.value; + if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIndex, false)) { + htIdxHolder.value = currentIndex; return PutStatus.KEY_PRESENT; } } @@ -751,12 +745,15 @@ public abstract class HashTableTemplate implements HashTable { @Override public int probeForKey(int incomingRowIdx, int hashCode) throws SchemaChangeException { int bucketIndex = getBucketIndex(hashCode, numBuckets()); - - for ( currentIdxHolder.value = startIndices.getAccessor().get(bucketIndex); - currentIdxHolder.value != EMPTY_SLOT; ) { - BatchHolder bh = batchHolders.get((currentIdxHolder.value >>> 16) & BATCH_MASK); - if (bh.isKeyMatch(incomingRowIdx, currentIdxHolder, true /* isProbe */)) { - return currentIdxHolder.value; + int startIdx = startIndices.getAccessor().get(bucketIndex); + BatchHolder lastEntryBatch = null; + + for ( int currentIndex = startIdx; + currentIndex != EMPTY_SLOT; + currentIndex = lastEntryBatch.nextLinkInHashChain(currentIndex)) { + lastEntryBatch = batchHolders.get((currentIndex >>> 16) & BATCH_MASK); + if (lastEntryBatch.isKeyMatch(incomingRowIdx, currentIndex, true /* isProbe */)) { + return currentIndex; } } return -1;
