This is an automated email from the ASF dual-hosted git repository. boaz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/drill.git
commit 4bc562361b5d178a15f42b46b91bd635805b1d88 Author: Ben-Zvi <[email protected]> AuthorDate: Fri Jan 4 20:14:02 2019 -0800 DRILL-6880: For Hash-Join hash-table build - treat null keys as an equal match --- .../physical/impl/common/ChainedHashTable.java | 49 +++++++++++++++------- .../physical/impl/common/HashTableTemplate.java | 13 ++++++ 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java index dcdac95..9171ad1 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/ChainedHashTable.java @@ -63,6 +63,10 @@ public class ChainedHashTable { GeneratorMapping.create("setupInterior" /* setup method */, "isKeyMatchInternalBuild" /* eval method */, null /* reset */, null /* cleanup */); + private static final GeneratorMapping BOTH_KEYS_NULL = + GeneratorMapping.create("setupInterior" /* setup method */, "areBothKeysNull" /* eval method */, + null /* reset */, null /* cleanup */); + private static final GeneratorMapping KEY_MATCH_PROBE = GeneratorMapping.create("setupInterior" /* setup method */, "isKeyMatchInternalProbe" /* eval method */, null /* reset */, null /* cleanup */); @@ -95,10 +99,14 @@ public class ChainedHashTable { private final MappingSet KeyMatchIncomingBuildMapping = new MappingSet("incomingRowIdx", null, "incomingBuild", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_BUILD); + private final MappingSet bothKeysNullIncomingBuildMapping = + new MappingSet("incomingRowIdx", null, "incomingBuild", null, SETUP_INTERIOR_CONSTANT, BOTH_KEYS_NULL); private final MappingSet KeyMatchIncomingProbeMapping = new MappingSet("incomingRowIdx", null, "incomingProbe", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_PROBE); private final MappingSet KeyMatchHtableMapping = new MappingSet("htRowIdx", null, "htContainer", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_BUILD); + private final MappingSet bothKeysNullHtableMapping = + new MappingSet("htRowIdx", null, "htContainer", null, SETUP_INTERIOR_CONSTANT, BOTH_KEYS_NULL); private final MappingSet KeyMatchHtableProbeMapping = new MappingSet("htRowIdx", null, "htContainer", null, SETUP_INTERIOR_CONSTANT, KEY_MATCH_PROBE); private final MappingSet GetHashIncomingBuildMapping = @@ -208,7 +216,10 @@ public class ChainedHashTable { i++; } - + // Only in case of an equi-join: Generate a special method to check if both the new key and the existing key (in this HT bucket) are nulls + // (used by Hash-Join to avoid creating a long hash-table chain of null keys, which can lead to O(n^2) work on that chain.) + setupIsKeyMatchInternal(cgInner, bothKeysNullIncomingBuildMapping, bothKeysNullHtableMapping, keyExprsBuild, + htConfig.getComparators(), htKeyFieldIds); // generate code for isKeyMatch(), setValue(), getHash() and outputRecordKeys() setupIsKeyMatchInternal(cgInner, KeyMatchIncomingBuildMapping, KeyMatchHtableMapping, keyExprsBuild, htConfig.getComparators(), htKeyFieldIds); @@ -235,9 +246,14 @@ public class ChainedHashTable { private void setupIsKeyMatchInternal(ClassGenerator<HashTable> cg, MappingSet incomingMapping, MappingSet htableMapping, LogicalExpression[] keyExprs, List<Comparator> comparators, TypedFieldId[] htKeyFieldIds) { + + boolean isProbe = incomingMapping == KeyMatchIncomingProbeMapping; + boolean areBothNulls = incomingMapping == bothKeysNullIncomingBuildMapping; + cg.setMappingSet(incomingMapping); - if (keyExprs == null || keyExprs.length == 0) { + if (keyExprs == null || keyExprs.length == 0 || + areBothNulls && ! comparators.contains(Comparator.EQUALS)) { // e.g. for Hash-Aggr, or non-equi join cg.getEvalBlock()._return(JExpr.FALSE); return; } @@ -253,28 +269,29 @@ public class ChainedHashTable { JConditional jc; - // codegen for nullable columns if nulls are not equal - if (comparators.get(i) == Comparator.EQUALS - && left.isOptional() && right.isOptional()) { - jc = cg.getEvalBlock()._if(left.getIsSet().eq(JExpr.lit(0)). + if ( areBothNulls || isProbe ) { + // codegen for the special case when both columns are null (i.e., return early - as equal or not) + if (comparators.get(i) == Comparator.EQUALS + && left.isOptional() && right.isOptional()) { + jc = cg.getEvalBlock()._if(left.getIsSet().eq(JExpr.lit(0)). cand(right.getIsSet().eq(JExpr.lit(0)))); - jc._then()._return(JExpr.FALSE); + jc._then()._return( areBothNulls ? JExpr.TRUE : JExpr.FALSE); + } } + if ( ! areBothNulls ) { + final LogicalExpression f = FunctionGenerationHelper.getOrderingComparatorNullsHigh(left, right, context.getFunctionRegistry()); - final LogicalExpression f = - FunctionGenerationHelper - .getOrderingComparatorNullsHigh(left, right, context.getFunctionRegistry()); - - HoldingContainer out = cg.addExpr(f, ClassGenerator.BlkCreateMode.FALSE); + HoldingContainer out = cg.addExpr(f, ClassGenerator.BlkCreateMode.FALSE); - // check if two values are not equal (comparator result != 0) - jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0))); + // check if two values are not equal (comparator result != 0) + jc = cg.getEvalBlock()._if(out.getValue().ne(JExpr.lit(0))); - jc._then()._return(JExpr.FALSE); + jc._then()._return(JExpr.FALSE); + } } // All key expressions compared equal, so return TRUE - cg.getEvalBlock()._return(JExpr.TRUE); + cg.getEvalBlock()._return( areBothNulls ? JExpr.FALSE : JExpr.TRUE); } private void setupSetValue(ClassGenerator<HashTable> cg, LogicalExpression[] keyExprs, diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java index 25ada28..e123896 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/common/HashTableTemplate.java @@ -224,6 +224,13 @@ public abstract class HashTableTemplate implements HashTable { if (isProbe) { match = isKeyMatchInternalProbe(incomingRowIdx, currentIdxWithinBatch); } else { + // in case (of a hash-join only) where both the new incoming key and the current are null, treat them as + // a match; i.e. the new would be added into the helper (but not the Hash-Table !), though it would never + // be used (not putting it into the helper would take a bigger code change, and some performance cost, hence + // not worth it). In the past such a new null key was added into the Hash-Table (i.e., no match), which + // created long costly chains - SEE DRILL-6880) + if ( areBothKeysNull(incomingRowIdx, currentIdxWithinBatch) ) { return true; } + match = isKeyMatchInternalBuild(incomingRowIdx, currentIdxWithinBatch); } @@ -424,6 +431,12 @@ public abstract class HashTableTemplate implements HashTable { } @RuntimeOverridden + protected boolean areBothKeysNull( + @Named("incomingRowIdx") int incomingRowIdx, @Named("htRowIdx") int htRowIdx) throws SchemaChangeException { + return false; + } + + @RuntimeOverridden protected boolean isKeyMatchInternalProbe( @Named("incomingRowIdx") int incomingRowIdx, @Named("htRowIdx") int htRowIdx) throws SchemaChangeException { return false;
