This is an automated email from the ASF dual-hosted git repository. mhubail pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 780fa9e42e48bf99b525e24cf7ee524db0068d70 Author: Vijay Sarathy <[email protected]> AuthorDate: Tue Jul 23 19:00:31 2024 -0700 [ASTERIXDB-3445][COMP] Hash join picked by CBO in the presence of non-equality join predicates Ext-ref: MB-62489 Change-Id: I242700b7bd3a30ffddfc16cfb07901a35c417ba6 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18517 Tested-by: Jenkins <[email protected]> Reviewed-by: Vijay Sarathy <[email protected]> Reviewed-by: Michael Blow <[email protected]> Integration-Tests: Michael Blow <[email protected]> --- .../asterix/optimizer/rules/cbo/JoinEnum.java | 13 ++- .../asterix/optimizer/rules/cbo/JoinNode.java | 4 +- .../optimizerts/results_cbo/ch2/ch2_q7.plan | 126 +++++++++++---------- 3 files changed, 74 insertions(+), 69 deletions(-) diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java index 4fa4873c5b..479e395d60 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinEnum.java @@ -285,7 +285,7 @@ public class JoinEnum { return andExpr; } - protected ILogicalExpression getHashJoinExpr(List<Integer> newJoinConditions) { + protected ILogicalExpression getHashJoinExpr(List<Integer> newJoinConditions, boolean outerJoin) { if (newJoinConditions.size() == 0) { // this is a cartesian product return ConstantExpression.TRUE; @@ -300,15 +300,18 @@ public class JoinEnum { ScalarFunctionCallExpression andExpr = new ScalarFunctionCallExpression( BuiltinFunctions.getBuiltinFunctionInfo(AlgebricksBuiltinFunctions.AND)); - // All the join predicates need to be equality predicates for a hash join to be possible. + // at least one equality predicate needs to be present for a hash join to be possible. boolean eqPredFound = false; for (int joinNum : newJoinConditions) { // need to AND all the expressions. JoinCondition jc = joinConditions.get(joinNum); - if (jc.comparisonType != JoinCondition.comparisonOp.OP_EQ) { + if (jc.comparisonType == JoinCondition.comparisonOp.OP_EQ) { + eqPredFound = true; + } else if (outerJoin) { + // For outer joins, non-eq predicates cannot be pulled up and applied after the + // join, so a hash join will not be possible. return null; } - eqPredFound = true; andExpr.getArguments().add(new MutableObject<>(jc.joinCondition)); } // return null if no equality predicates were found @@ -1150,4 +1153,4 @@ public class JoinEnum { PhysicalOptimizationConfig physOptConfig = context.getPhysicalOptimizationConfig(); return physOptConfig.getQueryPlanShapeMode(); } -} \ No newline at end of file +} diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java index b946ba8cdc..d6ea457f85 100644 --- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java +++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/cbo/JoinNode.java @@ -1234,9 +1234,9 @@ public class JoinNode { return; } } - ILogicalExpression hashJoinExpr = joinEnum.getHashJoinExpr(newJoinConditions); - ILogicalExpression nestedLoopJoinExpr = joinEnum.getNestedLoopJoinExpr(newJoinConditions); boolean outerJoin = joinEnum.lookForOuterJoins(newJoinConditions); + ILogicalExpression hashJoinExpr = joinEnum.getHashJoinExpr(newJoinConditions, outerJoin); + ILogicalExpression nestedLoopJoinExpr = joinEnum.getNestedLoopJoinExpr(newJoinConditions); double current_card = this.cardinality; if (current_card >= Cost.MAX_CARD) { diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q7.plan b/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q7.plan index 9640d4dff4..0eaee70941 100644 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q7.plan +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/results_cbo/ch2/ch2_q7.plan @@ -18,85 +18,87 @@ -- STREAM_PROJECT |PARTITIONED| -- ASSIGN |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- HYBRID_HASH_JOIN [$$303, $$326][$$277, $$300] |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| + -- HYBRID_HASH_JOIN [$$288][$$304] |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- NESTED_LOOP |PARTITIONED| + -- HYBRID_HASH_JOIN [$$277][$$303] |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- HYBRID_HASH_JOIN [$$288][$$304] |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$288] |PARTITIONED| + -- HYBRID_HASH_JOIN [$$326][$$300] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$326] |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- HYBRID_HASH_JOIN [$$291, $$293, $$295][$$305, $$306, $$307] |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$291, $$293, $$295] |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- HYBRID_HASH_JOIN [$$275, $$276][$$310, $$311] |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$275, $$276] |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- HYBRID_HASH_JOIN [$$291, $$293, $$295][$$305, $$306, $$307] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$291, $$293, $$295] |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- HYBRID_HASH_JOIN [$$275, $$276][$$310, $$311] |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$275, $$276] |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (test.stock) |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$310, $$311] |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- STREAM_SELECT |PARTITIONED| + -- DATASOURCE_SCAN (test.stock) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$310, $$311] |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| + -- STREAM_SELECT |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- UNNEST |PARTITIONED| + -- ASSIGN |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| + -- UNNEST |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (test.orders) |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$305, $$306, $$307] |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| + -- DATASOURCE_SCAN (test.orders) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$305, $$306, $$307] |PARTITIONED| -- ASSIGN |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (test.customer) |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- HASH_PARTITION_EXCHANGE [$$304] |PARTITIONED| - -- REPLICATE |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (test.customer) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- HASH_PARTITION_EXCHANGE [$$300] |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (test.supplier) |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (test.nation) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- REPLICATE |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (test.nation) |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| - -- BROADCAST_EXCHANGE |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ASSIGN |PARTITIONED| - -- STREAM_PROJECT |PARTITIONED| - -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- DATASOURCE_SCAN (test.supplier) |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| -- ONE_TO_ONE_EXCHANGE |PARTITIONED| - -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- REPLICATE |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (test.nation) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- REPLICATE |PARTITIONED| + -- BROADCAST_EXCHANGE |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ASSIGN |PARTITIONED| + -- STREAM_PROJECT |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- DATASOURCE_SCAN (test.nation) |PARTITIONED| + -- ONE_TO_ONE_EXCHANGE |PARTITIONED| + -- EMPTY_TUPLE_SOURCE |PARTITIONED|
