This is an automated email from the ASF dual-hosted git repository.
vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 13ccaaf HIVE-24087 FK side join elimination in presence of PK-FK
constraint (Vineet Garg, reviewed by Jesus Camacho Rodriguez, Zoltan Haindrich)
13ccaaf is described below
commit 13ccaaf6845422286001fa97eb99370de3f5ca42
Author: Vineet G <[email protected]>
AuthorDate: Tue Sep 1 08:15:25 2020 -0700
HIVE-24087 FK side join elimination in presence of PK-FK constraint (Vineet
Garg, reviewed by Jesus Camacho Rodriguez, Zoltan Haindrich)
---
.../hive/ql/optimizer/calcite/HiveRelOptUtil.java | 14 +-
.../calcite/rules/HiveFilterJoinRule.java | 2 +-
.../rules/HiveFilterProjectTransposeRule.java | 2 +-
.../calcite/rules/HiveJoinConstraintsRule.java | 171 ++++++---
.../calcite/rules/HiveJoinSwapConstraintsRule.java | 4 +-
.../clientpositive/join_constraints_optimization.q | 88 +++++
.../llap/join_constraints_optimization.q.out | 384 +++++++++++++++++++++
7 files changed, 611 insertions(+), 54 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
index 9d45c6c..2b68f00 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite;
+import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
@@ -75,6 +76,8 @@ import
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -740,11 +743,16 @@ public class HiveRelOptUtil extends RelOptUtil {
}
public static RewritablePKFKJoinInfo isRewritablePKFKJoin(Join join,
- boolean leftInputPotentialFK, RelMetadataQuery mq) {
+ final RelNode fkInput, final RelNode nonFkInput,
+ RelMetadataQuery mq) {
final JoinRelType joinType = join.getJoinType();
final RexNode cond = join.getCondition();
- final RelNode fkInput = leftInputPotentialFK ? join.getLeft() :
join.getRight();
- final RelNode nonFkInput = leftInputPotentialFK ? join.getRight() :
join.getLeft();
+
+ Preconditions.checkArgument(fkInput == join.getLeft()
+ || fkInput == join.getRight(), "Invalid input: " + fkInput);
+ Preconditions.checkArgument(nonFkInput == join.getLeft()
+ || nonFkInput == join.getRight(), "Invalid input: " + nonFkInput);
+
final RewritablePKFKJoinInfo nonRewritable =
RewritablePKFKJoinInfo.of(false, null);
// TODO : Need to handle Anti join.
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
index e89523d..8f2deb5 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
@@ -66,7 +66,7 @@ public abstract class HiveFilterJoinRule extends
FilterJoinRule {
public boolean matches(RelOptRuleCall call) {
Join join = call.rel(1);
RewritablePKFKJoinInfo joinInfo = HiveRelOptUtil.isRewritablePKFKJoin(
- join, true, call.getMetadataQuery());
+ join, join.getLeft(), join.getRight(), call.getMetadataQuery());
if (!joinInfo.rewritable) {
return false;
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
index 0198fdb..8a80602 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
@@ -105,7 +105,7 @@ public class HiveFilterProjectTransposeRule extends
FilterProjectTransposeRule {
if (call.rels.length > 2) {
final Join joinRel = call.rel(2);
RewritablePKFKJoinInfo joinInfo = HiveRelOptUtil.isRewritablePKFKJoin(
- joinRel, true, call.getMetadataQuery());
+ joinRel, joinRel.getLeft(), joinRel.getRight(),
call.getMetadataQuery());
if (!joinInfo.rewritable) {
return false;
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
index 0c533e3..47e8715 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
@@ -16,9 +16,6 @@
*/
package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil;
@@ -46,6 +43,10 @@ import
org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFK
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
/**
* The rule can perform two different optimizations.
* 1) Removes a join if is does not alter the cardinality of the one of its
inputs.
@@ -95,6 +96,7 @@ public class HiveJoinConstraintsRule extends RelOptRule {
// If it is a left outer, left will be the FK side.
// If it is a right outer, right will be the FK side.
final RelNode fkInput;
+ final RelNode nonFkInput;
final ImmutableBitSet topRefs =
RelOptUtil.InputFinder.bits(topProjExprs, null);
final ImmutableBitSet leftBits =
@@ -106,7 +108,7 @@ public class HiveJoinConstraintsRule extends RelOptRule {
boolean leftInputPotentialFK = topRefs.intersects(leftBits);
boolean rightInputPotentialFK = topRefs.intersects(rightBits);
if (leftInputPotentialFK && rightInputPotentialFK &&
- (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
+ (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
// Both inputs are referenced. Before making a decision, try to swap
// references in join condition if it is an inner join, i.e. if a join
// condition column is referenced above the join, then we can just
@@ -189,21 +191,24 @@ public class HiveJoinConstraintsRule extends RelOptRule {
switch (joinType) {
case SEMI:
case INNER:
- //case ANTI: //TODO:https://issues.apache.org/jira/browse/HIVE-23920
+ //case ANTI: //TODO:https://issues.apache.org/jira/browse/HIVE-23920
if (leftInputPotentialFK && rightInputPotentialFK) {
// Bails out as it references columns from both sides (or no columns)
// and there is nothing to transform
return;
}
fkInput = leftInputPotentialFK ? leftInput : rightInput;
+ nonFkInput = leftInputPotentialFK ? rightInput: leftInput;
mode = Mode.REMOVE;
break;
case LEFT:
fkInput = leftInput;
+ nonFkInput = rightInput;
mode = leftInputPotentialFK && !rightInputPotentialFK ? Mode.REMOVE :
Mode.TRANSFORM;
break;
case RIGHT:
fkInput = rightInput;
+ nonFkInput = leftInput;
mode = !leftInputPotentialFK && rightInputPotentialFK ? Mode.REMOVE :
Mode.TRANSFORM;
break;
default:
@@ -213,61 +218,133 @@ public class HiveJoinConstraintsRule extends RelOptRule {
// 2) Check whether this join can be rewritten or removed
RewritablePKFKJoinInfo r = HiveRelOptUtil.isRewritablePKFKJoin(
- join, leftInput == fkInput, call.getMetadataQuery());
+ join, fkInput, nonFkInput, call.getMetadataQuery());
// 3) If it is the only condition, we can trigger the rewriting
if (r.rewritable) {
- List<RexNode> nullableNodes = r.nullableNodes;
- // If we reach here, we trigger the transform
- if (mode == Mode.REMOVE) {
- if (rightInputPotentialFK) {
- // First, if FK is the right input, we need to shift
- nullableNodes = nullableNodes.stream()
- .map(node -> RexUtil.shift(node, 0,
-leftInput.getRowType().getFieldCount()))
- .collect(Collectors.toList());
- topProjExprs = topProjExprs.stream()
- .map(node -> RexUtil.shift(node, 0,
-leftInput.getRowType().getFieldCount()))
- .collect(Collectors.toList());
- }
- // Fix nullability in references to the input node
- topProjExprs = HiveCalciteUtil.fixNullability(rexBuilder,
topProjExprs, RelOptUtil.getFieldTypeList(fkInput.getRowType()));
- // Trigger transformation
- if (nullableNodes.isEmpty()) {
- call.transformTo(call.builder()
- .push(fkInput)
- .project(topProjExprs)
- .convert(project.getRowType(), false)
- .build());
+ rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project,
r.nullableNodes);
+ } else {
+ // check if FK side could be removed instead
+ // Possibly this could be enhanced to take other join type into
consideration.
+ if (joinType != JoinRelType.INNER) {
+ return;
+ }
+
+ //first swap fk and non-fk input and see if we can rewrite them
+ RewritablePKFKJoinInfo fkRemoval = HiveRelOptUtil.isRewritablePKFKJoin(
+ join, nonFkInput, fkInput, call.getMetadataQuery());
+
+ if (fkRemoval.rewritable) {
+ // we have established that nonFkInput is FK, and fkInput is PK
+ // and there is no row filtering on FK side
+ // check that FK side join column is distinct (i.e. have a group by)
+ ImmutableBitSet fkSideBitSet;
+ if (nonFkInput == leftInput) {
+ fkSideBitSet = leftBits;
} else {
- RexNode newFilterCond;
- if (nullableNodes.size() == 1) {
- newFilterCond =
rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, nullableNodes.get(0));
- } else {
- List<RexNode> isNotNullConds = new ArrayList<>();
- for (RexNode nullableNode : nullableNodes) {
-
isNotNullConds.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL,
nullableNode));
+ fkSideBitSet = rightBits;
+ }
+
+ ImmutableBitSet.Builder fkJoinColBuilder = ImmutableBitSet.builder();
+ for (RexNode conj : RelOptUtil.conjunctions(cond)) {
+ if (!conj.isA(SqlKind.EQUALS)) {
+ return;
+ }
+ RexCall eq = (RexCall) conj;
+ RexNode op1 = eq.getOperands().get(0);
+ RexNode op2 = eq.getOperands().get(1);
+ if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
+ // Check references
+ int ref1 = ((RexInputRef) op1).getIndex();
+ int ref2 = ((RexInputRef) op2).getIndex();
+ int leftRef = -1;
+ int rightRef = -1;
+ if (fkSideBitSet.get(ref1)) {
+ // check that join columns are not nullable
+ if (op1.getType().isNullable()) {
+ return;
+ }
+ fkJoinColBuilder.set(fkSideBitSet.indexOf(ref1));
+ } else {
+ if (op2.getType().isNullable()) {
+ return;
+ }
+ fkJoinColBuilder.set(fkSideBitSet.indexOf(ref2));
}
- newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.AND,
isNotNullConds);
}
- call.transformTo(call.builder()
- .push(fkInput)
- .filter(newFilterCond)
- .project(topProjExprs)
- .convert(project.getRowType(), false)
- .build());
}
- } else { // Mode.TRANSFORM
- // Trigger transformation
+
+ if (!call.getMetadataQuery().areColumnsUnique(nonFkInput,
fkJoinColBuilder.build())) {
+ return;
+ }
+
+ // all conditions are met, therefore we can perform rewrite to remove
fk side
+ rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project,
fkRemoval.nullableNodes);
+
+ }
+
+ }
+ }
+
+
+ private void rewrite(final Mode mode, final RelNode inputToKeep, final
RelNode inputToRemove,
+ final Join join, List<RexNode> topProjExprs, RelOptRuleCall call, final
RelNode project, List<RexNode> nullableNodes) {
+ RexBuilder rexBuilder = join.getCluster().getRexBuilder();
+
+ final RelNode leftInput = join.getLeft();
+ final RelNode rightInput = join.getRight();
+
+ // If we reach here, we trigger the transform
+ if (mode == Mode.REMOVE) {
+ if (inputToKeep == join.getRight()) {
+ // First, if FK is the right input, we need to shift
+ nullableNodes = nullableNodes.stream()
+ .map(node -> RexUtil.shift(node, 0,
-leftInput.getRowType().getFieldCount()))
+ .collect(Collectors.toList());
+ topProjExprs = topProjExprs.stream()
+ .map(node -> RexUtil.shift(node, 0,
-leftInput.getRowType().getFieldCount()))
+ .collect(Collectors.toList());
+ }
+ // Fix nullability in references to the input node
+ topProjExprs = HiveCalciteUtil.fixNullability(rexBuilder, topProjExprs,
RelOptUtil.getFieldTypeList(inputToKeep.getRowType()));
+ // Trigger transformation
+ if (nullableNodes.isEmpty()) {
+ call.transformTo(call.builder()
+ .push(inputToKeep)
+ .project(topProjExprs)
+ .convert(project.getRowType(), false)
+ .build());
+ } else {
+ RexNode newFilterCond;
+ if (nullableNodes.size() == 1) {
+ newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL,
nullableNodes.get(0));
+ } else {
+ List<RexNode> isNotNullConds = new ArrayList<>();
+ for (RexNode nullableNode : nullableNodes) {
+
isNotNullConds.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL,
nullableNode));
+ }
+ newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.AND,
isNotNullConds);
+ }
call.transformTo(call.builder()
- .push(leftInput).push(rightInput)
- .join(JoinRelType.INNER, join.getCondition())
- .convert(call.rel(1).getRowType(), false) // Preserve nullability
- .project(project.getChildExps())
+ .push(inputToKeep)
+ .filter(newFilterCond)
+ .project(topProjExprs)
+ .convert(project.getRowType(), false)
.build());
}
+ } else { // Mode.TRANSFORM
+ // Trigger transformation
+ call.transformTo(call.builder()
+ .push(leftInput).push(rightInput)
+ .join(JoinRelType.INNER, join.getCondition())
+ .convert(call.rel(1).getRowType(), false) // Preserve nullability
+ .project(project.getChildExps())
+ .build());
}
}
+
+
private enum Mode {
// Removes join operator from the plan
REMOVE,
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinSwapConstraintsRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinSwapConstraintsRule.java
index dd2fcbd..419f5b5 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinSwapConstraintsRule.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinSwapConstraintsRule.java
@@ -78,9 +78,9 @@ public class HiveJoinSwapConstraintsRule extends RelOptRule {
// - If the bottom one is not a non-filtering column appending join,
// we cannot trigger the optimization.
RewritablePKFKJoinInfo topInfo = HiveRelOptUtil.isRewritablePKFKJoin(
- topJoin, true, call.getMetadataQuery());
+ topJoin, topJoin.getLeft(), topJoin.getRight(),
call.getMetadataQuery());
RewritablePKFKJoinInfo bottomInfo = HiveRelOptUtil.isRewritablePKFKJoin(
- bottomJoin, true, call.getMetadataQuery());
+ bottomJoin, bottomJoin.getLeft(), bottomJoin.getRight(),
call.getMetadataQuery());
if (topInfo.rewritable || !bottomInfo.rewritable) {
// Nothing to do
return;
diff --git a/ql/src/test/queries/clientpositive/join_constraints_optimization.q
b/ql/src/test/queries/clientpositive/join_constraints_optimization.q
index 5dbfe05..78159f1 100644
--- a/ql/src/test/queries/clientpositive/join_constraints_optimization.q
+++ b/ql/src/test/queries/clientpositive/join_constraints_optimization.q
@@ -134,3 +134,91 @@ EXPLAIN
SELECT `lo_linenumber`, `c_custkey`
FROM `lineorder_removal_n0`
JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`;
+
+-- FK-PK JOIN with FK side removal
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey
+ FROM lineorder_removal_n0
+ WHERE lo_custkey IS NOT NULL
+ GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey =
customer_removal_n0.c_custkey;
+
+-- FK-PK JOIN with FK side removal
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey, sum(lo_discount) sm
+ FROM lineorder_removal_n0
+ WHERE lo_custkey IS NOT NULL
+ GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey =
customer_removal_n0.c_custkey
+WHERE fkSide.sm > 0;
+
+-- FK-PK JOIN with FK side removal, BUT without explicit IS NOT NULL on join
column
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN (SELECT lo_custkey
+ FROM lineorder_removal_n0
+ GROUP BY lo_custkey) fkSide on fkSide.lo_custkey =
customer_removal_n0.c_custkey;
+
+-- NEGATIVE for FK-PK JOIN with FK side removal, FK JOIN COL might not be
distinct
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN (SELECT lo_linenumber,lo_custkey
+ FROM lineorder_removal_n0
+ WHERE lo_custkey IS NOT NULL
+ GROUP BY lo_linenumber, lo_custkey,lo_custkey
+ ) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey;
+
+-- FK side join col is PK as well (thus providing uniqueness without GROUP BY)
+CREATE TABLE `t1`(
+ `lo_orderkey` bigint,
+ `lo_linenumber` int,
+ `lo_custkey` bigint not null disable rely,
+ `lo_partkey` bigint not null disable rely,
+ `lo_orderdate` bigint,
+ `lo_revenue` double,
+ primary key (`lo_custkey`) disable rely,
+ constraint fkt1 foreign key (`lo_custkey`) references
`customer_removal_n0`(`c_custkey`) disable rely,
+ constraint fkt2 foreign key (`lo_orderdate`) references
`dates_removal_n0`(`d_datekey`) disable rely)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '|'
+ STORED AS TEXTFILE;
+
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey,lo_linenumber
+ FROM t1
+ WHERE lo_custkey IS NOT NULL) fkSide ON fkSide.lo_custkey =
customer_removal_n0.c_custkey;
+DROP TABLE t1;
+
+-- FK side join col is UNIQUE and NOT NULL(thus providing uniqueness without
GROUP BY)
+CREATE TABLE `t1`(
+ `lo_orderkey` bigint,
+ `lo_linenumber` int,
+ `lo_custkey` bigint not null disable rely,
+ `lo_partkey` bigint not null disable rely,
+ `lo_orderdate` bigint,
+ `lo_revenue` double,
+ UNIQUE (`lo_custkey`) disable rely,
+ constraint fkt1 foreign key (`lo_custkey`) references
`customer_removal_n0`(`c_custkey`) disable rely,
+ constraint fkt2 foreign key (`lo_orderdate`) references
`dates_removal_n0`(`d_datekey`) disable rely)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '|'
+ STORED AS TEXTFILE;
+
+
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey,lo_linenumber
+ FROM t1) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey;
+
+DROP TABLE t1;
\ No newline at end of file
diff --git
a/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out
b/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out
index 9a51bd6..874445d 100644
---
a/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out
+++
b/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out
@@ -796,3 +796,387 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
ListSink
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey
+ FROM lineorder_removal_n0
+ WHERE lo_custkey IS NOT NULL
+ GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey =
customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey
+ FROM lineorder_removal_n0
+ WHERE lo_custkey IS NOT NULL
+ GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey =
customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: customer_removal_n0
+ Select Operator
+ expressions: c_custkey (type: bigint), c_name (type: string),
c_address (type: string), c_city (type: string), c_nation (type: string),
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey, sum(lo_discount) sm
+ FROM lineorder_removal_n0
+ WHERE lo_custkey IS NOT NULL
+ GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey =
customer_removal_n0.c_custkey
+WHERE fkSide.sm > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey, sum(lo_discount) sm
+ FROM lineorder_removal_n0
+ WHERE lo_custkey IS NOT NULL
+ GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey =
customer_removal_n0.c_custkey
+WHERE fkSide.sm > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: customer_removal_n0
+ Select Operator
+ expressions: c_custkey (type: bigint), c_name (type: string),
c_address (type: string), c_city (type: string), c_nation (type: string),
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN (SELECT lo_custkey
+ FROM lineorder_removal_n0
+ GROUP BY lo_custkey) fkSide on fkSide.lo_custkey =
customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN (SELECT lo_custkey
+ FROM lineorder_removal_n0
+ GROUP BY lo_custkey) fkSide on fkSide.lo_custkey =
customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: customer_removal_n0
+ Select Operator
+ expressions: c_custkey (type: bigint), c_name (type: string),
c_address (type: string), c_city (type: string), c_nation (type: string),
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN (SELECT lo_linenumber,lo_custkey
+ FROM lineorder_removal_n0
+ WHERE lo_custkey IS NOT NULL
+ GROUP BY lo_linenumber, lo_custkey,lo_custkey
+ ) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN (SELECT lo_linenumber,lo_custkey
+ FROM lineorder_removal_n0
+ WHERE lo_custkey IS NOT NULL
+ GROUP BY lo_linenumber, lo_custkey,lo_custkey
+ ) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: customer_removal_n0
+ Statistics: Num rows: 1 Data size: 1296 Basic stats:
COMPLETE Column stats: NONE
+ Select Operator
+ expressions: c_custkey (type: bigint), c_name (type:
string), c_address (type: string), c_city (type: string), c_nation (type:
string), c_region (type: string), c_phone (type: string), c_mktsegment (type:
string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4,
_col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 1296 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 1 Data size: 1296 Basic stats:
COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string), _col2 (type:
string), _col3 (type: string), _col4 (type: string), _col5 (type: string),
_col6 (type: string), _col7 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: lineorder_removal_n0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE
Column stats: NONE
+ Group By Operator
+ keys: lo_custkey (type: bigint), lo_linenumber (type: int)
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 12 Basic stats:
COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint), _col1 (type: int)
+ null sort order: zz
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: bigint),
_col1 (type: int)
+ Statistics: Num rows: 1 Data size: 12 Basic stats:
COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: bigint)
+ 1 _col0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ Statistics: Num rows: 1 Data size: 1425 Basic stats: COMPLETE
Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1425 Basic stats:
COMPLETE Column stats: NONE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE
Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE
Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 1 Data size: 12 Basic stats:
COMPLETE Column stats: NONE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: CREATE TABLE `t1`(
+ `lo_orderkey` bigint,
+ `lo_linenumber` int,
+ `lo_custkey` bigint not null disable rely,
+ `lo_partkey` bigint not null disable rely,
+ `lo_orderdate` bigint,
+ `lo_revenue` double,
+ primary key (`lo_custkey`) disable rely,
+ constraint fkt1 foreign key (`lo_custkey`) references
`customer_removal_n0`(`c_custkey`) disable rely,
+ constraint fkt2 foreign key (`lo_orderdate`) references
`dates_removal_n0`(`d_datekey`) disable rely)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '|'
+ STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: CREATE TABLE `t1`(
+ `lo_orderkey` bigint,
+ `lo_linenumber` int,
+ `lo_custkey` bigint not null disable rely,
+ `lo_partkey` bigint not null disable rely,
+ `lo_orderdate` bigint,
+ `lo_revenue` double,
+ primary key (`lo_custkey`) disable rely,
+ constraint fkt1 foreign key (`lo_custkey`) references
`customer_removal_n0`(`c_custkey`) disable rely,
+ constraint fkt2 foreign key (`lo_orderdate`) references
`dates_removal_n0`(`d_datekey`) disable rely)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '|'
+ STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey,lo_linenumber
+ FROM t1
+ WHERE lo_custkey IS NOT NULL) fkSide ON fkSide.lo_custkey =
customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey,lo_linenumber
+ FROM t1
+ WHERE lo_custkey IS NOT NULL) fkSide ON fkSide.lo_custkey =
customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: customer_removal_n0
+ Select Operator
+ expressions: c_custkey (type: bigint), c_name (type: string),
c_address (type: string), c_city (type: string), c_nation (type: string),
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ ListSink
+
+PREHOOK: query: DROP TABLE t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: DROP TABLE t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE `t1`(
+ `lo_orderkey` bigint,
+ `lo_linenumber` int,
+ `lo_custkey` bigint not null disable rely,
+ `lo_partkey` bigint not null disable rely,
+ `lo_orderdate` bigint,
+ `lo_revenue` double,
+ UNIQUE (`lo_custkey`) disable rely,
+ constraint fkt1 foreign key (`lo_custkey`) references
`customer_removal_n0`(`c_custkey`) disable rely,
+ constraint fkt2 foreign key (`lo_orderdate`) references
`dates_removal_n0`(`d_datekey`) disable rely)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '|'
+ STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: CREATE TABLE `t1`(
+ `lo_orderkey` bigint,
+ `lo_linenumber` int,
+ `lo_custkey` bigint not null disable rely,
+ `lo_partkey` bigint not null disable rely,
+ `lo_orderdate` bigint,
+ `lo_revenue` double,
+ UNIQUE (`lo_custkey`) disable rely,
+ constraint fkt1 foreign key (`lo_custkey`) references
`customer_removal_n0`(`c_custkey`) disable rely,
+ constraint fkt2 foreign key (`lo_orderdate`) references
`dates_removal_n0`(`d_datekey`) disable rely)
+ ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '|'
+ STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey,lo_linenumber
+ FROM t1) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+ JOIN
+ (SELECT lo_custkey,lo_linenumber
+ FROM t1) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: customer_removal_n0
+ Select Operator
+ expressions: c_custkey (type: bigint), c_name (type: string),
c_address (type: string), c_city (type: string), c_nation (type: string),
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7
+ ListSink
+
+PREHOOK: query: DROP TABLE t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: DROP TABLE t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1