This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 13ccaaf  HIVE-24087 FK side join elimination in presence of PK-FK 
constraint (Vineet Garg, reviewed by Jesus Camacho Rodriguez, Zoltan Haindrich)
13ccaaf is described below

commit 13ccaaf6845422286001fa97eb99370de3f5ca42
Author: Vineet G <[email protected]>
AuthorDate: Tue Sep 1 08:15:25 2020 -0700

    HIVE-24087 FK side join elimination in presence of PK-FK constraint (Vineet 
Garg, reviewed by Jesus Camacho Rodriguez, Zoltan Haindrich)
---
 .../hive/ql/optimizer/calcite/HiveRelOptUtil.java  |  14 +-
 .../calcite/rules/HiveFilterJoinRule.java          |   2 +-
 .../rules/HiveFilterProjectTransposeRule.java      |   2 +-
 .../calcite/rules/HiveJoinConstraintsRule.java     | 171 ++++++---
 .../calcite/rules/HiveJoinSwapConstraintsRule.java |   4 +-
 .../clientpositive/join_constraints_optimization.q |  88 +++++
 .../llap/join_constraints_optimization.q.out       | 384 +++++++++++++++++++++
 7 files changed, 611 insertions(+), 54 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
index 9d45c6c..2b68f00 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.HashMultimap;
 import com.google.common.collect.Multimap;
 import com.google.common.collect.Sets;
@@ -75,6 +76,8 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -740,11 +743,16 @@ public class HiveRelOptUtil extends RelOptUtil {
   }
 
   public static RewritablePKFKJoinInfo isRewritablePKFKJoin(Join join,
-        boolean leftInputPotentialFK, RelMetadataQuery mq) {
+        final RelNode fkInput, final RelNode nonFkInput,
+        RelMetadataQuery mq) {
     final JoinRelType joinType = join.getJoinType();
     final RexNode cond = join.getCondition();
-    final RelNode fkInput = leftInputPotentialFK ? join.getLeft() : 
join.getRight();
-    final RelNode nonFkInput = leftInputPotentialFK ? join.getRight() : 
join.getLeft();
+
+    Preconditions.checkArgument(fkInput == join.getLeft()
+        || fkInput == join.getRight(), "Invalid input: " + fkInput);
+    Preconditions.checkArgument(nonFkInput == join.getLeft()
+        || nonFkInput == join.getRight(), "Invalid input: " + nonFkInput);
+
     final RewritablePKFKJoinInfo nonRewritable = 
RewritablePKFKJoinInfo.of(false, null);
 
     // TODO : Need to handle Anti join.
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
index e89523d..8f2deb5 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterJoinRule.java
@@ -66,7 +66,7 @@ public abstract class HiveFilterJoinRule extends 
FilterJoinRule {
     public boolean matches(RelOptRuleCall call) {
       Join join = call.rel(1);
       RewritablePKFKJoinInfo joinInfo = HiveRelOptUtil.isRewritablePKFKJoin(
-          join, true, call.getMetadataQuery());
+          join, join.getLeft(), join.getRight(), call.getMetadataQuery());
       if (!joinInfo.rewritable) {
         return false;
       }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
index 0198fdb..8a80602 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
@@ -105,7 +105,7 @@ public class HiveFilterProjectTransposeRule extends 
FilterProjectTransposeRule {
     if (call.rels.length > 2) {
       final Join joinRel = call.rel(2);
       RewritablePKFKJoinInfo joinInfo = HiveRelOptUtil.isRewritablePKFKJoin(
-          joinRel, true, call.getMetadataQuery());
+          joinRel, joinRel.getLeft(), joinRel.getRight(), 
call.getMetadataQuery());
       if (!joinInfo.rewritable) {
         return false;
       }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
index 0c533e3..47e8715 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java
@@ -16,9 +16,6 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
@@ -46,6 +43,10 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFK
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
 /**
  * The rule can perform two different optimizations.
  * 1) Removes a join if is does not alter the cardinality of the one of its 
inputs.
@@ -95,6 +96,7 @@ public class HiveJoinConstraintsRule extends RelOptRule {
     // If it is a left outer, left will be the FK side.
     // If it is a right outer, right will be the FK side.
     final RelNode fkInput;
+    final RelNode nonFkInput;
     final ImmutableBitSet topRefs =
         RelOptUtil.InputFinder.bits(topProjExprs, null);
     final ImmutableBitSet leftBits =
@@ -106,7 +108,7 @@ public class HiveJoinConstraintsRule extends RelOptRule {
     boolean leftInputPotentialFK = topRefs.intersects(leftBits);
     boolean rightInputPotentialFK = topRefs.intersects(rightBits);
     if (leftInputPotentialFK && rightInputPotentialFK &&
-            (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
+        (joinType == JoinRelType.INNER || joinType == JoinRelType.SEMI)) {
       // Both inputs are referenced. Before making a decision, try to swap
       // references in join condition if it is an inner join, i.e. if a join
       // condition column is referenced above the join, then we can just
@@ -189,21 +191,24 @@ public class HiveJoinConstraintsRule extends RelOptRule {
     switch (joinType) {
     case SEMI:
     case INNER:
-    //case ANTI: //TODO:https://issues.apache.org/jira/browse/HIVE-23920
+      //case ANTI: //TODO:https://issues.apache.org/jira/browse/HIVE-23920
       if (leftInputPotentialFK && rightInputPotentialFK) {
         // Bails out as it references columns from both sides (or no columns)
         // and there is nothing to transform
         return;
       }
       fkInput = leftInputPotentialFK ? leftInput : rightInput;
+      nonFkInput = leftInputPotentialFK ? rightInput: leftInput;
       mode = Mode.REMOVE;
       break;
     case LEFT:
       fkInput = leftInput;
+      nonFkInput = rightInput;
       mode = leftInputPotentialFK && !rightInputPotentialFK ? Mode.REMOVE : 
Mode.TRANSFORM;
       break;
     case RIGHT:
       fkInput = rightInput;
+      nonFkInput = leftInput;
       mode = !leftInputPotentialFK && rightInputPotentialFK ? Mode.REMOVE : 
Mode.TRANSFORM;
       break;
     default:
@@ -213,61 +218,133 @@ public class HiveJoinConstraintsRule extends RelOptRule {
 
     // 2) Check whether this join can be rewritten or removed
     RewritablePKFKJoinInfo r = HiveRelOptUtil.isRewritablePKFKJoin(
-        join, leftInput == fkInput, call.getMetadataQuery());
+        join, fkInput,  nonFkInput, call.getMetadataQuery());
 
     // 3) If it is the only condition, we can trigger the rewriting
     if (r.rewritable) {
-      List<RexNode> nullableNodes = r.nullableNodes;
-      // If we reach here, we trigger the transform
-      if (mode == Mode.REMOVE) {
-        if (rightInputPotentialFK) {
-          // First, if FK is the right input, we need to shift
-          nullableNodes = nullableNodes.stream()
-              .map(node -> RexUtil.shift(node, 0, 
-leftInput.getRowType().getFieldCount()))
-              .collect(Collectors.toList());
-          topProjExprs = topProjExprs.stream()
-              .map(node -> RexUtil.shift(node, 0, 
-leftInput.getRowType().getFieldCount()))
-              .collect(Collectors.toList());
-        }
-        // Fix nullability in references to the input node
-        topProjExprs = HiveCalciteUtil.fixNullability(rexBuilder, 
topProjExprs, RelOptUtil.getFieldTypeList(fkInput.getRowType()));
-        // Trigger transformation
-        if (nullableNodes.isEmpty()) {
-          call.transformTo(call.builder()
-              .push(fkInput)
-              .project(topProjExprs)
-              .convert(project.getRowType(), false)
-              .build());
+      rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, 
r.nullableNodes);
+    } else {
+      // check if FK side could be removed instead
+      // Possibly this could be enhanced to take other join type into 
consideration.
+      if (joinType != JoinRelType.INNER) {
+        return;
+      }
+
+      //first swap fk and non-fk input and see if we can rewrite them
+      RewritablePKFKJoinInfo fkRemoval = HiveRelOptUtil.isRewritablePKFKJoin(
+          join, nonFkInput, fkInput, call.getMetadataQuery());
+
+      if (fkRemoval.rewritable) {
+        // we have established that nonFkInput is FK, and fkInput is PK
+        // and there is no row filtering on FK side
+        // check that FK side join column is distinct (i.e. have a group by)
+        ImmutableBitSet fkSideBitSet;
+        if (nonFkInput == leftInput) {
+          fkSideBitSet = leftBits;
         } else {
-          RexNode newFilterCond;
-          if (nullableNodes.size() == 1) {
-            newFilterCond = 
rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, nullableNodes.get(0));
-          } else {
-            List<RexNode> isNotNullConds = new ArrayList<>();
-            for (RexNode nullableNode : nullableNodes) {
-              
isNotNullConds.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, 
nullableNode));
+          fkSideBitSet = rightBits;
+        }
+
+        ImmutableBitSet.Builder fkJoinColBuilder = ImmutableBitSet.builder();
+        for (RexNode conj : RelOptUtil.conjunctions(cond)) {
+          if (!conj.isA(SqlKind.EQUALS)) {
+            return;
+          }
+          RexCall eq = (RexCall) conj;
+          RexNode op1 = eq.getOperands().get(0);
+          RexNode op2 = eq.getOperands().get(1);
+          if (op1 instanceof RexInputRef && op2 instanceof RexInputRef) {
+            // Check references
+            int ref1 = ((RexInputRef) op1).getIndex();
+            int ref2 = ((RexInputRef) op2).getIndex();
+            int leftRef = -1;
+            int rightRef = -1;
+            if (fkSideBitSet.get(ref1)) {
+              // check that join columns are not nullable
+              if (op1.getType().isNullable()) {
+                return;
+              }
+              fkJoinColBuilder.set(fkSideBitSet.indexOf(ref1));
+            } else {
+              if (op2.getType().isNullable()) {
+                return;
+              }
+              fkJoinColBuilder.set(fkSideBitSet.indexOf(ref2));
             }
-            newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, 
isNotNullConds);
           }
-          call.transformTo(call.builder()
-              .push(fkInput)
-              .filter(newFilterCond)
-              .project(topProjExprs)
-              .convert(project.getRowType(), false)
-              .build());
         }
-      } else { // Mode.TRANSFORM
-        // Trigger transformation
+
+        if (!call.getMetadataQuery().areColumnsUnique(nonFkInput, 
fkJoinColBuilder.build())) {
+          return;
+        }
+
+        // all conditions are met, therefore we can perform rewrite to remove 
fk side
+        rewrite(mode, fkInput, nonFkInput, join, topProjExprs, call, project, 
fkRemoval.nullableNodes);
+
+      }
+
+    }
+  }
+
+
+  private void rewrite(final Mode mode, final RelNode inputToKeep, final 
RelNode inputToRemove,
+      final Join join, List<RexNode> topProjExprs, RelOptRuleCall call, final 
RelNode project, List<RexNode> nullableNodes) {
+    RexBuilder rexBuilder = join.getCluster().getRexBuilder();
+
+    final RelNode leftInput = join.getLeft();
+    final RelNode rightInput = join.getRight();
+
+    // If we reach here, we trigger the transform
+    if (mode == Mode.REMOVE) {
+      if (inputToKeep == join.getRight()) {
+        // First, if FK is the right input, we need to shift
+        nullableNodes = nullableNodes.stream()
+            .map(node -> RexUtil.shift(node, 0, 
-leftInput.getRowType().getFieldCount()))
+            .collect(Collectors.toList());
+        topProjExprs = topProjExprs.stream()
+            .map(node -> RexUtil.shift(node, 0, 
-leftInput.getRowType().getFieldCount()))
+            .collect(Collectors.toList());
+      }
+      // Fix nullability in references to the input node
+      topProjExprs = HiveCalciteUtil.fixNullability(rexBuilder, topProjExprs, 
RelOptUtil.getFieldTypeList(inputToKeep.getRowType()));
+      // Trigger transformation
+      if (nullableNodes.isEmpty()) {
+        call.transformTo(call.builder()
+            .push(inputToKeep)
+            .project(topProjExprs)
+            .convert(project.getRowType(), false)
+            .build());
+      } else {
+        RexNode newFilterCond;
+        if (nullableNodes.size() == 1) {
+          newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, 
nullableNodes.get(0));
+        } else {
+          List<RexNode> isNotNullConds = new ArrayList<>();
+          for (RexNode nullableNode : nullableNodes) {
+            
isNotNullConds.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, 
nullableNode));
+          }
+          newFilterCond = rexBuilder.makeCall(SqlStdOperatorTable.AND, 
isNotNullConds);
+        }
         call.transformTo(call.builder()
-            .push(leftInput).push(rightInput)
-            .join(JoinRelType.INNER, join.getCondition())
-            .convert(call.rel(1).getRowType(), false) // Preserve nullability
-            .project(project.getChildExps())
+            .push(inputToKeep)
+            .filter(newFilterCond)
+            .project(topProjExprs)
+            .convert(project.getRowType(), false)
             .build());
       }
+    } else { // Mode.TRANSFORM
+      // Trigger transformation
+      call.transformTo(call.builder()
+          .push(leftInput).push(rightInput)
+          .join(JoinRelType.INNER, join.getCondition())
+          .convert(call.rel(1).getRowType(), false) // Preserve nullability
+          .project(project.getChildExps())
+          .build());
     }
   }
 
+
+
   private enum Mode {
     // Removes join operator from the plan
     REMOVE,
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinSwapConstraintsRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinSwapConstraintsRule.java
index dd2fcbd..419f5b5 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinSwapConstraintsRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinSwapConstraintsRule.java
@@ -78,9 +78,9 @@ public class HiveJoinSwapConstraintsRule extends RelOptRule {
     // - If the bottom one is not a non-filtering column appending join,
     // we cannot trigger the optimization.
     RewritablePKFKJoinInfo topInfo = HiveRelOptUtil.isRewritablePKFKJoin(
-        topJoin, true, call.getMetadataQuery());
+        topJoin, topJoin.getLeft(), topJoin.getRight(), 
call.getMetadataQuery());
     RewritablePKFKJoinInfo bottomInfo = HiveRelOptUtil.isRewritablePKFKJoin(
-        bottomJoin, true, call.getMetadataQuery());
+        bottomJoin, bottomJoin.getLeft(), bottomJoin.getRight(), 
call.getMetadataQuery());
     if (topInfo.rewritable || !bottomInfo.rewritable) {
       // Nothing to do
       return;
diff --git a/ql/src/test/queries/clientpositive/join_constraints_optimization.q 
b/ql/src/test/queries/clientpositive/join_constraints_optimization.q
index 5dbfe05..78159f1 100644
--- a/ql/src/test/queries/clientpositive/join_constraints_optimization.q
+++ b/ql/src/test/queries/clientpositive/join_constraints_optimization.q
@@ -134,3 +134,91 @@ EXPLAIN
 SELECT `lo_linenumber`, `c_custkey`
 FROM `lineorder_removal_n0`
 JOIN `customer_removal_n0` ON `lo_custkey` = `c_custkey`;
+
+-- FK-PK JOIN with FK side removal
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+    JOIN
+    (SELECT lo_custkey
+    FROM lineorder_removal_n0
+    WHERE lo_custkey IS NOT NULL
+    GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey = 
customer_removal_n0.c_custkey;
+
+-- FK-PK JOIN with FK side removal
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN
+     (SELECT lo_custkey, sum(lo_discount)  sm
+      FROM lineorder_removal_n0
+      WHERE lo_custkey IS NOT NULL
+      GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey = 
customer_removal_n0.c_custkey
+WHERE fkSide.sm > 0;
+
+-- FK-PK JOIN with FK side removal, BUT without explicit IS NOT NULL on join 
column
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+    JOIN (SELECT lo_custkey
+            FROM lineorder_removal_n0
+            GROUP BY lo_custkey) fkSide on fkSide.lo_custkey = 
customer_removal_n0.c_custkey;
+
+-- NEGATIVE for FK-PK JOIN with FK side removal, FK JOIN COL might not be 
distinct
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN (SELECT lo_linenumber,lo_custkey
+               FROM lineorder_removal_n0
+               WHERE lo_custkey IS NOT NULL
+               GROUP BY lo_linenumber, lo_custkey,lo_custkey
+             ) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey;
+
+-- FK side join col is PK as well (thus providing uniqueness without GROUP BY)
+CREATE TABLE `t1`(
+               `lo_orderkey` bigint,
+               `lo_linenumber` int,
+               `lo_custkey` bigint not null disable rely,
+               `lo_partkey` bigint not null disable rely,
+               `lo_orderdate` bigint,
+               `lo_revenue` double,
+               primary key (`lo_custkey`) disable rely,
+               constraint fkt1 foreign key (`lo_custkey`) references 
`customer_removal_n0`(`c_custkey`) disable rely,
+               constraint fkt2 foreign key (`lo_orderdate`) references 
`dates_removal_n0`(`d_datekey`) disable rely)
+    ROW FORMAT DELIMITED
+        FIELDS TERMINATED BY '|'
+    STORED AS TEXTFILE;
+
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN
+     (SELECT lo_custkey,lo_linenumber
+      FROM t1
+      WHERE lo_custkey IS NOT NULL) fkSide ON fkSide.lo_custkey = 
customer_removal_n0.c_custkey;
+DROP TABLE t1;
+
+-- FK side join col is UNIQUE and NOT NULL(thus providing uniqueness without 
GROUP BY)
+CREATE TABLE `t1`(
+                     `lo_orderkey` bigint,
+                     `lo_linenumber` int,
+                     `lo_custkey` bigint not null disable rely,
+                     `lo_partkey` bigint not null disable rely,
+                     `lo_orderdate` bigint,
+                     `lo_revenue` double,
+                     UNIQUE (`lo_custkey`) disable rely,
+                     constraint fkt1 foreign key (`lo_custkey`) references 
`customer_removal_n0`(`c_custkey`) disable rely,
+                     constraint fkt2 foreign key (`lo_orderdate`) references 
`dates_removal_n0`(`d_datekey`) disable rely)
+    ROW FORMAT DELIMITED
+        FIELDS TERMINATED BY '|'
+    STORED AS TEXTFILE;
+
+
+EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN
+     (SELECT lo_custkey,lo_linenumber
+      FROM t1) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey;
+
+DROP TABLE t1;
\ No newline at end of file
diff --git 
a/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out 
b/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out
index 9a51bd6..874445d 100644
--- 
a/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/join_constraints_optimization.q.out
@@ -796,3 +796,387 @@ STAGE PLANS:
             outputColumnNames: _col0, _col1
             ListSink
 
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+    JOIN
+    (SELECT lo_custkey
+    FROM lineorder_removal_n0
+    WHERE lo_custkey IS NOT NULL
+    GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey = 
customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+    JOIN
+    (SELECT lo_custkey
+    FROM lineorder_removal_n0
+    WHERE lo_custkey IS NOT NULL
+    GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey = 
customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: customer_removal_n0
+          Select Operator
+            expressions: c_custkey (type: bigint), c_name (type: string), 
c_address (type: string), c_city (type: string), c_nation (type: string), 
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
+            ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN
+     (SELECT lo_custkey, sum(lo_discount)  sm
+      FROM lineorder_removal_n0
+      WHERE lo_custkey IS NOT NULL
+      GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey = 
customer_removal_n0.c_custkey
+WHERE fkSide.sm > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN
+     (SELECT lo_custkey, sum(lo_discount)  sm
+      FROM lineorder_removal_n0
+      WHERE lo_custkey IS NOT NULL
+      GROUP BY lo_custkey) fkSide ON fkSide.lo_custkey = 
customer_removal_n0.c_custkey
+WHERE fkSide.sm > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: customer_removal_n0
+          Select Operator
+            expressions: c_custkey (type: bigint), c_name (type: string), 
c_address (type: string), c_city (type: string), c_nation (type: string), 
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
+            ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+    JOIN (SELECT lo_custkey
+            FROM lineorder_removal_n0
+            GROUP BY lo_custkey) fkSide on fkSide.lo_custkey = 
customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+    JOIN (SELECT lo_custkey
+            FROM lineorder_removal_n0
+            GROUP BY lo_custkey) fkSide on fkSide.lo_custkey = 
customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: customer_removal_n0
+          Select Operator
+            expressions: c_custkey (type: bigint), c_name (type: string), 
c_address (type: string), c_city (type: string), c_nation (type: string), 
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
+            ListSink
+
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN (SELECT lo_linenumber,lo_custkey
+               FROM lineorder_removal_n0
+               WHERE lo_custkey IS NOT NULL
+               GROUP BY lo_linenumber, lo_custkey,lo_custkey
+             ) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN (SELECT lo_linenumber,lo_custkey
+               FROM lineorder_removal_n0
+               WHERE lo_custkey IS NOT NULL
+               GROUP BY lo_linenumber, lo_custkey,lo_custkey
+             ) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@lineorder_removal_n0
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: customer_removal_n0
+                  Statistics: Num rows: 1 Data size: 1296 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: c_custkey (type: bigint), c_name (type: 
string), c_address (type: string), c_city (type: string), c_nation (type: 
string), c_region (type: string), c_phone (type: string), c_mktsegment (type: 
string)
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7
+                    Statistics: Num rows: 1 Data size: 1296 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint)
+                      null sort order: z
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: bigint)
+                      Statistics: Num rows: 1 Data size: 1296 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string), _col2 (type: 
string), _col3 (type: string), _col4 (type: string), _col5 (type: string), 
_col6 (type: string), _col7 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: lineorder_removal_n0
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  Group By Operator
+                    keys: lo_custkey (type: bigint), lo_linenumber (type: int)
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: bigint), _col1 (type: int)
+                      null sort order: zz
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: bigint), 
_col1 (type: int)
+                      Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: bigint)
+                  1 _col0 (type: bigint)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
+                Statistics: Num rows: 1 Data size: 1425 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 1425 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: bigint), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: bigint)
+                    null sort order: z
+                    sort order: +
+                    Map-reduce partition columns: _col0 (type: bigint)
+                    Statistics: Num rows: 1 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: CREATE TABLE `t1`(
+               `lo_orderkey` bigint,
+               `lo_linenumber` int,
+               `lo_custkey` bigint not null disable rely,
+               `lo_partkey` bigint not null disable rely,
+               `lo_orderdate` bigint,
+               `lo_revenue` double,
+               primary key (`lo_custkey`) disable rely,
+               constraint fkt1 foreign key (`lo_custkey`) references 
`customer_removal_n0`(`c_custkey`) disable rely,
+               constraint fkt2 foreign key (`lo_orderdate`) references 
`dates_removal_n0`(`d_datekey`) disable rely)
+    ROW FORMAT DELIMITED
+        FIELDS TERMINATED BY '|'
+    STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: CREATE TABLE `t1`(
+               `lo_orderkey` bigint,
+               `lo_linenumber` int,
+               `lo_custkey` bigint not null disable rely,
+               `lo_partkey` bigint not null disable rely,
+               `lo_orderdate` bigint,
+               `lo_revenue` double,
+               primary key (`lo_custkey`) disable rely,
+               constraint fkt1 foreign key (`lo_custkey`) references 
`customer_removal_n0`(`c_custkey`) disable rely,
+               constraint fkt2 foreign key (`lo_orderdate`) references 
`dates_removal_n0`(`d_datekey`) disable rely)
+    ROW FORMAT DELIMITED
+        FIELDS TERMINATED BY '|'
+    STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN
+     (SELECT lo_custkey,lo_linenumber
+      FROM t1
+      WHERE lo_custkey IS NOT NULL) fkSide ON fkSide.lo_custkey = 
customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN
+     (SELECT lo_custkey,lo_linenumber
+      FROM t1
+      WHERE lo_custkey IS NOT NULL) fkSide ON fkSide.lo_custkey = 
customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: customer_removal_n0
+          Select Operator
+            expressions: c_custkey (type: bigint), c_name (type: string), 
c_address (type: string), c_city (type: string), c_nation (type: string), 
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
+            ListSink
+
+PREHOOK: query: DROP TABLE t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: DROP TABLE t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE `t1`(
+                     `lo_orderkey` bigint,
+                     `lo_linenumber` int,
+                     `lo_custkey` bigint not null disable rely,
+                     `lo_partkey` bigint not null disable rely,
+                     `lo_orderdate` bigint,
+                     `lo_revenue` double,
+                     UNIQUE (`lo_custkey`) disable rely,
+                     constraint fkt1 foreign key (`lo_custkey`) references 
`customer_removal_n0`(`c_custkey`) disable rely,
+                     constraint fkt2 foreign key (`lo_orderdate`) references 
`dates_removal_n0`(`d_datekey`) disable rely)
+    ROW FORMAT DELIMITED
+        FIELDS TERMINATED BY '|'
+    STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: CREATE TABLE `t1`(
+                     `lo_orderkey` bigint,
+                     `lo_linenumber` int,
+                     `lo_custkey` bigint not null disable rely,
+                     `lo_partkey` bigint not null disable rely,
+                     `lo_orderdate` bigint,
+                     `lo_revenue` double,
+                     UNIQUE (`lo_custkey`) disable rely,
+                     constraint fkt1 foreign key (`lo_custkey`) references 
`customer_removal_n0`(`c_custkey`) disable rely,
+                     constraint fkt2 foreign key (`lo_orderdate`) references 
`dates_removal_n0`(`d_datekey`) disable rely)
+    ROW FORMAT DELIMITED
+        FIELDS TERMINATED BY '|'
+    STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN
+     (SELECT lo_custkey,lo_linenumber
+      FROM t1) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_removal_n0
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT customer_removal_n0.*
+FROM customer_removal_n0
+         JOIN
+     (SELECT lo_custkey,lo_linenumber
+      FROM t1) fkSide ON fkSide.lo_custkey = customer_removal_n0.c_custkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_removal_n0
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: customer_removal_n0
+          Select Operator
+            expressions: c_custkey (type: bigint), c_name (type: string), 
c_address (type: string), c_city (type: string), c_nation (type: string), 
c_region (type: string), c_phone (type: string), c_mktsegment (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7
+            ListSink
+
+PREHOOK: query: DROP TABLE t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: DROP TABLE t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1

Reply via email to