This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 3f0935e HIVE-21921: Support for correlated quantified predicates (Vineet Garg,reviewed by Jesus Camacho Rodriguez) 3f0935e is described below commit 3f0935e4f2e56dc483860e74240f3f9826f74e8f Author: Vineet Garg <vg...@apache.org> AuthorDate: Wed Jul 3 10:16:59 2019 -0700 HIVE-21921: Support for correlated quantified predicates (Vineet Garg,reviewed by Jesus Camacho Rodriguez) --- .../calcite/CalciteSubqueryRuntimeException.java | 47 + .../calcite/rules/HiveSubQueryRemoveRule.java | 323 +++-- .../hadoop/hive/ql/parse/CalcitePlanner.java | 18 +- .../clientnegative/subquery_any_aggregate.q | 10 + ql/src/test/queries/clientpositive/subquery_ANY.q | 41 + .../clientnegative/subquery_any_aggregate.q.out | 37 + .../results/clientpositive/llap/subquery_ANY.q.out | 1294 ++++++++++++++++++++ 7 files changed, 1627 insertions(+), 143 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSubqueryRuntimeException.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSubqueryRuntimeException.java new file mode 100644 index 0000000..a0412e3 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSubqueryRuntimeException.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * Exception from Subquery rewrite. + */ + +public class CalciteSubqueryRuntimeException extends RuntimeException{ + + private static final long serialVersionUID = 1L; + + public CalciteSubqueryRuntimeException() { + super(); + } + + public CalciteSubqueryRuntimeException(String message) { + super(message); + } + + public CalciteSubqueryRuntimeException(Throwable cause) { + super(cause); + } + + public CalciteSubqueryRuntimeException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index 6c57474..bad49f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -55,17 +55,20 @@ import java.util.List; import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubqueryRuntimeException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveSubQRemoveRelBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.SubqueryConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; /** * NOTE: this rule is replicated from Calcite's SubqueryRemoveRule * Transform that converts IN, EXISTS and scalar sub-queries into joins. * TODO: - * Reason this is replicated instead of using Calcite's is - * Calcite creates null literal with null type but hive needs it to be properly typed + * Reason this is replicated instead of using Calcite's is + * Calcite creates null literal with null type but hive needs it to be properly typed * * <p>Sub-queries are represented by {@link RexSubQuery} expressions. * @@ -79,52 +82,48 @@ public class HiveSubQueryRemoveRule extends RelOptRule { private HiveConf conf; public HiveSubQueryRemoveRule(HiveConf conf) { - super(operand(RelNode.class, null, HiveSubQueryFinder.RELNODE_PREDICATE, - any()), + super(operand(RelNode.class, null, HiveSubQueryFinder.RELNODE_PREDICATE, any()), HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Filter"); this.conf = conf; } - @Override - public void onMatch(RelOptRuleCall call) { + + @Override public void onMatch(RelOptRuleCall call) { final RelNode relNode = call.rel(0); final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null); // if subquery is in FILTER - if(relNode instanceof Filter) { + if (relNode instanceof Filter) { final Filter filter = call.rel(0); - final RexSubQuery e = - RexUtil.SubQueryFinder.find(filter.getCondition()); + final RexSubQuery e = RexUtil.SubQueryFinder.find(filter.getCondition()); assert e != null; final RelOptUtil.Logic logic = - LogicVisitor.find(RelOptUtil.Logic.TRUE, - ImmutableList.of(filter.getCondition()), e); + LogicVisitor.find(RelOptUtil.Logic.TRUE, ImmutableList.of(filter.getCondition()), e); builder.push(filter.getInput()); final int fieldCount = builder.peek().getRowType().getFieldCount(); - assert(filter instanceof HiveFilter); + assert (filter instanceof HiveFilter); SubqueryConf subqueryConfig = filter.getCluster().getPlanner(). getContext().unwrap(SubqueryConf.class); boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel); - final RexNode target = apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e), logic, - builder, 1, fieldCount, isCorrScalarQuery); + final RexNode target = + apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e), logic, builder, 1, + fieldCount, isCorrScalarQuery); final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); builder.filter(shuttle.apply(filter.getCondition())); builder.project(fields(builder, filter.getRowType().getFieldCount())); RelNode newRel = builder.build(); call.transformTo(newRel); - } else if(relNode instanceof Project) { + } else if (relNode instanceof Project) { // if subquery is in PROJECT final Project project = call.rel(0); - final RexSubQuery e = - RexUtil.SubQueryFinder.find(project.getProjects()); + final RexSubQuery e = RexUtil.SubQueryFinder.find(project.getProjects()); assert e != null; final RelOptUtil.Logic logic = - LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN, - project.getProjects(), e); + LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN, project.getProjects(), e); builder.push(project.getInput()); final int fieldCount = builder.peek().getRowType().getFieldCount(); @@ -132,11 +131,11 @@ public class HiveSubQueryRemoveRule extends RelOptRule { project.getCluster().getPlanner().getContext().unwrap(SubqueryConf.class); boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel); - final RexNode target = apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e), - logic, builder, 1, fieldCount, isCorrScalarQuery); + final RexNode target = + apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e), logic, builder, 1, + fieldCount, isCorrScalarQuery); final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target); - builder.project(shuttle.apply(project.getProjects()), - project.getRowType().getFieldNames()); + builder.project(shuttle.apply(project.getProjects()), project.getRowType().getFieldNames()); call.transformTo(builder.build()); } } @@ -145,32 +144,31 @@ public class HiveSubQueryRemoveRule extends RelOptRule { /// if COUNT returns true since COUNT produces 0 on empty result set private boolean isAggZeroOnEmpty(RexSubQuery e) { //as this is corr scalar subquery with agg we expect one aggregate - assert(e.getKind() == SqlKind.SCALAR_QUERY); - assert(e.rel.getInputs().size() == 1); - Aggregate relAgg = (Aggregate)e.rel.getInput(0); - assert(relAgg.getAggCallList().size() == 1); //should only have one aggregate - if(relAgg.getAggCallList().get(0).getAggregation().getKind() == SqlKind.COUNT) { + assert (e.getKind() == SqlKind.SCALAR_QUERY); + assert (e.rel.getInputs().size() == 1); + Aggregate relAgg = (Aggregate) e.rel.getInput(0); + assert (relAgg.getAggCallList().size() == 1); //should only have one aggregate + if (relAgg.getAggCallList().get(0).getAggregation().getKind() == SqlKind.COUNT) { return true; } return false; } private SqlTypeName getAggTypeForScalarSub(RexSubQuery e) { - assert(e.getKind() == SqlKind.SCALAR_QUERY); - assert(e.rel.getInputs().size() == 1); - Aggregate relAgg = (Aggregate)e.rel.getInput(0); - assert(relAgg.getAggCallList().size() == 1); //should only have one aggregate + assert (e.getKind() == SqlKind.SCALAR_QUERY); + assert (e.rel.getInputs().size() == 1); + Aggregate relAgg = (Aggregate) e.rel.getInput(0); + assert (relAgg.getAggCallList().size() == 1); //should only have one aggregate return relAgg.getAggCallList().get(0).getType().getSqlTypeName(); } private RexNode rewriteScalar(RelMetadataQuery mq, RexSubQuery e, Set<CorrelationId> variablesSet, - HiveSubQRemoveRelBuilder builder, int offset, int inputCount, - boolean isCorrScalarAgg) { + HiveSubQRemoveRelBuilder builder, int offset, int inputCount, boolean isCorrScalarAgg) { // if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check // since it is guaranteed to produce at most one row Double maxRowCount = mq.getMaxRowCount(e.rel); - boolean shouldIntroSQCountCheck = maxRowCount== null || maxRowCount > 1.0; - if(shouldIntroSQCountCheck) { + boolean shouldIntroSQCountCheck = maxRowCount == null || maxRowCount > 1.0; + if (shouldIntroSQCountCheck) { builder.push(e.rel); // returns single row/column builder.aggregate(builder.groupKey(), builder.count(false, "cnt")); @@ -191,7 +189,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule { } offset++; } - if(isCorrScalarAgg) { + if (isCorrScalarAgg) { // Transformation : // Outer Query Left Join (inner query) on correlated predicate // and preserve rows only from left side. @@ -208,7 +206,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule { final ImmutableList.Builder<RexNode> operands = ImmutableList.builder(); RexNode literal; - if(isAggZeroOnEmpty(e)) { + if (isAggZeroOnEmpty(e)) { // since count has a return type of BIG INT we need to make a literal of type big int // relbuilder's literal doesn't allow this literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0)); @@ -216,7 +214,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule { literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e)); } operands.add((builder.isNull(builder.field(indicator))), literal); - operands.add(field(builder, 1, builder.fields().size()-2)); + operands.add(field(builder, 1, builder.fields().size() - 2)); return builder.call(SqlStdOperatorTable.CASE, operands.build()); } @@ -230,33 +228,89 @@ public class HiveSubQueryRemoveRule extends RelOptRule { private RexNode rewriteSomeAll(RexSubQuery e, Set<CorrelationId> variablesSet, HiveSubQRemoveRelBuilder builder) { final SqlQuantifyOperator op = (SqlQuantifyOperator) e.op; - assert(op == SqlStdOperatorTable.SOME_GE - || op == SqlStdOperatorTable.SOME_LE - || op == SqlStdOperatorTable.SOME_LT - || op == SqlStdOperatorTable.SOME_GT); - builder.push(e.rel) - .aggregate(builder.groupKey(), - op.comparisonKind == SqlKind.GREATER_THAN - || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL - ? builder.min("m", builder.field(0)) - : builder.max("m", builder.field(0)), - builder.count(false, "c"), - builder.count(false, "d", builder.field(0))) - .as("q") - .join(JoinRelType.INNER); - return builder.call(SqlStdOperatorTable.CASE, - builder.call(SqlStdOperatorTable.EQUALS, - builder.field("q", "c"), builder.literal(0)), - builder.literal(false), - builder.call(SqlStdOperatorTable.IS_TRUE, - builder.call(RelOptUtil.op(op.comparisonKind, null), - e.operands.get(0), builder.field("q", "m"))), - builder.literal(true), - builder.call(SqlStdOperatorTable.GREATER_THAN, - builder.field("q", "c"), builder.field("q", "d")), - e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), - builder.call(RelOptUtil.op(op.comparisonKind, null), - e.operands.get(0), builder.field("q", "m"))); + + // SOME_EQ & SOME_NE should have been rewritten into IN/ NOT IN + assert (op == SqlStdOperatorTable.SOME_GE || op == SqlStdOperatorTable.SOME_LE + || op == SqlStdOperatorTable.SOME_LT || op == SqlStdOperatorTable.SOME_GT); + + if (variablesSet.isEmpty()) { + // for non-correlated case queries such as + // select e.deptno, e.deptno < some (select deptno from emp) as v + // from emp as e + // + // becomes + // + // select e.deptno, + // case + // when q.c = 0 then false // sub-query is empty + // when (e.deptno < q.m) is true then true + // when q.c > q.d then unknown // sub-query has at least one null + // else e.deptno < q.m + // end as v + // from emp as e + // cross join ( + // select max(deptno) as m, count(*) as c, count(deptno) as d + // from emp) as q + builder.push(e.rel).aggregate(builder.groupKey(), op.comparisonKind == SqlKind.GREATER_THAN + || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL ? builder + .min("m", builder.field(0)) : builder.max("m", builder.field(0)), + builder.count(false, "c"), builder.count(false, "d", builder.field(0))).as("q") + .join(JoinRelType.INNER); + return builder.call(SqlStdOperatorTable.CASE, + builder.call(SqlStdOperatorTable.EQUALS, builder.field("q", "c"), builder.literal(0)), + builder.literal(false), builder.call(SqlStdOperatorTable.IS_TRUE, builder + .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0), + builder.field("q", "m"))), builder.literal(true), builder + .call(SqlStdOperatorTable.GREATER_THAN, builder.field("q", "c"), + builder.field("q", "d")), + e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), builder + .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0), + builder.field("q", "m"))); + } else { + // for correlated case queries such as + // select e.deptno, e.deptno < some (select deptno from emp where emp.name = e.name) as v + // from emp as e + // + // becomes + // + // select e.deptno, + // case + // when indicator is null then false // sub-query is empty for corresponding corr value + // when q.c = 0 then false // sub-query is empty + // when (e.deptno < q.m) is true then true + // when q.c > q.d then unknown // sub-query has at least one null + // else e.deptno < q.m + // end as v + // from emp as e + // left outer join ( + // select max(deptno) as m, count(*) as c, count(deptno) as d, "alwaysTrue" as indicator + // group by name from emp) as q on e.name = q.name + subqueryRestriction(e.rel); + builder.push(e.rel); + builder.aggregate(builder.groupKey(), op.comparisonKind == SqlKind.GREATER_THAN + || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL ? builder + .min("m", builder.field(0)) : builder.max("m", builder.field(0)), + builder.count(false, "c"), builder.count(false, "d", builder.field(0))); + + final List<RexNode> parentQueryFields = new ArrayList<>(); + parentQueryFields.addAll(builder.fields()); + String indicator = "alwaysTrue" + e.rel.getId(); + parentQueryFields.add(builder.alias(builder.literal(true), indicator)); + builder.project(parentQueryFields).as("q"); + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + return builder.call(SqlStdOperatorTable.CASE, + builder.call(SqlStdOperatorTable.IS_NULL, builder.field(indicator)), + builder.literal(false), + builder.call(SqlStdOperatorTable.EQUALS, builder.field("q", "c"), builder.literal(0)), + builder.literal(false), builder.call(SqlStdOperatorTable.IS_TRUE, builder + .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0), + builder.field("q", "m"))), builder.literal(true), builder + .call(SqlStdOperatorTable.GREATER_THAN, builder.field("q", "c"), + builder.field("q", "d")), + e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), builder + .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0), + builder.field("q", "m"))); + } } @@ -318,17 +372,16 @@ public class HiveSubQueryRemoveRule extends RelOptRule { builder.push(e.rel); final List<RexNode> fields = new ArrayList<>(); - if(e.getKind() == SqlKind.IN) { + if (e.getKind() == SqlKind.IN) { fields.addAll(builder.fields()); // Transformation: sq_count_check(count(*), true) FILTER is generated on top // of subquery which is then joined (LEFT or INNER) with outer query // This transformation is done to add run time check using sq_count_check to // throw an error if subquery is producing zero row, since with aggregate this // will produce wrong results (because we further rewrite such queries into JOIN) - if(isCorrScalarAgg) { + if (isCorrScalarAgg) { // returns single row/column - builder.aggregate(builder.groupKey(), - builder.count(false, "cnt_in")); + builder.aggregate(builder.groupKey(), builder.count(false, "cnt_in")); if (!variablesSet.isEmpty()) { builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); @@ -336,9 +389,10 @@ public class HiveSubQueryRemoveRule extends RelOptRule { builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); } - SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, - ReturnTypes.BIGINT, InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, - SqlFunctionCategory.USER_DEFINED_FUNCTION); + SqlFunction inCountCheck = + new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, + InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, + SqlFunctionCategory.USER_DEFINED_FUNCTION); // we create FILTER (sq_count_check(count()) > 0) instead of PROJECT // because RelFieldTrimmer ends up getting rid of Project @@ -347,7 +401,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule { //true here indicates that sq_count_check is for IN/NOT IN subqueries builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)), builder.literal(0))); - offset = offset + 1; + offset = offset + 1; builder.push(e.rel); } } @@ -362,12 +416,10 @@ public class HiveSubQueryRemoveRule extends RelOptRule { logic = RelOptUtil.Logic.TRUE_FALSE; break; } - builder.aggregate(builder.groupKey(), - builder.count(false, "c"), - builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", - builder.fields())); + builder.aggregate(builder.groupKey(), builder.count(false, "c"), + builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", builder.fields())); builder.as("ct"); - if(!variablesSet.isEmpty()) { + if (!variablesSet.isEmpty()) { //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); } else { @@ -384,8 +436,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule { case TRUE: if (fields.isEmpty()) { builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId())); - if(!variablesSet.isEmpty() - && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { + if (!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS + || e.getKind() == SqlKind.IN)) { // avoid adding group by for correlated IN/EXISTS queries // since this is rewritting into semijoin break; @@ -393,8 +445,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule { builder.aggregate(builder.groupKey(0)); } } else { - if(!variablesSet.isEmpty() - && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { + if (!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS + || e.getKind() == SqlKind.IN)) { // avoid adding group by for correlated IN/EXISTS queries // since this is rewritting into semijoin break; @@ -410,10 +462,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule { } builder.as("dt"); final List<RexNode> conditions = new ArrayList<>(); - for (Pair<RexNode, RexNode> pair - : Pair.zip(e.getOperands(), builder.fields())) { - conditions.add( - builder.equals(pair.left, RexUtil.shift(pair.right, offset))); + for (Pair<RexNode, RexNode> pair : Pair.zip(e.getOperands(), builder.fields())) { + conditions.add(builder.equals(pair.left, RexUtil.shift(pair.right, offset))); } switch (logic) { case TRUE: @@ -432,8 +482,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule { switch (logic) { case TRUE_FALSE_UNKNOWN: case UNKNOWN_AS_TRUE: - operands.add( - builder.equals(builder.field("ct", "c"), builder.literal(0)), + operands.add(builder.equals(builder.field("ct", "c"), builder.literal(0)), builder.literal(false)); //now that we are using LEFT OUTER JOIN to join inner count, count(*) // with outer table, we wouldn't be able to tell if count is zero @@ -444,8 +493,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule { operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false)); break; } - operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())), - builder.literal(true)); + operands + .add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())), builder.literal(true)); if (!keyIsNulls.isEmpty()) { //Calcite creates null literal with Null type here but // because HIVE doesn't support null type it is appropriately typed boolean @@ -460,9 +509,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule { b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN); // fall through case UNKNOWN_AS_TRUE: - operands.add( - builder.call(SqlStdOperatorTable.LESS_THAN, - builder.field("ct", "ck"), builder.field("ct", "c")), + operands.add(builder + .call(SqlStdOperatorTable.LESS_THAN, builder.field("ct", "ck"), builder.field("ct", "c")), b); break; } @@ -471,8 +519,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule { } protected RexNode apply(RelMetadataQuery mq, RexSubQuery e, Set<CorrelationId> variablesSet, - RelOptUtil.Logic logic, - HiveSubQRemoveRelBuilder builder, int inputCount, int offset, + RelOptUtil.Logic logic, HiveSubQRemoveRelBuilder builder, int inputCount, int offset, boolean isCorrScalarAgg) { switch (e.getKind()) { case SCALAR_QUERY: @@ -487,10 +534,12 @@ public class HiveSubQueryRemoveRule extends RelOptRule { } } - /** Returns a reference to a particular field, by offset, across several - * inputs on a {@link RelBuilder}'s stack. */ + /** + * Returns a reference to a particular field, by offset, across several + * inputs on a {@link RelBuilder}'s stack. + */ private RexInputRef field(HiveSubQRemoveRelBuilder builder, int inputCount, int offset) { - for (int inputOrdinal = 0;;) { + for (int inputOrdinal = 0; ;) { final RelNode r = builder.peek(inputCount, inputOrdinal); if (offset < r.getRowType().getFieldCount()) { return builder.field(inputCount, inputOrdinal, offset); @@ -500,8 +549,10 @@ public class HiveSubQueryRemoveRule extends RelOptRule { } } - /** Returns a list of expressions that project the first {@code fieldCount} - * fields of the top input on a {@link RelBuilder}'s stack. */ + /** + * Returns a list of expressions that project the first {@code fieldCount} + * fields of the top input on a {@link RelBuilder}'s stack. + */ private static List<RexNode> fields(HiveSubQRemoveRelBuilder builder, int fieldCount) { final List<RexNode> projects = new ArrayList<>(); for (int i = 0; i < fieldCount; i++) { @@ -510,9 +561,11 @@ public class HiveSubQueryRemoveRule extends RelOptRule { return projects; } - /** Shuttle that replaces occurrences of a given + /** + * Shuttle that replaces occurrences of a given * {@link org.apache.calcite.rex.RexSubQuery} with a replacement - * expression. */ + * expression. + */ private static class ReplaceSubQueryShuttle extends RexShuttle { private final RexSubQuery subQuery; private final RexNode replacement; @@ -531,37 +584,40 @@ public class HiveSubQueryRemoveRule extends RelOptRule { // Following HiveSubQueryFinder has been copied from RexUtil::SubQueryFinder // since there is BUG in there (CALCITE-1726). // Once CALCITE-1726 is fixed we should get rid of the following code - /** Visitor that throws {@link org.apache.calcite.util.Util.FoundOne} if - * applied to an expression that contains a {@link RexSubQuery}. */ + + /** + * Visitor that throws {@link org.apache.calcite.util.Util.FoundOne} if + * applied to an expression that contains a {@link RexSubQuery}. + */ public static final class HiveSubQueryFinder extends RexVisitorImpl<Void> { public static final HiveSubQueryFinder INSTANCE = new HiveSubQueryFinder(); - /** Returns whether a {@link Project} contains a sub-query. */ - public static final Predicate<RelNode> RELNODE_PREDICATE= - new Predicate<RelNode>() { - @Override - public boolean apply(RelNode relNode) { - if (relNode instanceof Project) { - Project project = (Project)relNode; - for (RexNode node : project.getProjects()) { - try { - node.accept(INSTANCE); - } catch (Util.FoundOne e) { - return true; - } - } - return false; - } else if (relNode instanceof Filter) { - try { - ((Filter)relNode).getCondition().accept(INSTANCE); - return false; - } catch (Util.FoundOne e) { - return true; - } + /** + * Returns whether a {@link Project} contains a sub-query. + */ + public static final Predicate<RelNode> RELNODE_PREDICATE = new Predicate<RelNode>() { + @Override public boolean apply(RelNode relNode) { + if (relNode instanceof Project) { + Project project = (Project) relNode; + for (RexNode node : project.getProjects()) { + try { + node.accept(INSTANCE); + } catch (Util.FoundOne e) { + return true; } + } + return false; + } else if (relNode instanceof Filter) { + try { + ((Filter) relNode).getCondition().accept(INSTANCE); return false; + } catch (Util.FoundOne e) { + return true; } - }; + } + return false; + } + }; private HiveSubQueryFinder() { super(true); @@ -592,6 +648,17 @@ public class HiveSubQueryRemoveRule extends RelOptRule { } } + public static void subqueryRestriction(RelNode relNode) { + if (relNode instanceof HiveAggregate) { + HiveAggregate aggregate = (HiveAggregate) relNode; + if (!aggregate.getAggCallList().isEmpty() && aggregate.getGroupSet().isEmpty()) { + throw new CalciteSubqueryRuntimeException( + "Subquery rewrite: Aggregate without group by is not allowed"); + } + } else if (relNode instanceof HiveProject || relNode instanceof HiveFilter) { + subqueryRestriction(relNode.getInput(0)); + } + } } // End SubQueryRemoveRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 7a30239..cce87b4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -139,21 +139,8 @@ import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.*; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; -import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptMaterializationValidator; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; -import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; -import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; @@ -595,7 +582,8 @@ public class CalcitePlanner extends SemanticAnalyzer { this.ctx.setCboInfo("Plan not optimized by CBO."); } } - if( e instanceof CalciteSubquerySemanticException) { + if( e instanceof CalciteSubquerySemanticException + || e instanceof CalciteSubqueryRuntimeException) { // non-cbo path retries to execute subqueries and throws completely different exception/error // to eclipse the original error message // so avoid executing subqueries on non-cbo diff --git a/ql/src/test/queries/clientnegative/subquery_any_aggregate.q b/ql/src/test/queries/clientnegative/subquery_any_aggregate.q new file mode 100644 index 0000000..485a33c --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_any_aggregate.q @@ -0,0 +1,10 @@ +create table t(i int, j int); +insert into t values(0,1), (0,2); + +create table tt(i int, j int); +insert into tt values(0,3); + +select * from t where i > ANY (select count(i) from tt where tt.j = t.j); + +drop table t; +drop table tt; diff --git a/ql/src/test/queries/clientpositive/subquery_ANY.q b/ql/src/test/queries/clientpositive/subquery_ANY.q index 1c36edb..37dd801 100644 --- a/ql/src/test/queries/clientpositive/subquery_ANY.q +++ b/ql/src/test/queries/clientpositive/subquery_ANY.q @@ -7,6 +7,9 @@ create table tempty(i int, j int); CREATE TABLE part_null_n0 as select * from part; insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL); +CREATE TABLE part_null_n1 as select * from part; +insert into part_null_n1 values(17273,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL); + -- test all six comparison operators explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part); select count(*) from part where p_partkey = ANY (select p_partkey from part); @@ -76,5 +79,43 @@ select p_partkey, (p_partkey > ANY (select null from part_null_n0)) from part_nu select p_partkey, (p_partkey > ANY (select i from tempty)) from part_null_n0; +-- correlated +explain select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type); +select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type); + +-- correlated, select, with empty results, should produce false +explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part; +select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part; + +-- correlated, correlation condtion matches but subquery will not produce result due to false prediate, should produce false +explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part; +select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part; + +-- correlated, subquery has match, should produce true +explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part; +select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part; + +-- correlated, subquery has match but has NULL for one row, should produce one NULL +explain select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part; +select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part; + +-- correlated, with an aggregate and explicit group by +explain select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part; +select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part; + +-- nested +explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type + AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type)); +select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type + AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type)); + +-- multi +explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type) + AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type); + +select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type) + AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type); + +DROP TABLE part_null_n1; DROP TABLE part_null_n0; DROP TABLE tempty; diff --git a/ql/src/test/results/clientnegative/subquery_any_aggregate.q.out b/ql/src/test/results/clientnegative/subquery_any_aggregate.q.out new file mode 100644 index 0000000..5176ed7 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_any_aggregate.q.out @@ -0,0 +1,37 @@ +PREHOOK: query: create table t(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values(0,1), (0,2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values(0,1), (0,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.i SCRIPT [] +POSTHOOK: Lineage: t.j SCRIPT [] +PREHOOK: query: create table tt(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tt +POSTHOOK: query: create table tt(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tt +PREHOOK: query: insert into tt values(0,3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tt +POSTHOOK: query: insert into tt values(0,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tt +POSTHOOK: Lineage: tt.i SCRIPT [] +POSTHOOK: Lineage: tt.j SCRIPT [] +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubqueryRuntimeException: Subquery rewrite: Aggregate without group by is not allowed diff --git a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out index cb0ec4b..e77f41a 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out @@ -42,6 +42,42 @@ POSTHOOK: Lineage: part_null_n0.p_partkey EXPRESSION [] POSTHOOK: Lineage: part_null_n0.p_retailprice EXPRESSION [] POSTHOOK: Lineage: part_null_n0.p_size EXPRESSION [] POSTHOOK: Lineage: part_null_n0.p_type EXPRESSION [] +PREHOOK: query: CREATE TABLE part_null_n1 as select * from part +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@part +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null_n1 +POSTHOOK: query: CREATE TABLE part_null_n1 as select * from part +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@part +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null_n1 +POSTHOOK: Lineage: part_null_n1.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n1.p_comment SIMPLE [(part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n1.p_container SIMPLE [(part)part.FieldSchema(name:p_container, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n1.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n1.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n1.p_partkey SIMPLE [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n1.p_retailprice SIMPLE [(part)part.FieldSchema(name:p_retailprice, type:double, comment:null), ] +POSTHOOK: Lineage: part_null_n1.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n1.p_type SIMPLE [(part)part.FieldSchema(name:p_type, type:string, comment:null), ] +PREHOOK: query: insert into part_null_n1 values(17273,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@part_null_n1 +POSTHOOK: query: insert into part_null_n1 values(17273,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@part_null_n1 +POSTHOOK: Lineage: part_null_n1.p_brand EXPRESSION [] +POSTHOOK: Lineage: part_null_n1.p_comment EXPRESSION [] +POSTHOOK: Lineage: part_null_n1.p_container EXPRESSION [] +POSTHOOK: Lineage: part_null_n1.p_mfgr EXPRESSION [] +POSTHOOK: Lineage: part_null_n1.p_name EXPRESSION [] +POSTHOOK: Lineage: part_null_n1.p_partkey SCRIPT [] +POSTHOOK: Lineage: part_null_n1.p_retailprice EXPRESSION [] +POSTHOOK: Lineage: part_null_n1.p_size EXPRESSION [] +POSTHOOK: Lineage: part_null_n1.p_type EXPRESSION [] PREHOOK: query: explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -568,6 +604,1264 @@ POSTHOOK: Input: default@tempty 86428 false 90681 false NULL false +PREHOOK: query: explain select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + filterExpr: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_partkey), count(), count(p_partkey) + keys: p_type (type: string) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 40 Data size: 25120 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((_col0 > _col9) and (_col12 is null or (_col10 = 0L)) is not true) or ((_col0 > _col9) and (_col12 is null or (_col10 = 0L)) is not true and (_col0 > _col9) is not true and (_col10 > _col11) is not true)) (type: boolean) + Statistics: Num rows: 7 Data size: 4405 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), true (type: boolean), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +PREHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: pp + filterExpr: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_partkey), count(), count(p_partkey) + keys: p_type (type: string) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Statistics: Num rows: 37 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (((_col0 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true) or ((_col0 >= _col2) and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true and _col6 is not true)) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 37 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 37 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), true (type: boolean), _col0 (type: string), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 false +110592 false +112398 false +121152 false +121152 false +132666 false +144293 false +146985 false +15103 false +155733 false +17273 false +17927 false +191709 false +192697 false +195606 false +33357 false +40982 false +42669 false +45261 false +48427 false +49671 false +65667 false +78486 false +85768 false +86428 false +90681 false +PREHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Filter Operator + predicate: ((p_partkey < 0) and p_type is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_partkey), count(), count(p_partkey) + keys: p_type (type: string) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Statistics: Num rows: 27 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (((_col0 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true) or ((_col0 >= _col2) and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true and _col6 is not true)) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), true (type: boolean), _col0 (type: string), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 false +110592 false +112398 false +121152 false +121152 false +132666 false +144293 false +146985 false +15103 false +155733 false +17273 false +17927 false +191709 false +192697 false +195606 false +33357 false +40982 false +42669 false +45261 false +48427 false +49671 false +65667 false +78486 false +85768 false +86428 false +90681 false +PREHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_partkey), count(), count(p_partkey) + keys: p_type (type: string) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Statistics: Num rows: 36 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (((_col0 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true) or ((_col0 >= _col2) and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true and _col6 is not true)) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), true (type: boolean), _col0 (type: string), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 true +110592 true +112398 true +121152 true +121152 true +132666 true +144293 true +146985 true +15103 true +155733 true +17273 true +17927 true +191709 true +192697 true +195606 true +33357 true +40982 true +42669 true +45261 true +48427 true +49671 true +65667 true +78486 true +85768 true +86428 true +90681 true +PREHOOK: query: explain select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: pp + filterExpr: p_partkey is not null (type: boolean) + Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), (3 * p_size) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1), count(), count(_col1) + keys: _col0 (type: int) + minReductionHashAggr: 0.5185185 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6 + Statistics: Num rows: 39 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (((_col1 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col1 >= _col2) is not true) or ((_col1 >= _col2) and (_col3 is null or _col5) is not true and (_col1 >= _col2) is not true and _col6 is not true)) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), true (type: boolean), _col0 (type: int), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +105685 false +110592 false +112398 false +121152 false +121152 false +132666 false +144293 false +146985 false +15103 false +155733 false +17273 NULL +17927 false +191709 false +192697 false +195606 false +33357 false +40982 false +42669 false +45261 false +48427 false +49671 false +65667 false +78486 false +85768 false +86428 false +90681 false +PREHOOK: query: explain select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: pp + filterExpr: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_partkey) + keys: p_type (type: string), p_partkey (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Statistics: Num rows: 37 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (((_col0 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true) or ((_col0 >= _col2) and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true and _col6 is not true)) (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 37 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 37 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col2), count(), count(_col2) + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), true (type: boolean), _col0 (type: string), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 false +110592 false +112398 false +121152 false +121152 false +132666 false +144293 false +146985 false +15103 false +155733 false +17273 false +17927 false +191709 false +192697 false +195606 false +33357 false +40982 false +42669 false +45261 false +48427 false +49671 false +65667 false +78486 false +85768 false +86428 false +90681 false +PREHOOK: query: explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type + AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type + AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_null_n1 + filterExpr: (p_type is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_type is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part + filterExpr: (p_type is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_type is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: pp + filterExpr: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size), count(), count(p_size) + keys: p_type (type: string) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string), _col4 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 3 Data size: 1857 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 1857 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 40 Data size: 9520 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((_col2 >= _col3) and (_col6 is null or (_col4 = 0L)) is not true) or ((_col2 >= _col3) and (_col6 is null or (_col4 = 0L)) is not true and (_col2 >= _col3) is not true and (_col4 > _col5) is not true)) (type: boolean) + Statistics: Num rows: 7 Data size: 1675 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1575 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.28571427 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), true (type: boolean), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type + AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type + AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type) + AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +POSTHOOK: query: explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type) + AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_null_n1 + filterExpr: (p_type is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_type is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col1 (type: string) + Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part + filterExpr: (p_type is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_type is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_name (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: pp + filterExpr: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(p_size), count(), count(p_size) + keys: p_type (type: string) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col4 (type: string) + 1 _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 26 Data size: 16430 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((_col5 >= _col9) and (_col12 is null or (_col10 = 0L)) is not true) or ((_col5 >= _col9) and (_col12 is null or (_col10 = 0L)) is not true and (_col5 >= _col9) is not true and (_col10 > _col11) is not true)) (type: boolean) + Statistics: Num rows: 5 Data size: 3167 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 5 Data size: 3095 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 3095 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), true (type: boolean), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type) + AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +POSTHOOK: query: select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type) + AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n1 +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: DROP TABLE part_null_n1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_null_n1 +PREHOOK: Output: default@part_null_n1 +POSTHOOK: query: DROP TABLE part_null_n1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_null_n1 +POSTHOOK: Output: default@part_null_n1 PREHOOK: query: DROP TABLE part_null_n0 PREHOOK: type: DROPTABLE PREHOOK: Input: default@part_null_n0