This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 86a7eb7 HIVE-13554: Support for non-correlated Quantified Comparison Predicates (Vineet Garg, reviewed by Jesus Camacho Rodriguez) 86a7eb7 is described below commit 86a7eb7730b224f038ff48286cf5d9009ba422c5 Author: Vineet Garg <vg...@apache.org> AuthorDate: Wed May 8 10:34:09 2019 -0700 HIVE-13554: Support for non-correlated Quantified Comparison Predicates (Vineet Garg, reviewed by Jesus Camacho Rodriguez) --- .../test/resources/testconfiguration.properties | 2 + .../calcite/rules/HiveSubQueryRemoveRule.java | 536 ++++++------ .../calcite/translator/RexNodeConverter.java | 176 ++-- .../hadoop/hive/ql/parse/CalcitePlanner.java | 56 +- .../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 1 + .../hadoop/hive/ql/parse/IdentifiersParser.g | 17 + .../apache/hadoop/hive/ql/parse/QBSubQuery.java | 6 + .../apache/hadoop/hive/ql/parse/SubQueryUtils.java | 123 ++- .../hadoop/hive/ql/parse/TypeCheckProcFactory.java | 12 + .../hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java | 19 +- .../parse/TestSQL11ReservedKeyWordsNegative.java | 2 +- .../queries/clientnegative/subquery_all_equal.q | 3 + .../queries/clientnegative/subquery_any_notequal.q | 3 + ql/src/test/queries/clientpositive/subquery_ALL.q | 75 ++ ql/src/test/queries/clientpositive/subquery_ANY.q | 80 ++ .../clientnegative/invalid_select_expression.q.out | 2 +- .../clientnegative/subquery_all_equal.q.out | 1 + .../clientnegative/subquery_any_notequal.q.out | 1 + .../results/clientnegative/subquery_in_lhs.q.out | 2 +- .../subquery_subquery_chain_exists.q.out | 2 +- .../results/clientpositive/llap/subquery_ALL.q.out | 896 +++++++++++++++++++++ .../results/clientpositive/llap/subquery_ANY.q.out | 586 ++++++++++++++ 22 files changed, 2212 insertions(+), 389 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 06a62da..3d8bae8 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -738,6 +738,8 @@ minillaplocal.query.files=\ strict_managed_tables_sysdb.q,\ strict_managed_tables1.q,\ strict_managed_tables2.q,\ + subquery_ANY.q,\ + subquery_ALL.q,\ subquery_in_having.q,\ subquery_notin.q,\ subquery_nested_subquery.q, \ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index ecd70b2..6c57474 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -36,6 +36,7 @@ import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.fun.SqlQuantifyOperator; import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.InferTypes; @@ -162,280 +163,325 @@ public class HiveSubQueryRemoveRule extends RelOptRule { return relAgg.getAggCallList().get(0).getType().getSqlTypeName(); } - protected RexNode apply(RelMetadataQuery mq, RexSubQuery e, Set<CorrelationId> variablesSet, - RelOptUtil.Logic logic, - HiveSubQRemoveRelBuilder builder, int inputCount, int offset, + private RexNode rewriteScalar(RelMetadataQuery mq, RexSubQuery e, Set<CorrelationId> variablesSet, + HiveSubQRemoveRelBuilder builder, int offset, int inputCount, boolean isCorrScalarAgg) { - switch (e.getKind()) { - case SCALAR_QUERY: - // if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check - // since it is guaranteed to produce at most one row - Double maxRowCount = mq.getMaxRowCount(e.rel); - boolean shouldIntroSQCountCheck = maxRowCount== null || maxRowCount > 1.0; - if(shouldIntroSQCountCheck) { - builder.push(e.rel); - // returns single row/column - builder.aggregate(builder.groupKey(), builder.count(false, "cnt")); + // if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check + // since it is guaranteed to produce at most one row + Double maxRowCount = mq.getMaxRowCount(e.rel); + boolean shouldIntroSQCountCheck = maxRowCount== null || maxRowCount > 1.0; + if(shouldIntroSQCountCheck) { + builder.push(e.rel); + // returns single row/column + builder.aggregate(builder.groupKey(), builder.count(false, "cnt")); - SqlFunction countCheck = - new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, - InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, - SqlFunctionCategory.USER_DEFINED_FUNCTION); + SqlFunction countCheck = + new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT, + InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, + SqlFunctionCategory.USER_DEFINED_FUNCTION); - //we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer - // ends up getting rid of Project since it is not used further up the tree - builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, - builder.call(countCheck, builder.field("cnt")), builder.literal(1))); - if (!variablesSet.isEmpty()) { - builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - } else { - builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); - } - offset++; - } - if(isCorrScalarAgg) { - // Transformation : - // Outer Query Left Join (inner query) on correlated predicate - // and preserve rows only from left side. - builder.push(e.rel); - final List<RexNode> parentQueryFields = new ArrayList<>(); - parentQueryFields.addAll(builder.fields()); - - // id is appended since there could be multiple scalar subqueries and FILTER - // is created using field name - String indicator = "alwaysTrue" + e.rel.getId(); - parentQueryFields.add(builder.alias(builder.literal(true), indicator)); - builder.project(parentQueryFields); + //we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer + // ends up getting rid of Project since it is not used further up the tree + builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, + builder.call(countCheck, builder.field("cnt")), builder.literal(1))); + if (!variablesSet.isEmpty()) { builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - - final ImmutableList.Builder<RexNode> operands = ImmutableList.builder(); - RexNode literal; - if(isAggZeroOnEmpty(e)) { - // since count has a return type of BIG INT we need to make a literal of type big int - // relbuilder's literal doesn't allow this - literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0)); - } else { - literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e)); - } - operands.add((builder.isNull(builder.field(indicator))), literal); - operands.add(field(builder, 1, builder.fields().size()-2)); - return builder.call(SqlStdOperatorTable.CASE, operands.build()); + } else { + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); } - - //Transformation is to left join for correlated predicates and inner join otherwise, - // but do a count on inner side before that to make sure it generates atmost 1 row. + offset++; + } + if(isCorrScalarAgg) { + // Transformation : + // Outer Query Left Join (inner query) on correlated predicate + // and preserve rows only from left side. builder.push(e.rel); + final List<RexNode> parentQueryFields = new ArrayList<>(); + parentQueryFields.addAll(builder.fields()); + + // id is appended since there could be multiple scalar subqueries and FILTER + // is created using field name + String indicator = "alwaysTrue" + e.rel.getId(); + parentQueryFields.add(builder.alias(builder.literal(true), indicator)); + builder.project(parentQueryFields); builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - return field(builder, inputCount, offset); - case IN: - case EXISTS: - // Most general case, where the left and right keys might have nulls, and - // caller requires 3-valued logic return. - // - // select e.deptno, e.deptno in (select deptno from emp) - // - // becomes - // - // select e.deptno, - // case - // when ct.c = 0 then false - // when dt.i is not null then true - // when e.deptno is null then null - // when ct.ck < ct.c then null - // else false - // end - // from e - // left join ( - // (select count(*) as c, count(deptno) as ck from emp) as ct - // cross join (select distinct deptno, true as i from emp)) as dt - // on e.deptno = dt.deptno - // - // If keys are not null we can remove "ct" and simplify to - // - // select e.deptno, - // case - // when dt.i is not null then true - // else false - // end - // from e - // left join (select distinct deptno, true as i from emp) as dt - // on e.deptno = dt.deptno - // - // We could further simplify to - // - // select e.deptno, - // dt.i is not null - // from e - // left join (select distinct deptno, true as i from emp) as dt - // on e.deptno = dt.deptno - // - // but have not yet. - // - // If the logic is TRUE we can just kill the record if the condition - // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner - // join: - // - // select e.deptno, - // true - // from e - // inner join (select distinct deptno from emp) as dt - // on e.deptno = dt.deptno - // + final ImmutableList.Builder<RexNode> operands = ImmutableList.builder(); + RexNode literal; + if(isAggZeroOnEmpty(e)) { + // since count has a return type of BIG INT we need to make a literal of type big int + // relbuilder's literal doesn't allow this + literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0)); + } else { + literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e)); + } + operands.add((builder.isNull(builder.field(indicator))), literal); + operands.add(field(builder, 1, builder.fields().size()-2)); + return builder.call(SqlStdOperatorTable.CASE, operands.build()); + } - builder.push(e.rel); - final List<RexNode> fields = new ArrayList<>(); - if(e.getKind() == SqlKind.IN) { - fields.addAll(builder.fields()); - // Transformation: sq_count_check(count(*), true) FILTER is generated on top - // of subquery which is then joined (LEFT or INNER) with outer query - // This transformation is done to add run time check using sq_count_check to - // throw an error if subquery is producing zero row, since with aggregate this - // will produce wrong results (because we further rewrite such queries into JOIN) - if(isCorrScalarAgg) { - // returns single row/column - builder.aggregate(builder.groupKey(), - builder.count(false, "cnt_in")); - - if (!variablesSet.isEmpty()) { - builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); - } else { - builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); - } + //Transformation is to left join for correlated predicates and inner join otherwise, + // but do a count on inner side before that to make sure it generates atmost 1 row. + builder.push(e.rel); + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + return field(builder, inputCount, offset); + } - SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, - ReturnTypes.BIGINT, InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, - SqlFunctionCategory.USER_DEFINED_FUNCTION); + private RexNode rewriteSomeAll(RexSubQuery e, Set<CorrelationId> variablesSet, + HiveSubQRemoveRelBuilder builder) { + final SqlQuantifyOperator op = (SqlQuantifyOperator) e.op; + assert(op == SqlStdOperatorTable.SOME_GE + || op == SqlStdOperatorTable.SOME_LE + || op == SqlStdOperatorTable.SOME_LT + || op == SqlStdOperatorTable.SOME_GT); + builder.push(e.rel) + .aggregate(builder.groupKey(), + op.comparisonKind == SqlKind.GREATER_THAN + || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL + ? builder.min("m", builder.field(0)) + : builder.max("m", builder.field(0)), + builder.count(false, "c"), + builder.count(false, "d", builder.field(0))) + .as("q") + .join(JoinRelType.INNER); + return builder.call(SqlStdOperatorTable.CASE, + builder.call(SqlStdOperatorTable.EQUALS, + builder.field("q", "c"), builder.literal(0)), + builder.literal(false), + builder.call(SqlStdOperatorTable.IS_TRUE, + builder.call(RelOptUtil.op(op.comparisonKind, null), + e.operands.get(0), builder.field("q", "m"))), + builder.literal(true), + builder.call(SqlStdOperatorTable.GREATER_THAN, + builder.field("q", "c"), builder.field("q", "d")), + e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), + builder.call(RelOptUtil.op(op.comparisonKind, null), + e.operands.get(0), builder.field("q", "m"))); - // we create FILTER (sq_count_check(count()) > 0) instead of PROJECT - // because RelFieldTrimmer ends up getting rid of Project - // since it is not used further up the tree - builder.filter(builder.call(SqlStdOperatorTable.GREATER_THAN, - //true here indicates that sq_count_check is for IN/NOT IN subqueries - builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)), - builder.literal(0))); - offset = offset + 1; - builder.push(e.rel); - } - } + } - // First, the cross join - switch (logic) { - case TRUE_FALSE_UNKNOWN: - case UNKNOWN_AS_TRUE: - // Since EXISTS/NOT EXISTS are not affected by presence of - // null keys we do not need to generate count(*), count(c) - if (e.getKind() == SqlKind.EXISTS) { - logic = RelOptUtil.Logic.TRUE_FALSE; - break; - } + private RexNode rewriteInExists(RexSubQuery e, Set<CorrelationId> variablesSet, + RelOptUtil.Logic logic, HiveSubQRemoveRelBuilder builder, int offset, + boolean isCorrScalarAgg) { + // Most general case, where the left and right keys might have nulls, and + // caller requires 3-valued logic return. + // + // select e.deptno, e.deptno in (select deptno from emp) + // + // becomes + // + // select e.deptno, + // case + // when ct.c = 0 then false + // when dt.i is not null then true + // when e.deptno is null then null + // when ct.ck < ct.c then null + // else false + // end + // from e + // left join ( + // (select count(*) as c, count(deptno) as ck from emp) as ct + // cross join (select distinct deptno, true as i from emp)) as dt + // on e.deptno = dt.deptno + // + // If keys are not null we can remove "ct" and simplify to + // + // select e.deptno, + // case + // when dt.i is not null then true + // else false + // end + // from e + // left join (select distinct deptno, true as i from emp) as dt + // on e.deptno = dt.deptno + // + // We could further simplify to + // + // select e.deptno, + // dt.i is not null + // from e + // left join (select distinct deptno, true as i from emp) as dt + // on e.deptno = dt.deptno + // + // but have not yet. + // + // If the logic is TRUE we can just kill the record if the condition + // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner + // join: + // + // select e.deptno, + // true + // from e + // inner join (select distinct deptno from emp) as dt + // on e.deptno = dt.deptno + // + + builder.push(e.rel); + final List<RexNode> fields = new ArrayList<>(); + if(e.getKind() == SqlKind.IN) { + fields.addAll(builder.fields()); + // Transformation: sq_count_check(count(*), true) FILTER is generated on top + // of subquery which is then joined (LEFT or INNER) with outer query + // This transformation is done to add run time check using sq_count_check to + // throw an error if subquery is producing zero row, since with aggregate this + // will produce wrong results (because we further rewrite such queries into JOIN) + if(isCorrScalarAgg) { + // returns single row/column builder.aggregate(builder.groupKey(), - builder.count(false, "c"), - builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", - builder.fields())); - builder.as("ct"); - if(!variablesSet.isEmpty()) { - //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); + builder.count(false, "cnt_in")); + + if (!variablesSet.isEmpty()) { builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); } else { builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); } - offset += 2; + SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, + ReturnTypes.BIGINT, InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, + SqlFunctionCategory.USER_DEFINED_FUNCTION); + + // we create FILTER (sq_count_check(count()) > 0) instead of PROJECT + // because RelFieldTrimmer ends up getting rid of Project + // since it is not used further up the tree + builder.filter(builder.call(SqlStdOperatorTable.GREATER_THAN, + //true here indicates that sq_count_check is for IN/NOT IN subqueries + builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)), + builder.literal(0))); + offset = offset + 1; builder.push(e.rel); - break; } + } - // Now the left join - switch (logic) { - case TRUE: - if (fields.isEmpty()) { - builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId())); - if(!variablesSet.isEmpty() - && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { - // avoid adding group by for correlated IN/EXISTS queries - // since this is rewritting into semijoin - break; - } else { - builder.aggregate(builder.groupKey(0)); - } - } else { - if(!variablesSet.isEmpty() - && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { - // avoid adding group by for correlated IN/EXISTS queries - // since this is rewritting into semijoin - break; - } else { - builder.aggregate(builder.groupKey(fields)); - } - } + // First, the cross join + switch (logic) { + case TRUE_FALSE_UNKNOWN: + case UNKNOWN_AS_TRUE: + // Since EXISTS/NOT EXISTS are not affected by presence of + // null keys we do not need to generate count(*), count(c) + if (e.getKind() == SqlKind.EXISTS) { + logic = RelOptUtil.Logic.TRUE_FALSE; break; - default: - fields.add(builder.alias(builder.literal(true), "i" + e.rel.getId())); - builder.project(fields); - builder.distinct(); - } - builder.as("dt"); - final List<RexNode> conditions = new ArrayList<>(); - for (Pair<RexNode, RexNode> pair - : Pair.zip(e.getOperands(), builder.fields())) { - conditions.add( - builder.equals(pair.left, RexUtil.shift(pair.right, offset))); } - switch (logic) { - case TRUE: - builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true); - return builder.literal(true); + builder.aggregate(builder.groupKey(), + builder.count(false, "c"), + builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", + builder.fields())); + builder.as("ct"); + if(!variablesSet.isEmpty()) { + //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); + builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet); + } else { + builder.join(JoinRelType.INNER, builder.literal(true), variablesSet); } - builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet); - final List<RexNode> keyIsNulls = new ArrayList<>(); - for (RexNode operand : e.getOperands()) { - if (operand.getType().isNullable()) { - keyIsNulls.add(builder.isNull(operand)); + offset += 2; + builder.push(e.rel); + break; + } + + // Now the left join + switch (logic) { + case TRUE: + if (fields.isEmpty()) { + builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId())); + if(!variablesSet.isEmpty() + && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { + // avoid adding group by for correlated IN/EXISTS queries + // since this is rewritting into semijoin + break; + } else { + builder.aggregate(builder.groupKey(0)); + } + } else { + if(!variablesSet.isEmpty() + && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { + // avoid adding group by for correlated IN/EXISTS queries + // since this is rewritting into semijoin + break; + } else { + builder.aggregate(builder.groupKey(fields)); } } - final ImmutableList.Builder<RexNode> operands = ImmutableList.builder(); - switch (logic) { - case TRUE_FALSE_UNKNOWN: - case UNKNOWN_AS_TRUE: - operands.add( - builder.equals(builder.field("ct", "c"), builder.literal(0)), - builder.literal(false)); - //now that we are using LEFT OUTER JOIN to join inner count, count(*) - // with outer table, we wouldn't be able to tell if count is zero - // for inner table since inner join with correlated values will get rid - // of all values where join cond is not true (i.e where actual inner table - // will produce zero result). To handle this case we need to check both - // count is zero or count is null - operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false)); - break; - } - operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())), - builder.literal(true)); - if (!keyIsNulls.isEmpty()) { - //Calcite creates null literal with Null type here but - // because HIVE doesn't support null type it is appropriately typed boolean - operands.add(builder.or(keyIsNulls), - e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN)); - // we are creating filter here so should not be returning NULL. - // Not sure why Calcite return NULL - } - RexNode b = builder.literal(true); - switch (logic) { - case TRUE_FALSE_UNKNOWN: - b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN); - // fall through - case UNKNOWN_AS_TRUE: - operands.add( - builder.call(SqlStdOperatorTable.LESS_THAN, - builder.field("ct", "ck"), builder.field("ct", "c")), - b); - break; + break; + default: + fields.add(builder.alias(builder.literal(true), "i" + e.rel.getId())); + builder.project(fields); + builder.distinct(); + } + builder.as("dt"); + final List<RexNode> conditions = new ArrayList<>(); + for (Pair<RexNode, RexNode> pair + : Pair.zip(e.getOperands(), builder.fields())) { + conditions.add( + builder.equals(pair.left, RexUtil.shift(pair.right, offset))); + } + switch (logic) { + case TRUE: + builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true); + return builder.literal(true); + } + builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet); + + final List<RexNode> keyIsNulls = new ArrayList<>(); + for (RexNode operand : e.getOperands()) { + if (operand.getType().isNullable()) { + keyIsNulls.add(builder.isNull(operand)); } - operands.add(builder.literal(false)); - return builder.call(SqlStdOperatorTable.CASE, operands.build()); + } + final ImmutableList.Builder<RexNode> operands = ImmutableList.builder(); + switch (logic) { + case TRUE_FALSE_UNKNOWN: + case UNKNOWN_AS_TRUE: + operands.add( + builder.equals(builder.field("ct", "c"), builder.literal(0)), + builder.literal(false)); + //now that we are using LEFT OUTER JOIN to join inner count, count(*) + // with outer table, we wouldn't be able to tell if count is zero + // for inner table since inner join with correlated values will get rid + // of all values where join cond is not true (i.e where actual inner table + // will produce zero result). To handle this case we need to check both + // count is zero or count is null + operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false)); + break; + } + operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())), + builder.literal(true)); + if (!keyIsNulls.isEmpty()) { + //Calcite creates null literal with Null type here but + // because HIVE doesn't support null type it is appropriately typed boolean + operands.add(builder.or(keyIsNulls), + e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN)); + // we are creating filter here so should not be returning NULL. + // Not sure why Calcite return NULL + } + RexNode b = builder.literal(true); + switch (logic) { + case TRUE_FALSE_UNKNOWN: + b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN); + // fall through + case UNKNOWN_AS_TRUE: + operands.add( + builder.call(SqlStdOperatorTable.LESS_THAN, + builder.field("ct", "ck"), builder.field("ct", "c")), + b); + break; + } + operands.add(builder.literal(false)); + return builder.call(SqlStdOperatorTable.CASE, operands.build()); + } + protected RexNode apply(RelMetadataQuery mq, RexSubQuery e, Set<CorrelationId> variablesSet, + RelOptUtil.Logic logic, + HiveSubQRemoveRelBuilder builder, int inputCount, int offset, + boolean isCorrScalarAgg) { + switch (e.getKind()) { + case SCALAR_QUERY: + return rewriteScalar(mq, e, variablesSet, builder, offset, inputCount, isCorrScalarAgg); + case SOME: + return rewriteSomeAll(e, variablesSet, builder); + case IN: + case EXISTS: + return rewriteInExists(e, variablesSet, logic, builder, offset, isCorrScalarAgg); default: throw new AssertionError(e.getKind()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 1134cf3..89fad04 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -42,6 +42,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlCastFunction; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.fun.SqlQuantifyOperator; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; @@ -67,6 +68,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter.HiveNlsString.Interpretation; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -147,10 +150,10 @@ public class RexNodeConverter { //subqueries will need outer query's row resolver public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType, - ImmutableMap<String, Integer> outerNameToPosMap, + ImmutableMap<String, Integer> outerNameToPosMap, ImmutableMap<String, Integer> nameToPosMap, RowResolver hiveRR, RowResolver outerRR, int offset, boolean flattenExpr, int correlatedId) { this.cluster = cluster; - this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR , offset)); + this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR, offset)); this.flattenExpr = flattenExpr; this.outerRR = outerRR; this.outerNameToPosMap = outerNameToPosMap; @@ -191,41 +194,108 @@ public class RexNodeConverter { // TODO: handle ExprNodeColumnListDesc } + private RexNode getSomeSubquery(final RelNode subqueryRel, final RexNode lhs, + final SqlQuantifyOperator quantifyOperator) { + if(quantifyOperator == SqlStdOperatorTable.SOME_EQ) { + return RexSubQuery.in(subqueryRel, ImmutableList.<RexNode>of(lhs)); + } else if (quantifyOperator == SqlStdOperatorTable.SOME_NE) { + RexSubQuery subQuery = RexSubQuery.in(subqueryRel, ImmutableList.<RexNode>of(lhs)); + return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, subQuery); + } else { + return RexSubQuery.some(subqueryRel, ImmutableList.of(lhs), quantifyOperator); + } + } + + private void throwInvalidSubqueryError(final ASTNode comparisonOp) throws SemanticException { + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "Invalid operator:" + comparisonOp.toString())); + } + + // <>ANY and =ALL is not supported + private RexNode convertSubquerySomeAll(final ExprNodeSubQueryDesc subQueryDesc) + throws SemanticException { + assert(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SOME + || subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL); + + RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs()); + ASTNode comparisonOp = subQueryDesc.getComparisonOp(); + SqlQuantifyOperator quantifyOperator = null; + + switch (comparisonOp.getType()) { + case HiveParser.EQUAL: + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL) { + throwInvalidSubqueryError(comparisonOp); + } + quantifyOperator = SqlStdOperatorTable.SOME_EQ; + break; + case HiveParser.LESSTHAN: + quantifyOperator = SqlStdOperatorTable.SOME_LT; + break; + case HiveParser.LESSTHANOREQUALTO: + quantifyOperator = SqlStdOperatorTable.SOME_LE; + break; + case HiveParser.GREATERTHAN: + quantifyOperator = SqlStdOperatorTable.SOME_GT; + break; + case HiveParser.GREATERTHANOREQUALTO: + quantifyOperator = SqlStdOperatorTable.SOME_GE; + break; + case HiveParser.NOTEQUAL: + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SOME) { + throwInvalidSubqueryError(comparisonOp); + } + quantifyOperator = SqlStdOperatorTable.SOME_NE; + break; + default: + throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + "Invalid operator:" + comparisonOp.toString())); + } + + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL) { + quantifyOperator = SqlStdOperatorTable.some(quantifyOperator.comparisonKind.negateNullSafe()); + } + RexNode someQuery = getSomeSubquery(subQueryDesc.getRexSubQuery(), rexNodeLhs, + quantifyOperator); + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL) { + return cluster.getRexBuilder().makeCall(SqlStdOperatorTable.NOT, someQuery); + } + return someQuery; + } + private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws SemanticException { - if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.IN ) { - /* - * Check.5.h :: For In and Not In the SubQuery must implicitly or - * explicitly only contain one select item. - */ + if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.IN) { + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) { throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "SubQuery can contain only 1 item in Select List.")); + "SubQuery can contain only 1 item in Select List.")); } //create RexNode for LHS RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs()); //create RexSubQuery node RexNode rexSubQuery = RexSubQuery.in(subQueryDesc.getRexSubQuery(), - ImmutableList.<RexNode>of(rexNodeLhs) ); + ImmutableList.<RexNode>of(rexNodeLhs)); return rexSubQuery; - } - else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.EXISTS) { + } else if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.EXISTS) { RexNode subQueryNode = RexSubQuery.exists(subQueryDesc.getRexSubQuery()); return subQueryNode; - } - else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SCALAR){ + } else if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SCALAR){ if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) { throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "SubQuery can contain only 1 item in Select List.")); + "SubQuery can contain only 1 item in Select List.")); } //create RexSubQuery node RexNode rexSubQuery = RexSubQuery.scalar(subQueryDesc.getRexSubQuery()); return rexSubQuery; - } - - else { + } else if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.SOME + || subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.ALL) { + return convertSubquerySomeAll(subQueryDesc); + } else { throw new CalciteSubquerySemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( - "Invalid subquery: " + subQueryDesc.getType())); + "Invalid subquery: " + subQueryDesc.getType())); } } @@ -256,11 +326,11 @@ public class RexNodeConverter { boolean isNumeric = (tgtUdf instanceof GenericUDFBaseBinary && func.getTypeInfo().getCategory() == Category.PRIMITIVE && (PrimitiveGrouping.NUMERIC_GROUP == PrimitiveObjectInspectorUtils.getPrimitiveGrouping( - ((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory()))); + ((PrimitiveTypeInfo) func.getTypeInfo()).getPrimitiveCategory()))); boolean isCompare = !isNumeric && tgtUdf instanceof GenericUDFBaseCompare; boolean isWhenCase = tgtUdf instanceof GenericUDFWhen || tgtUdf instanceof GenericUDFCase; boolean isTransformableTimeStamp = func.getGenericUDF() instanceof GenericUDFUnixTimeStamp && - func.getChildren().size() != 0; + func.getChildren().size() != 0; boolean isBetween = !isNumeric && tgtUdf instanceof GenericUDFBetween; boolean isIN = !isNumeric && tgtUdf instanceof GenericUDFIn; boolean isAllPrimitive = true; @@ -272,7 +342,7 @@ public class RexNodeConverter { // TODO: checking 2 children is useless, compare already does that. } else if (isCompare && (func.getChildren().size() == 2)) { tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0) - .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); + .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); } else if (isWhenCase) { // If it is a CASE or WHEN, we need to check that children do not contain stateful functions // as they are not allowed @@ -287,15 +357,15 @@ public class RexNodeConverter { // We skip first child as is not involved (is the revert boolean) // The target type needs to account for all 3 operands tgtDT = FunctionRegistry.getCommonClassForComparison( - func.getChildren().get(1).getTypeInfo(), - FunctionRegistry.getCommonClassForComparison( - func.getChildren().get(2).getTypeInfo(), - func.getChildren().get(3).getTypeInfo())); + func.getChildren().get(1).getTypeInfo(), + FunctionRegistry.getCommonClassForComparison( + func.getChildren().get(2).getTypeInfo(), + func.getChildren().get(3).getTypeInfo())); } else if (isIN) { // We're only considering the first element of the IN list for the type assert func.getChildren().size() > 1; tgtDT = FunctionRegistry.getCommonClassForComparison(func.getChildren().get(0) - .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); + .getTypeInfo(), func.getChildren().get(1).getTypeInfo()); } for (int i =0; i < func.getChildren().size(); ++i) { @@ -361,7 +431,7 @@ public class RexNodeConverter { calciteOp = SqlStdOperatorTable.OR; } } else if (calciteOp.getKind() == SqlKind.COALESCE && - childRexNodeLst.size() > 1 ) { + childRexNodeLst.size() > 1) { // Rewrite COALESCE as a CASE // This allows to be further reduced to OR, if possible calciteOp = SqlStdOperatorTable.CASE; @@ -468,13 +538,13 @@ public class RexNodeConverter { if (FunctionRegistry.getNormalizedFunctionName(func.getFuncText()).equals("case")) { RexNode firstPred = childRexNodeLst.get(0); int length = childRexNodeLst.size() % 2 == 1 ? - childRexNodeLst.size() : childRexNodeLst.size() - 1; + childRexNodeLst.size() : childRexNodeLst.size() - 1; for (int i = 1; i < length; i++) { if (i % 2 == 1) { // We rewrite it newChildRexNodeLst.add( - cluster.getRexBuilder().makeCall( - SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); + cluster.getRexBuilder().makeCall( + SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); } else { newChildRexNodeLst.add(childRexNodeLst.get(i)); } @@ -489,7 +559,7 @@ public class RexNodeConverter { // Calcite always needs the else clause to be defined explicitly if (newChildRexNodeLst.size() % 2 == 0) { newChildRexNodeLst.add(cluster.getRexBuilder().makeNullLiteral( - newChildRexNodeLst.get(newChildRexNodeLst.size()-1).getType())); + newChildRexNodeLst.get(newChildRexNodeLst.size()-1).getType())); } return newChildRexNodeLst; } @@ -628,16 +698,16 @@ public class RexNodeConverter { } private List<RexNode> rewriteCoalesceChildren( - ExprNodeGenericFuncDesc func, List<RexNode> childRexNodeLst) { + ExprNodeGenericFuncDesc func, List<RexNode> childRexNodeLst) { final List<RexNode> convertedChildList = Lists.newArrayList(); assert childRexNodeLst.size() > 0; final RexBuilder rexBuilder = cluster.getRexBuilder(); int i=0; - for (; i < childRexNodeLst.size()-1; ++i ) { + for (; i < childRexNodeLst.size()-1; ++i) { // WHEN child not null THEN child final RexNode child = childRexNodeLst.get(i); RexNode childCond = rexBuilder.makeCall( - SqlStdOperatorTable.IS_NOT_NULL, child); + SqlStdOperatorTable.IS_NOT_NULL, child); convertedChildList.add(childCond); convertedChildList.add(child); } @@ -655,7 +725,7 @@ public class RexNodeConverter { return true; } if (node.getChildren() != null && !node.getChildren().isEmpty() && - checkForStatefulFunctions(node.getChildren())) { + checkForStatefulFunctions(node.getChildren())) { return true; } } @@ -809,7 +879,7 @@ public class RexNodeConverter { break; case FLOAT: calciteLiteral = rexBuilder.makeApproxLiteral( - new BigDecimal(Float.toString((Float)value)), calciteDataType); + new BigDecimal(Float.toString((Float)value)), calciteDataType); break; case DOUBLE: // TODO: The best solution is to support NaN in expression reduction. @@ -817,7 +887,7 @@ public class RexNodeConverter { throw new CalciteSemanticException("NaN", UnsupportedFeature.Invalid_decimal); } calciteLiteral = rexBuilder.makeApproxLiteral( - new BigDecimal(Double.toString((Double)value)), calciteDataType); + new BigDecimal(Double.toString((Double)value)), calciteDataType); break; case CHAR: if (value instanceof HiveChar) { @@ -850,11 +920,11 @@ public class RexNodeConverter { // Must call makeLiteral, not makeTimestampLiteral // to have the RexBuilder.roundTime logic kick in calciteLiteral = rexBuilder.makeLiteral( - tsString, - rexBuilder.getTypeFactory().createSqlType( - SqlTypeName.TIMESTAMP, - rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP)), - false); + tsString, + rexBuilder.getTypeFactory().createSqlType( + SqlTypeName.TIMESTAMP, + rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP)), + false); break; case TIMESTAMPLOCALTZ: final TimestampString tsLocalTZString; @@ -867,28 +937,28 @@ public class RexNodeConverter { .withNanos(i.getNano()); } calciteLiteral = rexBuilder.makeTimestampWithLocalTimeZoneLiteral( - tsLocalTZString, - rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE)); + tsLocalTZString, + rexBuilder.getTypeFactory().getTypeSystem().getDefaultPrecision(SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE)); break; case INTERVAL_YEAR_MONTH: // Calcite year-month literal value is months as BigDecimal BigDecimal totalMonths = BigDecimal.valueOf(((HiveIntervalYearMonth) value).getTotalMonths()); calciteLiteral = rexBuilder.makeIntervalLiteral(totalMonths, - new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1,1))); + new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1))); break; case INTERVAL_DAY_TIME: // Calcite day-time interval is millis value as BigDecimal // Seconds converted to millis BigDecimal secsValueBd = BigDecimal - .valueOf(((HiveIntervalDayTime) value).getTotalSeconds() * 1000); + .valueOf(((HiveIntervalDayTime) value).getTotalSeconds() * 1000); // Nanos converted to millis - BigDecimal nanosValueBd = BigDecimal.valueOf(((HiveIntervalDayTime) - value).getNanos(), 6); - calciteLiteral = - rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), - new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new - SqlParserPos(1, 1))); - break; + BigDecimal nanosValueBd = BigDecimal.valueOf(((HiveIntervalDayTime) + value).getNanos(), 6); + calciteLiteral = + rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), + new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new + SqlParserPos(1, 1))); + break; case VOID: calciteLiteral = rexBuilder.makeLiteral(null, calciteDataType, true); break; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f5c5a10..4011d99 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -3192,48 +3192,6 @@ public class CalcitePlanner extends SemanticAnalyzer { return filterRel; } - private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel, - boolean forHavingClause, Set<ASTNode> corrScalarQueries) - throws SemanticException { - List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); - - ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond); - List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); - for(int i=0; i<subQueriesInOriginalTree.size(); i++){ - //we do not care about the transformation or rewriting of AST - // which following statement does - // we only care about the restriction checks they perform. - // We plan to get rid of these restrictions later - int sqIdx = qb.incrNumSubQueryPredicates(); - ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); - - ASTNode subQueryAST = subQueries.get(i); - //SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); - ASTNode outerQueryExpr = (ASTNode) subQueryAST.getChild(2); - - if (outerQueryExpr != null && outerQueryExpr.getType() == HiveParser.TOK_SUBQUERY_EXPR) { - - throw new CalciteSubquerySemanticException( - ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - outerQueryExpr, "IN/NOT IN subqueries are not allowed in LHS")); - } - - - QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, - originalSubQueryAST, ctx); - - RowResolver inputRR = relToHiveRR.get(srcRel); - - String havingInputAlias = null; - - boolean [] subqueryConfig = {false, false}; - subQuery.subqueryRestrictionsCheck(inputRR, forHavingClause, - havingInputAlias, subqueryConfig); - if(subqueryConfig[0]) { - corrScalarQueries.add(originalSubQueryAST); - } - } - } private RelNode genLateralViewPlans(ASTNode lateralView, Map<String, RelNode> aliasToRel) throws SemanticException { @@ -3392,8 +3350,6 @@ public class CalcitePlanner extends SemanticAnalyzer { Set<ASTNode> corrScalarQueriesWithAgg = new HashSet<ASTNode>(); boolean isSubQuery = false; try { - //disallow subqueries which HIVE doesn't currently support - subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg); Deque<ASTNode> stack = new ArrayDeque<ASTNode>(); stack.push(node); @@ -3402,13 +3358,11 @@ public class CalcitePlanner extends SemanticAnalyzer { switch (next.getType()) { case HiveParser.TOK_SUBQUERY_EXPR: - /* - * Restriction 2.h Subquery isnot allowed in LHS - */ - if (next.getChildren().size() == 3 && next.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR) { - throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION - .getMsg(next.getChild(2), "SubQuery in LHS expressions are not supported.")); - } + + //disallow subqueries which HIVE doesn't currently support + SubQueryUtils.subqueryRestrictionCheck(qb, next, srcRel, forHavingClause, + corrScalarQueriesWithAgg, ctx, this.relToHiveRR); + String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates(); QB qbSQ = new QB(qb.getId(), sbQueryAlias, true); qbSQ.setInsideView(qb.isInsideView()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 608befc..3ca45fe 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -45,6 +45,7 @@ KW_TRUE : 'TRUE'; KW_FALSE : 'FALSE'; KW_UNKNOWN : 'UNKNOWN'; KW_ALL : 'ALL'; +KW_SOME : 'SOME'; KW_NONE: 'NONE'; KW_AND : 'AND'; KW_OR : 'OR'; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index f22511a..58fe0cd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -589,6 +589,23 @@ precedenceSimilarExpressionAtom[CommonTree t] | KW_LIKE KW_ALL (expr=expressionsInParenthesis[false, false]) -> ^(TOK_FUNCTION Identifier["likeall"] {$t} {$expr.tree}) + | + precedenceSimilarExpressionQuantifierPredicate[$t] + ; + +precedenceSimilarExpressionQuantifierPredicate[CommonTree t] + : + dropPartitionOperator quantifierType subQueryExpression + -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP quantifierType dropPartitionOperator ) subQueryExpression {$t}) + ; + +quantifierType + : + KW_ANY -> KW_SOME + | + KW_SOME -> KW_SOME + | + KW_ALL -> KW_ALL ; precedenceSimilarExpressionIn[CommonTree t] diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index bf1af29..51448a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -45,6 +45,8 @@ public class QBSubQuery implements ISubQueryJoinInfo { NOT_EXISTS, IN, NOT_IN, + SOME, + ALL, SCALAR; public static SubQueryType get(ASTNode opNode) throws SemanticException { @@ -71,6 +73,10 @@ public class QBSubQuery implements ISubQueryJoinInfo { return IN; case HiveParser.TOK_SUBQUERY_OP_NOTIN: return NOT_IN; + case HiveParser.KW_SOME: + return SOME; + case HiveParser.KW_ALL: + return ALL; default: throw new SemanticException(SemanticAnalyzer.generateErrorMessage(opNode, "Operator not supported in SubQuery use.")); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java index 099157f..b3df1ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -22,10 +22,13 @@ import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Deque; import java.util.List; +import java.util.LinkedHashMap; import java.util.Map; +import java.util.Set; import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.CommonTreeAdaptor; +import org.apache.calcite.rel.RelNode; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -101,6 +104,67 @@ public class SubQueryUtils { return node; } + static public void subqueryRestrictionCheck(QB qb, ASTNode subqueryExprNode, RelNode srcRel, + boolean forHavingClause, Set<ASTNode> corrScalarQueries, Context ctx, + LinkedHashMap<RelNode, RowResolver> relToHiveRR) + throws SemanticException { + + assert(subqueryExprNode.getType() == HiveParser.TOK_SUBQUERY_EXPR); + + /* + * Restriction : Subquery is not allowed in LHS + */ + if (subqueryExprNode.getChildren().size() == 3 + && subqueryExprNode.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR) { + throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION + .getMsg(subqueryExprNode.getChild(2), "SubQuery on left hand side is not supported.")); + } + + // avoid subquery restrictions for SOME/ALL for now + if(subqueryExprNode.getChild(0).getChildCount() > 1 + && (subqueryExprNode.getChild(0).getChild(1).getType() == HiveParser.KW_SOME + || subqueryExprNode.getChild(0).getChild(1).getType() == HiveParser.KW_ALL)) { + return; + } + + // TOK_SUBQUERY_EXPR + // 0. TOK_SUBQUERY_OP + // 0. TYPE: IN/SOME/EXISTS + // 1. Comparion op: >, < etc + // 1. TOK_QUERY: Subquery + // 2. LHS expr + //final ASTNode clonedSubExprAST = (ASTNode) SubQueryUtils.adaptor.dupTree(subqueryExprNode); + //we do not care about the transformation or rewriting of AST + // which following statement does + // we only care about the restriction checks they perform. + // We plan to get rid of these restrictions later + int sqIdx = qb.incrNumSubQueryPredicates(); + ASTNode outerQueryExpr = (ASTNode) subqueryExprNode.getChild(2); + + if (outerQueryExpr != null && outerQueryExpr.getType() == HiveParser.TOK_SUBQUERY_EXPR) { + throw new CalciteSubquerySemanticException( + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + outerQueryExpr, "IN/EXISTS/SOME/ALL subqueries are not allowed in LHS")); + } + + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subqueryExprNode, + subqueryExprNode, ctx); + + RowResolver inputRR = relToHiveRR.get(srcRel); + + String havingInputAlias = null; + + boolean [] subqueryConfig = {false, false}; + subQuery.subqueryRestrictionsCheck(inputRR, forHavingClause, + havingInputAlias, subqueryConfig); + + if(subqueryConfig[0]) { + corrScalarQueries.add(subqueryExprNode); + } + //} + } + + /* * Check that SubQuery is a top level conjuncts. @@ -215,14 +279,14 @@ public class SubQueryUtils { ASTNode next = stack.pop(); switch(next.getType()) { - case HiveParser.TOK_SUBQUERY_EXPR: - subQueries.add(next); - break; - default: - int childCount = next.getChildCount(); - for(int i = childCount - 1; i >= 0; i--) { - stack.push((ASTNode) next.getChild(i)); - } + case HiveParser.TOK_SUBQUERY_EXPR: + subQueries.add(next); + break; + default: + int childCount = next.getChildCount(); + for(int i = childCount - 1; i >= 0; i--) { + stack.push((ASTNode) next.getChild(i)); + } } } } @@ -236,19 +300,10 @@ public class SubQueryUtils { ASTNode sq = (ASTNode) sqAST.getChild(1); ASTNode outerQueryExpr = (ASTNode) sqAST.getChild(2); - /* - * Restriction.8.m :: We allow only 1 SubQuery expression per Query. - */ - if (outerQueryExpr != null && outerQueryExpr.getType() == HiveParser.TOK_SUBQUERY_EXPR ) { - - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - originalSQAST.getChild(1), "Only 1 SubQuery expression is supported.")); - } - - return new QBSubQuery(outerQueryId, sqIdx, sq, outerQueryExpr, - buildSQOperator(sqOp), - originalSQAST, - ctx); + return new QBSubQuery(outerQueryId, sqIdx, sq, outerQueryExpr, + buildSQOperator(sqOp), + originalSQAST, + ctx); } static SubQueryTypeDef buildSQOperator(ASTNode astSQOp) throws SemanticException { @@ -280,8 +335,8 @@ public class SubQueryUtils { .getText()); GenericUDAFResolver udafResolver = FunctionRegistry.getGenericUDAFResolver(functionName); if (udafResolver != null) { - // we need to know if it is COUNT since this is specialized for IN/NOT IN - // corr subqueries. + // we need to know if it is COUNT since this is specialized for IN/NOT IN + // corr subqueries. if(udafResolver instanceof GenericUDAFCount) { return 2; } @@ -315,7 +370,7 @@ public class SubQueryUtils { .getText().toLowerCase()); alias = (joinNode.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer.unescapeIdentifier(joinNode.getChild(1).getText().toLowerCase()) : - alias; + alias; aliases.add(alias); } else { ASTNode left = (ASTNode) joinNode.getChild(0); @@ -663,13 +718,13 @@ public class SubQueryUtils { static void checkForSubqueries(ASTNode node, boolean disallow) throws SemanticException { // allow NOT but throw an error for rest if(node.getType() == HiveParser.TOK_SUBQUERY_EXPR - && disallow) { + && disallow) { throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - "Invalid subquery. Subquery in UDAF is not allowed.")); + "Invalid subquery. Subquery in UDAF is not allowed.")); } if (node.getType() == HiveParser.TOK_FUNCTION - || node.getType() == HiveParser.TOK_FUNCTIONDI - || node.getType() == HiveParser.TOK_FUNCTIONSTAR) { + || node.getType() == HiveParser.TOK_FUNCTIONDI + || node.getType() == HiveParser.TOK_FUNCTIONSTAR) { if (node.getChild(0).getType() == HiveParser.Identifier) { String functionName = SemanticAnalyzer.unescapeIdentifier(node.getChild(0).getText()); GenericUDAFResolver udafResolver = FunctionRegistry.getGenericUDAFResolver(functionName); @@ -679,7 +734,7 @@ public class SubQueryUtils { } } for(int i=0; i<node.getChildCount(); i++) { - checkForSubqueries((ASTNode)node.getChild(i), disallow); + checkForSubqueries((ASTNode)node.getChild(i), disallow); } } /* @@ -687,14 +742,14 @@ public class SubQueryUtils { * it is top level expression, else it throws an error */ public static void checkForTopLevelSubqueries(ASTNode selExprList) throws SemanticException{ - // should be either SELECT or SELECT DISTINCT + // should be either SELECT or SELECT DISTINCT assert(selExprList.getType() == HiveParser.TOK_SELECT - || selExprList.getType() == HiveParser.TOK_SELECTDI); + || selExprList.getType() == HiveParser.TOK_SELECTDI); for(int i=0; i<selExprList.getChildCount(); i++) { ASTNode selExpr = (ASTNode)selExprList.getChild(i); // could get either query hint or select expr assert(selExpr.getType() == HiveParser.TOK_SELEXPR - || selExpr.getType() == HiveParser.QUERY_HINT); + || selExpr.getType() == HiveParser.QUERY_HINT); if(selExpr.getType() == HiveParser.QUERY_HINT) { // skip query hints @@ -702,10 +757,10 @@ public class SubQueryUtils { } if(selExpr.getChildCount() == 1 - && selExpr.getChild(0).getType() == HiveParser.TOK_SUBQUERY_EXPR) { + && selExpr.getChild(0).getType() == HiveParser.TOK_SUBQUERY_EXPR) { if(selExprList.getType() == HiveParser.TOK_SELECTDI) { throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - "Invalid subquery. Subquery with DISTINCT clause is not supported!")); + "Invalid subquery. Subquery with DISTINCT clause is not supported!")); } continue; //we are good since subquery is top level expression diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index dff108a..7e804e3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -1754,6 +1754,8 @@ public class TypeCheckProcFactory { || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTIN); boolean isEXISTS = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_EXISTS || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTEXISTS); + boolean isSOME = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_SOME); + boolean isALL = (subqueryOp.getChildCount() > 0) && (subqueryOp.getChild(0).getType() == HiveParser.KW_ALL); boolean isScalar = subqueryOp.getChildCount() == 0 ; // subqueryToRelNode might be null if subquery expression anywhere other than @@ -1791,6 +1793,16 @@ public class TypeCheckProcFactory { TypeInfo subExprType = TypeConverter.convert(subqueryRel.getRowType().getFieldList().get(0).getType()); return new ExprNodeSubQueryDesc(subExprType, subqueryRel, ExprNodeSubQueryDesc.SubqueryType.SCALAR); + } else if(isSOME) { + assert(nodeOutputs[2] != null); + ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2]; + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryRel, + ExprNodeSubQueryDesc.SubqueryType.SOME, lhs, (ASTNode)subqueryOp.getChild(1) ); + } else if(isALL) { + assert(nodeOutputs[2] != null); + ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2]; + return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryRel, + ExprNodeSubQueryDesc.SubqueryType.ALL, lhs, (ASTNode)subqueryOp.getChild(1)); } /* diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java index dbb1cd7..cd80da8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.calcite.rel.RelNode; @@ -35,7 +36,9 @@ public class ExprNodeSubQueryDesc extends ExprNodeDesc implements Serializable { public static enum SubqueryType{ IN, EXISTS, - SCALAR + SCALAR, + SOME, + ALL }; /** @@ -44,12 +47,14 @@ public class ExprNodeSubQueryDesc extends ExprNodeDesc implements Serializable { private RelNode rexSubQuery; private ExprNodeDesc subQueryLhs; private SubqueryType type; + private ASTNode comparisonOp; public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, SubqueryType type) { super(typeInfo); this.rexSubQuery = subQuery; this.subQueryLhs = null; this.type = type; + this.comparisonOp = null; } public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, SubqueryType type, ExprNodeDesc lhs) { @@ -57,9 +62,19 @@ public class ExprNodeSubQueryDesc extends ExprNodeDesc implements Serializable { this.rexSubQuery = subQuery; this.subQueryLhs = lhs; this.type = type; - + this.comparisonOp = null; + } + public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, + SubqueryType type, ExprNodeDesc lhs, ASTNode comparisonOp) { + super(typeInfo); + this.rexSubQuery = subQuery; + this.subQueryLhs = lhs; + this.type = type; + this.comparisonOp = comparisonOp; } + public ASTNode getComparisonOp() {return this.comparisonOp; } + public SubqueryType getType() { return type; } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java index 57579ce..0d39999 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java @@ -89,7 +89,7 @@ public class TestSQL11ReservedKeyWordsNegative { { "RLIKE" }, { "ROLLUP" }, { "ROW" }, { "ROWS" }, { "SET" }, { "SMALLINT" }, { "TABLE" }, { "TIME" }, { "TIMESTAMP" }, { "TO" }, { "TRIGGER" }, { "TRUE" }, { "TRUNCATE" }, { "UNION" }, { "UNIQUE" }, { "UPDATE" }, { "USER" }, { "USING" }, - { "VALUES" }, { "WITH" } }); + { "VALUES" }, { "WITH" }, { "SOME" }, { "ANY" }, { "ALL" } }); } private String keyword; diff --git a/ql/src/test/queries/clientnegative/subquery_all_equal.q b/ql/src/test/queries/clientnegative/subquery_all_equal.q new file mode 100644 index 0000000..7e3a360 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_all_equal.q @@ -0,0 +1,3 @@ +--! qt:dataset:part +-- =ALL is not allowed +explain select * from part where p_type = ALL(select max(p_type) from part); \ No newline at end of file diff --git a/ql/src/test/queries/clientnegative/subquery_any_notequal.q b/ql/src/test/queries/clientnegative/subquery_any_notequal.q new file mode 100644 index 0000000..26d96ac --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_any_notequal.q @@ -0,0 +1,3 @@ +--! qt:dataset:part +-- <>ANY is not allowed +explain select * from part where p_type <> ANY(select max(p_type) from part); \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/subquery_ALL.q b/ql/src/test/queries/clientpositive/subquery_ALL.q new file mode 100644 index 0000000..f794f37 --- /dev/null +++ b/ql/src/test/queries/clientpositive/subquery_ALL.q @@ -0,0 +1,75 @@ +--! qt:dataset:part +-- SORT_QUERY_RESULTS + +--empty table +create table tempty(i int, j int); + +CREATE TABLE part_null_n0 as select * from part; +insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL); + +-- test all six comparison operators +--explain cbo select count(*) from part where p_partkey = ALL (select p_partkey from part); +--select count(*) from part where p_partkey = ALL (select p_partkey from part); + +explain select count(*) from part where p_partkey <> ALL (select p_partkey from part); +select count(*) from part where p_partkey <> ALL (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey > ALL (select p_partkey from part); +select count(*) from part where p_partkey > ALL (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey < ALL (select p_partkey from part); +select count(*) from part where p_partkey < ALL (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey >= ALL (select p_partkey from part); +select count(*) from part where p_partkey >= ALL (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey <= ALL (select p_partkey from part); +select count(*) from part where p_partkey <= ALL (select p_partkey from part); + +-- ALL with aggregate in subquery +explain cbo select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey); +select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey); + +select count(*) from part where p_size < ALL (select max(null) from part group by p_partkey); + +--empty row produces true with ALL +select count(*) from part where p_partkey <> ALL(select i from tempty); + +-- true + null, should produce zero results +select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0 UNION select null from part group by true); + +-- false + null -> false, therefore should produce results +select count(*) from part where ((p_partkey <> ALL (select p_partkey from part_null_n0)) == false); + +-- all null -> null +select count(*) from part where (p_partkey <> ALL (select p_partkey from part_null_n0 where p_partkey is null)) is null; + +-- false, should produce zero result +select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0); + +-- ALL in having +explain cbo select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey); +select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey); + +-- multiple +explain select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_size <> ALL (select p_size from part group by p_size); +select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_partkey <> ALL (select p_size from part group by p_size); + +--nested +explain cbo select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) ; +select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) ; + +-- subquery in SELECT + +select p_partkey, (p_partkey >= ALL (select p_partkey from part_null_n0)) from part_null_n0; + +select p_partkey, (p_partkey > ALL (select null from part_null_n0)) from part_null_n0; + +select p_partkey, (p_partkey > ALL (select i from tempty)) from part_null_n0; + +DROP TABLE part_null_n0; +DROP TABLE tempty; diff --git a/ql/src/test/queries/clientpositive/subquery_ANY.q b/ql/src/test/queries/clientpositive/subquery_ANY.q new file mode 100644 index 0000000..1c36edb --- /dev/null +++ b/ql/src/test/queries/clientpositive/subquery_ANY.q @@ -0,0 +1,80 @@ +--! qt:dataset:part +-- SORT_QUERY_RESULTS + +--empty table +create table tempty(i int, j int); + +CREATE TABLE part_null_n0 as select * from part; +insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL); + +-- test all six comparison operators +explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part); +select count(*) from part where p_partkey = ANY (select p_partkey from part); + +--explain cbo select count(*) from part where p_partkey <> ANY (select p_partkey from part); +--select count(*) from part where p_partkey <> ANY (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey > ANY (select p_partkey from part); +select count(*) from part where p_partkey > ANY (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey < ANY (select p_partkey from part); +select count(*) from part where p_partkey < ANY (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part); +select count(*) from part where p_partkey >= ANY (select p_partkey from part); + +explain cbo select count(*) from part where p_partkey <= ANY (select p_partkey from part); +select count(*) from part where p_partkey <= ANY (select p_partkey from part); + +-- SOME is same as ANY +explain cbo select count(*) from part where p_partkey = SOME(select min(p_partkey) from part); +select count(*) from part where p_partkey = SOME(select min(p_partkey) from part); + +-- ANY with aggregate in subquery +explain cbo select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey); +select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey); + +select count(*) from part where p_size < ANY (select max(null) from part group by p_partkey); + +--empty row produces false with ANY +select count(*) from part where p_partkey = ANY(select i from tempty); + +-- true + null, should produce results +select count(*) from part where p_partkey = ANY (select p_partkey from part_null_n0); + +-- false + null -> null +select count(*) from part where (p_size= ANY (select p_partkey from part_null_n0)) is null; + +-- all null -> null +select count(*) from part where (p_partkey = ANY (select p_partkey from part_null_n0 where p_partkey is null)) is null; + +-- false, should produce zero result +select count(*) from part where p_partkey > ANY (select max(p_partkey) from part_null_n0); + +-- ANY in having +explain cbo select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey); +select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey); + +-- multiple +explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size); +select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size); + +--nested +explain cbo select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) ; +select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) ; + +-- subquery in SELECT +select p_partkey, (p_partkey > ANY (select p_partkey from part)) from part; + +select p_partkey, (p_partkey > ANY (select p_partkey from part_null_n0)) from part_null_n0; + +select p_partkey, (p_partkey > ANY (select null from part_null_n0)) from part_null_n0; + +select p_partkey, (p_partkey > ANY (select i from tempty)) from part_null_n0; + +DROP TABLE part_null_n0; +DROP TABLE tempty; diff --git a/ql/src/test/results/clientnegative/invalid_select_expression.q.out b/ql/src/test/results/clientnegative/invalid_select_expression.q.out index 63a682a..8781ffc 100644 --- a/ql/src/test/results/clientnegative/invalid_select_expression.q.out +++ b/ql/src/test/results/clientnegative/invalid_select_expression.q.out @@ -1 +1 @@ -FAILED: ParseException line 1:32 cannot recognize input near '.' 'foo' '<EOF>' in expression specification +FAILED: ParseException line 1:32 cannot recognize input near '>' '.' 'foo' in expression specification diff --git a/ql/src/test/results/clientnegative/subquery_all_equal.q.out b/ql/src/test/results/clientnegative/subquery_all_equal.q.out new file mode 100644 index 0000000..47a0ad3 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_all_equal.q.out @@ -0,0 +1 @@ +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Invalid SubQuery expression Invalid operator:= diff --git a/ql/src/test/results/clientnegative/subquery_any_notequal.q.out b/ql/src/test/results/clientnegative/subquery_any_notequal.q.out new file mode 100644 index 0000000..500a8c2 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_any_notequal.q.out @@ -0,0 +1 @@ +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Invalid SubQuery expression Invalid operator:<> diff --git a/ql/src/test/results/clientnegative/subquery_in_lhs.q.out b/ql/src/test/results/clientnegative/subquery_in_lhs.q.out index 8cfdf55..ec9f00a 100644 --- a/ql/src/test/results/clientnegative/subquery_in_lhs.q.out +++ b/ql/src/test/results/clientnegative/subquery_in_lhs.q.out @@ -1 +1 @@ -FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 0:-1 Unsupported SubQuery Expression 'p_size': IN/NOT IN subqueries are not allowed in LHS +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 0:-1 Unsupported SubQuery Expression 'p_size': SubQuery on left hand side is not supported. diff --git a/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out b/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out index 9b957e6..07b93f3 100644 --- a/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out +++ b/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10249]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 5:7 Unsupported SubQuery Expression 'key': IN/NOT IN subqueries are not allowed in LHS +FAILED: SemanticException [Error 10249]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 5:7 Unsupported SubQuery Expression 'key': SubQuery on left hand side is not supported. diff --git a/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out new file mode 100644 index 0000000..f0a9db0 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/subquery_ALL.q.out @@ -0,0 +1,896 @@ +PREHOOK: query: create table tempty(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: CREATE TABLE part_null_n0 as select * from part +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@part +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: CREATE TABLE part_null_n0 as select * from part +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@part +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null_n0 +POSTHOOK: Lineage: part_null_n0.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_comment SIMPLE [(part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_container SIMPLE [(part)part.FieldSchema(name:p_container, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_partkey SIMPLE [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_retailprice SIMPLE [(part)part.FieldSchema(name:p_retailprice, type:double, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_type SIMPLE [(part)part.FieldSchema(name:p_type, type:string, comment:null), ] +PREHOOK: query: insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@part_null_n0 +POSTHOOK: Lineage: part_null_n0.p_brand EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_comment EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_container EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_mfgr EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_name EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_partkey EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_retailprice EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_size EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_type EXPRESSION [] +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: explain select count(*) from part where p_partkey <> ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from part where p_partkey <> ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_partkey (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int) + outputColumnNames: p_partkey + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(p_partkey) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 39 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 39 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: boolean) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 39 Data size: 836 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 39 Data size: 836 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((_col2 >= _col1) or (_col1 = 0L) or _col4 is not null or _col0 is null) and (_col0 is not null or (_col1 = 0L) or _col4 is not null) and (_col4 is null or (_col1 = 0L))) (type: boolean) + Statistics: Num rows: 39 Data size: 836 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 39 Data size: 836 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where p_partkey <> ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey <> ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey > ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey > ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<=($0, $1)), $2), OR($3, $2, <=($0, $1)), OR(>($0, $1), $2, <=($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey < ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey < ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(>=($0, $1)), $2), OR($3, $2, >=($0, $1)), OR(<($0, $1), $2, >=($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey < ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey < ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey >= ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey >= ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<($0, $1)), $2), OR($3, $2, <($0, $1)), OR(>=($0, $1), $2, <($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey >= ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey >= ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +1 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey <= ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey <= ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(>($0, $1)), $2), OR($3, $2, >($0, $1)), OR(<=($0, $1), $2, >($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey <= ALL (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey <= ALL (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +1 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(>=($0, $1)), $2), OR($3, $2, >=($0, $1)), OR(<($0, $1), $2, >=($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_size=[$5]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)]) + HiveProject(p_partkey=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[max($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_size < ALL (select max(p_size) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_size < ALL (select max(null) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_size < ALL (select max(null) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where p_partkey <> ALL(select i from tempty) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@tempty +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey <> ALL(select i from tempty) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@tempty +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0 UNION select null from part group by true) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0 UNION select null from part group by true) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where ((p_partkey <> ALL (select p_partkey from part_null_n0)) == false) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where ((p_partkey <> ALL (select p_partkey from part_null_n0)) == false) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select count(*) from part where (p_partkey <> ALL (select p_partkey from part_null_n0 where p_partkey is null)) is null +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where (p_partkey <> ALL (select p_partkey from part_null_n0 where p_partkey is null)) is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ALL (select max(p_partkey) from part_null_n0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain cbo select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveProject($f0=[$4]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<=($4, $0)), $1), OR($2, $1, <=($4, $0)), OR(>($4, $0), $1, <=($4, $0), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($1)], c=[COUNT()], d=[COUNT($1)]) + HiveProject(p_partkey=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part having count(*) > ALL (select count(*) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: explain select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_size <> ALL (select p_size from part group by p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_size <> ALL (select p_size from part group by p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Reducer 11 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int) + outputColumnNames: p_partkey + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(p_partkey), count(), count(p_partkey) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_size (type: int) + outputColumnNames: p_size + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 11 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 42 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 42 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: boolean) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col7, _col8 + residual filter predicates: {((_col0 < _col4) is not true or (_col5 = 0L))} {((_col5 <= _col6) or (_col5 = 0L) or (_col0 < _col4))} {((_col0 >= _col4) or (_col5 = 0L) or (_col0 < _col4) or (_col5 > _col6))} + Statistics: Num rows: 42 Data size: 1916 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col7 (type: bigint), _col8 (type: bigint), _col3 (type: boolean) + outputColumnNames: _col1, _col5, _col6, _col8 + Statistics: Num rows: 42 Data size: 1916 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((_col6 >= _col5) or (_col5 = 0L) or _col8 is not null or _col1 is null) and (_col1 is not null or (_col5 = 0L) or _col8 is not null) and (_col8 is null or (_col5 = 0L))) (type: boolean) + Statistics: Num rows: 42 Data size: 1916 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 42 Data size: 1916 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(), count(_col0) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_partkey <> ALL (select p_size from part group by p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey >= ALL (select p_partkey from part) + AND p_partkey <> ALL (select p_size from part group by p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +1 +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<($0, $1)), $2), OR($3, $2, <($0, $1)), OR(>=($0, $1), $2, <($0, $1), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveJoin(condition=[AND(OR(IS NOT TRUE(<($1, $2)), $3), OR($4, $3, <($1, $2)), OR(>=($1, $2), $3, <($1, $2), $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0], p_size=[$5]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], ==[=($1, 0)], <==[<=($1, $2)], >=[>($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveProject(p_size=[$0]) + HiveAggregate(group=[{5}]) + HiveTableScan(table=[[default, part_null_n0]], table:alias=[part_null_n0]) + +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey + >= ALL (select p_partkey from part where p_size >= ALL(select p_size from part_null_n0 group by p_size)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select p_partkey, (p_partkey >= ALL (select p_partkey from part_null_n0)) from part_null_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey >= ALL (select p_partkey from part_null_n0)) from part_null_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +105685 false +110592 false +112398 false +121152 false +121152 false +132666 false +144293 false +146985 false +15103 false +155733 false +17273 false +17927 false +191709 false +192697 false +195606 NULL +33357 false +40982 false +42669 false +45261 false +48427 false +49671 false +65667 false +78486 false +85768 false +86428 false +90681 false +NULL NULL +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select p_partkey, (p_partkey > ALL (select null from part_null_n0)) from part_null_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey > ALL (select null from part_null_n0)) from part_null_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +105685 NULL +110592 NULL +112398 NULL +121152 NULL +121152 NULL +132666 NULL +144293 NULL +146985 NULL +15103 NULL +155733 NULL +17273 NULL +17927 NULL +191709 NULL +192697 NULL +195606 NULL +33357 NULL +40982 NULL +42669 NULL +45261 NULL +48427 NULL +49671 NULL +65667 NULL +78486 NULL +85768 NULL +86428 NULL +90681 NULL +NULL NULL +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select p_partkey, (p_partkey > ALL (select i from tempty)) from part_null_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_null_n0 +PREHOOK: Input: default@tempty +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey > ALL (select i from tempty)) from part_null_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_null_n0 +POSTHOOK: Input: default@tempty +#### A masked pattern was here #### +105685 true +110592 true +112398 true +121152 true +121152 true +132666 true +144293 true +146985 true +15103 true +155733 true +17273 true +17927 true +191709 true +192697 true +195606 true +33357 true +40982 true +42669 true +45261 true +48427 true +49671 true +65667 true +78486 true +85768 true +86428 true +90681 true +NULL true +PREHOOK: query: DROP TABLE part_null_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_null_n0 +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: DROP TABLE part_null_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_null_n0 +POSTHOOK: Output: default@part_null_n0 +PREHOOK: query: DROP TABLE tempty +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tempty +PREHOOK: Output: default@tempty +POSTHOOK: query: DROP TABLE tempty +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tempty +POSTHOOK: Output: default@tempty diff --git a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out new file mode 100644 index 0000000..cb0ec4b --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out @@ -0,0 +1,586 @@ +PREHOOK: query: create table tempty(i int, j int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(i int, j int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: CREATE TABLE part_null_n0 as select * from part +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@part +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: CREATE TABLE part_null_n0 as select * from part +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@part +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null_n0 +POSTHOOK: Lineage: part_null_n0.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_comment SIMPLE [(part)part.FieldSchema(name:p_comment, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_container SIMPLE [(part)part.FieldSchema(name:p_container, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_partkey SIMPLE [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_retailprice SIMPLE [(part)part.FieldSchema(name:p_retailprice, type:double, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ] +POSTHOOK: Lineage: part_null_n0.p_type SIMPLE [(part)part.FieldSchema(name:p_type, type:string, comment:null), ] +PREHOOK: query: insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@part_null_n0 +POSTHOOK: Lineage: part_null_n0.p_brand EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_comment EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_container EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_mfgr EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_name EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_partkey EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_retailprice EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_size EXPRESSION [] +POSTHOOK: Lineage: part_null_n0.p_type EXPRESSION [] +PREHOOK: query: explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveSemiJoin(condition=[=($0, $1)], joinType=[inner]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +PREHOOK: query: select count(*) from part where p_partkey = ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey = ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey > ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey > ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(>($0, $1), $2), AND(>($0, $1), $2, IS NOT TRUE(>($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +25 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey < ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey < ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(<($0, $1), $2), AND(<($0, $1), $2, IS NOT TRUE(<($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey < ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey < ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +25 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(>=($0, $1), $2), AND(>=($0, $1), $2, IS NOT TRUE(>=($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey <= ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey <= ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(<=($0, $1), $2), AND(<=($0, $1), $2, IS NOT TRUE(<=($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey <= ANY (select p_partkey from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey <= ANY (select p_partkey from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +PREHOOK: query: explain cbo select count(*) from part where p_partkey = SOME(select min(p_partkey) from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey = SOME(select min(p_partkey) from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject($f0=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[min($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +PREHOOK: query: select count(*) from part where p_partkey = SOME(select min(p_partkey) from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey = SOME(select min(p_partkey) from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +1 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(<($0, $1), $2), AND(<($0, $1), $2, IS NOT TRUE(<($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_size=[$5]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MAX($1)], c=[COUNT()], d=[COUNT($1)]) + HiveProject(p_partkey=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[max($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_size < ANY (select max(p_size) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +25 +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_size < ANY (select max(null) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_size < ANY (select max(null) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from part where p_partkey = ANY(select i from tempty) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@tempty +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey = ANY(select i from tempty) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@tempty +#### A masked pattern was here #### +0 +PREHOOK: query: select count(*) from part where p_partkey = ANY (select p_partkey from part_null_n0) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey = ANY (select p_partkey from part_null_n0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where (p_size= ANY (select p_partkey from part_null_n0)) is null +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where (p_size= ANY (select p_partkey from part_null_n0)) is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select count(*) from part where (p_partkey = ANY (select p_partkey from part_null_n0 where p_partkey is null)) is null +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where (p_partkey = ANY (select p_partkey from part_null_n0 where p_partkey is null)) is null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey > ANY (select max(p_partkey) from part_null_n0) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey > ANY (select max(p_partkey) from part_null_n0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +0 +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain cbo select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveProject($f0=[$3]) + HiveJoin(condition=[OR(AND(>($3, $0), $1), AND(>($3, $0), $1, IS NOT TRUE(>($3, $0)), $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)]) + HiveProject(p_partkey=[$0], $f1=[$1]) + HiveAggregate(group=[{0}], agg#0=[count()]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{}], agg#0=[count()]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part having count(*) > ANY (select count(*) from part group by p_partkey) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[AND(=($1, $2), OR(AND(>=($0, $3), <>($4, 0)), AND(>=($0, $3), <>($4, 0), IS NOT TRUE(>=($0, $3)), <=($4, $5))))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0], p_size=[$5]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_size=[$0]) + HiveAggregate(group=[{5}]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], c=[$1], d=[$2]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) + AND p_size = ANY (select p_size from part group by p_size) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain cbo select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: explain cbo select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +CBO PLAN: +HiveAggregate(group=[{}], agg#0=[count()]) + HiveJoin(condition=[OR(AND(>=($0, $1), $2), AND(>=($0, $1), $2, IS NOT TRUE(>=($0, $1)), $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveJoin(condition=[OR(AND(>=($1, $2), $3), AND(>=($1, $2), $3, IS NOT TRUE(>=($1, $2)), $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0], p_size=[$5]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveProject(m=[$0], <>=[<>($1, 0)], <==[<=($1, $2)]) + HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) + HiveProject(p_size=[$0]) + HiveAggregate(group=[{5}]) + HiveTableScan(table=[[default, part_null_n0]], table:alias=[part_null_n0]) + +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from part where p_partkey + >= ANY (select p_partkey from part where p_size >= ANY(select p_size from part_null_n0 group by p_size)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +26 +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select p_partkey, (p_partkey > ANY (select p_partkey from part)) from part +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey > ANY (select p_partkey from part)) from part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 true +110592 true +112398 true +121152 true +121152 true +132666 true +144293 true +146985 true +15103 false +155733 true +17273 true +17927 true +191709 true +192697 true +195606 true +33357 true +40982 true +42669 true +45261 true +48427 true +49671 true +65667 true +78486 true +85768 true +86428 true +90681 true +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select p_partkey, (p_partkey > ANY (select p_partkey from part_null_n0)) from part_null_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey > ANY (select p_partkey from part_null_n0)) from part_null_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +105685 true +110592 true +112398 true +121152 true +121152 true +132666 true +144293 true +146985 true +15103 NULL +155733 true +17273 true +17927 true +191709 true +192697 true +195606 true +33357 true +40982 true +42669 true +45261 true +48427 true +49671 true +65667 true +78486 true +85768 true +86428 true +90681 true +NULL NULL +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select p_partkey, (p_partkey > ANY (select null from part_null_n0)) from part_null_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey > ANY (select null from part_null_n0)) from part_null_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_null_n0 +#### A masked pattern was here #### +105685 NULL +110592 NULL +112398 NULL +121152 NULL +121152 NULL +132666 NULL +144293 NULL +146985 NULL +15103 NULL +155733 NULL +17273 NULL +17927 NULL +191709 NULL +192697 NULL +195606 NULL +33357 NULL +40982 NULL +42669 NULL +45261 NULL +48427 NULL +49671 NULL +65667 NULL +78486 NULL +85768 NULL +86428 NULL +90681 NULL +NULL NULL +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select p_partkey, (p_partkey > ANY (select i from tempty)) from part_null_n0 +PREHOOK: type: QUERY +PREHOOK: Input: default@part_null_n0 +PREHOOK: Input: default@tempty +#### A masked pattern was here #### +POSTHOOK: query: select p_partkey, (p_partkey > ANY (select i from tempty)) from part_null_n0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_null_n0 +POSTHOOK: Input: default@tempty +#### A masked pattern was here #### +105685 false +110592 false +112398 false +121152 false +121152 false +132666 false +144293 false +146985 false +15103 false +155733 false +17273 false +17927 false +191709 false +192697 false +195606 false +33357 false +40982 false +42669 false +45261 false +48427 false +49671 false +65667 false +78486 false +85768 false +86428 false +90681 false +NULL false +PREHOOK: query: DROP TABLE part_null_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_null_n0 +PREHOOK: Output: default@part_null_n0 +POSTHOOK: query: DROP TABLE part_null_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_null_n0 +POSTHOOK: Output: default@part_null_n0 +PREHOOK: query: DROP TABLE tempty +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tempty +PREHOOK: Output: default@tempty +POSTHOOK: query: DROP TABLE tempty +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tempty +POSTHOOK: Output: default@tempty