Repository: hive Updated Branches: refs/heads/master 6baeac7d6 -> ef6bf21a2
HIVE-18725: Improve error handling for subqueries if there is wrong column reference (Igor Kryvenko, reviewed by Vineet Garg) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ef6bf21a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ef6bf21a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ef6bf21a Branch: refs/heads/master Commit: ef6bf21a27c7192d6c782f0e13e180e8e5463a3b Parents: 6baeac7 Author: Igor Kryvenko <kryvenko7i...@gmail.com> Authored: Wed Sep 5 17:12:25 2018 -0700 Committer: Vineet Garg <vg...@apache.org> Committed: Wed Sep 5 17:14:30 2018 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/CalcitePlanner.java | 87 ++++++++++---------- .../subquery_non_exisiting_column.q | 5 ++ .../clientnegative/subquery_corr_from.q.out | 2 +- .../subquery_corr_grandparent.q.out | 2 +- .../clientnegative/subquery_corr_select.q.out | 2 +- .../subquery_non_exisiting_column.q.out | 1 + .../subquery_scalar_multi_columns.q.out | 2 +- 7 files changed, 54 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ef6bf21a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index df40a28..39f27b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -3269,59 +3269,60 @@ public class CalcitePlanner extends SemanticAnalyzer { } private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause, - Map<ASTNode, RelNode> subQueryToRelNode) throws SemanticException { + Map<ASTNode, RelNode> subQueryToRelNode) throws CalciteSubquerySemanticException { Set<ASTNode> corrScalarQueriesWithAgg = new HashSet<ASTNode>(); - Set<ASTNode> scalarQueriesWithAggNoWinNoGby= new HashSet<ASTNode>(); - //disallow subqueries which HIVE doesn't currently support - subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg, - scalarQueriesWithAggNoWinNoGby); - Deque<ASTNode> stack = new ArrayDeque<ASTNode>(); - stack.push(node); - + Set<ASTNode> scalarQueriesWithAggNoWinNoGby = new HashSet<ASTNode>(); boolean isSubQuery = false; + try { + //disallow subqueries which HIVE doesn't currently support + subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg, + scalarQueriesWithAggNoWinNoGby); + Deque<ASTNode> stack = new ArrayDeque<ASTNode>(); + stack.push(node); - while (!stack.isEmpty()) { - ASTNode next = stack.pop(); + while (!stack.isEmpty()) { + ASTNode next = stack.pop(); - switch(next.getType()) { - case HiveParser.TOK_SUBQUERY_EXPR: + switch (next.getType()) { + case HiveParser.TOK_SUBQUERY_EXPR: /* * Restriction 2.h Subquery isnot allowed in LHS */ - if(next.getChildren().size() == 3 - && next.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR){ - throw new CalciteSemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - next.getChild(2), - "SubQuery in LHS expressions are not supported.")); - } - String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates(); - QB qbSQ = new QB(qb.getId(), sbQueryAlias, true); - Phase1Ctx ctx1 = initPhase1Ctx(); - doPhase1((ASTNode)next.getChild(1), qbSQ, ctx1, null); - getMetaData(qbSQ); - this.subqueryId++; - RelNode subQueryRelNode = genLogicalPlan(qbSQ, false, - relToHiveColNameCalcitePosMap.get(srcRel), relToHiveRR.get(srcRel)); - subQueryToRelNode.put(next, subQueryRelNode); - //keep track of subqueries which are scalar, correlated and contains aggregate - // subquery expression. This will later be special cased in Subquery remove rule - // for correlated scalar queries with aggregate we have take care of the case where - // inner aggregate happens on empty result - if(corrScalarQueriesWithAgg.contains(next)) { - corrScalarRexSQWithAgg.add(subQueryRelNode); - } - if(scalarQueriesWithAggNoWinNoGby.contains(next)) { - scalarAggNoGbyNoWin.add(subQueryRelNode); - } - isSubQuery = true; - break; - default: - int childCount = next.getChildCount(); - for(int i = childCount - 1; i >= 0; i--) { - stack.push((ASTNode) next.getChild(i)); + if (next.getChildren().size() == 3 && next.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR) { + throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION + .getMsg(next.getChild(2), "SubQuery in LHS expressions are not supported.")); + } + String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates(); + QB qbSQ = new QB(qb.getId(), sbQueryAlias, true); + Phase1Ctx ctx1 = initPhase1Ctx(); + doPhase1((ASTNode) next.getChild(1), qbSQ, ctx1, null); + getMetaData(qbSQ); + this.subqueryId++; + RelNode subQueryRelNode = + genLogicalPlan(qbSQ, false, relToHiveColNameCalcitePosMap.get(srcRel), relToHiveRR.get(srcRel)); + subQueryToRelNode.put(next, subQueryRelNode); + //keep track of subqueries which are scalar, correlated and contains aggregate + // subquery expression. This will later be special cased in Subquery remove rule + // for correlated scalar queries with aggregate we have take care of the case where + // inner aggregate happens on empty result + if (corrScalarQueriesWithAgg.contains(next)) { + corrScalarRexSQWithAgg.add(subQueryRelNode); + } + if (scalarQueriesWithAggNoWinNoGby.contains(next)) { + scalarAggNoGbyNoWin.add(subQueryRelNode); + } + isSubQuery = true; + break; + default: + int childCount = next.getChildCount(); + for (int i = childCount - 1; i >= 0; i--) { + stack.push((ASTNode) next.getChild(i)); + } } } + } catch (SemanticException e) { + throw new CalciteSubquerySemanticException(e.getMessage()); } return isSubQuery; } http://git-wip-us.apache.org/repos/asf/hive/blob/ef6bf21a/ql/src/test/queries/clientnegative/subquery_non_exisiting_column.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/subquery_non_exisiting_column.q b/ql/src/test/queries/clientnegative/subquery_non_exisiting_column.q new file mode 100644 index 0000000..5a4861a --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_non_exisiting_column.q @@ -0,0 +1,5 @@ +--! qt:dataset:srcpart +--! qt:dataset:part + +explain select * from srcpart where srcpart.key IN ( select p_type from part p where p.p_type = srcpart.non_exisiting_column) +and srcpart.key IN (select 4); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/ef6bf21a/ql/src/test/results/clientnegative/subquery_corr_from.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_corr_from.q.out b/ql/src/test/results/clientnegative/subquery_corr_from.q.out index 3af1a8a..f47dd47 100644 --- a/ql/src/test/results/clientnegative/subquery_corr_from.q.out +++ b/ql/src/test/results/clientnegative/subquery_corr_from.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10004]: Line 4:113 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment) +FAILED: SemanticException [Error 10004]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 4:113 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment) http://git-wip-us.apache.org/repos/asf/hive/blob/ef6bf21a/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out b/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out index 6f06fc7..504c70d 100644 --- a/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out +++ b/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10004]: Line 5:95 Invalid table alias or column reference 'x': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment) +FAILED: SemanticException [Error 10004]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 5:95 Invalid table alias or column reference 'x': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment) http://git-wip-us.apache.org/repos/asf/hive/blob/ef6bf21a/ql/src/test/results/clientnegative/subquery_corr_select.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_corr_select.q.out b/ql/src/test/results/clientnegative/subquery_corr_select.q.out index d95b939..d04b04e 100644 --- a/ql/src/test/results/clientnegative/subquery_corr_select.q.out +++ b/ql/src/test/results/clientnegative/subquery_corr_select.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10004]: Line 3:54 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment) +FAILED: SemanticException [Error 10004]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 3:54 Invalid table alias or column reference 'po': (possible column names are: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment) http://git-wip-us.apache.org/repos/asf/hive/blob/ef6bf21a/ql/src/test/results/clientnegative/subquery_non_exisiting_column.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_non_exisiting_column.q.out b/ql/src/test/results/clientnegative/subquery_non_exisiting_column.q.out new file mode 100644 index 0000000..3635af0 --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_non_exisiting_column.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10002]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 4:104 Invalid column reference 'non_exisiting_column' http://git-wip-us.apache.org/repos/asf/hive/blob/ef6bf21a/ql/src/test/results/clientnegative/subquery_scalar_multi_columns.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientnegative/subquery_scalar_multi_columns.q.out b/ql/src/test/results/clientnegative/subquery_scalar_multi_columns.q.out index ee1e44e..cfa6d97 100644 --- a/ql/src/test/results/clientnegative/subquery_scalar_multi_columns.q.out +++ b/ql/src/test/results/clientnegative/subquery_scalar_multi_columns.q.out @@ -1,3 +1,3 @@ -FAILED: SemanticException Line 2:67 Invalid SubQuery expression 'p_type' in definition of SubQuery sq_1 [ +FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 2:67 Invalid SubQuery expression 'p_type' in definition of SubQuery sq_1 [ (select p_size, p_type from part) ] used as sq_1 at Line 0:-1: SubQuery can contain only 1 item in Select List.