This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new df0a1429d4a HIVE-27761: Compilation of nested CTEs throws
SemanticException (Soumyakanti Das, reviewed by Krisztian Kasa)
df0a1429d4a is described below
commit df0a1429d4a67bc180cf1e15ff10432f6723bb1c
Author: Soumyakanti Das <[email protected]>
AuthorDate: Thu Oct 5 05:41:03 2023 -0700
HIVE-27761: Compilation of nested CTEs throws SemanticException
(Soumyakanti Das, reviewed by Krisztian Kasa)
---
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 77 ++++++++++------
.../clientnegative/nested_ctes_ambiguous_table.q | 13 +++
.../clientnegative/nested_ctes_outside_scope.q | 10 ++
ql/src/test/queries/clientpositive/nested_ctes.q | 38 ++++++++
.../nested_ctes_ambiguous_table.q.out | 1 +
.../clientnegative/nested_ctes_outside_scope.q.out | 1 +
.../results/clientpositive/llap/nested_ctes.q.out | 102 +++++++++++++++++++++
7 files changed, 216 insertions(+), 26 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 13a1491b373..30c24ddc746 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -640,14 +640,25 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
return returnVal;
}
+ private void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String
alias, ASTNode tabColNames,
+ Map<String, CTEClause> aliasToCTEs) throws
SemanticException {
+ doPhase1QBExpr(ast, qbexpr, id, alias, false, tabColNames, aliasToCTEs);
+ }
+
private void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String
alias, ASTNode tabColNames)
throws SemanticException {
- doPhase1QBExpr(ast, qbexpr, id, alias, false, tabColNames);
+ doPhase1QBExpr(ast, qbexpr, id, alias, false, tabColNames, aliasToCTEs);
}
@SuppressWarnings("nls")
void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias,
boolean insideView, ASTNode tabColNames)
throws SemanticException {
+ doPhase1QBExpr(ast, qbexpr, id, alias, insideView, tabColNames,
this.aliasToCTEs);
+ }
+
+ @SuppressWarnings("nls")
+ void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias,
boolean insideView, ASTNode tabColNames,
+ Map<String, CTEClause> aliasToCTEs) throws
SemanticException {
assert (ast.getToken() != null);
if (ast.getToken().getType() == HiveParser.TOK_QUERY) {
@@ -655,7 +666,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
qb.setInsideView(insideView);
Phase1Ctx ctx_1 = initPhase1Ctx();
qb.getParseInfo().setColAliases(tabColNames);
- doPhase1(ast, qb, ctx_1, null);
+ doPhase1(ast, qb, ctx_1, null, aliasToCTEs);
qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
qbexpr.setQB(qb);
@@ -686,14 +697,14 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
assert (ast.getChild(0) != null);
QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1);
doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id,
- alias + SUBQUERY_TAG_1, insideView, tabColNames);
+ alias + SUBQUERY_TAG_1, insideView, tabColNames, aliasToCTEs);
qbexpr.setQBExpr1(qbexpr1);
// query 2
assert (ast.getChild(1) != null);
QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2);
doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id,
- alias + SUBQUERY_TAG_2, insideView, tabColNames);
+ alias + SUBQUERY_TAG_2, insideView, tabColNames, aliasToCTEs);
qbexpr.setQBExpr2(qbexpr2);
}
}
@@ -1301,7 +1312,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
* Phase1: hold onto any CTE definitions in aliasToCTE.
* CTE definitions are global to the Query.
*/
- private void processCTE(QB qb, ASTNode ctes) throws SemanticException {
+ private void processCTE(QB qb, ASTNode ctes, Map<String, CTEClause>
aliasToCTEs) throws SemanticException {
int numCTEs = ctes.getChildCount();
@@ -1334,7 +1345,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
* they appear in when adding them to the <code>aliasToCTEs</code> map.
*
*/
- private CTEClause findCTEFromName(QB qb, String cteName) {
+ private CTEClause findCTEFromName(QB qb, String cteName, Map<String,
CTEClause> aliasToCTEs) {
StringBuilder qId = new StringBuilder();
if (qb.getId() != null) {
qId.append(qb.getId());
@@ -1367,7 +1378,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias)
throws SemanticException {
cteAlias = cteAlias == null ? cteName : cteAlias;
- CTEClause cte = findCTEFromName(qb, cteName);
+ CTEClause cte = findCTEFromName(qb, cteName, aliasToCTEs);
ASTNode cteQryNode = cte.cteNode;
QBExpr cteQBExpr = new QBExpr(cteAlias);
doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias,
cte.withColList);
@@ -1677,6 +1688,12 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
return alias;
}
+ @SuppressWarnings({"fallthrough", "nls"})
+ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext
plannerCtx)
+ throws SemanticException {
+ return doPhase1(ast, qb, ctx_1, plannerCtx, this.aliasToCTEs);
+ }
+
/**
* Phase 1: (including, but not limited to):
*
@@ -1694,7 +1711,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
* @throws SemanticException
*/
@SuppressWarnings({"fallthrough", "nls"})
- boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext
plannerCtx)
+ boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext
plannerCtx, Map<String, CTEClause> aliasToCTEs)
throws SemanticException {
boolean phase1Result = true;
@@ -2018,7 +2035,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
break;
case HiveParser.TOK_CTE:
- processCTE(qb, ast);
+ processCTE(qb, ast, aliasToCTEs);
break;
case HiveParser.QUERY_HINT:
processQueryHint(ast, qbp, 0);
@@ -2033,7 +2050,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
int child_count = ast.getChildCount();
for (int child_pos = 0; child_pos < child_count && phase1Result;
++child_pos) {
// Recurse
- phase1Result = doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1,
plannerCtx);
+ phase1Result = doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1,
plannerCtx, aliasToCTEs);
}
}
return phase1Result;
@@ -2156,14 +2173,15 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
return targetColNames;
}
- private void getMaterializationMetadata(QB qb) throws SemanticException {
+ private Map<String, CTEClause> getMaterializationMetadata(QB qb) throws
SemanticException {
if (qb.isCTAS()) {
- return;
+ return null;
}
+ Map<String, CTEClause> materializationAliasToCTEs = new
HashMap<>(this.aliasToCTEs);
try {
- gatherCTEReferences(qb, rootClause);
+ gatherCTEReferences(qb, rootClause, materializationAliasToCTEs);
int threshold = HiveConf.getIntVar(conf,
HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD);
- for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) {
+ for (CTEClause cte :
Sets.newHashSet(materializationAliasToCTEs.values())) {
if (threshold >= 0 && cte.reference >= threshold) {
cte.materialize = !HiveConf.getBoolVar(conf,
ConfVars.HIVE_CTE_MATERIALIZE_FULL_AGGREGATE_ONLY)
|| cte.qbExpr.getQB().getParseInfo().isFullyAggregate();
@@ -2176,24 +2194,27 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
}
throw new SemanticException(e.getMessage(), e);
}
+ return materializationAliasToCTEs;
}
- private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws
HiveException {
+ private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent,
+ Map<String, CTEClause>
materializationAliasToCTEs) throws HiveException {
if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
- gatherCTEReferences(qbexpr.getQB(), parent);
+ gatherCTEReferences(qbexpr.getQB(), parent, materializationAliasToCTEs);
} else {
- gatherCTEReferences(qbexpr.getQBExpr1(), parent);
- gatherCTEReferences(qbexpr.getQBExpr2(), parent);
+ gatherCTEReferences(qbexpr.getQBExpr1(), parent,
materializationAliasToCTEs);
+ gatherCTEReferences(qbexpr.getQBExpr2(), parent,
materializationAliasToCTEs);
}
}
// TODO: check view references, too
- private void gatherCTEReferences(QB qb, CTEClause current) throws
HiveException {
+ private void gatherCTEReferences(QB qb, CTEClause current,
+ Map<String, CTEClause>
materializationAliasToCTEs) throws HiveException {
for (String alias : qb.getTabAliases()) {
String tabName = qb.getTabNameForAlias(alias);
String cteName = tabName.toLowerCase();
- CTEClause cte = findCTEFromName(qb, cteName);
+ CTEClause cte = findCTEFromName(qb, cteName, materializationAliasToCTEs);
if (cte != null) {
if (ctesExpanded.contains(cteName)) {
throw new SemanticException("Recursive cte " + cteName +
@@ -2206,18 +2227,18 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
continue;
}
cte.qbExpr = new QBExpr(cteName);
- doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName,
cte.withColList);
+ doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName,
cte.withColList, materializationAliasToCTEs);
ctesExpanded.add(cteName);
- gatherCTEReferences(cte.qbExpr, cte);
+ gatherCTEReferences(cte.qbExpr, cte, materializationAliasToCTEs);
ctesExpanded.remove(ctesExpanded.size() - 1);
}
}
for (String alias : qb.getSubqAliases()) {
- gatherCTEReferences(qb.getSubqForAlias(alias), current);
+ gatherCTEReferences(qb.getSubqForAlias(alias), current,
materializationAliasToCTEs);
}
for (String alias : qb.getSubqExprAliases()) {
- gatherCTEReferences(qb.getSubqExprForAlias(alias), current);
+ gatherCTEReferences(qb.getSubqExprForAlias(alias), current,
materializationAliasToCTEs);
}
}
@@ -2227,10 +2248,14 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
private void getMetaData(QB qb, boolean enableMaterialization) throws
SemanticException {
try {
+ Map<String, CTEClause> materializationAliasToCTEs = null;
if (enableMaterialization) {
- getMaterializationMetadata(qb);
+ materializationAliasToCTEs = getMaterializationMetadata(qb);
}
getMetaData(qb, null);
+ if (materializationAliasToCTEs != null &&
!materializationAliasToCTEs.isEmpty()) {
+ this.aliasToCTEs.putAll(materializationAliasToCTEs);
+ }
} catch (HiveException e) {
if (e instanceof SemanticException) {
throw (SemanticException)e;
@@ -2287,7 +2312,7 @@ public class SemanticAnalyzer extends
BaseSemanticAnalyzer {
Table materializedTab = ctx.getMaterializedTable(cteName);
if (materializedTab == null) {
// we first look for this alias from CTE, and then from catalog.
- CTEClause cte = findCTEFromName(qb, cteName);
+ CTEClause cte = findCTEFromName(qb, cteName, aliasToCTEs);
if (cte != null) {
if (!cte.materialize) {
addCTEAsSubQuery(qb, cteName, alias);
diff --git a/ql/src/test/queries/clientnegative/nested_ctes_ambiguous_table.q
b/ql/src/test/queries/clientnegative/nested_ctes_ambiguous_table.q
new file mode 100644
index 00000000000..73bdef40ba3
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/nested_ctes_ambiguous_table.q
@@ -0,0 +1,13 @@
+
+with
+ test1 as (
+ with
+ t1 as (
+ select 1 as a
+ ),
+ t1 as (
+ select 1 as b
+ )
+ select 1
+ )
+select * from test1;
diff --git a/ql/src/test/queries/clientnegative/nested_ctes_outside_scope.q
b/ql/src/test/queries/clientnegative/nested_ctes_outside_scope.q
new file mode 100644
index 00000000000..5f146b924de
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/nested_ctes_outside_scope.q
@@ -0,0 +1,10 @@
+
+with
+ test1 as (
+ with
+ t1 as (
+ select 1 as a
+ )
+ select 1 as b
+ )
+select * from test1 join t1 on a=b;
diff --git a/ql/src/test/queries/clientpositive/nested_ctes.q
b/ql/src/test/queries/clientpositive/nested_ctes.q
new file mode 100644
index 00000000000..e68adb8420b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/nested_ctes.q
@@ -0,0 +1,38 @@
+
+with
+ test1 as (
+ with t1 as (select 1)
+ select 1
+ )
+select * from test1;
+with
+ test2 as (
+ with
+ test2 as (
+ select 1 as a
+ )
+ select 1
+ )
+select * from test2;
+
+with
+ test3 as (
+ with
+ t1 as (
+ select 1 as a
+ ),
+ t2 as (
+ select 1 as b
+ )
+ select * from t1 join t2 on a=b
+ )
+select * from test3;
+
+create table table_1(a int, b int);
+
+with
+ test4 as (
+ with t1 as (select * from table_1)
+ select * from t1
+ )
+select * from test4;
diff --git
a/ql/src/test/results/clientnegative/nested_ctes_ambiguous_table.q.out
b/ql/src/test/results/clientnegative/nested_ctes_ambiguous_table.q.out
new file mode 100644
index 00000000000..ebd260f3b4a
--- /dev/null
+++ b/ql/src/test/results/clientnegative/nested_ctes_ambiguous_table.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10008]: Line 8:12 Ambiguous table alias 't1'
diff --git a/ql/src/test/results/clientnegative/nested_ctes_outside_scope.q.out
b/ql/src/test/results/clientnegative/nested_ctes_outside_scope.q.out
new file mode 100644
index 00000000000..5954fe5d692
--- /dev/null
+++ b/ql/src/test/results/clientnegative/nested_ctes_outside_scope.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10001]: Line 10:25 Table not found 't1'
diff --git a/ql/src/test/results/clientpositive/llap/nested_ctes.q.out
b/ql/src/test/results/clientpositive/llap/nested_ctes.q.out
new file mode 100644
index 00000000000..0b771699b5b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/nested_ctes.q.out
@@ -0,0 +1,102 @@
+PREHOOK: query: with
+ test1 as (
+ with t1 as (select 1)
+ select 1
+ )
+select * from test1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: with
+ test1 as (
+ with t1 as (select 1)
+ select 1
+ )
+select * from test1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: with
+ test2 as (
+ with
+ test2 as (
+ select 1 as a
+ )
+ select 1
+ )
+select * from test2
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: with
+ test2 as (
+ with
+ test2 as (
+ select 1 as a
+ )
+ select 1
+ )
+select * from test2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage
'Reducer 2' is a cross product
+PREHOOK: query: with
+ test3 as (
+ with
+ t1 as (
+ select 1 as a
+ ),
+ t2 as (
+ select 1 as b
+ )
+ select * from t1 join t2 on a=b
+ )
+select * from test3
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: with
+ test3 as (
+ with
+ t1 as (
+ select 1 as a
+ ),
+ t2 as (
+ select 1 as b
+ )
+ select * from t1 join t2 on a=b
+ )
+select * from test3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1 1
+PREHOOK: query: create table table_1(a int, b int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_1
+POSTHOOK: query: create table table_1(a int, b int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_1
+PREHOOK: query: with
+ test4 as (
+ with t1 as (select * from table_1)
+ select * from t1
+ )
+select * from test4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table_1
+#### A masked pattern was here ####
+POSTHOOK: query: with
+ test4 as (
+ with t1 as (select * from table_1)
+ select * from t1
+ )
+select * from test4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table_1
+#### A masked pattern was here ####