This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new df0a1429d4a HIVE-27761: Compilation of nested CTEs throws 
SemanticException (Soumyakanti Das, reviewed by Krisztian Kasa)
df0a1429d4a is described below

commit df0a1429d4a67bc180cf1e15ff10432f6723bb1c
Author: Soumyakanti Das <[email protected]>
AuthorDate: Thu Oct 5 05:41:03 2023 -0700

    HIVE-27761: Compilation of nested CTEs throws SemanticException 
(Soumyakanti Das, reviewed by Krisztian Kasa)
---
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  77 ++++++++++------
 .../clientnegative/nested_ctes_ambiguous_table.q   |  13 +++
 .../clientnegative/nested_ctes_outside_scope.q     |  10 ++
 ql/src/test/queries/clientpositive/nested_ctes.q   |  38 ++++++++
 .../nested_ctes_ambiguous_table.q.out              |   1 +
 .../clientnegative/nested_ctes_outside_scope.q.out |   1 +
 .../results/clientpositive/llap/nested_ctes.q.out  | 102 +++++++++++++++++++++
 7 files changed, 216 insertions(+), 26 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 13a1491b373..30c24ddc746 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -640,14 +640,25 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     return returnVal;
   }
 
+  private void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String 
alias, ASTNode tabColNames,
+                              Map<String, CTEClause> aliasToCTEs) throws 
SemanticException {
+    doPhase1QBExpr(ast, qbexpr, id, alias, false, tabColNames, aliasToCTEs);
+  }
+
   private void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String 
alias, ASTNode tabColNames)
       throws SemanticException {
-    doPhase1QBExpr(ast, qbexpr, id, alias, false, tabColNames);
+    doPhase1QBExpr(ast, qbexpr, id, alias, false, tabColNames, aliasToCTEs);
   }
 
   @SuppressWarnings("nls")
   void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, 
boolean insideView, ASTNode tabColNames)
       throws SemanticException {
+    doPhase1QBExpr(ast, qbexpr, id, alias, insideView, tabColNames, 
this.aliasToCTEs);
+  }
+
+  @SuppressWarnings("nls")
+  void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias, 
boolean insideView, ASTNode tabColNames,
+                      Map<String, CTEClause> aliasToCTEs) throws 
SemanticException {
 
     assert (ast.getToken() != null);
     if (ast.getToken().getType() == HiveParser.TOK_QUERY) {
@@ -655,7 +666,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       qb.setInsideView(insideView);
       Phase1Ctx ctx_1 = initPhase1Ctx();
       qb.getParseInfo().setColAliases(tabColNames);
-      doPhase1(ast, qb, ctx_1, null);
+      doPhase1(ast, qb, ctx_1, null, aliasToCTEs);
 
       qbexpr.setOpcode(QBExpr.Opcode.NULLOP);
       qbexpr.setQB(qb);
@@ -686,14 +697,14 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       assert (ast.getChild(0) != null);
       QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1);
       doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id,
-          alias + SUBQUERY_TAG_1, insideView, tabColNames);
+          alias + SUBQUERY_TAG_1, insideView, tabColNames, aliasToCTEs);
       qbexpr.setQBExpr1(qbexpr1);
 
       // query 2
       assert (ast.getChild(1) != null);
       QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2);
       doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id,
-          alias + SUBQUERY_TAG_2, insideView, tabColNames);
+          alias + SUBQUERY_TAG_2, insideView, tabColNames, aliasToCTEs);
       qbexpr.setQBExpr2(qbexpr2);
     }
   }
@@ -1301,7 +1312,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
    * Phase1: hold onto any CTE definitions in aliasToCTE.
    * CTE definitions are global to the Query.
    */
-  private void processCTE(QB qb, ASTNode ctes) throws SemanticException {
+  private void processCTE(QB qb, ASTNode ctes, Map<String, CTEClause> 
aliasToCTEs) throws SemanticException {
 
     int numCTEs = ctes.getChildCount();
 
@@ -1334,7 +1345,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
    * they appear in when adding them to the <code>aliasToCTEs</code> map.
    *
    */
-  private CTEClause findCTEFromName(QB qb, String cteName) {
+  private CTEClause findCTEFromName(QB qb, String cteName, Map<String, 
CTEClause> aliasToCTEs) {
     StringBuilder qId = new StringBuilder();
     if (qb.getId() != null) {
       qId.append(qb.getId());
@@ -1367,7 +1378,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   private void addCTEAsSubQuery(QB qb, String cteName, String cteAlias)
       throws SemanticException {
     cteAlias = cteAlias == null ? cteName : cteAlias;
-    CTEClause cte = findCTEFromName(qb, cteName);
+    CTEClause cte = findCTEFromName(qb, cteName, aliasToCTEs);
     ASTNode cteQryNode = cte.cteNode;
     QBExpr cteQBExpr = new QBExpr(cteAlias);
     doPhase1QBExpr(cteQryNode, cteQBExpr, qb.getId(), cteAlias, 
cte.withColList);
@@ -1677,6 +1688,12 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     return alias;
   }
 
+  @SuppressWarnings({"fallthrough", "nls"})
+  boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext 
plannerCtx)
+      throws SemanticException {
+    return doPhase1(ast, qb, ctx_1, plannerCtx, this.aliasToCTEs);
+  }
+
   /**
    * Phase 1: (including, but not limited to):
    *
@@ -1694,7 +1711,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
    * @throws SemanticException
    */
   @SuppressWarnings({"fallthrough", "nls"})
-  boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext 
plannerCtx)
+  boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext 
plannerCtx, Map<String, CTEClause> aliasToCTEs)
       throws SemanticException {
 
     boolean phase1Result = true;
@@ -2018,7 +2035,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
         qb.getParseInfo().getDestToLateralView().put(ctx_1.dest, ast);
         break;
       case HiveParser.TOK_CTE:
-        processCTE(qb, ast);
+        processCTE(qb, ast, aliasToCTEs);
         break;
       case HiveParser.QUERY_HINT:
           processQueryHint(ast, qbp, 0);
@@ -2033,7 +2050,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       int child_count = ast.getChildCount();
       for (int child_pos = 0; child_pos < child_count && phase1Result; 
++child_pos) {
         // Recurse
-        phase1Result = doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1, 
plannerCtx);
+        phase1Result = doPhase1((ASTNode) ast.getChild(child_pos), qb, ctx_1, 
plannerCtx, aliasToCTEs);
       }
     }
     return phase1Result;
@@ -2156,14 +2173,15 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     return targetColNames;
   }
 
-  private void getMaterializationMetadata(QB qb) throws SemanticException {
+  private Map<String, CTEClause> getMaterializationMetadata(QB qb) throws 
SemanticException {
     if (qb.isCTAS()) {
-      return;
+      return null;
     }
+    Map<String, CTEClause> materializationAliasToCTEs = new 
HashMap<>(this.aliasToCTEs);
     try {
-      gatherCTEReferences(qb, rootClause);
+      gatherCTEReferences(qb, rootClause, materializationAliasToCTEs);
       int threshold = HiveConf.getIntVar(conf, 
HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD);
-      for (CTEClause cte : Sets.newHashSet(aliasToCTEs.values())) {
+      for (CTEClause cte : 
Sets.newHashSet(materializationAliasToCTEs.values())) {
         if (threshold >= 0 && cte.reference >= threshold) {
           cte.materialize = !HiveConf.getBoolVar(conf, 
ConfVars.HIVE_CTE_MATERIALIZE_FULL_AGGREGATE_ONLY)
               || cte.qbExpr.getQB().getParseInfo().isFullyAggregate();
@@ -2176,24 +2194,27 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       }
       throw new SemanticException(e.getMessage(), e);
     }
+    return materializationAliasToCTEs;
   }
 
-  private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent) throws 
HiveException {
+  private void gatherCTEReferences(QBExpr qbexpr, CTEClause parent,
+                                   Map<String, CTEClause> 
materializationAliasToCTEs) throws HiveException {
     if (qbexpr.getOpcode() == QBExpr.Opcode.NULLOP) {
-      gatherCTEReferences(qbexpr.getQB(), parent);
+      gatherCTEReferences(qbexpr.getQB(), parent, materializationAliasToCTEs);
     } else {
-      gatherCTEReferences(qbexpr.getQBExpr1(), parent);
-      gatherCTEReferences(qbexpr.getQBExpr2(), parent);
+      gatherCTEReferences(qbexpr.getQBExpr1(), parent, 
materializationAliasToCTEs);
+      gatherCTEReferences(qbexpr.getQBExpr2(), parent, 
materializationAliasToCTEs);
     }
   }
 
   // TODO: check view references, too
-  private void gatherCTEReferences(QB qb, CTEClause current) throws 
HiveException {
+  private void gatherCTEReferences(QB qb, CTEClause current,
+                                   Map<String, CTEClause> 
materializationAliasToCTEs) throws HiveException {
     for (String alias : qb.getTabAliases()) {
       String tabName = qb.getTabNameForAlias(alias);
       String cteName = tabName.toLowerCase();
 
-      CTEClause cte = findCTEFromName(qb, cteName);
+      CTEClause cte = findCTEFromName(qb, cteName, materializationAliasToCTEs);
       if (cte != null) {
         if (ctesExpanded.contains(cteName)) {
           throw new SemanticException("Recursive cte " + cteName +
@@ -2206,18 +2227,18 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
           continue;
         }
         cte.qbExpr = new QBExpr(cteName);
-        doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName, 
cte.withColList);
+        doPhase1QBExpr(cte.cteNode, cte.qbExpr, qb.getId(), cteName, 
cte.withColList, materializationAliasToCTEs);
 
         ctesExpanded.add(cteName);
-        gatherCTEReferences(cte.qbExpr, cte);
+        gatherCTEReferences(cte.qbExpr, cte, materializationAliasToCTEs);
         ctesExpanded.remove(ctesExpanded.size() - 1);
       }
     }
     for (String alias : qb.getSubqAliases()) {
-      gatherCTEReferences(qb.getSubqForAlias(alias), current);
+      gatherCTEReferences(qb.getSubqForAlias(alias), current, 
materializationAliasToCTEs);
     }
     for (String alias : qb.getSubqExprAliases()) {
-      gatherCTEReferences(qb.getSubqExprForAlias(alias), current);
+      gatherCTEReferences(qb.getSubqExprForAlias(alias), current, 
materializationAliasToCTEs);
     }
   }
 
@@ -2227,10 +2248,14 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 
   private void getMetaData(QB qb, boolean enableMaterialization) throws 
SemanticException {
     try {
+      Map<String, CTEClause> materializationAliasToCTEs = null;
       if (enableMaterialization) {
-        getMaterializationMetadata(qb);
+        materializationAliasToCTEs = getMaterializationMetadata(qb);
       }
       getMetaData(qb, null);
+      if (materializationAliasToCTEs != null && 
!materializationAliasToCTEs.isEmpty()) {
+        this.aliasToCTEs.putAll(materializationAliasToCTEs);
+      }
     } catch (HiveException e) {
       if (e instanceof SemanticException) {
         throw (SemanticException)e;
@@ -2287,7 +2312,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
         Table materializedTab = ctx.getMaterializedTable(cteName);
         if (materializedTab == null) {
           // we first look for this alias from CTE, and then from catalog.
-          CTEClause cte = findCTEFromName(qb, cteName);
+          CTEClause cte = findCTEFromName(qb, cteName, aliasToCTEs);
           if (cte != null) {
             if (!cte.materialize) {
               addCTEAsSubQuery(qb, cteName, alias);
diff --git a/ql/src/test/queries/clientnegative/nested_ctes_ambiguous_table.q 
b/ql/src/test/queries/clientnegative/nested_ctes_ambiguous_table.q
new file mode 100644
index 00000000000..73bdef40ba3
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/nested_ctes_ambiguous_table.q
@@ -0,0 +1,13 @@
+
+with
+    test1 as (
+        with
+            t1 as (
+                select 1 as a
+            ),
+            t1 as (
+                select 1 as b
+            )
+        select 1
+    )
+select * from test1;
diff --git a/ql/src/test/queries/clientnegative/nested_ctes_outside_scope.q 
b/ql/src/test/queries/clientnegative/nested_ctes_outside_scope.q
new file mode 100644
index 00000000000..5f146b924de
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/nested_ctes_outside_scope.q
@@ -0,0 +1,10 @@
+
+with
+    test1 as (
+        with
+            t1 as (
+                select 1 as a
+            )
+        select 1 as b
+    )
+select * from test1 join t1 on a=b;
diff --git a/ql/src/test/queries/clientpositive/nested_ctes.q 
b/ql/src/test/queries/clientpositive/nested_ctes.q
new file mode 100644
index 00000000000..e68adb8420b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/nested_ctes.q
@@ -0,0 +1,38 @@
+
+with
+    test1 as (
+        with t1 as (select 1)
+        select 1
+    )
+select * from test1;
+with
+    test2 as (
+        with
+            test2 as (
+                select 1 as a
+            )
+        select 1
+    )
+select * from test2;
+
+with
+    test3 as (
+        with
+            t1 as (
+                select 1 as a
+            ),
+            t2 as (
+                select 1 as b
+            )
+        select * from t1 join t2 on a=b
+    )
+select * from test3;
+
+create table table_1(a int, b int);
+
+with
+    test4 as (
+        with t1 as (select * from table_1)
+        select * from t1
+    )
+select * from test4;
diff --git 
a/ql/src/test/results/clientnegative/nested_ctes_ambiguous_table.q.out 
b/ql/src/test/results/clientnegative/nested_ctes_ambiguous_table.q.out
new file mode 100644
index 00000000000..ebd260f3b4a
--- /dev/null
+++ b/ql/src/test/results/clientnegative/nested_ctes_ambiguous_table.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10008]: Line 8:12 Ambiguous table alias 't1'
diff --git a/ql/src/test/results/clientnegative/nested_ctes_outside_scope.q.out 
b/ql/src/test/results/clientnegative/nested_ctes_outside_scope.q.out
new file mode 100644
index 00000000000..5954fe5d692
--- /dev/null
+++ b/ql/src/test/results/clientnegative/nested_ctes_outside_scope.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10001]: Line 10:25 Table not found 't1'
diff --git a/ql/src/test/results/clientpositive/llap/nested_ctes.q.out 
b/ql/src/test/results/clientpositive/llap/nested_ctes.q.out
new file mode 100644
index 00000000000..0b771699b5b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/nested_ctes.q.out
@@ -0,0 +1,102 @@
+PREHOOK: query: with
+    test1 as (
+        with t1 as (select 1)
+        select 1
+    )
+select * from test1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: with
+    test1 as (
+        with t1 as (select 1)
+        select 1
+    )
+select * from test1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+PREHOOK: query: with
+    test2 as (
+        with
+            test2 as (
+                select 1 as a
+            )
+        select 1
+    )
+select * from test2
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: with
+    test2 as (
+        with
+            test2 as (
+                select 1 as a
+            )
+        select 1
+    )
+select * from test2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1
+Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+PREHOOK: query: with
+    test3 as (
+        with
+            t1 as (
+                select 1 as a
+            ),
+            t2 as (
+                select 1 as b
+            )
+        select * from t1 join t2 on a=b
+    )
+select * from test3
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: with
+    test3 as (
+        with
+            t1 as (
+                select 1 as a
+            ),
+            t2 as (
+                select 1 as b
+            )
+        select * from t1 join t2 on a=b
+    )
+select * from test3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+1      1
+PREHOOK: query: create table table_1(a int, b int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_1
+POSTHOOK: query: create table table_1(a int, b int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_1
+PREHOOK: query: with
+    test4 as (
+        with t1 as (select * from table_1)
+        select * from t1
+    )
+select * from test4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table_1
+#### A masked pattern was here ####
+POSTHOOK: query: with
+    test4 as (
+        with t1 as (select * from table_1)
+        select * from t1
+    )
+select * from test4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table_1
+#### A masked pattern was here ####

Reply via email to