This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new c68e7ecb9d2 HIVE-29249: RuntimeException in 
PlanModifierForASTConv.introduceDerivedTable for queries with self joins (#6117)
c68e7ecb9d2 is described below

commit c68e7ecb9d2a6aae6ac4492fc22de72ea106960b
Author: Stamatis Zampetakis <[email protected]>
AuthorDate: Mon Nov 17 14:35:10 2025 +0100

    HIVE-29249: RuntimeException in 
PlanModifierForASTConv.introduceDerivedTable for queries with self joins (#6117)
    
    The problem occurs when we need to introduce a derived table over the left 
input of the join but the input is already modified (newL) by the shuttle.
    Looking for the modified input in the join's children fails and raises an 
exception. Since we know which input needs to be replaced we can avoid the 
lookup mechanism in
    `introduceDerivedTable(RelNode, RelNode)` and build directly the new join 
operator via copy.
    
    To avoid unnecessary object creation bubling further up via the 
HiveRelShuttle mechansim we perform the copy *only* when one of the inputs is 
modified.
---
 .../calcite/translator/PlanModifierForASTConv.java | 12 ++++++----
 .../cbo_self_join_ambiguous_alias_cte.q            |  7 +++++-
 .../llap/cbo_self_join_ambiguous_alias_cte.q.out   | 26 ++++++++++++++++++++++
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
index f1dd9dd6e0c..1875538e089 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
@@ -229,12 +229,16 @@ public RelNode visit(HiveJoin join) {
         // No self-join detected, return the join as is
         aliases.addAll(lf.aliases);
         aliases.addAll(rf.aliases);
+      } else {
+        // Self-join detected, introduce a derived table for the left side
+        aliases.addAll(rf.aliases);
+        newL = introduceDerivedTable(newL);
+      }
+      if (newL == join.getLeft() && newR == join.getRight()) {
+        return join;
+      } else {
         return join.copy(join.getTraitSet(), Arrays.asList(newL, newR));
       }
-      // Self-join detected, introduce a derived table for the left side
-      aliases.addAll(rf.aliases);
-      introduceDerivedTable(newL, join);
-      return join;
     }
 
     @Override
diff --git 
a/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q 
b/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
index 3293a409ed2..4195ec443b5 100644
--- a/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
+++ b/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
@@ -3,4 +3,9 @@ create table t1 (key int, value int);
 explain cbo
 with cte as
 (select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, 
ROW__IS__DELETED from t1)
-select * from cte a join cte b join cte c 
+select * from cte a join cte b join cte c;
+
+explain cbo
+with cte as
+(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, 
ROW__IS__DELETED from t1)
+select * from cte a join t1 b join cte c;
diff --git 
a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
 
b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
index 8ddab9b785c..bad3855cb5d 100644
--- 
a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
@@ -32,3 +32,29 @@ HiveProject(a.key=[$0], a.value=[$1], 
a.block__offset__inside__file=[$2], a.inpu
         HiveTableScan(table=[[default, t1]], table:alias=[t1])
     HiveTableScan(table=[[default, t1]], table:alias=[t1])
 
+Warning: Shuffle Join MERGEJOIN[13][tables = [t1, $hdt$_0]] in Stage 'Reducer 
2' is a cross product
+Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, t1]] in Stage 'Reducer 
3' is a cross product
+PREHOOK: query: explain cbo
+with cte as
+(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, 
ROW__IS__DELETED from t1)
+select * from cte a join t1 b join cte c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+with cte as
+(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, 
ROW__IS__DELETED from t1)
+select * from cte a join t1 b join cte c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a.key=[$0], a.value=[$1], a.block__offset__inside__file=[$2], 
a.input__file__name=[$3], a.row__id=[$4], a.row__is__deleted=[$5], b.key=[$6], 
b.value=[$7], c.key=[$8], c.value=[$9], c.block__offset__inside__file=[$10], 
c.input__file__name=[$11], c.row__id=[$12], c.row__is__deleted=[$13])
+  HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not 
available])
+    HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], 
INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$6], 
value0=[$7])
+      HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not 
available])
+        HiveTableScan(table=[[default, t1]], table:alias=[t1])
+        HiveProject(key=[$0], value=[$1])
+          HiveTableScan(table=[[default, t1]], table:alias=[b])
+    HiveTableScan(table=[[default, t1]], table:alias=[t1])
+

Reply via email to