This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch branch-4.2
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-4.2 by this push:
     new f2a8098cf55 HIVE-29249: RuntimeException in 
PlanModifierForASTConv.introduceDerivedTable for queries with self joins (#6117)
f2a8098cf55 is described below

commit f2a8098cf551b697ca1afe74ee0e747f458ab9f1
Author: Stamatis Zampetakis <[email protected]>
AuthorDate: Mon Nov 17 14:35:10 2025 +0100

    HIVE-29249: RuntimeException in 
PlanModifierForASTConv.introduceDerivedTable for queries with self joins (#6117)
    
    The problem occurs when we need to introduce a derived table over the left 
input of the join but the input is already modified (newL) by the shuttle.
    Looking for the modified input in the join's children fails and raises an 
exception. Since we know which input needs to be replaced we can avoid the 
lookup mechanism in
    `introduceDerivedTable(RelNode, RelNode)` and build directly the new join 
operator via copy.
    
    To avoid unnecessary object creation bubling further up via the 
HiveRelShuttle mechansim we perform the copy *only* when one of the inputs is 
modified.
    
    (cherry picked from commit c68e7ecb9d2a6aae6ac4492fc22de72ea106960b)
---
 .../calcite/translator/PlanModifierForASTConv.java | 12 ++++++----
 .../cbo_self_join_ambiguous_alias_cte.q            |  7 +++++-
 .../llap/cbo_self_join_ambiguous_alias_cte.q.out   | 26 ++++++++++++++++++++++
 3 files changed, 40 insertions(+), 5 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
index f1dd9dd6e0c..1875538e089 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
@@ -229,12 +229,16 @@ public RelNode visit(HiveJoin join) {
         // No self-join detected, return the join as is
         aliases.addAll(lf.aliases);
         aliases.addAll(rf.aliases);
+      } else {
+        // Self-join detected, introduce a derived table for the left side
+        aliases.addAll(rf.aliases);
+        newL = introduceDerivedTable(newL);
+      }
+      if (newL == join.getLeft() && newR == join.getRight()) {
+        return join;
+      } else {
         return join.copy(join.getTraitSet(), Arrays.asList(newL, newR));
       }
-      // Self-join detected, introduce a derived table for the left side
-      aliases.addAll(rf.aliases);
-      introduceDerivedTable(newL, join);
-      return join;
     }
 
     @Override
diff --git 
a/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q 
b/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
index 3293a409ed2..4195ec443b5 100644
--- a/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
+++ b/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
@@ -3,4 +3,9 @@ create table t1 (key int, value int);
 explain cbo
 with cte as
 (select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, 
ROW__IS__DELETED from t1)
-select * from cte a join cte b join cte c 
+select * from cte a join cte b join cte c;
+
+explain cbo
+with cte as
+(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, 
ROW__IS__DELETED from t1)
+select * from cte a join t1 b join cte c;
diff --git 
a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
 
b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
index 8ddab9b785c..bad3855cb5d 100644
--- 
a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
@@ -32,3 +32,29 @@ HiveProject(a.key=[$0], a.value=[$1], 
a.block__offset__inside__file=[$2], a.inpu
         HiveTableScan(table=[[default, t1]], table:alias=[t1])
     HiveTableScan(table=[[default, t1]], table:alias=[t1])
 
+Warning: Shuffle Join MERGEJOIN[13][tables = [t1, $hdt$_0]] in Stage 'Reducer 
2' is a cross product
+Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, t1]] in Stage 'Reducer 
3' is a cross product
+PREHOOK: query: explain cbo
+with cte as
+(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, 
ROW__IS__DELETED from t1)
+select * from cte a join t1 b join cte c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+with cte as
+(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, 
ROW__IS__DELETED from t1)
+select * from cte a join t1 b join cte c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a.key=[$0], a.value=[$1], a.block__offset__inside__file=[$2], 
a.input__file__name=[$3], a.row__id=[$4], a.row__is__deleted=[$5], b.key=[$6], 
b.value=[$7], c.key=[$8], c.value=[$9], c.block__offset__inside__file=[$10], 
c.input__file__name=[$11], c.row__id=[$12], c.row__is__deleted=[$13])
+  HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not 
available])
+    HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], 
INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$6], 
value0=[$7])
+      HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not 
available])
+        HiveTableScan(table=[[default, t1]], table:alias=[t1])
+        HiveProject(key=[$0], value=[$1])
+          HiveTableScan(table=[[default, t1]], table:alias=[b])
+    HiveTableScan(table=[[default, t1]], table:alias=[t1])
+

Reply via email to