This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new c68e7ecb9d2 HIVE-29249: RuntimeException in
PlanModifierForASTConv.introduceDerivedTable for queries with self joins (#6117)
c68e7ecb9d2 is described below
commit c68e7ecb9d2a6aae6ac4492fc22de72ea106960b
Author: Stamatis Zampetakis <[email protected]>
AuthorDate: Mon Nov 17 14:35:10 2025 +0100
HIVE-29249: RuntimeException in
PlanModifierForASTConv.introduceDerivedTable for queries with self joins (#6117)
The problem occurs when we need to introduce a derived table over the left
input of the join but the input is already modified (newL) by the shuttle.
Looking for the modified input in the join's children fails and raises an
exception. Since we know which input needs to be replaced we can avoid the
lookup mechanism in
`introduceDerivedTable(RelNode, RelNode)` and build directly the new join
operator via copy.
To avoid unnecessary object creation bubling further up via the
HiveRelShuttle mechansim we perform the copy *only* when one of the inputs is
modified.
---
.../calcite/translator/PlanModifierForASTConv.java | 12 ++++++----
.../cbo_self_join_ambiguous_alias_cte.q | 7 +++++-
.../llap/cbo_self_join_ambiguous_alias_cte.q.out | 26 ++++++++++++++++++++++
3 files changed, 40 insertions(+), 5 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
index f1dd9dd6e0c..1875538e089 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
@@ -229,12 +229,16 @@ public RelNode visit(HiveJoin join) {
// No self-join detected, return the join as is
aliases.addAll(lf.aliases);
aliases.addAll(rf.aliases);
+ } else {
+ // Self-join detected, introduce a derived table for the left side
+ aliases.addAll(rf.aliases);
+ newL = introduceDerivedTable(newL);
+ }
+ if (newL == join.getLeft() && newR == join.getRight()) {
+ return join;
+ } else {
return join.copy(join.getTraitSet(), Arrays.asList(newL, newR));
}
- // Self-join detected, introduce a derived table for the left side
- aliases.addAll(rf.aliases);
- introduceDerivedTable(newL, join);
- return join;
}
@Override
diff --git
a/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
b/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
index 3293a409ed2..4195ec443b5 100644
--- a/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
+++ b/ql/src/test/queries/clientpositive/cbo_self_join_ambiguous_alias_cte.q
@@ -3,4 +3,9 @@ create table t1 (key int, value int);
explain cbo
with cte as
(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID,
ROW__IS__DELETED from t1)
-select * from cte a join cte b join cte c
+select * from cte a join cte b join cte c;
+
+explain cbo
+with cte as
+(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID,
ROW__IS__DELETED from t1)
+select * from cte a join t1 b join cte c;
diff --git
a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
index 8ddab9b785c..bad3855cb5d 100644
---
a/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
+++
b/ql/src/test/results/clientpositive/llap/cbo_self_join_ambiguous_alias_cte.q.out
@@ -32,3 +32,29 @@ HiveProject(a.key=[$0], a.value=[$1],
a.block__offset__inside__file=[$2], a.inpu
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
+Warning: Shuffle Join MERGEJOIN[13][tables = [t1, $hdt$_0]] in Stage 'Reducer
2' is a cross product
+Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, t1]] in Stage 'Reducer
3' is a cross product
+PREHOOK: query: explain cbo
+with cte as
+(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID,
ROW__IS__DELETED from t1)
+select * from cte a join t1 b join cte c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+with cte as
+(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID,
ROW__IS__DELETED from t1)
+select * from cte a join t1 b join cte c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(a.key=[$0], a.value=[$1], a.block__offset__inside__file=[$2],
a.input__file__name=[$3], a.row__id=[$4], a.row__is__deleted=[$5], b.key=[$6],
b.value=[$7], c.key=[$8], c.value=[$9], c.block__offset__inside__file=[$10],
c.input__file__name=[$11], c.row__id=[$12], c.row__is__deleted=[$13])
+ HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not
available])
+ HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2],
INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$6],
value0=[$7])
+ HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not
available])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+ HiveProject(key=[$0], value=[$1])
+ HiveTableScan(table=[[default, t1]], table:alias=[b])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+