This is an automated email from the ASF dual-hosted git repository.
deniskuzZ pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 99b61cc9f44 HIVE-29616: Fix incorrect column lineage when multiple
subqueries with identical table aliases (#6485)
99b61cc9f44 is described below
commit 99b61cc9f442856f3205ec2e4b6a463fe4e82a79
Author: ljq-dmr <[email protected]>
AuthorDate: Mon Jun 1 14:01:11 2026 +0800
HIVE-29616: Fix incorrect column lineage when multiple subqueries with
identical table aliases (#6485)
---
.../hive/ql/optimizer/lineage/ExprProcFactory.java | 17 ++++++---
ql/src/test/queries/clientpositive/lineage8.q | 19 ++++++++++
.../results/clientpositive/llap/lineage8.q.out | 41 ++++++++++++++++++++++
3 files changed, 73 insertions(+), 4 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java
index 8ba2c51e850..dea8f4a3b6c 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java
@@ -183,13 +183,12 @@ public static SemanticNodeProcessor getColumnProcessor() {
return new ColumnExprProcessor();
}
- private static boolean findSourceColumn(
+ private static boolean findSourceColumn(Operator<? extends OperatorDesc>
inpOp,
LineageCtx lctx, Predicate cond, String tabAlias, String alias) {
for (Map.Entry<String, TableScanOperator> topOpMap:
lctx.getParseCtx().getTopOps().entrySet()) {
TableScanOperator tableScanOp = topOpMap.getValue();
Table tbl = tableScanOp.getConf().getTableMetadata();
- if (tbl.getTableName().equals(tabAlias)
- || tabAlias.equals(tableScanOp.getConf().getAlias())) {
+ if (isMatchingTableScan(inpOp, tabAlias, tableScanOp, tbl)) {
for (FieldSchema column: tbl.getCols()) {
if (column.getName().equals(alias)) {
TableAliasInfo table = new TableAliasInfo();
@@ -208,6 +207,16 @@ private static boolean findSourceColumn(
return false;
}
+ private static boolean isMatchingTableScan(Operator<? extends OperatorDesc>
inpOp, String tabAlias,
+ TableScanOperator tableScanOp, Table tbl) {
+ boolean operatorIdMatches =
inpOp.getOperatorId().equals(tableScanOp.getOperatorId());
+
+ boolean tableNameMatches = tbl.getTableName().equals(tabAlias);
+ boolean aliasMatches = tabAlias.equals(tableScanOp.getConf().getAlias());
+
+ return operatorIdMatches && (tableNameMatches || aliasMatches);
+ }
+
/**
* Get the expression string of an expression node.
*/
@@ -241,7 +250,7 @@ public static String getExprString(RowSchema rs,
ExprNodeDesc expr,
}
if (tabAlias != null && tabAlias.length() > 0
&& !tabAlias.startsWith("_") && !tabAlias.startsWith("$")) {
- if (cond != null && !findSourceColumn(lctx, cond, tabAlias, alias) &&
dep != null) {
+ if (cond != null && !findSourceColumn(inpOp, lctx, cond, tabAlias,
alias) && dep != null) {
cond.getBaseCols().addAll(dep.getBaseCols());
}
return tabAlias + "." + alias;
diff --git a/ql/src/test/queries/clientpositive/lineage8.q
b/ql/src/test/queries/clientpositive/lineage8.q
new file mode 100644
index 00000000000..959376eaf41
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/lineage8.q
@@ -0,0 +1,19 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.LineageLogger;
+
+create table table_1 (id1 int, id2 int);
+create table table_2 (id1 int, id2 int);
+
+create table table_3 as
+select id1 from table_1 t1 where t1.id2 = 1
+union all
+select id1 from table_2 t1 where t1.id2 = 2;
+
+create table table_4 as
+select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where
t1.id2 = 1
+union all
+select id1 from table_2 t1 where t1.id2 = 2;
+
+create table table_5 as
+select t.id1 from
+(select id1 from table_1 t1 where t1.id2 = 1) t
+join table_2 t1 on t.id1 = t1.id2;
diff --git a/ql/src/test/results/clientpositive/llap/lineage8.q.out
b/ql/src/test/results/clientpositive/llap/lineage8.q.out
new file mode 100644
index 00000000000..6f8334018da
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/lineage8.q.out
@@ -0,0 +1,41 @@
+PREHOOK: query: create table table_1 (id1 int, id2 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_1
+PREHOOK: query: create table table_2 (id1 int, id2 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_2
+PREHOOK: query: create table table_3 as
+select id1 from table_1 t1 where t1.id2 = 1
+union all
+select id1 from table_2 t1 where t1.id2 = 2
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@table_1
+PREHOOK: Input: default@table_2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_3
+Result schema has 1 fields, but we don't get as many dependencies
+{"version":"1.0","engine":"tez","database":"default","hash":"24a0f860f60a1b7d5f350fd8eb164a37","queryText":"create
table table_3 as\nselect id1 from table_1 t1 where t1.id2 = 1\nunion
all\nselect id1 from table_2 t1 where t1.id2 =
2","edges":[{"sources":[1,2],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"(t1.id2
=
1)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"(t1.id2
= 2)","edgeType":"PREDICATE"}],"vertices": [...]
+PREHOOK: query: create table table_4 as
+select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where
t1.id2 = 1
+union all
+select id1 from table_2 t1 where t1.id2 = 2
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@table_1
+PREHOOK: Input: default@table_2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_4
+Result schema has 1 fields, but we don't get as many dependencies
+{"version":"1.0","engine":"tez","database":"default","hash":"761d0cf34076cec77766bf7af8f1cbe9","queryText":"create
table table_4 as\nselect id1 from (select id1,id2 from table_1 t1 where t1.id1
= 3 ) t1 where t1.id2 = 1\nunion all\nselect id1 from table_2 t1 where t1.id2 =
2","edges":[{"sources":[1],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0],"expression":"((t1.id1
= 3) and (t1.id2 =
1))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"e [...]
+PREHOOK: query: create table table_5 as
+select t.id1 from
+(select id1 from table_1 t1 where t1.id2 = 1) t
+join table_2 t1 on t.id1 = t1.id2
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@table_1
+PREHOOK: Input: default@table_2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_5
+Result schema has 1 fields, but we don't get as many dependencies
+{"version":"1.0","engine":"tez","database":"default","hash":"615bb67f6ff2dd50695bffd14c296677","queryText":"create
table table_5 as\nselect t.id1 from\n(select id1 from table_1 t1 where t1.id2
= 1) t\njoin table_2 t1 on t.id1 =
t1.id2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[2,1],"targets":[0],"expression":"((t1.id2
= 1) and t1.id1 is not
null)","edgeType":"PREDICATE"},{"sources":[1,3],"targets":[0],"expression":"(t1.id1
= t1.id2)","edgeType":"PREDICATE [...]