This is an automated email from the ASF dual-hosted git repository.

deniskuzZ pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 99b61cc9f44 HIVE-29616: Fix incorrect column lineage when multiple 
subqueries with identical table aliases (#6485)
99b61cc9f44 is described below

commit 99b61cc9f442856f3205ec2e4b6a463fe4e82a79
Author: ljq-dmr <[email protected]>
AuthorDate: Mon Jun 1 14:01:11 2026 +0800

    HIVE-29616: Fix incorrect column lineage when multiple subqueries with 
identical table aliases (#6485)
---
 .../hive/ql/optimizer/lineage/ExprProcFactory.java | 17 ++++++---
 ql/src/test/queries/clientpositive/lineage8.q      | 19 ++++++++++
 .../results/clientpositive/llap/lineage8.q.out     | 41 ++++++++++++++++++++++
 3 files changed, 73 insertions(+), 4 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java
index 8ba2c51e850..dea8f4a3b6c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java
@@ -183,13 +183,12 @@ public static SemanticNodeProcessor getColumnProcessor() {
     return new ColumnExprProcessor();
   }
 
-  private static boolean findSourceColumn(
+  private static boolean findSourceColumn(Operator<? extends OperatorDesc> 
inpOp,
       LineageCtx lctx, Predicate cond, String tabAlias, String alias) {
     for (Map.Entry<String, TableScanOperator> topOpMap: 
lctx.getParseCtx().getTopOps().entrySet()) {
       TableScanOperator tableScanOp = topOpMap.getValue();
       Table tbl = tableScanOp.getConf().getTableMetadata();
-      if (tbl.getTableName().equals(tabAlias)
-          || tabAlias.equals(tableScanOp.getConf().getAlias())) {
+      if (isMatchingTableScan(inpOp, tabAlias, tableScanOp, tbl)) {
         for (FieldSchema column: tbl.getCols()) {
           if (column.getName().equals(alias)) {
             TableAliasInfo table = new TableAliasInfo();
@@ -208,6 +207,16 @@ private static boolean findSourceColumn(
     return false;
   }
 
+  private static boolean isMatchingTableScan(Operator<? extends OperatorDesc> 
inpOp, String tabAlias,
+      TableScanOperator tableScanOp, Table tbl) {
+    boolean operatorIdMatches = 
inpOp.getOperatorId().equals(tableScanOp.getOperatorId());
+
+    boolean tableNameMatches = tbl.getTableName().equals(tabAlias);
+    boolean aliasMatches = tabAlias.equals(tableScanOp.getConf().getAlias());
+
+    return operatorIdMatches && (tableNameMatches || aliasMatches);
+  }
+
   /**
    * Get the expression string of an expression node.
    */
@@ -241,7 +250,7 @@ public static String getExprString(RowSchema rs, 
ExprNodeDesc expr,
       }
       if (tabAlias != null && tabAlias.length() > 0
           && !tabAlias.startsWith("_") && !tabAlias.startsWith("$")) {
-        if (cond != null && !findSourceColumn(lctx, cond, tabAlias, alias) && 
dep != null) {
+        if (cond != null && !findSourceColumn(inpOp, lctx, cond, tabAlias, 
alias) && dep != null) {
           cond.getBaseCols().addAll(dep.getBaseCols());
         }
         return tabAlias + "." + alias;
diff --git a/ql/src/test/queries/clientpositive/lineage8.q 
b/ql/src/test/queries/clientpositive/lineage8.q
new file mode 100644
index 00000000000..959376eaf41
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/lineage8.q
@@ -0,0 +1,19 @@
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.LineageLogger;
+
+create table table_1 (id1 int, id2 int);
+create table table_2 (id1 int, id2 int);
+
+create table table_3 as
+select id1 from table_1 t1 where t1.id2 = 1
+union all
+select id1 from table_2 t1 where t1.id2 = 2;
+
+create table table_4 as
+select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where 
t1.id2 = 1
+union all
+select id1 from table_2 t1 where t1.id2 = 2;
+
+create table table_5 as
+select t.id1 from
+(select id1 from table_1 t1 where t1.id2 = 1) t
+join table_2 t1 on t.id1 = t1.id2;
diff --git a/ql/src/test/results/clientpositive/llap/lineage8.q.out 
b/ql/src/test/results/clientpositive/llap/lineage8.q.out
new file mode 100644
index 00000000000..6f8334018da
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/lineage8.q.out
@@ -0,0 +1,41 @@
+PREHOOK: query: create table table_1 (id1 int, id2 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_1
+PREHOOK: query: create table table_2 (id1 int, id2 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_2
+PREHOOK: query: create table table_3 as
+select id1 from table_1 t1 where t1.id2 = 1
+union all
+select id1 from table_2 t1 where t1.id2 = 2
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@table_1
+PREHOOK: Input: default@table_2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_3
+Result schema has 1 fields, but we don't get as many dependencies
+{"version":"1.0","engine":"tez","database":"default","hash":"24a0f860f60a1b7d5f350fd8eb164a37","queryText":"create
 table table_3 as\nselect id1 from table_1 t1 where t1.id2 = 1\nunion 
all\nselect id1 from table_2 t1 where t1.id2 = 
2","edges":[{"sources":[1,2],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"(t1.id2
 = 
1)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"(t1.id2 
= 2)","edgeType":"PREDICATE"}],"vertices": [...]
+PREHOOK: query: create table table_4 as
+select id1 from (select id1,id2 from table_1 t1 where t1.id1 = 3 ) t1 where 
t1.id2 = 1
+union all
+select id1 from table_2 t1 where t1.id2 = 2
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@table_1
+PREHOOK: Input: default@table_2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_4
+Result schema has 1 fields, but we don't get as many dependencies
+{"version":"1.0","engine":"tez","database":"default","hash":"761d0cf34076cec77766bf7af8f1cbe9","queryText":"create
 table table_4 as\nselect id1 from (select id1,id2 from table_1 t1 where t1.id1 
= 3 ) t1 where t1.id2 = 1\nunion all\nselect id1 from table_2 t1 where t1.id2 = 
2","edges":[{"sources":[1],"targets":[0],"expression":"id1","edgeType":"PROJECTION"},{"sources":[2,3],"targets":[0],"expression":"((t1.id1
 = 3) and (t1.id2 = 
1))","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"e [...]
+PREHOOK: query: create table table_5 as
+select t.id1 from
+(select id1 from table_1 t1 where t1.id2 = 1) t
+join table_2 t1 on t.id1 = t1.id2
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@table_1
+PREHOOK: Input: default@table_2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_5
+Result schema has 1 fields, but we don't get as many dependencies
+{"version":"1.0","engine":"tez","database":"default","hash":"615bb67f6ff2dd50695bffd14c296677","queryText":"create
 table table_5 as\nselect t.id1 from\n(select id1 from table_1 t1 where t1.id2 
= 1) t\njoin table_2 t1 on t.id1 = 
t1.id2","edges":[{"sources":[1],"targets":[0],"edgeType":"PROJECTION"},{"sources":[2,1],"targets":[0],"expression":"((t1.id2
 = 1) and t1.id1 is not 
null)","edgeType":"PREDICATE"},{"sources":[1,3],"targets":[0],"expression":"(t1.id1
 = t1.id2)","edgeType":"PREDICATE [...]

Reply via email to