This is an automated email from the ASF dual-hosted git repository.

soumyakanti3578 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 0433dbba6c0 HIVE-29634: Fix semijoin RHS column registration for 
multiple columns from same alias (#6516)
0433dbba6c0 is described below

commit 0433dbba6c0e1d4fdfbe1bccca6701d6eaf9c77c
Author: Tanishq Chugh <[email protected]>
AuthorDate: Wed Jun 3 22:18:44 2026 +0530

    HIVE-29634: Fix semijoin RHS column registration for multiple columns from 
same alias (#6516)
---
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     | 21 +++++++++----
 .../queries/clientpositive/join_common_rhs_alias.q |  8 +++++
 .../llap/join_common_rhs_alias.q.out               | 35 ++++++++++++++++++++++
 .../results/clientpositive/llap/semijoin6.q.out    | 10 +++----
 4 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index b23871278c5..7781ded7bfa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3065,14 +3065,23 @@ void parseJoinCondPopulateAlias(QBJoinTree joinTree, 
ASTNode condn,
             && condn.getToken().getType() == HiveParser.DOT) {
           // get the semijoin rhs table name and field name
           fields1 = new ArrayList<String>();
-          int rhssize = rightAliases.size();
+          List<String> scopedLeftAliases = new ArrayList<>();
+          List<String> scopedRightAliases = new ArrayList<>();
+
           parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
-              leftAliases, rightAliases, null, aliasToOpInfo);
-          String rhsAlias = null;
+              scopedLeftAliases, scopedRightAliases, null, aliasToOpInfo);
 
-          if (rightAliases.size() > rhssize) { // the new table is rhs table
-            rhsAlias = rightAliases.get(rightAliases.size() - 1);
-          }
+          String rhsAlias = scopedRightAliases.isEmpty() ? null : 
scopedRightAliases.get(0);
+          scopedLeftAliases.forEach(alias -> {
+            if (!leftAliases.contains(alias)) {
+              leftAliases.add(alias);
+            }
+          });
+          scopedRightAliases.forEach(alias -> {
+            if (!rightAliases.contains(alias)) {
+              rightAliases.add(alias);
+            }
+          });
 
           parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
               leftAliases, rightAliases, fields1, aliasToOpInfo);
diff --git a/ql/src/test/queries/clientpositive/join_common_rhs_alias.q 
b/ql/src/test/queries/clientpositive/join_common_rhs_alias.q
new file mode 100644
index 00000000000..b691f6c8803
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_common_rhs_alias.q
@@ -0,0 +1,8 @@
+CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING);
+
+INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), ("c", "c" , "a");
+
+SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2
+ON t1.c1 == t2.c1
+AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 )
+WHERE t2.c1 IS NULL;
diff --git 
a/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out 
b/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out
new file mode 100644
index 00000000000..1ce5e828621
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out
@@ -0,0 +1,35 @@
+PREHOOK: query: CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab
+POSTHOOK: query: CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab
+PREHOOK: query: INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), 
("c", "c" , "a")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tab
+POSTHOOK: query: INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), 
("c", "c" , "a")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tab
+POSTHOOK: Lineage: tab.c1 SCRIPT []
+POSTHOOK: Lineage: tab.c2 SCRIPT []
+POSTHOOK: Lineage: tab.c3 SCRIPT []
+PREHOOK: query: SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2
+ON t1.c1 == t2.c1
+AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 )
+WHERE t2.c1 IS NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2
+ON t1.c1 == t2.c1
+AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 )
+WHERE t2.c1 IS NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+#### A masked pattern was here ####
+a      a       aa
+b      b       ba
diff --git a/ql/src/test/results/clientpositive/llap/semijoin6.q.out 
b/ql/src/test/results/clientpositive/llap/semijoin6.q.out
index 0c7e9d4f441..327b12ec050 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin6.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin6.q.out
@@ -777,20 +777,20 @@ STAGE PLANS:
                     Statistics: Num rows: 6 Data size: 48 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: a (type: int), b (type: int)
-                      outputColumnNames: _col0, _col2
+                      outputColumnNames: _col0, _col3
                       Statistics: Num rows: 6 Data size: 48 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Group By Operator
-                        keys: _col0 (type: int), _col2 (type: int), _col2 
(type: int)
+                        keys: _col0 (type: int), _col3 (type: int), _col3 
(type: int), _col3 (type: int)
                         minReductionHashAggr: 0.4
                         mode: hash
-                        outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: 5 Data size: 60 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        outputColumnNames: _col0, _col1, _col2, _col3
+                        Statistics: Num rows: 5 Data size: 80 Basic stats: 
COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: int)
                           null sort order: z
                           sort order: +
                           Map-reduce partition columns: _col0 (type: int)
-                          Statistics: Num rows: 5 Data size: 60 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 5 Data size: 80 Basic stats: 
COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: all inputs

Reply via email to