This is an automated email from the ASF dual-hosted git repository.
soumyakanti3578 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 0433dbba6c0 HIVE-29634: Fix semijoin RHS column registration for
multiple columns from same alias (#6516)
0433dbba6c0 is described below
commit 0433dbba6c0e1d4fdfbe1bccca6701d6eaf9c77c
Author: Tanishq Chugh <[email protected]>
AuthorDate: Wed Jun 3 22:18:44 2026 +0530
HIVE-29634: Fix semijoin RHS column registration for multiple columns from
same alias (#6516)
---
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 21 +++++++++----
.../queries/clientpositive/join_common_rhs_alias.q | 8 +++++
.../llap/join_common_rhs_alias.q.out | 35 ++++++++++++++++++++++
.../results/clientpositive/llap/semijoin6.q.out | 10 +++----
4 files changed, 63 insertions(+), 11 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index b23871278c5..7781ded7bfa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3065,14 +3065,23 @@ void parseJoinCondPopulateAlias(QBJoinTree joinTree,
ASTNode condn,
&& condn.getToken().getType() == HiveParser.DOT) {
// get the semijoin rhs table name and field name
fields1 = new ArrayList<String>();
- int rhssize = rightAliases.size();
+ List<String> scopedLeftAliases = new ArrayList<>();
+ List<String> scopedRightAliases = new ArrayList<>();
+
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(0),
- leftAliases, rightAliases, null, aliasToOpInfo);
- String rhsAlias = null;
+ scopedLeftAliases, scopedRightAliases, null, aliasToOpInfo);
- if (rightAliases.size() > rhssize) { // the new table is rhs table
- rhsAlias = rightAliases.get(rightAliases.size() - 1);
- }
+ String rhsAlias = scopedRightAliases.isEmpty() ? null :
scopedRightAliases.get(0);
+ scopedLeftAliases.forEach(alias -> {
+ if (!leftAliases.contains(alias)) {
+ leftAliases.add(alias);
+ }
+ });
+ scopedRightAliases.forEach(alias -> {
+ if (!rightAliases.contains(alias)) {
+ rightAliases.add(alias);
+ }
+ });
parseJoinCondPopulateAlias(joinTree, (ASTNode) condn.getChild(1),
leftAliases, rightAliases, fields1, aliasToOpInfo);
diff --git a/ql/src/test/queries/clientpositive/join_common_rhs_alias.q
b/ql/src/test/queries/clientpositive/join_common_rhs_alias.q
new file mode 100644
index 00000000000..b691f6c8803
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_common_rhs_alias.q
@@ -0,0 +1,8 @@
+CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING);
+
+INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"), ("c", "c" , "a");
+
+SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2
+ON t1.c1 == t2.c1
+AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 )
+WHERE t2.c1 IS NULL;
diff --git
a/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out
b/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out
new file mode 100644
index 00000000000..1ce5e828621
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/join_common_rhs_alias.q.out
@@ -0,0 +1,35 @@
+PREHOOK: query: CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab
+POSTHOOK: query: CREATE TABLE tab (c1 STRING, c2 STRING, c3 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab
+PREHOOK: query: INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"),
("c", "c" , "a")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tab
+POSTHOOK: query: INSERT INTO tab VALUES("a", "a", "aa"), ("b", "b", "ba"),
("c", "c" , "a")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tab
+POSTHOOK: Lineage: tab.c1 SCRIPT []
+POSTHOOK: Lineage: tab.c2 SCRIPT []
+POSTHOOK: Lineage: tab.c3 SCRIPT []
+PREHOOK: query: SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2
+ON t1.c1 == t2.c1
+AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 )
+WHERE t2.c1 IS NULL
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t1.* FROM tab t1 LEFT OUTER JOIN tab t2
+ON t1.c1 == t2.c1
+AND CONCAT ( t1.c2 , 'a') = CONCAT ( t2.c2 , t2.c3 )
+WHERE t2.c1 IS NULL
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab
+#### A masked pattern was here ####
+a a aa
+b b ba
diff --git a/ql/src/test/results/clientpositive/llap/semijoin6.q.out
b/ql/src/test/results/clientpositive/llap/semijoin6.q.out
index 0c7e9d4f441..327b12ec050 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin6.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin6.q.out
@@ -777,20 +777,20 @@ STAGE PLANS:
Statistics: Num rows: 6 Data size: 48 Basic stats:
COMPLETE Column stats: COMPLETE
Select Operator
expressions: a (type: int), b (type: int)
- outputColumnNames: _col0, _col2
+ outputColumnNames: _col0, _col3
Statistics: Num rows: 6 Data size: 48 Basic stats:
COMPLETE Column stats: COMPLETE
Group By Operator
- keys: _col0 (type: int), _col2 (type: int), _col2
(type: int)
+ keys: _col0 (type: int), _col3 (type: int), _col3
(type: int), _col3 (type: int)
minReductionHashAggr: 0.4
mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 5 Data size: 60 Basic stats:
COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 5 Data size: 80 Basic stats:
COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 5 Data size: 60 Basic stats:
COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 80 Basic stats:
COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs