zengxl created HIVE-27655:
-----------------------------

             Summary: When a table is joined more than 2 times, the join result 
is null
                 Key: HIVE-27655
                 URL: https://issues.apache.org/jira/browse/HIVE-27655
             Project: Hive
          Issue Type: Bug
    Affects Versions: 3.1.2
            Reporter: zengxl
            Assignee: Ayush Saxena


*1.set up parameters*

close mapjoin and  set the number of reduce to more than 2

 
{code:java}
set hive.auto.convert.join=false;
set mapreduce.job.reduces=2; {code}
 

 

*2.Prepare data*
 * {*}{color:#FF0000}Note{color}{*}: The table storage format does not affect 
the test

{code:java}
create table test_parquet(acc_nbr string,user_area_1 string,user_area_2 
string,user_area_3 string,user_area_4 string) partitioned by (p_date_no string) 
stored as parquet;
create table join_test_parquet(ms_area_id string,area_name string) stored as 
textfile;
insert into table test_parquet partition (p_date_no='20230821') values 
('acc_nbr_1','1','2','3','4');
insert into table test_parquet partition (p_date_no='20230821') values 
('acc_nbr_2','5','6','7','8');
insert into table test_parquet partition (p_date_no='20230821') values 
('acc_nbr_3','9','10','11','12');
insert into table join_test_parquet values 
('1','area_name_1'),('2','area_name_2'),('3','area_name_3'),('4','area_name_4');
insert into table join_test_parquet values 
('5','area_name_5'),('6','area_name_6'),('7','area_name_7'),('8','area_name_8');
insert into table join_test_parquet values 
('9','area_name_9'),('10','area_name_10'),('11','area_name_11'),('12','area_name_12');
 {code}
{*}Test SQL {color:#FF0000}case1 {color}as follows{*}{*}{*}

 
{code:java}

{code}
*create table test_join_result 
as 
select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
b1.area_name as a1,
b2.area_name as a2,
b3.area_name as a3,
b4.area_name as a4
from test_parquet a
LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
where a.p_date_no = '20230821';* 

*{color:#FF0000}incorrect{color}* *reuslt:*

 
{code:java}
hive> select * from test_join_result ;
OK
acc_nbr_1       1       2       3       4       area_name_1     NULL    NULL    
NULL
acc_nbr_2       5       6       7       8       area_name_5     NULL    NULL    
area_name_8
acc_nbr_3       9       10      11      12      area_name_9     NULL    NULL    
area_name_12
Time taken: 0.435 seconds, Fetched: 3 row(s) {code}
*{color:#FF0000}correct{color}* {*}reuslt:{*}{*}{*}
{code:java}
 hive> select * from  test_join_result ;
OK
acc_nbr_3       9       10      11      12      area_name_9     area_name_10    
area_name_11    area_name_12
acc_nbr_1       1       2       3       4       area_name_1     area_name_2     
area_name_3     area_name_4
acc_nbr_2       5       6       7       8       area_name_5     area_name_6     
area_name_7     area_name_8
Time taken: 0.276 seconds, Fetched: 3 row(s){code}
{*}{*}{*}Test SQL {color:#FF0000}case2{color} as follows{*}{*}{*}

 
{code:java}
create table test_join_result 
as 
select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
b1.area_name as a1,
b2.area_name as a2
--b3.area_name as a3,
--b4.area_name as a4
from test_parquet a
LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
--LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
--LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
where a.p_date_no = '20230821'; {code}
*{color:#FF0000}incorrect {color}result*

 

 
{code:java}
 hive> select * from test_join_result;
OK
acc_nbr_1       1       2       3       4       area_name_1     NULL
acc_nbr_2       5       6       7       8       area_name_5     NULL
acc_nbr_3       9       10      11      12      area_name_9     NULL {code}
*correct reuslt*
{code:java}
hive> select * from  test_join_result ;
OK
acc_nbr_3       9       10      11      12      area_name_9     area_name_10
acc_nbr_1       1       2       3       4       area_name_1     area_name_2
acc_nbr_2       5       6       7       8       area_name_5     area_name_6 
{code}
 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to