[ 
https://issues.apache.org/jira/browse/HIVE-27655?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

zengxl updated HIVE-27655:
--------------------------
    Summary: When a table is joined more than 1 times, the join result is null  
(was: When a table is joined more than 2 times, the join result is null)

> When a table is joined more than 1 times, the join result is null
> -----------------------------------------------------------------
>
>                 Key: HIVE-27655
>                 URL: https://issues.apache.org/jira/browse/HIVE-27655
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 3.1.2
>            Reporter: zengxl
>            Assignee: Ayush Saxena
>            Priority: Major
>
> *1.set up parameters*
> close mapjoin and  set the number of reduce to more than 2
> {code:java}
> set hive.auto.convert.join=false;
> set mapreduce.job.reduces=2; {code}
> *2.Prepare data*
>  * {*}{color:#ff0000}Note{color}{*}: The table storage format does not affect 
> the test
> {code:java}
> create table test_parquet(acc_nbr string,user_area_1 string,user_area_2 
> string,user_area_3 string,user_area_4 string) partitioned by (p_date_no 
> string) stored as parquet;
> create table join_test_parquet(ms_area_id string,area_name string) stored as 
> textfile;
> insert into table test_parquet partition (p_date_no='20230821') values 
> ('acc_nbr_1','1','2','3','4');
> insert into table test_parquet partition (p_date_no='20230821') values 
> ('acc_nbr_2','5','6','7','8');
> insert into table test_parquet partition (p_date_no='20230821') values 
> ('acc_nbr_3','9','10','11','12');
> insert into table join_test_parquet values 
> ('1','area_name_1'),('2','area_name_2'),('3','area_name_3'),('4','area_name_4');
> insert into table join_test_parquet values 
> ('5','area_name_5'),('6','area_name_6'),('7','area_name_7'),('8','area_name_8');
> insert into table join_test_parquet values 
> ('9','area_name_9'),('10','area_name_10'),('11','area_name_11'),('12','area_name_12');
>  {code}
> *Test SQL {color:#ff0000}case1 {color}as follows*
> {code:java}
> create table test_join_result 
> as 
> select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
> b1.area_name as a1,
> b2.area_name as a2,
> b3.area_name as a3,
> b4.area_name as a4
> from test_parquet a
> LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
> LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
> LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
> LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
> where a.p_date_no = '20230821';{code}
> *{color:#ff0000}incorrect{color}* *reuslt:*
> {code:java}
> hive> select * from test_join_result ;
> OK
> acc_nbr_1       1       2       3       4       area_name_1     NULL    NULL  
>   NULL
> acc_nbr_2       5       6       7       8       area_name_5     NULL    NULL  
>   area_name_8
> acc_nbr_3       9       10      11      12      area_name_9     NULL    NULL  
>   area_name_12
> Time taken: 0.435 seconds, Fetched: 3 row(s) {code}
> *{color:#ff0000}correct{color}* *reuslt:*
> {code:java}
>  hive> select * from  test_join_result ;
> OK
> acc_nbr_3       9       10      11      12      area_name_9     area_name_10  
>   area_name_11    area_name_12
> acc_nbr_1       1       2       3       4       area_name_1     area_name_2   
>   area_name_3     area_name_4
> acc_nbr_2       5       6       7       8       area_name_5     area_name_6   
>   area_name_7     area_name_8
> Time taken: 0.276 seconds, Fetched: 3 row(s){code}
> *Test SQL {color:#ff0000}case2{color} as follows*
> {code:java}
> create table test_join_result 
> as 
> select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
> b1.area_name as a1,
> b2.area_name as a2
> --b3.area_name as a3,
> --b4.area_name as a4
> from test_parquet a
> LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
> LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
> --LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
> --LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
> where a.p_date_no = '20230821'; {code}
> *{color:#ff0000}incorrect {color}result*
> {code:java}
>  hive> select * from test_join_result;
> OK
> acc_nbr_1       1       2       3       4       area_name_1     NULL
> acc_nbr_2       5       6       7       8       area_name_5     NULL
> acc_nbr_3       9       10      11      12      area_name_9     NULL {code}
> *correct reuslt*
> {code:java}
> hive> select * from  test_join_result ;
> OK
> acc_nbr_3       9       10      11      12      area_name_9     area_name_10
> acc_nbr_1       1       2       3       4       area_name_1     area_name_2
> acc_nbr_2       5       6       7       8       area_name_5     area_name_6 
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to