[ 
https://issues.apache.org/jira/browse/HIVE-27655?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

zengxl updated HIVE-27655:
--------------------------
    Description: 
*1.set up parameters*

close mapjoin and  set the number of reduce to more than 2
{code:java}
set hive.auto.convert.join=false;
set mapreduce.job.reduces=2; {code}
*2.Prepare data*
 * {*}{color:#ff0000}Note{color}{*}: The table storage format does not affect 
the test

{code:java}
create table test_parquet(acc_nbr string,user_area_1 string,user_area_2 
string,user_area_3 string,user_area_4 string) partitioned by (p_date_no string) 
stored as parquet;
create table join_test_parquet(ms_area_id string,area_name string) stored as 
textfile;
insert into table test_parquet partition (p_date_no='20230821') values 
('acc_nbr_1','1','2','3','4');
insert into table test_parquet partition (p_date_no='20230821') values 
('acc_nbr_2','5','6','7','8');
insert into table test_parquet partition (p_date_no='20230821') values 
('acc_nbr_3','9','10','11','12');
insert into table join_test_parquet values 
('1','area_name_1'),('2','area_name_2'),('3','area_name_3'),('4','area_name_4');
insert into table join_test_parquet values 
('5','area_name_5'),('6','area_name_6'),('7','area_name_7'),('8','area_name_8');
insert into table join_test_parquet values 
('9','area_name_9'),('10','area_name_10'),('11','area_name_11'),('12','area_name_12');
 {code}
*Test SQL {color:#ff0000}case1 {color}as follows*
{code:java}
create table test_join_result 
as 
select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
b1.area_name as a1,
b2.area_name as a2,
b3.area_name as a3,
b4.area_name as a4
from test_parquet a
LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
where a.p_date_no = '20230821';{code}
*{color:#ff0000}incorrect{color}* *reuslt:*
{code:java}
hive> select * from test_join_result ;
OK
acc_nbr_1       1       2       3       4       area_name_1     NULL    NULL    
NULL
acc_nbr_2       5       6       7       8       area_name_5     NULL    NULL    
area_name_8
acc_nbr_3       9       10      11      12      area_name_9     NULL    NULL    
area_name_12
Time taken: 0.435 seconds, Fetched: 3 row(s) {code}
*{color:#ff0000}correct{color}* *reuslt:*
{code:java}
 hive> select * from  test_join_result ;
OK
acc_nbr_3       9       10      11      12      area_name_9     area_name_10    
area_name_11    area_name_12
acc_nbr_1       1       2       3       4       area_name_1     area_name_2     
area_name_3     area_name_4
acc_nbr_2       5       6       7       8       area_name_5     area_name_6     
area_name_7     area_name_8
Time taken: 0.276 seconds, Fetched: 3 row(s){code}
*Test SQL {color:#ff0000}case2{color} as follows*
{code:java}
create table test_join_result 
as 
select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
b1.area_name as a1,
b2.area_name as a2
--b3.area_name as a3,
--b4.area_name as a4
from test_parquet a
LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
--LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
--LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
where a.p_date_no = '20230821'; {code}
*{color:#ff0000}incorrect {color}result*
{code:java}
 hive> select * from test_join_result;
OK
acc_nbr_1       1       2       3       4       area_name_1     NULL
acc_nbr_2       5       6       7       8       area_name_5     NULL
acc_nbr_3       9       10      11      12      area_name_9     NULL {code}
*correct reuslt*
{code:java}
hive> select * from  test_join_result ;
OK
acc_nbr_3       9       10      11      12      area_name_9     area_name_10
acc_nbr_1       1       2       3       4       area_name_1     area_name_2
acc_nbr_2       5       6       7       8       area_name_5     area_name_6 
{code}
 

  was:
*1.set up parameters*

close mapjoin and  set the number of reduce to more than 2

 
{code:java}
set hive.auto.convert.join=false;
set mapreduce.job.reduces=2; {code}
 

 

*2.Prepare data*
 * {*}{color:#FF0000}Note{color}{*}: The table storage format does not affect 
the test

{code:java}
create table test_parquet(acc_nbr string,user_area_1 string,user_area_2 
string,user_area_3 string,user_area_4 string) partitioned by (p_date_no string) 
stored as parquet;
create table join_test_parquet(ms_area_id string,area_name string) stored as 
textfile;
insert into table test_parquet partition (p_date_no='20230821') values 
('acc_nbr_1','1','2','3','4');
insert into table test_parquet partition (p_date_no='20230821') values 
('acc_nbr_2','5','6','7','8');
insert into table test_parquet partition (p_date_no='20230821') values 
('acc_nbr_3','9','10','11','12');
insert into table join_test_parquet values 
('1','area_name_1'),('2','area_name_2'),('3','area_name_3'),('4','area_name_4');
insert into table join_test_parquet values 
('5','area_name_5'),('6','area_name_6'),('7','area_name_7'),('8','area_name_8');
insert into table join_test_parquet values 
('9','area_name_9'),('10','area_name_10'),('11','area_name_11'),('12','area_name_12');
 {code}
{*}Test SQL {color:#FF0000}case1 {color}as follows{*}{*}{*}

 
{code:java}

{code}
*create table test_join_result 
as 
select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
b1.area_name as a1,
b2.area_name as a2,
b3.area_name as a3,
b4.area_name as a4
from test_parquet a
LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
where a.p_date_no = '20230821';* 

*{color:#FF0000}incorrect{color}* *reuslt:*

 
{code:java}
hive> select * from test_join_result ;
OK
acc_nbr_1       1       2       3       4       area_name_1     NULL    NULL    
NULL
acc_nbr_2       5       6       7       8       area_name_5     NULL    NULL    
area_name_8
acc_nbr_3       9       10      11      12      area_name_9     NULL    NULL    
area_name_12
Time taken: 0.435 seconds, Fetched: 3 row(s) {code}
*{color:#FF0000}correct{color}* {*}reuslt:{*}{*}{*}
{code:java}
 hive> select * from  test_join_result ;
OK
acc_nbr_3       9       10      11      12      area_name_9     area_name_10    
area_name_11    area_name_12
acc_nbr_1       1       2       3       4       area_name_1     area_name_2     
area_name_3     area_name_4
acc_nbr_2       5       6       7       8       area_name_5     area_name_6     
area_name_7     area_name_8
Time taken: 0.276 seconds, Fetched: 3 row(s){code}
{*}{*}{*}Test SQL {color:#FF0000}case2{color} as follows{*}{*}{*}

 
{code:java}
create table test_join_result 
as 
select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
b1.area_name as a1,
b2.area_name as a2
--b3.area_name as a3,
--b4.area_name as a4
from test_parquet a
LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
--LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
--LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
where a.p_date_no = '20230821'; {code}
*{color:#FF0000}incorrect {color}result*

 

 
{code:java}
 hive> select * from test_join_result;
OK
acc_nbr_1       1       2       3       4       area_name_1     NULL
acc_nbr_2       5       6       7       8       area_name_5     NULL
acc_nbr_3       9       10      11      12      area_name_9     NULL {code}
*correct reuslt*
{code:java}
hive> select * from  test_join_result ;
OK
acc_nbr_3       9       10      11      12      area_name_9     area_name_10
acc_nbr_1       1       2       3       4       area_name_1     area_name_2
acc_nbr_2       5       6       7       8       area_name_5     area_name_6 
{code}
 


> When a table is joined more than 2 times, the join result is null
> -----------------------------------------------------------------
>
>                 Key: HIVE-27655
>                 URL: https://issues.apache.org/jira/browse/HIVE-27655
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 3.1.2
>            Reporter: zengxl
>            Assignee: Ayush Saxena
>            Priority: Major
>
> *1.set up parameters*
> close mapjoin and  set the number of reduce to more than 2
> {code:java}
> set hive.auto.convert.join=false;
> set mapreduce.job.reduces=2; {code}
> *2.Prepare data*
>  * {*}{color:#ff0000}Note{color}{*}: The table storage format does not affect 
> the test
> {code:java}
> create table test_parquet(acc_nbr string,user_area_1 string,user_area_2 
> string,user_area_3 string,user_area_4 string) partitioned by (p_date_no 
> string) stored as parquet;
> create table join_test_parquet(ms_area_id string,area_name string) stored as 
> textfile;
> insert into table test_parquet partition (p_date_no='20230821') values 
> ('acc_nbr_1','1','2','3','4');
> insert into table test_parquet partition (p_date_no='20230821') values 
> ('acc_nbr_2','5','6','7','8');
> insert into table test_parquet partition (p_date_no='20230821') values 
> ('acc_nbr_3','9','10','11','12');
> insert into table join_test_parquet values 
> ('1','area_name_1'),('2','area_name_2'),('3','area_name_3'),('4','area_name_4');
> insert into table join_test_parquet values 
> ('5','area_name_5'),('6','area_name_6'),('7','area_name_7'),('8','area_name_8');
> insert into table join_test_parquet values 
> ('9','area_name_9'),('10','area_name_10'),('11','area_name_11'),('12','area_name_12');
>  {code}
> *Test SQL {color:#ff0000}case1 {color}as follows*
> {code:java}
> create table test_join_result 
> as 
> select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
> b1.area_name as a1,
> b2.area_name as a2,
> b3.area_name as a3,
> b4.area_name as a4
> from test_parquet a
> LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
> LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
> LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
> LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
> where a.p_date_no = '20230821';{code}
> *{color:#ff0000}incorrect{color}* *reuslt:*
> {code:java}
> hive> select * from test_join_result ;
> OK
> acc_nbr_1       1       2       3       4       area_name_1     NULL    NULL  
>   NULL
> acc_nbr_2       5       6       7       8       area_name_5     NULL    NULL  
>   area_name_8
> acc_nbr_3       9       10      11      12      area_name_9     NULL    NULL  
>   area_name_12
> Time taken: 0.435 seconds, Fetched: 3 row(s) {code}
> *{color:#ff0000}correct{color}* *reuslt:*
> {code:java}
>  hive> select * from  test_join_result ;
> OK
> acc_nbr_3       9       10      11      12      area_name_9     area_name_10  
>   area_name_11    area_name_12
> acc_nbr_1       1       2       3       4       area_name_1     area_name_2   
>   area_name_3     area_name_4
> acc_nbr_2       5       6       7       8       area_name_5     area_name_6   
>   area_name_7     area_name_8
> Time taken: 0.276 seconds, Fetched: 3 row(s){code}
> *Test SQL {color:#ff0000}case2{color} as follows*
> {code:java}
> create table test_join_result 
> as 
> select  a.acc_nbr,user_area_1,user_area_2,user_area_3,user_area_4,
> b1.area_name as a1,
> b2.area_name as a2
> --b3.area_name as a3,
> --b4.area_name as a4
> from test_parquet a
> LEFT join join_test_parquet b1 on a.user_area_1 = b1.ms_area_id
> LEFT join join_test_parquet b2 on a.user_area_2 = b2.ms_area_id
> --LEFT join join_test_parquet b3 on a.user_area_3 = b3.ms_area_id
> --LEFT join join_test_parquet b4 on a.user_area_4 = b4.ms_area_id
> where a.p_date_no = '20230821'; {code}
> *{color:#ff0000}incorrect {color}result*
> {code:java}
>  hive> select * from test_join_result;
> OK
> acc_nbr_1       1       2       3       4       area_name_1     NULL
> acc_nbr_2       5       6       7       8       area_name_5     NULL
> acc_nbr_3       9       10      11      12      area_name_9     NULL {code}
> *correct reuslt*
> {code:java}
> hive> select * from  test_join_result ;
> OK
> acc_nbr_3       9       10      11      12      area_name_9     area_name_10
> acc_nbr_1       1       2       3       4       area_name_1     area_name_2
> acc_nbr_2       5       6       7       8       area_name_5     area_name_6 
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to