zhangqidong created HIVE-23809:
----------------------------------

             Summary: Data loss occurs when using tez engine to join different 
bucketing_version tables
                 Key: HIVE-23809
                 URL: https://issues.apache.org/jira/browse/HIVE-23809
             Project: Hive
          Issue Type: Bug
          Components: Hive, Tez
    Affects Versions: 3.1.0
            Reporter: zhangqidong


*Test case:*
create table tb_a (a int, b string,c string);
create table tb_b (a int, b string,c string);
insert into tb_a values 
(11,'a','aa'),(22,'b','bb'),(33,'c','cc'),(44,'d','dd'),(5,'e','ee'),(6,'f','ff'),(7,'g','gg');
insert into tb_b values 
(11,'a','aa'),(22,'b','bb'),(33,'c','cc'),(44,'d','dd'),(5,'e','ee'),(6,'f','ff'),(7,'g','gg');
alter table tb_a set tblproperties ("bucketing_version"='1');
alter table tb_b set tblproperties ("bucketing_version"='2');
*Hivesql:*
*set hive.auto.convert.join=false;*
*set mapred.reduce.tasks=2;*
select ta.a as a_a, tb.b as b_b from table_a ta join table_b tb on(ta.a=tb.a);


set hive.execution.engine=mr;
+------+------+
| a_a | b_b |
+------+------+
| 5 | e |
| 6 | f |
| 7 | g |
| 11 | a |
| 22 | b |
| 33 | c |
| 44 | d |
+------+------+


set hive.execution.engine=tez;
+------+------+
| a_a | b_b |
+------+------+
| 6 | f |
| 5 | e |
| 11 | a |
| 33 | c |
+------+------+

 
 
 
 
 
 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to