zhangqidong created HIVE-23809: ---------------------------------- Summary: Data loss occurs when using tez engine to join different bucketing_version tables Key: HIVE-23809 URL: https://issues.apache.org/jira/browse/HIVE-23809 Project: Hive Issue Type: Bug Components: Hive, Tez Affects Versions: 3.1.0 Reporter: zhangqidong
*Test case:* create table tb_a (a int, b string,c string); create table tb_b (a int, b string,c string); insert into tb_a values (11,'a','aa'),(22,'b','bb'),(33,'c','cc'),(44,'d','dd'),(5,'e','ee'),(6,'f','ff'),(7,'g','gg'); insert into tb_b values (11,'a','aa'),(22,'b','bb'),(33,'c','cc'),(44,'d','dd'),(5,'e','ee'),(6,'f','ff'),(7,'g','gg'); alter table tb_a set tblproperties ("bucketing_version"='1'); alter table tb_b set tblproperties ("bucketing_version"='2'); *Hivesql:* *set hive.auto.convert.join=false;* *set mapred.reduce.tasks=2;* select ta.a as a_a, tb.b as b_b from table_a ta join table_b tb on(ta.a=tb.a); set hive.execution.engine=mr; +------+------+ | a_a | b_b | +------+------+ | 5 | e | | 6 | f | | 7 | g | | 11 | a | | 22 | b | | 33 | c | | 44 | d | +------+------+ set hive.execution.engine=tez; +------+------+ | a_a | b_b | +------+------+ | 6 | f | | 5 | e | | 11 | a | | 33 | c | +------+------+ -- This message was sent by Atlassian Jira (v8.3.4#803005)