[jira] [Updated] (HIVE-22227) Tez bucket pruning produces wrong result with shared work optimization

Vineet Garg (Jira) Fri, 20 Sep 2019 15:12:55 -0700


     [ 
https://issues.apache.org/jira/browse/HIVE-22227?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


Vineet Garg updated HIVE-22227:
-------------------------------
    Description: 
*Reproducer*
{code:sql}
set hive.tez.bucket.pruning=true;
set hive.optimize.shared.work=true;

CREATE TABLE srcbucket_mapjoin_n16(key int, value string) partitioned by (ds 
string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
CREATE TABLE tab_part_n10 (key int, value string) PARTITIONED BY(ds STRING) 
CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORCFILE;
CREATE TABLE srcbucket_mapjoin_part_n17 (key int, value string) partitioned by 
(ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;

load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE 
srcbucket_mapjoin_n16 partition(ds='2008-04-08');
load data local inpath '.$HIVE_SRC/data/files/bmj1/000001_0' INTO TABLE 
srcbucket_mapjoin_n16 partition(ds='2008-04-08');

load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE 
srcbucket_mapjoin_part_n17 partition(ds='2008-04-08');
load data local inpath '$HIVE_SRC/data/files/bmj/000001_0' INTO TABLE 
srcbucket_mapjoin_part_n17 partition(ds='2008-04-08');
load data local inpath '$HIVE_SRC/data/files/bmj/000002_0' INTO TABLE 
srcbucket_mapjoin_part_n17 partition(ds='2008-04-08');


set hive.optimize.bucketingsorting=false;
insert overwrite table tab_part_n10 partition (ds='2008-04-08')
select key,value from srcbucket_mapjoin_part_n17;

CREATE TABLE tab_n9(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED 
BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORCFILE;
insert overwrite table tab_n9 partition (ds='2008-04-08')
select key,value from srcbucket_mapjoin_n16;

select * from
(select * from tab_n9 where tab_n9.key = 0)a
join
(select * from tab_part_n10 where tab_part_n10.key = 98)b full outer join 
tab_part_n10 c on a.key = b.key and b.key = c.key
order by 1,2,3,4,5,6,7,8,9;
{code}

> Tez bucket pruning produces wrong result with shared work optimization
> ----------------------------------------------------------------------
>
>                 Key: HIVE-22227
>                 URL: https://issues.apache.org/jira/browse/HIVE-22227
>             Project: Hive
>          Issue Type: Bug
>          Components: Query Planning
>    Affects Versions: 4.0.0
>            Reporter: Vineet Garg
>            Assignee: Vineet Garg
>            Priority: Major
>
> *Reproducer*
> {code:sql}
> set hive.tez.bucket.pruning=true;
> set hive.optimize.shared.work=true;
> CREATE TABLE srcbucket_mapjoin_n16(key int, value string) partitioned by (ds 
> string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
> CREATE TABLE tab_part_n10 (key int, value string) PARTITIONED BY(ds STRING) 
> CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS ORCFILE;
> CREATE TABLE srcbucket_mapjoin_part_n17 (key int, value string) partitioned 
> by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
> load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE 
> srcbucket_mapjoin_n16 partition(ds='2008-04-08');
> load data local inpath '.$HIVE_SRC/data/files/bmj1/000001_0' INTO TABLE 
> srcbucket_mapjoin_n16 partition(ds='2008-04-08');
> load data local inpath '$HIVE_SRC/data/files/bmj/000000_0' INTO TABLE 
> srcbucket_mapjoin_part_n17 partition(ds='2008-04-08');
> load data local inpath '$HIVE_SRC/data/files/bmj/000001_0' INTO TABLE 
> srcbucket_mapjoin_part_n17 partition(ds='2008-04-08');
> load data local inpath '$HIVE_SRC/data/files/bmj/000002_0' INTO TABLE 
> srcbucket_mapjoin_part_n17 partition(ds='2008-04-08');
> set hive.optimize.bucketingsorting=false;
> insert overwrite table tab_part_n10 partition (ds='2008-04-08')
> select key,value from srcbucket_mapjoin_part_n17;
> CREATE TABLE tab_n9(key int, value string) PARTITIONED BY(ds STRING) 
> CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORCFILE;
> insert overwrite table tab_n9 partition (ds='2008-04-08')
> select key,value from srcbucket_mapjoin_n16;
> select * from
> (select * from tab_n9 where tab_n9.key = 0)a
> join
> (select * from tab_part_n10 where tab_part_n10.key = 98)b full outer join 
> tab_part_n10 c on a.key = b.key and b.key = c.key
> order by 1,2,3,4,5,6,7,8,9;
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

[jira] [Updated] (HIVE-22227) Tez bucket pruning produces wrong result with shared work optimization

Reply via email to