http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q new file mode 100644 index 0000000..8b83ef6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q @@ -0,0 +1,180 @@ +set hive.support.sql11.reserved.keywords=false; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.spark.dynamic.partition.pruning=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; + +-- SORT_QUERY_RESULTS + +select distinct ds from srcpart; +select distinct hr from srcpart; + +EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds; +create table srcpart_date as select ds as ds, ds as date from srcpart group by ds; +create table srcpart_hour as select hr as hr, hr as hour from srcpart group by hr; +create table srcpart_date_hour as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr; +create table srcpart_double_hour as select (hr*2) as hr, hr as hour from srcpart group by hr; + +-- single column, single key +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where ds = '2008-04-08'; + +-- multiple sources, single key +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; + +-- multiple columns single source +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where ds = '2008-04-08' and hr = 11; + +-- empty set +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where ds = 'I DONT EXIST'; + +-- expressions +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where hr = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where cast(hr as string) = 11; + + +-- parent is reduce tasks +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'; +select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'; +select count(*) from srcpart where ds = '2008-04-08'; + +-- non-equi join +EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); +select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); + +-- old style join syntax +EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; +select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; + +-- left join +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; + +-- full outer +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; + +-- with static pruning +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; + +-- union + subquery +EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask = true; +set hive.auto.convert.join.noconditionaltask.size = 10000000; + +-- single column, single key +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +select count(*) from srcpart where ds = '2008-04-08'; + +-- multiple sources, single key +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; + +-- multiple columns single source +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart where ds = '2008-04-08' and hr = 11; + +-- empty set +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +-- Disabled until TEZ-1486 is fixed +-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; + +-- expressions +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +select count(*) from srcpart where hr = 11; + +-- parent is reduce tasks +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'; +select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'; +select count(*) from srcpart where ds = '2008-04-08'; + +-- left join +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; + +-- full outer +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; + +-- with static pruning +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; +-- Disabled until TEZ-1486 is fixed +-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +-- where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; + +-- union + subquery +EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); + + +drop table srcpart_orc; +drop table srcpart_date; +drop table srcpart_hour; +drop table srcpart_date_hour; +drop table srcpart_double_hour;
http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_2.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_2.q new file mode 100644 index 0000000..734f187 --- /dev/null +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_2.q @@ -0,0 +1,118 @@ +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.spark.dynamic.partition.pruning=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask = true; +set hive.auto.convert.join.noconditionaltask.size = 10000000; + +-- SORT_QUERY_RESULTS + +create table dim_shops (id int, label string) row format delimited fields terminated by ',' stored as textfile; +load data local inpath '../../data/files/dim_shops.txt' into table dim_shops; + +create table agg_01 (amount decimal) partitioned by (dim_shops_id int) row format delimited fields terminated by ',' stored as textfile; +alter table agg_01 add partition (dim_shops_id = 1); +alter table agg_01 add partition (dim_shops_id = 2); +alter table agg_01 add partition (dim_shops_id = 3); + +load data local inpath '../../data/files/agg_01-p1.txt' into table agg_01 partition (dim_shops_id=1); +load data local inpath '../../data/files/agg_01-p2.txt' into table agg_01 partition (dim_shops_id=2); +load data local inpath '../../data/files/agg_01-p3.txt' into table agg_01 partition (dim_shops_id=3); + +analyze table dim_shops compute statistics; +analyze table agg_01 partition (dim_shops_id) compute statistics; + +select * from dim_shops; +select * from agg_01; + +EXPLAIN SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + +SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + +set hive.spark.dynamic.partition.pruning.max.data.size=1; + +EXPLAIN SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + +SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + +EXPLAIN SELECT d1.label +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id; + +SELECT d1.label +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id; + +EXPLAIN SELECT agg.amount +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and agg.dim_shops_id = 1; + +SELECT agg.amount +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and agg.dim_shops_id = 1; + +set hive.spark.dynamic.partition.pruning.max.data.size=1000000; + +EXPLAIN SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + +SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + + +EXPLAIN +SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'foo' +UNION ALL +SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'bar'; + +SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'foo' +UNION ALL +SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'bar'; http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q new file mode 100644 index 0000000..293fcfc --- /dev/null +++ b/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q @@ -0,0 +1,192 @@ +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.spark.dynamic.partition.pruning=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; +set hive.vectorized.execution.enabled=true; + + +select distinct ds from srcpart; +select distinct hr from srcpart; + +EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; +create table srcpart_date stored as orc as select ds as ds, ds as `date` from srcpart group by ds; +create table srcpart_hour stored as orc as select hr as hr, hr as hour from srcpart group by hr; +create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr; +create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as hour from srcpart group by hr; + +-- single column, single key +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where ds = '2008-04-08'; + +-- multiple sources, single key +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; + +-- multiple columns single source +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where ds = '2008-04-08' and hr = 11; + +-- empty set +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where ds = 'I DONT EXIST'; + +-- expressions +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=false; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where hr = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; +set hive.spark.dynamic.partition.pruning=true; +select count(*) from srcpart where cast(hr as string) = 11; + + +-- parent is reduce tasks +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +select count(*) from srcpart where ds = '2008-04-08'; + +-- non-equi join +EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); +select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); + +-- old style join syntax +EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; +select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; + +-- left join +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; + +-- full outer +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; + +-- with static pruning +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; + +-- union + subquery +EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask = true; +set hive.auto.convert.join.noconditionaltask.size = 10000000; + +-- single column, single key +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +select count(*) from srcpart where ds = '2008-04-08'; + +-- multiple sources, single key +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; +select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; + +-- multiple columns single source +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart where ds = '2008-04-08' and hr = 11; + +-- empty set +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +-- Disabled until TEZ-1486 is fixed +-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; + +-- expressions +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +select count(*) from srcpart where hr = 11; + +-- parent is reduce tasks +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +select count(*) from srcpart where ds = '2008-04-08'; + +-- left join +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; + +-- full outer +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; + +-- with static pruning +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; +-- Disabled until TEZ-1486 is fixed +-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +-- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; + +-- union + subquery +EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); + + +-- different file format +create table srcpart_orc (key int, value string) partitioned by (ds string, hr int) stored as orc; + + +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.vectorized.execution.enabled=false; +set hive.exec.max.dynamic.partitions=1000; + +insert into table srcpart_orc partition (ds, hr) select key, value, ds, hr from srcpart; +EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); +select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); +select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11; + +drop table srcpart_orc; +drop table srcpart_date; +drop table srcpart_hour; +drop table srcpart_date_hour; +drop table srcpart_double_hour; http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/bucket2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/bucket2.q.out b/ql/src/test/results/clientpositive/spark/bucket2.q.out index 89c3b4c..8bb53d5 100644 --- a/ql/src/test/results/clientpositive/spark/bucket2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket2.q.out @@ -203,14 +203,11 @@ STAGE PLANS: Processor Tree: TableScan alias: s - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from bucket2_1 tablesample (bucket 1 out of 2) s http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/bucket3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/bucket3.q.out b/ql/src/test/results/clientpositive/spark/bucket3.q.out index 2fc4855..b25ea05 100644 --- a/ql/src/test/results/clientpositive/spark/bucket3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket3.q.out @@ -226,14 +226,11 @@ STAGE PLANS: Processor Tree: TableScan alias: s - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), '1' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from bucket3_1 tablesample (bucket 1 out of 2) s where ds = '1' http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/bucket4.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/bucket4.q.out b/ql/src/test/results/clientpositive/spark/bucket4.q.out index 44e0f9f..2ad59da 100644 --- a/ql/src/test/results/clientpositive/spark/bucket4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket4.q.out @@ -202,14 +202,11 @@ STAGE PLANS: Processor Tree: TableScan alias: s - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from bucket4_1 tablesample (bucket 1 out of 2) s http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/column_access_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out index 7879ef1..5803093 100644 --- a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out +++ b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out @@ -92,11 +92,9 @@ STAGE PLANS: Processor Tree: TableScan alias: t1 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT key FROM (SELECT key, val FROM T1) subq1 @@ -124,11 +122,9 @@ STAGE PLANS: Processor Tree: TableScan alias: t1 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT k FROM (SELECT key as k, val as v FROM T1) subq1 http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out b/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out index c6f9039..a34a399 100644 --- a/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_partition_metadataonly.q.out @@ -16,11 +16,9 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '2008-04-08' (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select ds from srcpart where hr=11 and ds='2008-04-08' http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.7.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.7.out b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.7.out index e38ccf8..603e6bb 100644 --- a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.7.out +++ b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.java1.7.out @@ -461,16 +461,13 @@ STAGE PLANS: Processor Tree: TableScan alias: list_bucketing_static_part - Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: ((key = '484') and (value = 'val_484')) (type: boolean) - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '484' (type: string), 'val_484' (type: string), '2008-04-08' (type: string), '11' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 2406 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select * from list_bucketing_static_part where ds = '2008-04-08' and hr = '11' and key = '484' and value = 'val_484' http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out index a324abc..506d265 100644 --- a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out @@ -43,16 +43,13 @@ STAGE PLANS: Processor Tree: TableScan alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: false (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: select key from src where false http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/pcr.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index efadb1d..fb08f10 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -5461,11 +5461,9 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select key,value from srcpart where hr = cast(11 as double) @@ -5482,11 +5480,9 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: explain select key,value from srcpart where cast(hr as double) = 11 @@ -5503,10 +5499,8 @@ STAGE PLANS: Processor Tree: TableScan alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE ListSink http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/sample3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/sample3.q.out b/ql/src/test/results/clientpositive/spark/sample3.q.out index 2fe6b0d..35a4352 100644 --- a/ql/src/test/results/clientpositive/spark/sample3.q.out +++ b/ql/src/test/results/clientpositive/spark/sample3.q.out @@ -22,14 +22,11 @@ STAGE PLANS: Processor Tree: TableScan alias: s - Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((hash(key) & 2147483647) % 5) = 0) (type: boolean) - Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT s.key http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/sample9.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/sample9.q.out b/ql/src/test/results/clientpositive/spark/sample9.q.out index c9823f7..1a84bd6 100644 --- a/ql/src/test/results/clientpositive/spark/sample9.q.out +++ b/ql/src/test/results/clientpositive/spark/sample9.q.out @@ -53,17 +53,14 @@ STAGE PLANS: Processor Tree: TableScan alias: a - Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) sampleDesc: BUCKET 1 OUT OF 2 - Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT s.* http://git-wip-us.apache.org/repos/asf/hive/blob/42216997/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out index c94cc5b..4d912ca 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out @@ -1912,17 +1912,14 @@ STAGE PLANS: Processor Tree: TableScan alias: test_table1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: (((hash(key) & 2147483647) % 16) = 1) (type: boolean) sampleDesc: BUCKET 2 OUT OF 16 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: EXPLAIN EXTENDED SELECT * FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) @@ -2004,17 +2001,14 @@ STAGE PLANS: Processor Tree: TableScan alias: test_table3 - Statistics: Num rows: 1028 Data size: 10968 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: (((hash(key) & 2147483647) % 16) = 1) (type: boolean) sampleDesc: BUCKET 2 OUT OF 16 - Statistics: Num rows: 514 Data size: 5484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 514 Data size: 5484 Basic stats: COMPLETE Column stats: NONE ListSink PREHOOK: query: SELECT * FROM test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16)
