http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q index 2d3788d..d2ded71 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q @@ -7,33 +7,34 @@ set hive.tez.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; select distinct ds from srcpart; select distinct hr from srcpart; -EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; +EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_date stored as orc as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_hour stored as orc as select hr as hr, hr as hour from srcpart group by hr; create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr; create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as hour from srcpart group by hr; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; @@ -41,77 +42,77 @@ set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = 'I DONT EXIST'; -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where cast(hr as string) = 11; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; +EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); set hive.auto.convert.join=true; @@ -119,60 +120,60 @@ set hive.auto.convert.join.noconditionaltask = true; set hive.auto.convert.join.noconditionaltask.size = 10000000; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- expressions -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart where hr = 11; set hive.stats.fetch.column.stats=false; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; set hive.stats.fetch.column.stats=true; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); @@ -185,7 +186,7 @@ set hive.vectorized.execution.enabled=false; set hive.exec.max.dynamic.partitions=1000; insert into table srcpart_orc partition (ds, hr) select key, value, ds, hr from srcpart; -EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); +EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11;
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q index e1eefff..2aa4d02 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction.q @@ -16,27 +16,27 @@ create table dsrv_big stored as orc as select key as key_str, cast(key as int) a create table dsrv_small stored as orc as select distinct key as key_str, cast(key as int) as key_int, value from src where key < 100; -- single key (int) -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int); select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int); -- single key (string) -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); -- keys are different type -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str); select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_str); -- multiple tables -EXPLAIN select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int; +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int; select count(*) from dsrv_big a, dsrv_small b, dsrv_small c where a.key_int = b.key_int and a.key_int = c.key_int; -- multiple keys -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int); select count(*) from dsrv_big a join dsrv_small b on (a.key_str = b.key_str and a.key_int = b.key_int); -- small table result is empty -EXPLAIN select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2'); +EXPLAIN VECTORIZATION EXPRESSION select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2'); select count(*) from dsrv_big a join dsrv_small b on (a.key_int = b.key_int) where b.value in ('nonexistent1', 'nonexistent2'); drop table dsrv_big; http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_mapjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin.q b/ql/src/test/queries/clientpositive/vectorized_mapjoin.q index 6500d41..138c133 100644 --- a/ql/src/test/queries/clientpositive/vectorized_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin.q @@ -4,10 +4,11 @@ SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint; http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q b/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q index 137acbc..d259547 100644 --- a/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q +++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q @@ -15,7 +15,7 @@ create temporary table y (b int) stored as orc; insert into x values(1); insert into y values(1); -explain +explain vectorization expression select count(1) from x, y where a = b; select count(1) from x, y where a = b; http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_math_funcs.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_math_funcs.q b/ql/src/test/queries/clientpositive/vectorized_math_funcs.q index b01c468..6a10770 100644 --- a/ql/src/test/queries/clientpositive/vectorized_math_funcs.q +++ b/ql/src/test/queries/clientpositive/vectorized_math_funcs.q @@ -1,9 +1,10 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain +explain vectorization expression select cdouble ,Round(cdouble, 2) http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q b/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q index 4332898..5b07c9f 100644 --- a/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q @@ -4,9 +4,10 @@ SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; +explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_parquet.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_parquet.q b/ql/src/test/queries/clientpositive/vectorized_parquet.q index da138e0..e6ebdaa 100644 --- a/ql/src/test/queries/clientpositive/vectorized_parquet.q +++ b/ql/src/test/queries/clientpositive/vectorized_parquet.q @@ -21,7 +21,7 @@ insert overwrite table alltypes_parquet SET hive.vectorized.execution.enabled=true; -explain select * +explain vectorization select * from alltypes_parquet where cint = 528534767 limit 10; @@ -30,7 +30,7 @@ select * where cint = 528534767 limit 10; -explain select ctinyint, +explain vectorization select ctinyint, max(cint), min(csmallint), count(cstring1), http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_parquet_types.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_parquet_types.q b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q index 297c5af..68761b6 100644 --- a/ql/src/test/queries/clientpositive/vectorized_parquet_types.q +++ b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q @@ -48,19 +48,19 @@ SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, unhex(cbinary), cdecimal FROM parquet_types_staging; -- select -explain +explain vectorization expression SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; -explain +explain vectorization expression SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; -explain +explain vectorization expression SELECT ctinyint, MAX(cint), MIN(csmallint), http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_ptf.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_ptf.q b/ql/src/test/queries/clientpositive/vectorized_ptf.q index db2dbe1..232aa11 100644 --- a/ql/src/test/queries/clientpositive/vectorized_ptf.q +++ b/ql/src/test/queries/clientpositive/vectorized_ptf.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -42,7 +43,7 @@ insert into table part_orc select * from part_staging; --1. test1 -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -63,7 +64,7 @@ from noop(on part_orc -- 2. testJoinWithNoop -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -80,7 +81,7 @@ sort by j.p_name) -- 3. testOnlyPTF -explain extended +explain vectorization extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -93,7 +94,7 @@ order by p_name); -- 4. testPTFAlias -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -114,7 +115,7 @@ from noop(on part_orc -- 5. testPTFAndWhereWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -137,7 +138,7 @@ from noop(on part_orc -- 6. testSWQAndPTFAndGBy -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -162,7 +163,7 @@ group by p_mfgr, p_name, p_size -- 7. testJoin -explain extended +explain vectorization extended select abc.* from noop(on part_orc partition by p_mfgr @@ -177,7 +178,7 @@ order by p_name -- 8. testJoinRight -explain extended +explain vectorization extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -192,7 +193,7 @@ order by p_name -- 9. testNoopWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -207,7 +208,7 @@ order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -226,7 +227,7 @@ from noopwithmap(on part_orc -- 11. testHavingWithWindowingPTFNoGBY -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -247,7 +248,7 @@ order by p_name) -- 12. testFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -268,7 +269,7 @@ order by p_mfgr, p_name -- 13. testPTFAndWindowingInSubQ -explain extended +explain vectorization extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -295,7 +296,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 -- 14. testPTFJoinWithWindowingWithCount -explain extended +explain vectorization extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -322,7 +323,7 @@ order by p_name -- 15. testDistinctInSelectWithPTF -explain extended +explain vectorization extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -341,7 +342,7 @@ round(sum(p_retailprice),2) as s from part_orc group by p_mfgr, p_brand; -explain extended +explain vectorization extended select p_mfgr, p_brand, s, round(sum(s) over w1,2) as s1 from noop(on mfgr_price_view @@ -375,7 +376,7 @@ dr INT, cud DOUBLE, fv1 INT); -explain extended +explain vectorization extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -412,7 +413,7 @@ select * from part_5; -- 18. testMulti2OperatorsFunctionChainWithMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -447,7 +448,7 @@ from noop(on -- 19. testMulti3OperatorsFunctionChain -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -482,7 +483,7 @@ from noop(on -- 20. testMultiOperatorChainWithNoWindowing -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -514,7 +515,7 @@ from noop(on -- 21. testMultiOperatorChainEndsWithNoopMap -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -549,7 +550,7 @@ from noopwithmap(on -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -582,7 +583,7 @@ from noop(on -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain extended +explain vectorization extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q b/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q index f57d062..9227de0 100644 --- a/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q +++ b/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q @@ -2,10 +2,11 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=false; +set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT; http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_string_funcs.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_string_funcs.q b/ql/src/test/queries/clientpositive/vectorized_string_funcs.q index d04a3c3..ee95c0b 100644 --- a/ql/src/test/queries/clientpositive/vectorized_string_funcs.q +++ b/ql/src/test/queries/clientpositive/vectorized_string_funcs.q @@ -1,9 +1,10 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; -- Test string functions in vectorized mode to verify end-to-end functionality. -explain +explain vectorization select substr(cstring1, 1, 2) ,substr(cstring1, 2) http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_timestamp.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_timestamp.q b/ql/src/test/queries/clientpositive/vectorized_timestamp.q index 2784b7a..ceee2ee 100644 --- a/ql/src/test/queries/clientpositive/vectorized_timestamp.q +++ b/ql/src/test/queries/clientpositive/vectorized_timestamp.q @@ -6,23 +6,23 @@ CREATE TABLE test(ts TIMESTAMP) STORED AS ORC; INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999'); SET hive.vectorized.execution.enabled = false; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SET hive.vectorized.execution.enabled = true; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN +EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q b/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index aaf85fc..afbc18a 100644 --- a/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ b/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.fetch.task.conversion=none; -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. -- Turning on vectorization has been temporarily moved after filling the test table -- due to bug HIVE-8197. @@ -23,7 +24,7 @@ INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -49,7 +50,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -75,7 +76,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -103,7 +104,7 @@ FROM alltypesorc_string ORDER BY c1; -- Wrong format. Should all be NULL. -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -129,7 +130,7 @@ SELECT FROM alltypesorc_wrong ORDER BY c1; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), @@ -144,7 +145,7 @@ SELECT FROM alltypesorc_string; -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; @@ -152,7 +153,7 @@ SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; -EXPLAIN SELECT +EXPLAIN VECTORIZATION EXPRESSION SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q b/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q index 15964c9..e6e6d5d 100644 --- a/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q +++ b/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q @@ -1,8 +1,9 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled = true; SET hive.int.timestamp.conversion.in.seconds=false; +set hive.fetch.task.conversion=none; -explain +explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) @@ -40,7 +41,7 @@ where cbigint % 250 = 0; SET hive.int.timestamp.conversion.in.seconds=true; -explain +explain vectorization expression select -- to timestamp cast (ctinyint as timestamp) http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/acid_vectorization.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization.q.out index b9b4ce4..6171ce4 100644 --- a/ql/src/test/results/clientpositive/llap/acid_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_vectorization.q.out @@ -58,3 +58,63 @@ POSTHOOK: Input: default@acid_vectorized -1070883071 0ruyd6Y50JpdGRf6HqD -1070551679 iUR3Q -1069736047 k17Am8uPHWk02cEf1jet +PREHOOK: query: CREATE TABLE acid_fast_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: CREATE TABLE acid_fast_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true', 'transactional_properties'='default') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_fast_vectorized +PREHOOK: query: insert into table acid_fast_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: insert into table acid_fast_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_fast_vectorized +POSTHOOK: Lineage: acid_fast_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_fast_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert into table acid_fast_vectorized values (1, 'bar') +PREHOOK: type: QUERY +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: insert into table acid_fast_vectorized values (1, 'bar') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@acid_fast_vectorized +POSTHOOK: Lineage: acid_fast_vectorized.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_fast_vectorized.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: update acid_fast_vectorized set b = 'foo' where b = 'bar' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_fast_vectorized +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: update acid_fast_vectorized set b = 'foo' where b = 'bar' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_fast_vectorized +POSTHOOK: Output: default@acid_fast_vectorized +PREHOOK: query: delete from acid_fast_vectorized where b = 'foo' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_fast_vectorized +PREHOOK: Output: default@acid_fast_vectorized +POSTHOOK: query: delete from acid_fast_vectorized where b = 'foo' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_fast_vectorized +POSTHOOK: Output: default@acid_fast_vectorized +PREHOOK: query: select a, b from acid_fast_vectorized order by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_fast_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select a, b from acid_fast_vectorized order by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_fast_vectorized +#### A masked pattern was here #### +-1073279343 oj1YrV5Wa +-1073051226 A34p7oRr2WvUJNf +-1072910839 0iqrc5 +-1072081801 dPkN74F7 +-1072076362 2uLyD28144vklju213J1mr +-1071480828 aw724t8c5558x2xneC624 +-1071363017 Anj0oF +-1070883071 0ruyd6Y50JpdGRf6HqD +-1070551679 iUR3Q +-1069736047 k17Am8uPHWk02cEf1jet http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out b/ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out index 5b1669e..1d3ac6e 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_ppd_decimal.q.out @@ -18,13 +18,11 @@ POSTHOOK: Lineage: newtypestbl.c EXPRESSION [] POSTHOOK: Lineage: newtypestbl.d EXPRESSION [] POSTHOOK: Lineage: newtypestbl.da EXPRESSION [] POSTHOOK: Lineage: newtypestbl.v EXPRESSION [] -PREHOOK: query: -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) -select * from newtypestbl where d=0.22 +PREHOOK: query: select * from newtypestbl where d=0.22 PREHOOK: type: QUERY PREHOOK: Input: default@newtypestbl #### A masked pattern was here #### -POSTHOOK: query: -- decimal data types (EQUAL, NOT_EQUAL, LESS_THAN, LESS_THAN_EQUALS, IN, BETWEEN tests) -select * from newtypestbl where d=0.22 +POSTHOOK: query: select * from newtypestbl where d=0.22 POSTHOOK: type: QUERY POSTHOOK: Input: default@newtypestbl #### A masked pattern was here #### http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/pcs.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/pcs.q.out b/ql/src/test/results/clientpositive/llap/pcs.q.out index b3844ee..21c2652 100644 --- a/ql/src/test/results/clientpositive/llap/pcs.q.out +++ b/ql/src/test/results/clientpositive/llap/pcs.q.out @@ -130,6 +130,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -151,6 +152,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -175,6 +177,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -196,6 +199,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -283,6 +287,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -304,6 +309,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -326,6 +332,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -347,6 +354,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -406,6 +414,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -427,6 +436,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -449,6 +459,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -470,6 +481,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -570,6 +582,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -591,6 +604,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -615,6 +629,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -636,6 +651,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -683,6 +699,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -704,6 +721,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -728,6 +746,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -749,6 +768,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -848,6 +868,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -869,6 +890,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -891,6 +913,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -912,6 +935,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1209,6 +1233,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1230,6 +1255,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1306,6 +1332,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1327,6 +1354,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1349,6 +1377,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1370,6 +1399,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1416,6 +1446,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1437,6 +1468,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1459,6 +1491,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1480,6 +1513,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1502,6 +1536,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1523,6 +1558,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1586,6 +1622,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1607,6 +1644,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1629,6 +1667,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1650,6 +1689,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1672,6 +1712,7 @@ STAGE PLANS: properties: COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string @@ -1693,6 +1734,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 + column.name.delimiter , columns key,value columns.comments columns.types int:string