http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q index d2ded71..2d3788d 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q @@ -7,34 +7,33 @@ set hive.tez.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=none; select distinct ds from srcpart; select distinct hr from srcpart; -EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; +EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_date stored as orc as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_hour stored as orc as select hr as hr, hr as hour from srcpart group by hr; create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr; create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as hour from srcpart group by hr; -- single column, single key -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; @@ -42,77 +41,77 @@ set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where ds = 'I DONT EXIST'; -- expressions -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=false; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11; set hive.tez.dynamic.partition.pruning=true; select count(*) from srcpart where cast(hr as string) = 11; -- parent is reduce tasks -EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- non-equi join -EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); +EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); -- old style join syntax -EXPLAIN VECTORIZATION select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; +EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; -- left join -EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN VECTORIZATION select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); -EXPLAIN VECTORIZATION select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select ds from (select distinct(ds) as ds from srcpart union all select distinct(ds) as ds from srcpart) s where s.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); set hive.auto.convert.join=true; @@ -120,60 +119,60 @@ set hive.auto.convert.join.noconditionaltask = true; set hive.auto.convert.join.noconditionaltask.size = 10000000; -- single column, single key -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- expressions -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart_double_hour.hr) where srcpart_double_hour.hour = 11; select count(*) from srcpart where hr = 11; set hive.stats.fetch.column.stats=false; -- parent is reduce tasks -EXPLAIN VECTORIZATION select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; set hive.stats.fetch.column.stats=true; -- left join -EXPLAIN VECTORIZATION select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -EXPLAIN VECTORIZATION select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN VECTORIZATION select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; -EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery -EXPLAIN VECTORIZATION select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); +EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); @@ -186,7 +185,7 @@ set hive.vectorized.execution.enabled=false; set hive.exec.max.dynamic.partitions=1000; insert into table srcpart_orc partition (ds, hr) select key, value, ds, hr from srcpart; -EXPLAIN VECTORIZATION select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); +EXPLAIN select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart_orc join srcpart_date_hour on (srcpart_orc.ds = srcpart_date_hour.ds and srcpart_orc.hr = srcpart_date_hour.hr) where srcpart_date_hour.hour = 11 and (srcpart_date_hour.`date` = '2008-04-08' or srcpart_date_hour.`date` = '2008-04-09'); select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11;
http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_mapjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin.q b/ql/src/test/queries/clientpositive/vectorized_mapjoin.q index 138c133..6500d41 100644 --- a/ql/src/test/queries/clientpositive/vectorized_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin.q @@ -4,11 +4,10 @@ SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; -set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint; http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q b/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q index d259547..137acbc 100644 --- a/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q +++ b/ql/src/test/queries/clientpositive/vectorized_mapjoin2.q @@ -15,7 +15,7 @@ create temporary table y (b int) stored as orc; insert into x values(1); insert into y values(1); -explain vectorization expression +explain select count(1) from x, y where a = b; select count(1) from x, y where a = b; http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_math_funcs.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_math_funcs.q b/ql/src/test/queries/clientpositive/vectorized_math_funcs.q index 6875909..d79fcce 100644 --- a/ql/src/test/queries/clientpositive/vectorized_math_funcs.q +++ b/ql/src/test/queries/clientpositive/vectorized_math_funcs.q @@ -1,10 +1,9 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; -set hive.fetch.task.conversion=none; -- Test math functions in vectorized mode to verify they run correctly end-to-end. -explain vectorization expression +explain select cdouble ,Round(cdouble, 2) http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q b/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q index 5b07c9f..4332898 100644 --- a/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q +++ b/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q @@ -4,10 +4,9 @@ SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=true; SET hive.auto.convert.join.noconditionaltask=true; SET hive.auto.convert.join.noconditionaltask.size=1000000000; -set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -explain vectorization select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; +explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint; http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_parquet.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_parquet.q b/ql/src/test/queries/clientpositive/vectorized_parquet.q index e6ebdaa..da138e0 100644 --- a/ql/src/test/queries/clientpositive/vectorized_parquet.q +++ b/ql/src/test/queries/clientpositive/vectorized_parquet.q @@ -21,7 +21,7 @@ insert overwrite table alltypes_parquet SET hive.vectorized.execution.enabled=true; -explain vectorization select * +explain select * from alltypes_parquet where cint = 528534767 limit 10; @@ -30,7 +30,7 @@ select * where cint = 528534767 limit 10; -explain vectorization select ctinyint, +explain select ctinyint, max(cint), min(csmallint), count(cstring1), http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_parquet_types.q b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q index 68761b6..297c5af 100644 --- a/ql/src/test/queries/clientpositive/vectorized_parquet_types.q +++ b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q @@ -48,19 +48,19 @@ SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, unhex(cbinary), cdecimal FROM parquet_types_staging; -- select -explain vectorization expression +explain SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, hex(cbinary), cdecimal FROM parquet_types; -explain vectorization expression +explain SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; -explain vectorization expression +explain SELECT ctinyint, MAX(cint), MIN(csmallint), http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_ptf.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_ptf.q b/ql/src/test/queries/clientpositive/vectorized_ptf.q index e648320..64082e9 100644 --- a/ql/src/test/queries/clientpositive/vectorized_ptf.q +++ b/ql/src/test/queries/clientpositive/vectorized_ptf.q @@ -1,5 +1,4 @@ SET hive.vectorized.execution.enabled=true; -set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS @@ -43,7 +42,7 @@ insert into table part_orc select * from part_staging; --1. test1 -explain vectorization extended +explain extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -64,7 +63,7 @@ from noop(on part_orc -- 2. testJoinWithNoop -explain vectorization extended +explain extended select p_mfgr, p_name, p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by p_name) as deltaSz from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = p2.p_partkey) j @@ -81,7 +80,7 @@ sort by j.p_name) -- 3. testOnlyPTF -explain vectorization extended +explain extended select p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -94,7 +93,7 @@ order by p_name); -- 4. testPTFAlias -explain vectorization extended +explain extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -115,7 +114,7 @@ from noop(on part_orc -- 5. testPTFAndWhereWithWindowing -explain vectorization extended +explain extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -138,7 +137,7 @@ from noop(on part_orc -- 6. testSWQAndPTFAndGBy -explain vectorization extended +explain extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -163,7 +162,7 @@ group by p_mfgr, p_name, p_size -- 7. testJoin -explain vectorization extended +explain extended select abc.* from noop(on part_orc partition by p_mfgr @@ -178,7 +177,7 @@ order by p_name -- 8. testJoinRight -explain vectorization extended +explain extended select abc.* from part_orc p1 join noop(on part_orc partition by p_mfgr @@ -193,7 +192,7 @@ order by p_name -- 9. testNoopWithMap -explain vectorization extended +explain extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name, p_size desc) as r from noopwithmap(on part_orc @@ -208,7 +207,7 @@ order by p_name, p_size desc); -- 10. testNoopWithMapWithWindowing -explain vectorization extended +explain extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -227,7 +226,7 @@ from noopwithmap(on part_orc -- 11. testHavingWithWindowingPTFNoGBY -explain vectorization extended +explain extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -248,7 +247,7 @@ order by p_name) -- 12. testFunctionChain -explain vectorization extended +explain extended select p_mfgr, p_name, p_size, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -269,7 +268,7 @@ order by p_mfgr, p_name -- 13. testPTFAndWindowingInSubQ -explain vectorization extended +explain extended select p_mfgr, p_name, sub1.cd, sub1.s1 from (select p_mfgr, p_name, @@ -296,7 +295,7 @@ window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 -- 14. testPTFJoinWithWindowingWithCount -explain vectorization extended +explain extended select abc.p_mfgr, abc.p_name, rank() over (distribute by abc.p_mfgr sort by abc.p_name) as r, dense_rank() over (distribute by abc.p_mfgr sort by abc.p_name) as dr, @@ -323,7 +322,7 @@ order by p_name -- 15. testDistinctInSelectWithPTF -explain vectorization extended +explain extended select DISTINCT p_mfgr, p_name, p_size from noop(on part_orc partition by p_mfgr @@ -342,7 +341,7 @@ sum(p_retailprice) as s from part_orc group by p_mfgr, p_brand; -explain vectorization extended +explain extended select p_mfgr, p_brand, s, sum(s) over w1 as s1 from noop(on mfgr_price_view @@ -376,7 +375,7 @@ dr INT, cud DOUBLE, fv1 INT); -explain vectorization extended +explain extended from noop(on part_orc partition by p_mfgr order by p_name) @@ -413,7 +412,7 @@ select * from part_5; -- 18. testMulti2OperatorsFunctionChainWithMap -explain vectorization extended +explain extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -448,7 +447,7 @@ from noop(on -- 19. testMulti3OperatorsFunctionChain -explain vectorization extended +explain extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -483,7 +482,7 @@ from noop(on -- 20. testMultiOperatorChainWithNoWindowing -explain vectorization extended +explain extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, @@ -515,7 +514,7 @@ from noop(on -- 21. testMultiOperatorChainEndsWithNoopMap -explain vectorization extended +explain extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name) as dr, @@ -550,7 +549,7 @@ from noopwithmap(on -- 22. testMultiOperatorChainWithDiffPartitionForWindow1 -explain vectorization extended +explain extended select p_mfgr, p_name, rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as r, dense_rank() over (partition by p_mfgr,p_name order by p_mfgr,p_name) as dr, @@ -583,7 +582,7 @@ from noop(on -- 23. testMultiOperatorChainWithDiffPartitionForWindow2 -explain vectorization extended +explain extended select p_mfgr, p_name, rank() over (partition by p_mfgr order by p_name) as r, dense_rank() over (partition by p_mfgr order by p_name) as dr, http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q b/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q index 9227de0..f57d062 100644 --- a/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q +++ b/ql/src/test/queries/clientpositive/vectorized_shufflejoin.q @@ -2,11 +2,10 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.auto.convert.join=false; -set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) +EXPLAIN SELECT COUNT(t1.cint) AS CNT, MAX(t2.cint) , MIN(t1.cint), AVG(t1.cint+t2.cint) FROM alltypesorc t1 JOIN alltypesorc t2 ON t1.cint = t2.cint order by CNT; http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_string_funcs.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_string_funcs.q b/ql/src/test/queries/clientpositive/vectorized_string_funcs.q index ee95c0b..d04a3c3 100644 --- a/ql/src/test/queries/clientpositive/vectorized_string_funcs.q +++ b/ql/src/test/queries/clientpositive/vectorized_string_funcs.q @@ -1,10 +1,9 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled = true; -set hive.fetch.task.conversion=none; -- Test string functions in vectorized mode to verify end-to-end functionality. -explain vectorization +explain select substr(cstring1, 1, 2) ,substr(cstring1, 2) http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_timestamp.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_timestamp.q b/ql/src/test/queries/clientpositive/vectorized_timestamp.q index ceee2ee..2784b7a 100644 --- a/ql/src/test/queries/clientpositive/vectorized_timestamp.q +++ b/ql/src/test/queries/clientpositive/vectorized_timestamp.q @@ -6,23 +6,23 @@ CREATE TABLE test(ts TIMESTAMP) STORED AS ORC; INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999'); SET hive.vectorized.execution.enabled = false; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SET hive.vectorized.execution.enabled = true; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q b/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index afbc18a..aaf85fc 100644 --- a/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ b/ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -1,6 +1,5 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; -set hive.fetch.task.conversion=none; -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. -- Turning on vectorization has been temporarily moved after filling the test table -- due to bug HIVE-8197. @@ -24,7 +23,7 @@ INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '2000:01:01 00-00-00' FROM alltypesorc LIMIT 1; INSERT INTO TABLE alltypesorc_wrong SELECT '0000-00-00 99:99:99' FROM alltypesorc LIMIT 1; -EXPLAIN VECTORIZATION EXPRESSION SELECT +EXPLAIN SELECT to_unix_timestamp(ctimestamp1) AS c1, year(ctimestamp1), month(ctimestamp1), @@ -50,7 +49,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN VECTORIZATION EXPRESSION SELECT +EXPLAIN SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -76,7 +75,7 @@ SELECT FROM alltypesorc_string ORDER BY c1; -EXPLAIN VECTORIZATION EXPRESSION SELECT +EXPLAIN SELECT to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1, year(ctimestamp1) = year(stimestamp1), month(ctimestamp1) = month(stimestamp1), @@ -104,7 +103,7 @@ FROM alltypesorc_string ORDER BY c1; -- Wrong format. Should all be NULL. -EXPLAIN VECTORIZATION EXPRESSION SELECT +EXPLAIN SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), month(stimestamp1), @@ -130,7 +129,7 @@ SELECT FROM alltypesorc_wrong ORDER BY c1; -EXPLAIN VECTORIZATION EXPRESSION SELECT +EXPLAIN SELECT min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), @@ -145,7 +144,7 @@ SELECT FROM alltypesorc_string; -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... -EXPLAIN VECTORIZATION EXPRESSION SELECT +EXPLAIN SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; @@ -153,7 +152,7 @@ SELECT round(sum(ctimestamp1), 3) FROM alltypesorc_string; -EXPLAIN VECTORIZATION EXPRESSION SELECT +EXPLAIN SELECT round(avg(ctimestamp1), 0), variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19, http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q b/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q index e6e6d5d..15964c9 100644 --- a/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q +++ b/ql/src/test/queries/clientpositive/vectorized_timestamp_ints_casts.q @@ -1,9 +1,8 @@ set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled = true; SET hive.int.timestamp.conversion.in.seconds=false; -set hive.fetch.task.conversion=none; -explain vectorization expression +explain select -- to timestamp cast (ctinyint as timestamp) @@ -41,7 +40,7 @@ where cbigint % 250 = 0; SET hive.int.timestamp.conversion.in.seconds=true; -explain vectorization expression +explain select -- to timestamp cast (ctinyint as timestamp) http://git-wip-us.apache.org/repos/asf/hive/blob/aed21d0b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out index 5b74e0b..d8032d8 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part.q.out @@ -83,73 +83,25 @@ POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).b SIMPLE [(valu POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select insert_num,part,a,b from part_add_int_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select insert_num,part,a,b from part_add_int_permute_select POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] +Plan optimized by CBO. -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_add_int_permute_select - Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: PARTIAL - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4] - Select Operator - expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 4, 1, 2] - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 4 - includeColumns: [0, 1, 2] - dataColumns: insert_num:int, a:int, b:string, c:int - partitionColumnCount: 1 - partitionColumns: part:int - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=2 width=4) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_0] (rows=2 width=101) + default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,part,a,b from part_add_int_permute_select @@ -250,73 +202,25 @@ POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).c EXPRES POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ] POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select insert_num,part,a,b from part_add_int_string_permute_select PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select insert_num,part,a,b from part_add_int_string_permute_select POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] +Plan optimized by CBO. -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_add_int_string_permute_select - Statistics: Num rows: 2 Data size: 290 Basic stats: COMPLETE Column stats: PARTIAL - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] - Select Operator - expressions: insert_num (type: int), part (type: int), a (type: int), b (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 5, 1, 2] - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 5 - includeColumns: [0, 1, 2] - dataColumns: insert_num:int, a:int, b:string, c:int, d:string - partitionColumnCount: 1 - partitionColumns: part:int - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=2 width=4) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_0] (rows=2 width=145) + default@part_add_int_string_permute_select,part_add_int_string_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"] PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right select insert_num,part,a,b from part_add_int_string_permute_select @@ -479,73 +383,25 @@ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c2 SIMPLE [ POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).c3 SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:double1, type:double, comment:null), ] POSTHOOK: Lineage: part_change_string_group_double PARTITION(part=1).insert_num SIMPLE [(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, type:int, comment:null), ] insert_num double1 double1 double1 _c4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select insert_num,part,c1,c2,c3,b from part_change_string_group_double POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +Plan optimized by CBO. -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_change_string_group_double - Statistics: Num rows: 5 Data size: 2130 Basic stats: COMPLETE Column stats: PARTIAL - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] - Select Operator - expressions: insert_num (type: int), part (type: int), c1 (type: double), c2 (type: double), c3 (type: double), b (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 5, 1, 2, 3, 4] - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 5 - includeColumns: [0, 1, 2, 3, 4] - dataColumns: insert_num:int, c1:double, c2:double, c3:double, b:string - partitionColumnCount: 1 - partitionColumns: part:int - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=5 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + TableScan [TS_0] (rows=5 width=426) + default@part_change_string_group_double,part_change_string_group_double,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_string_group_double PREHOOK: type: QUERY @@ -645,73 +501,25 @@ POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION( POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_date_group_string_group_date_timestamp PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] +Plan optimized by CBO. -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_change_date_group_string_group_date_timestamp - Statistics: Num rows: 6 Data size: 3521 Basic stats: COMPLETE Column stats: PARTIAL - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - Select Operator - expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: char(50)), c3 (type: char(15)), c4 (type: varchar(50)), c5 (type: varchar(15)), c6 (type: string), c7 (type: char(50)), c8 (type: char(15)), c9 (type: varchar(50)), c10 (type: varchar(15)), b (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - dataColumns: insert_num:int, c1:string, c2:char(50), c3:char(15), c4:varchar(50), c5:varchar(15), c6:string, c7:char(50), c8:char(15), c9:varchar(50), c10:varchar(15), b:string - partitionColumnCount: 1 - partitionColumns: part:int - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=6 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + TableScan [TS_0] (rows=6 width=586) + default@part_change_date_group_string_group_date_timestamp,part_change_date_group_string_group_date_timestamp,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_date_group_string_group_date_timestamp PREHOOK: type: QUERY @@ -888,73 +696,25 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_grou POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_multi_ints_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 _col20 _col21 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +Plan optimized by CBO. -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_change_numeric_group_string_group_multi_ints_string_group - Statistics: Num rows: 6 Data size: 2903 Basic stats: COMPLETE Column stats: PARTIAL - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Select Operator - expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: string), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(50)), c8 (type: char(50)), c9 (type: char(5)), c10 (type: char(5)), c11 (type: char(5)), c12 (type: char(5)), c13 (type: varchar(50)), c14 (type: varchar(50)), c15 (type: varchar(50)), c16 (type: varchar(50)), c17 (type: varchar(5)), c18 (type: varchar(5)), c19 (type: varchar(5)), c20 (type: varchar(5)), b (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 22, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 22 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:string, c5:char(50), c6:char(50), c7:char(50), c8:char(50), c9:char(5), c10:char(5), c11:char(5), c12:char(5), c13:varchar(50), c14:varchar(50), c15:varchar(50), c16:varchar(50), c17:varchar(5), c18:varchar(5), c19:varchar(5), c20:varchar(5), b:string - partitionColumnCount: 1 - partitionColumns: part:int - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=6 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22"] + TableScan [TS_0] (rows=6 width=483) + default@part_change_numeric_group_string_group_multi_ints_string_group,part_change_numeric_group_string_group_multi_ints_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b from part_change_numeric_group_string_group_multi_ints_string_group PREHOOK: type: QUERY @@ -1113,73 +873,25 @@ POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_numeric_group_string_group_floating_string_group PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_change_numeric_group_string_group_floating_string_group - Statistics: Num rows: 6 Data size: 4540 Basic stats: COMPLETE Column stats: PARTIAL - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] - Select Operator - expressions: insert_num (type: int), part (type: int), c1 (type: string), c2 (type: string), c3 (type: string), c4 (type: char(50)), c5 (type: char(50)), c6 (type: char(50)), c7 (type: char(7)), c8 (type: char(7)), c9 (type: char(7)), c10 (type: varchar(50)), c11 (type: varchar(50)), c12 (type: varchar(50)), c13 (type: varchar(7)), c14 (type: varchar(7)), c15 (type: varchar(7)), b (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 17, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 17 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] - dataColumns: insert_num:int, c1:string, c2:string, c3:string, c4:char(50), c5:char(50), c6:char(50), c7:char(7), c8:char(7), c9:char(7), c10:varchar(50), c11:varchar(50), c12:varchar(50), c13:varchar(7), c14:varchar(7), c15:varchar(7), b:string - partitionColumnCount: 1 - partitionColumns: part:int +Plan optimized by CBO. - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=6 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] + TableScan [TS_0] (rows=6 width=756) + default@part_change_numeric_group_string_group_floating_string_group,part_change_numeric_group_string_group_floating_string_group,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b from part_change_numeric_group_string_group_floating_string_group PREHOOK: type: QUERY @@ -1326,73 +1038,25 @@ POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1 POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_string_group_string_group_string PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_change_string_group_string_group_string - Statistics: Num rows: 6 Data size: 6682 Basic stats: COMPLETE Column stats: PARTIAL - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - Select Operator - expressions: insert_num (type: int), part (type: int), c1 (type: char(50)), c2 (type: char(9)), c3 (type: varchar(50)), c4 (type: char(9)), c5 (type: varchar(50)), c6 (type: varchar(9)), c7 (type: string), c8 (type: char(50)), c9 (type: char(9)), c10 (type: string), b (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - dataColumns: insert_num:int, c1:char(50), c2:char(9), c3:varchar(50), c4:char(9), c5:varchar(50), c6:varchar(9), c7:string, c8:char(50), c9:char(9), c10:string, b:string - partitionColumnCount: 1 - partitionColumns: part:int +Plan optimized by CBO. - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=6 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + TableScan [TS_0] (rows=6 width=1113) + default@part_change_string_group_string_group_string,part_change_string_group_string_group_string,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from part_change_string_group_string_group_string PREHOOK: type: QUERY @@ -1573,73 +1237,25 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint P POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).c9 EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col10, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_tinyint_to_bigint PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 _col16 _col17 _col18 _col19 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] +Plan optimized by CBO. -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_change_lower_to_higher_numeric_group_tinyint_to_bigint - Statistics: Num rows: 6 Data size: 1419 Basic stats: COMPLETE Column stats: PARTIAL - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - Select Operator - expressions: insert_num (type: int), part (type: int), c1 (type: smallint), c2 (type: int), c3 (type: bigint), c4 (type: decimal(38,18)), c5 (type: float), c6 (type: double), c7 (type: int), c8 (type: bigint), c9 (type: decimal(38,18)), c10 (type: float), c11 (type: double), c12 (type: bigint), c13 (type: decimal(38,18)), c14 (type: float), c15 (type: double), c16 (type: decimal(38,18)), c17 (type: float), c18 (type: double), b (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 20, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 20 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - dataColumns: insert_num:int, c1:smallint, c2:int, c3:bigint, c4:decimal(38,18), c5:float, c6:double, c7:int, c8:bigint, c9:decimal(38,18), c10:float, c11:double, c12:bigint, c13:decimal(38,18), c14:float, c15:double, c16:decimal(38,18), c17:float, c18:double, b:string - partitionColumnCount: 1 - partitionColumns: part:int - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=6 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] + TableScan [TS_0] (rows=6 width=236) + default@part_change_lower_to_higher_numeric_group_tinyint_to_bigint,part_change_lower_to_higher_numeric_group_tinyint_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b from part_change_lower_to_higher_numeric_group_tinyint_to_bigint PREHOOK: type: QUERY @@ -1750,73 +1366,25 @@ POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PA POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).c3 EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col4, type:string, comment:), ] POSTHOOK: Lineage: part_change_lower_to_higher_numeric_group_decimal_to_float PARTITION(part=1).insert_num EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] _col0 _col1 _col2 _col3 _col4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float POSTHOOK: type: QUERY Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: part_change_lower_to_higher_numeric_group_decimal_to_float - Statistics: Num rows: 6 Data size: 1523 Basic stats: COMPLETE Column stats: PARTIAL - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5] - Select Operator - expressions: insert_num (type: int), part (type: int), c1 (type: float), c2 (type: double), c3 (type: double), b (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 5, 1, 2, 3, 4] - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized, llap - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 5 - includeColumns: [0, 1, 2, 3, 4] - dataColumns: insert_num:int, c1:float, c2:double, c3:double, b:string - partitionColumnCount: 1 - partitionColumns: part:int +Plan optimized by CBO. - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized, llap + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=6 width=4) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + TableScan [TS_0] (rows=6 width=253) + default@part_change_lower_to_higher_numeric_group_decimal_to_float,part_change_lower_to_higher_numeric_group_decimal_to_float,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","b"] PREHOOK: query: select insert_num,part,c1,c2,c3,b from part_change_lower_to_higher_numeric_group_decimal_to_float PREHOOK: type: QUERY