date:20161013

[16/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out 
b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
index 9eeb0d6..26fa9d9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
@@ -256,12 +256,16 @@ POSTHOOK: Input: default@flights_tiny_orc
 2010-10-29 12
 2010-10-30 11
 2010-10-31 8
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select * from flights_tiny_orc sort by fl_num, fl_date limit 25
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select * from flights_tiny_orc sort by fl_num, fl_date limit 25
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -280,46 +284,102 @@ STAGE PLANS:
 TableScan
   alias: flights_tiny_orc
   Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5]
   Select Operator
 expressions: origin_city_name (type: string), 
dest_city_name (type: string), fl_date (type: date), fl_time (type: timestamp), 
arr_delay (type: float), fl_num (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1, 2, 3, 4, 5]
 Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col5 (type: int), _col2 (type: date)
   sort order: ++
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
   Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
   TopN Hash Memory Usage: 0.1
   value expressions: _col0 (type: string), _col1 (type: 
string), _col3 (type: timestamp), _col4 (type: float)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), 
VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 3, 1, 4, 5, 0]
 Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
 Limit
   Number of rows: 25
+  Limit Vectorization:
+  className: VectorLimitOperator
+  native: true
   Statistics: Num rows: 25 Data size: 7200

[24/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
index bb6916b..9e591b8 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
@@ -14,7 +14,7 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orcsrc
 POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 FROM 
 (SELECT orcsrc.* FROM orcsrc sort by key) x
 JOIN
@@ -22,7 +22,7 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 FROM 
 (SELECT orcsrc.* FROM orcsrc sort by key) x
 JOIN
@@ -30,6 +30,10 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -49,40 +53,93 @@ STAGE PLANS:
 TableScan
   alias: orcsrc
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: key (type: string)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: string)
 sort order: +
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 500 Data size: 88000 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
   alias: orcsrc
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: key (type: string), value (type: string)
   outputColumnNames: _col0, _col1
+  Select Vectorization:
+  className:

[43/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q 
b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
index 8ed041b..11df12e 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
@@ -1,4 +1,4 @@
-set hive.explain.user=true;
+set hive.explain.user=false;
 set hive.mapred.mode=nonstrict;
 set hive.cli.print.header=true;
 SET hive.exec.schema.evolution=true;
@@ -39,7 +39,7 @@ alter table part_add_int_permute_select add columns(c int);
 
 insert into table part_add_int_permute_select partition(part=1) VALUES (2, 
, 'new', );
 
-explain
+explain vectorization detail
 select insert_num,part,a,b from part_add_int_permute_select;
 
 -- SELECT permutation columns to make sure NULL defaulting works right
@@ -62,7 +62,7 @@ alter table part_add_int_string_permute_select add columns(c 
int, d string);
 
 insert into table part_add_int_string_permute_select partition(part=1) VALUES 
(2, , 'new', , '');
 
-explain
+explain vectorization detail
 select insert_num,part,a,b from part_add_int_string_permute_select;
 
 -- SELECT permutation columns to make sure NULL defaulting works right
@@ -94,7 +94,7 @@ alter table part_change_string_group_double replace columns 
(insert_num int, c1
 
 insert into table part_change_string_group_double partition(part=1) SELECT 
insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE 
insert_num = 111;
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,b from part_change_string_group_double;
 
 select insert_num,part,c1,c2,c3,b from part_change_string_group_double;
@@ -117,7 +117,7 @@ alter table 
part_change_date_group_string_group_date_timestamp replace columns(i
 
 insert into table part_change_date_group_string_group_date_timestamp 
partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 
'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_date_group_string_group_date_timestamp;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_date_group_string_group_date_timestamp;
@@ -165,7 +165,7 @@ insert into table 
part_change_numeric_group_string_group_multi_ints_string_group
 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 
'filler', 'filler',
 'new');
 
-explain
+explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b
 from part_change_numeric_group_string_group_multi_ints_string_group;
 
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b
 from part_change_numeric_group_string_group_multi_ints_string_group;
@@ -208,7 +208,7 @@ insert into table 
part_change_numeric_group_string_group_floating_string_group p
  'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
  'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b 
from part_change_numeric_group_string_group_floating_string_group;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b 
from part_change_numeric_group_string_group_floating_string_group;
@@ -250,7 +250,7 @@ insert into table 
part_change_string_group_string_group_string partition(part=1)
   'filler', 'filler', 'filler',
   'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_string_group_string_group_string;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_string_group_string_group_string;
@@ -300,7 +300,7 @@ insert into table 
part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa
 1234.5678, 9876.543, 789.321,
'new');
 
-explain
+explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b
 from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
 
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b
 from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
@@ -331,7 +331,7 @@ alter table 
part_change_lower_to_higher_numeric_group_decimal_to_float replace c
 
 insert into table part_change_lower_to_higher_numeric_group_decimal_to_float 
partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,b from 
part_change_lower_to_higher_numeric_group_decimal_to_float;
 
 select

[34/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out 
b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
index 6b59497..739d0e1 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
@@ -69,13 +69,17 @@ POSTHOOK: Lineage: tint.cint SIMPLE 
[(tint_txt)tint_txt.FieldSchema(name:cint, t
 POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, 
type:int, comment:null), ]
 tint_txt.rnum  tint_txt.cint
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint 
between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col 
from tint , tsint
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint 
between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col 
from tint , tsint
 POSTHOOK: type: QUERY
 Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -93,9 +97,16 @@ STAGE PLANS:
 TableScan
   alias: tint
   Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: rnum (type: int), cint (type: int)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1]
 Statistics: Num rows: 5 Data size: 36 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
@@ -103,6 +114,11 @@ STAGE PLANS:
   keys:
 0 
 1 
+  Map Join Vectorization:
+  className: VectorMapJoinOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid 
Hash Join IS true, Small table vectorizes IS true
+  nativeConditionsNotMet: Not empty key IS false
   outputColumnNames: _col0, _col1, _col2, _col3
   input vertices:
 1 Map 2
@@ -110,9 +126,17 @@ STAGE PLANS:
   Select Operator
 expressions: _col0 (type: int), _col2 (type: int), 
_col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND 
_col3) THEN ('Ok') ELSE ('NoOk') END (type: string)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 2, 1, 3, 5]
+selectExpressions: VectorUDFAdaptor(CASE WHEN 
(_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: 
VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:Long) -> 5:String
 Statistics: Num rows: 25 Data size: 385 Basic stats: 
COMPLETE Column stats: NONE
 File Output Operator
   compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
   Statistics: Num rows: 25 Data size: 385 Basic stats: 
COMPLETE Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -120,21 +144,49 @@ STAGE PLANS:
   serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet:

[55/57] [abbrv] hive git commit: HIVE-14671 : merge master into hive-14535 (Sergey Shelukhin)

2016-10-13 Thread sershe

HIVE-14671 : merge master into hive-14535 (Sergey Shelukhin)

Conflicts:
ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bd78d660
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bd78d660
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bd78d660

Branch: refs/heads/hive-14535
Commit: bd78d6605c6c57f723045b47bab06fccc5053047
Parents: b9e8157 5353161
Author: Sergey Shelukhin 
Authored: Thu Oct 13 17:12:24 2016 -0700
Committer: Sergey Shelukhin 
Committed: Thu Oct 13 17:12:24 2016 -0700

--
 .gitignore  | 1 +
 .../org/apache/hadoop/hive/conf/HiveConf.java   |12 +-
 .../org/apache/hive/common/util/DateUtils.java  |20 +
 data/files/identity_udf.jar |   Bin 710 -> 0 bytes
 itests/hive-blobstore/README|25 +
 itests/hive-blobstore/pom.xml   |   355 +
 .../hadoop/hive/cli/TestBlobstoreCliDriver.java |64 +
 .../cli/TestBlobstoreNegativeCliDriver.java |64 +
 .../clientnegative/select_dropped_table.q   | 4 +
 .../test/queries/clientpositive/insert_into.q   | 4 +
 .../test/resources/blobstore-conf.xml.template  |22 +
 .../src/test/resources/hive-site.xml|   271 +
 .../test/resources/testconfiguration.properties | 2 +
 .../src/test/resources/tez-site.xml | 6 +
 .../clientnegative/select_dropped_table.q.out   |21 +
 .../results/clientpositive/insert_into.q.out|35 +
 .../minikdc/JdbcWithMiniKdcSQLAuthTest.java | 2 +
 .../org/apache/hive/jdbc/miniHS2/MiniHS2.java   |32 +-
 .../org/apache/hive/jdbc/TestJdbcDriver2.java   |   171 +-
 .../apache/hive/jdbc/TestJdbcWithMiniHS2.java   |  1005 +-
 .../apache/hive/jdbc/TestJdbcWithMiniMr.java|   344 -
 itests/pom.xml  | 1 +
 .../test/resources/testconfiguration.properties |80 +-
 .../control/AbstractCoreBlobstoreCliDriver.java |   167 +
 .../hadoop/hive/cli/control/CliConfigs.java |40 +
 .../cli/control/CoreBlobstoreCliDriver.java |29 +
 .../control/CoreBlobstoreNegativeCliDriver.java |29 +
 .../org/apache/hadoop/hive/ql/QTestUtil.java|18 +-
 .../ColumnArithmeticColumn.txt  | 7 +-
 .../ColumnArithmeticColumnDecimal.txt   | 5 +
 .../ColumnArithmeticColumnWithConvert.txt   |   173 -
 .../ColumnArithmeticScalar.txt  | 5 +
 .../ColumnArithmeticScalarDecimal.txt   | 5 +
 .../ColumnArithmeticScalarWithConvert.txt   |   150 -
 .../ExpressionTemplates/ColumnCompareColumn.txt | 5 +
 .../ExpressionTemplates/ColumnCompareScalar.txt | 5 +
 .../ExpressionTemplates/ColumnDivideColumn.txt  | 5 +
 .../ColumnDivideColumnDecimal.txt   | 5 +
 .../ExpressionTemplates/ColumnDivideScalar.txt  | 5 +
 .../ColumnDivideScalarDecimal.txt   | 5 +
 .../ExpressionTemplates/ColumnUnaryFunc.txt | 5 +
 .../ExpressionTemplates/ColumnUnaryMinus.txt| 5 +
 ...eColumnArithmeticIntervalYearMonthColumn.txt | 5 +
 ...eColumnArithmeticIntervalYearMonthScalar.txt | 5 +
 .../DateColumnArithmeticTimestampColumn.txt | 5 +
 .../DateColumnArithmeticTimestampScalar.txt | 5 +
 ...eScalarArithmeticIntervalYearMonthColumn.txt | 5 +
 .../DateScalarArithmeticTimestampColumn.txt | 5 +
 .../DecimalColumnUnaryFunc.txt  | 5 +
 .../ExpressionTemplates/FilterColumnBetween.txt | 7 +-
 .../FilterColumnCompareColumn.txt   | 9 +-
 .../FilterColumnCompareScalar.txt   | 9 +-
 .../FilterDecimalColumnBetween.txt  | 5 +
 .../FilterDecimalColumnCompareDecimalColumn.txt | 5 +
 .../FilterDecimalColumnCompareDecimalScalar.txt | 5 +
 .../FilterDecimalScalarCompareDecimalColumn.txt | 5 +
 ...erLongDoubleColumnCompareTimestampColumn.txt | 5 +
 ...erLongDoubleScalarCompareTimestampColumn.txt | 5 +
 .../FilterScalarCompareColumn.txt   | 9 +-
 .../FilterStringColumnBetween.txt   | 9 +-
 ...tringGroupColumnCompareStringGroupColumn.txt | 5 +
 ...gGroupColumnCompareStringGroupScalarBase.txt | 7 +
 ...gGroupScalarCompareStringGroupColumnBase.txt | 8 +
 .../FilterTimestampColumnBetween.txt| 5 +
 ...erTimestampColumnCompareLongDoubleColumn.txt | 5 +
 ...erTimestampColumnCompareLongDoubleScalar.txt | 5 +
 ...terTimestampColumnCompareTimestampColumn.txt | 5 +
 ...terTimestampColumnCompareTimestampScalar.txt | 5 +

[21/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out 
b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
index a075662..1fde0a9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
@@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
 -- Query copied from subquery_in.q
 
 -- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li 
on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
@@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
 -- Query copied from subquery_in.q
 
 -- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li 
on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -65,6 +69,10 @@ STAGE PLANS:
   value expressions: _col2 (type: int)
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 2 
 Map Operator Tree:
 TableScan
@@ -89,6 +97,10 @@ STAGE PLANS:
   Statistics: Num rows: 4 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 3 
 Map Operator Tree:
 TableScan
@@ -109,10 +121,27 @@ STAGE PLANS:
 Statistics: Num rows: 50 Data size: 200 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Reducer 4 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
+Group By Vectorization:
+className: VectorGroupByOperator
+vectorOutput: true
+keyExpressions: col 0
+native: false
+projectedOutputColumns: []
 keys: KEY._col0 (type: int)
 mode: mergepartial
 outputColumnNames: _col0
@@ -123,6 +152,10 @@ STAGE PLANS:
   keys:
 0 _col1 (type: int)
 1 _col0 (type: int)
+  Map Join Vectorization:
+  className: VectorMapJoinInnerLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
   outputColumnNames: _col2, _col4
   input vertices:
 0 Map 1
@@ -130,9 +163,16 @@ STAGE PLANS:
   Select Operator
 expressions: _col4 (type: int), _col2 (type: int)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+

[46/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 3a179a3..6167f48 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.optimizer.physical;
 import static 
org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM;
 
 import java.io.Serializable;
+import java.lang.annotation.Annotation;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -33,6 +34,7 @@ import java.util.Properties;
 import java.util.Set;
 import java.util.Stack;
 import java.util.regex.Pattern;
+import org.apache.commons.lang.ArrayUtils;
 
 import org.apache.calcite.util.Pair;
 import org.apache.commons.lang.ArrayUtils;
@@ -43,6 +45,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.*;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
 import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
@@ -62,7 +66,11 @@ import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOpe
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
+import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
@@ -73,6 +81,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -91,18 +100,36 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.Explain;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.FetchWork;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc;
+import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.VectorFilterDesc;
+import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc;
+import org.apache.hadoop.hive.ql.plan.VectorizationCondition;
 import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode;
+import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc;
+import org.apache.hadoop.hive.ql.plan.VectorLimitDesc;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import

[23/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
index 16603c7..c21da5f 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
@@ -97,9 +97,9 @@ POSTHOOK: type: SHOWPARTITIONS
 POSTHOOK: Input: default@char_tbl2
 gpa=3
 gpa=3.5  
-PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, 
c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, 
c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, 
c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, 
c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
index b9ffa34..25066be 100644
--- a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain 
+PREHOOK: query: explain vectorization 
 select count(*) from (select c.ctinyint 
 from alltypesorc c
 left outer join alltypesorc cd
@@ -7,7 +7,7 @@ left outer join alltypesorc hd
   on hd.ctinyint = c.ctinyint
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain 
+POSTHOOK: query: explain vectorization 
 select count(*) from (select c.ctinyint 
 from alltypesorc c
 left outer join alltypesorc cd
@@ -16,6 +16,10 @@ left outer join alltypesorc hd
   on hd.ctinyint = c.ctinyint
 ) t1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -68,6 +72,14 @@ STAGE PLANS:
 value expressions: _col0 (type: bigint)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 3 
 Map Operator Tree:
 TableScan
@@ -84,6 +96,14 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 36696 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
@@ -100,8 +120,23 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 36696 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true

[32/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out 
b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
index c7897f7..2789664 100644
--- a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
@@ -1,6 +1,6 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, 
cstring1, cint, cfloat, csmallint) as c
+EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, 
csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
@@ -8,12 +8,16 @@ LIMIT 10
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, 
cstring1, cint, cfloat, csmallint) as c
+EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, 
csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
 LIMIT 10
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -21,53 +25,62 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
 Tez
- A masked pattern was here 
   Edges:
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
- A masked pattern was here 
   Vertices:
 Map 1 
 Map Operator Tree:
-TableScan
-  alias: alltypesorc
-  Statistics: Num rows: 12288 Data size: 1045942 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Filter Operator
-predicate: cdouble is null (type: boolean)
-Statistics: Num rows: 3114 Data size: 265164 Basic stats: 
COMPLETE Column stats: COMPLETE
-Select Operator
-  expressions: cstring1 (type: string), cint (type: int), 
cfloat (type: float), csmallint (type: smallint), 
COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string)
-  outputColumnNames: _col1, _col2, _col3, _col4, _col5
-  Statistics: Num rows: 3114 Data size: 819540 Basic 
stats: COMPLETE Column stats: COMPLETE
-  Reduce Output Operator
-key expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string)
-sort order: +
-Statistics: Num rows: 3114 Data size: 819540 Basic 
stats: COMPLETE Column stats: COMPLETE
-TopN Hash Memory Usage: 0.1
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNull(col 5) -> 
boolean
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [6, 2, 4, 1, 16]
+  selectExpressions: VectorCoalesce(columns [12, 6, 
13, 14, 15])(children: ConstantVectorExpression(val null) -> 12:string, col 6, 
CastLongToString(col 2) -> 13:String, VectorUDFAdaptor(null(cfloat)) -> 
14:String, CastLongToString(col 1) -> 15:String) -> 16:string
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+

[29/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
index ff658d7..9a09b89 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
@@ -1,6 +1,6 @@
 Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy 
will work properly.
-explain
+explain vectorization expression
 select *
 from src
 where not key in
@@ -8,65 +8,199 @@ where not key in
 order by key
 PREHOOK: type: QUERY
 POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy 
will work properly.
-explain
+explain vectorization expression
 select *
 from src
 where not key in
 (select key from src)
 order by key
 POSTHOOK: type: QUERY
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Vertex dependency in root stage
-Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 4 <- Map 3 (SIMPLE_EDGE)
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Reducer 2 vectorized, llap
-  File Output Operator [FS_36]
-Select Operator [SEL_35] (rows=1 width=178)
-  Output:["_col0","_col1"]
-<-Map 1 [SIMPLE_EDGE] llap
-  SHUFFLE [RS_21]
-Select Operator [SEL_20] (rows=1 width=178)
-  Output:["_col0","_col1"]
-  Filter Operator [FIL_19] (rows=1 width=265)
-predicate:_col3 is null
-Map Join Operator [MAPJOIN_28] (rows=1219 width=265)
-  Conds:MAPJOIN_27._col0=RS_17._col0(Left 
Outer),Output:["_col0","_col1","_col3"]
-<-Map 5 [BROADCAST_EDGE] llap
-  BROADCAST [RS_17]
-PartitionCols:_col0
-Select Operator [SEL_12] (rows=500 width=87)
-  Output:["_col0"]
-  TableScan [TS_11] (rows=500 width=87)
-
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
-<-Map Join Operator [MAPJOIN_27] (rows=500 width=178)
-Conds:(Inner),Output:["_col0","_col1"]
-  <-Reducer 4 [BROADCAST_EDGE] vectorized, llap
-BROADCAST [RS_34]
-  Select Operator [SEL_33] (rows=1 width=8)
-Filter Operator [FIL_32] (rows=1 width=8)
-  predicate:(_col0 = 0)
-  Group By Operator [GBY_31] (rows=1 width=8)
-
Output:["_col0"],aggregations:["count(VALUE._col0)"]
-  <-Map 3 [SIMPLE_EDGE] llap
-SHUFFLE [RS_6]
-  Group By Operator [GBY_5] (rows=1 width=8)
-Output:["_col0"],aggregations:["count()"]
-Select Operator [SEL_4] (rows=1 width=87)
-  Filter Operator [FIL_25] (rows=1 width=87)
-predicate:key is null
-TableScan [TS_2] (rows=500 width=87)
-  
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
-  <-Select Operator [SEL_1] (rows=500 width=178)
-  Output:["_col0","_col1"]
-  TableScan [TS_0] (rows=500 width=178)
-
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: src
+  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 
+1 
+

[19/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_nvl.q.out 
b/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
index b926ab4b..aa8ed4a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
@@ -1,31 +1,82 @@
-PREHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT cdouble, nvl(cdouble, 
100) as n
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 LIMIT 10
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT cdouble, 
nvl(cdouble, 100) as n
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 LIMIT 10
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: alltypesorc
+  Statistics: Num rows: 12288 Data size: 73400 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
+  Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNull(col 5) -> 
boolean
+predicate: cdouble is null (type: boolean)
+Statistics: Num rows: 3114 Data size: 18608 Basic stats: 
COMPLETE Column stats: COMPLETE
+Select Operator
+  expressions: null (type: double), 100.0 (type: double)
+  outputColumnNames: _col0, _col1
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [12, 13]
+  selectExpressions: ConstantVectorExpression(val 
null) -> 12:double, ConstantVectorExpression(val 100.0) -> 13:double
+  Statistics: Num rows: 3114 Data size: 24920 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Limit
+Number of rows: 10
+Limit Vectorization:
+className: VectorLimitOperator
+native: true
+Statistics: Num rows: 10 Data size: 88 Basic stats: 
COMPLETE Column stats: COMPLETE
+File Output Operator
+  compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
+  Statistics: Num rows: 10 Data size: 88 Basic stats: 
COMPLETE Column stats: COMPLETE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: vectorized, llap
+LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
+
   Stage: Stage-0
 Fetch Operator
   limit: 10
   Processor Tree:
-TableScan
-  alias: alltypesorc
-  Filter Operator
-predicate: cdouble is null (type: boolean)
-Select Operator
-  expressions: null (type: double), 100.0 (type: double)
-  outputColumnNames: _col0, _col1
-  Limit
-Number of rows: 10
-ListSink
+ListSink
 
 PREHOOK: query: SELECT cdouble, nvl(cdouble, 100) as n
 FROM alltypesorc
@@ -51,30 +102,76 @@ NULL   100.0
 NULL   100.0
 NULL   100.0
 NULL   100.0
-PREHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT cfloat, nvl(cfloat, 
1) as n
 FROM

[33/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out 
b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
index a510e38..ce05391 100644
--- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
@@ -97,14 +97,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE 
[(over1k)over1k.FieldSchema(name:s, type:
 POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, 
type:smallint, comment:null), ]
 POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, 
type:tinyint, comment:null), ]
 POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, 
type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN 
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT sum(hash(*))
 FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN 
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT sum(hash(*))
 FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -155,6 +159,12 @@ STAGE PLANS:
   value expressions: _col0 (type: bigint)
 Execution mode: llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+notVectorizedReason: Predicate expression for FILTER operator: 
org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for 
SelectColumnIsNotNull argument #0 type name Binary
+vectorized: false
 Map 3 
 Map Operator Tree:
 TableScan
@@ -175,16 +185,38 @@ STAGE PLANS:
 value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 
(type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: 
timestamp), _col9 (type: decimal(4,2))
 Execution mode: llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+notVectorizedReason: Predicate expression for FILTER operator: 
org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for 
SelectColumnIsNotNull argument #0 type name Binary
+vectorized: false
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
 aggregations: sum(VALUE._col0)
+Group By Vectorization:
+aggregators: VectorUDAFSumLong(col 0) -> bigint
+className: VectorGroupByOperator
+vectorOutput: true
+native: false
+projectedOutputColumns: [0]
 mode: mergepartial
 outputColumnNames: _col0
 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
 File Output Operator
   compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -208,16 +240,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@hundredorc
  A masked pattern was here 
 -27832781952
-PREHOOK: query: EXPLAIN 
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT count(*), bin
 FROM hundredorc
 GROUP BY bin
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN 
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT count(*), bin
 FROM hundredorc
 GROUP BY bin
 POSTHOOK: type: QUERY
+PLAN

[48/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
index 2162f17..3e4a195 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
@@ -131,4 +131,10 @@ public abstract class IfExprTimestampScalarColumnBase 
extends VectorExpression {
   public String getOutputType() {
 return "timestamp";
   }
+
+  @Override
+  public String vectorExpressionParameters() {
+return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
index 707f574..5273131 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
@@ -117,4 +117,10 @@ public abstract class IfExprTimestampScalarScalarBase 
extends VectorExpression {
   public String getOutputType() {
 return "timestamp";
   }
+
+  @Override
+  public String vectorExpressionParameters() {
+return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
index f19551e..2f6e7b9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
@@ -107,6 +107,11 @@ public class IsNotNull extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
index 3169bae..583ab7a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
@@ -105,6 +105,11 @@ public class IsNull extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 VectorExpressionDescriptor.Builder b = new 
VectorExpressionDescriptor.Builder();
 b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
index 33f50e0..6fa9779 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
@@ -174,6 +174,11 @@ public class LongColDivideLongColumn extends 
VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum1 + ", col " + colNum2;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

[18/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
index 5729237..fbd294e 100644
--- a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
@@ -226,7 +226,7 @@ NULLNULL-850295959  -1887561756 NULL
NULLWMIgGA734hA4KQj2vD3fI6gX82220d  NULL
 NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
 NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
 NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
 select count(*) from (select c.cstring1 
 from small_alltypesorc_a c
 left outer join small_alltypesorc_a cd
@@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd
   on hd.cstring1 = c.cstring1
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
 select count(*) from (select c.cstring1 
 from small_alltypesorc_a c
 left outer join small_alltypesorc_a cd
@@ -244,112 +244,7 @@ left outer join small_alltypesorc_a hd
   on hd.cstring1 = c.cstring1
 ) t1
 POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Edges:
-Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
  A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: c
-  Statistics: Num rows: 20 Data size: 1023 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Select Operator
-expressions: cint (type: int), cstring1 (type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 20 Data size: 1023 Basic stats: 
COMPLETE Column stats: COMPLETE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col0 (type: int)
-1 _col0 (type: int)
-  outputColumnNames: _col1
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 40 Data size: 3560 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Map Join Operator
-condition map:
- Left Outer Join0 to 1
-keys:
-  0 _col1 (type: string)
-  1 _col0 (type: string)
-input vertices:
-  1 Map 4
-Statistics: Num rows: 80 Data size: 640 Basic stats: 
COMPLETE Column stats: COMPLETE
-Group By Operator
-  aggregations: count()
-  mode: hash
-  outputColumnNames: _col0
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Reduce Output Operator
-sort order: 
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
-value expressions: _col0 (type: bigint)
-Execution mode: vectorized, llap
-LLAP IO: all inputs
-Map 3 
-Map Operator Tree:
-TableScan
-  alias: cd
-  Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE 
Column stats: COMPLETE
-  Select Operator
-expressions: cint (type: int)
-outputColumnNames: _col0
-Statistics: Num rows: 20 Data size: 44 Basic stats: 
COMPLETE Column stats: COMPLETE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 20 Data size: 44 Basic stats: 
COMPLETE Column stats: COMPLETE
-Execution mode: vectorized, llap
-LLAP IO: all inputs
-Map 4 
-Map Operator Tree:

[22/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
index 69911f5..f3ffee8 100644
--- a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
@@ -132,91 +132,17 @@ POSTHOOK: query: select * from t4
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t4
  A masked pattern was here 
-PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key 
sort by a.key, a.value
-PREHOOK: type: QUERY
-POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key 
sort by a.key, a.value
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+PREHOOK: query: explain vectorization only summary
 
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Edges:
-Map 1 <- Map 3 (BROADCAST_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: a
-  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-  Filter Operator
-predicate: key is not null (type: boolean)
-Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Semi Join 0 to 1
-  keys:
-0 key (type: int)
-1 _col0 (type: int)
-  outputColumnNames: _col0, _col1
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-key expressions: _col0 (type: int), _col1 (type: 
string)
-sort order: ++
-Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
-Execution mode: llap
-LLAP IO: all inputs
-Map 3 
-Map Operator Tree:
-TableScan
-  alias: b
-  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-  Filter Operator
-predicate: key is not null (type: boolean)
-Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-Select Operator
-  expressions: key (type: int)
-  outputColumnNames: _col0
-  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-keys: _col0 (type: int)
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 11 Data size: 1023 Basic 
stats: COMPLETE Column stats: NONE
-Execution mode: llap
-LLAP IO: all inputs
-Reducer 2 
-Execution mode: llap
-Reduce Operator Tree:
-  Select Operator
-expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization only summary
 
-  Stage: Stage-0
-Fetch Operator
-  limit: -1
-  Processor Tree:
-ListSink
+select * from t1 a left semi join t2 b on

[30/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
index 882e83d..5d28d22 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
@@ -35,7 +35,7 @@ PREHOOK: query: -- EXPLAIN
 -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
 -- FROM decimal_tbl_1_orc ORDER BY dec;
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT
   round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
   round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
@@ -51,13 +51,17 @@ POSTHOOK: query: -- EXPLAIN
 -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
 -- FROM decimal_tbl_1_orc ORDER BY dec;
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT
   round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
   round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
   round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8)
 FROM decimal_tbl_1_orc ORDER BY d
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -75,26 +79,61 @@ STAGE PLANS:
 TableScan
   alias: decimal_tbl_1_orc
   Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Select Operator
 expressions: round(dec) (type: decimal(21,0)), round(dec, 
0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) 
(type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) 
(type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) 
(type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) 
(type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) 
(type: decimal(21,0)), round(dec, -8) (type: decimal(21,0))
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13]
+selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 
1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) 
-> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 
0, decimalPlaces 3) -> 5:decimal(24,3), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 
6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
-2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -3) -> 8:decimal(21,0), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 
9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
-5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -6) -> 11:decimal(21,0), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7)
  -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -8) -> 13:decimal(21,0)
 Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: decimal(21,0))
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col1 (type: decimal(21,0)), _col2 
(type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: 
decimal(24,3)), _col5 (type:

[54/57] [abbrv] hive git commit: HIVE-14835: Improve ptest2 build time (Prasanth Jayachandran reviewed by Sergio Pena)

2016-10-13 Thread sershe

HIVE-14835: Improve ptest2 build time (Prasanth Jayachandran reviewed by Sergio 
Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/53531618
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/53531618
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/53531618

Branch: refs/heads/hive-14535
Commit: 535316187f9451f11ac1cfbe7d6d66f61f2ee6d8
Parents: 0995719
Author: Prasanth Jayachandran 
Authored: Thu Oct 13 14:40:09 2016 -0700
Committer: Prasanth Jayachandran 
Committed: Thu Oct 13 14:40:09 2016 -0700

--
 testutils/ptest2/src/main/resources/source-prep.vm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/53531618/testutils/ptest2/src/main/resources/source-prep.vm
--
diff --git a/testutils/ptest2/src/main/resources/source-prep.vm 
b/testutils/ptest2/src/main/resources/source-prep.vm
index 67e6a95..0fc22be 100644
--- a/testutils/ptest2/src/main/resources/source-prep.vm
+++ b/testutils/ptest2/src/main/resources/source-prep.vm
@@ -102,11 +102,11 @@ cd $workingDir/
 fi
   done
 #end
-mvn -B clean install -DskipTests -Dmaven.repo.local=$workingDir/maven 
$mavenArgs $mavenBuildArgs
+mvn -B clean install -DskipTests -T 4 -q 
-Dmaven.repo.local=$workingDir/maven $mavenArgs $mavenBuildArgs
 if [[ -d "itests" ]]
 then
   cd itests
-  mvn -B clean install -DskipTests -Dmaven.repo.local=$workingDir/maven 
$mavenArgs $mavenBuildArgs
+  mvn -B clean install -DskipTests -T 4 -q 
-Dmaven.repo.local=$workingDir/maven $mavenArgs $mavenBuildArgs
 fi
   elif [[ "${buildTool}" == "ant" ]]
   then

[04/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out 
b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
index 511bd79..ef19bad 100644
--- a/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
@@ -32,12 +32,16 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__2
 POSTHOOK: Output: default@orc_table_2a
 POSTHOOK: Lineage: orc_table_2a.c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -53,18 +57,45 @@ STAGE PLANS:
 TableScan
   alias: t2
   Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: 
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
 predicate: (c > 2) (type: boolean)
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: c (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
+Spark Hash Table Sink Vectorization:
+className: VectorSparkHashTableSinkOperator
+native: true
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 1
+includeColumns: [0]
+dataColumns: c:int
+partitionColumnCount: 0
 Local Work:
   Map Reduce Local Work
 
@@ -77,12 +108,23 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: 
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
 predicate: (a > 2) (type: boolean)
 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: a (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -90,6 +132,14 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)

[44/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
index ebe613e..78b2e8b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.TableSample;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
+import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
 import org.apache.hadoop.hive.serde.serdeConstants;
 
 
@@ -396,4 +397,29 @@ public class TableScanDesc extends AbstractOperatorDesc {
 return opProps;
   }
 
+  public class TableScanOperatorExplainVectorization extends 
OperatorExplainVectorization {
+
+private final TableScanDesc tableScanDesc;
+private final VectorTableScanDesc vectorTableScanDesc;
+
+public TableScanOperatorExplainVectorization(TableScanDesc tableScanDesc, 
VectorDesc vectorDesc) {
+  // Native vectorization supported.
+  super(vectorDesc, true);
+  this.tableScanDesc = tableScanDesc;
+  vectorTableScanDesc = (VectorTableScanDesc) vectorDesc;
+}
+
+@Explain(vectorization = Vectorization.EXPRESSION, displayName = 
"projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+public String getProjectedOutputColumns() {
+  return Arrays.toString(vectorTableScanDesc.getProjectedOutputColumns());
+}
+  }
+
+  @Explain(vectorization = Vectorization.OPERATOR, displayName = "TableScan 
Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+  public TableScanOperatorExplainVectorization getTableScanVectorization() {
+if (vectorDesc == null) {
+  return null;
+}
+return new TableScanOperatorExplainVectorization(this, vectorDesc);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
index 7a70e6b..a037ea3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
@@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.exec.tez.DagUtils;
 import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
-
+import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
 
 /**
  * TezWork. This class encapsulates all the work objects that can be executed
@@ -49,7 +49,8 @@ import org.apache.hadoop.hive.ql.plan.Explain.Level;
  *
  */
 @SuppressWarnings("serial")
-@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED })
+@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED },
+vectorization = Vectorization.SUMMARY_PATH)
 public class TezWork extends AbstractOperatorDesc {
 
   public enum VertexType {
@@ -107,7 +108,8 @@ public class TezWork extends AbstractOperatorDesc {
   /**
* getWorkMap returns a map of "vertex name" to BaseWork
*/
-  @Explain(displayName = "Vertices", explainLevels = { Level.USER, 
Level.DEFAULT, Level.EXTENDED })
+  @Explain(displayName = "Vertices", explainLevels = { Level.USER, 
Level.DEFAULT, Level.EXTENDED },
+  vectorization = Vectorization.SUMMARY_PATH)
   public Map getWorkMap() {
 Map result = new LinkedHashMap();
 for (BaseWork w: getAllWork()) {
@@ -306,7 +308,8 @@ public class TezWork extends AbstractOperatorDesc {
 }
   }
 
-  @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED })
+  @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED },
+  vectorization = Vectorization.SUMMARY_PATH)
   public Map getDependencyMap() {
 Map result = new LinkedHashMap();
 for (Map.Entry entry: 
invertedWorkGraph.entrySet()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
new file mode 100644
index 000..2e11321
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
@@ -0,0 +1,35 @@

[40/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
--
diff --git 
a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q 
b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
index 2d3788d..d2ded71 100644
--- a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
+++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
@@ -7,33 +7,34 @@ set hive.tez.dynamic.partition.pruning=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
 set hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
 
 
 select distinct ds from srcpart;
 select distinct hr from srcpart;
 
-EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from 
srcpart group by ds;
+EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds;
 create table srcpart_date stored as orc as select ds as ds, ds as `date` from 
srcpart group by ds;
 create table srcpart_hour stored as orc as select hr as hr, hr as hour from 
srcpart group by hr;
 create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, 
hr as hr, hr as hour from srcpart group by ds, hr;
 create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as 
hour from srcpart group by hr;
 
 -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 set hive.tez.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 set hive.tez.dynamic.partition.pruning=true;
 select count(*) from srcpart where ds = '2008-04-08';
 
 -- multiple sources, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 set hive.tez.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
@@ -41,77 +42,77 @@ set hive.tez.dynamic.partition.pruning=true;
 select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
 
 -- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on 
(srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
 select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
 set hive.tez.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on 
(srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where

[06/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out 
b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
index fbb43c4..7d722d0 100644
--- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
@@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION 
[(alltypesorc)alltypesorc.
 POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
 POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
 POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
-PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN 
(CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM 
decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" 
AS DATE)) ORDER BY cdate
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN 
(CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM 
decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" 
AS DATE)) ORDER BY cdate
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -32,27 +36,65 @@ STAGE PLANS:
 TableScan
   alias: decimal_date_test
   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterLongColumnInList(col 3, 
values [-67, -171]) -> boolean
 predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: 
boolean)
 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: cdate (type: date)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [3]
   Statistics: Num rows: 6144 Data size: 1233808 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: date)
 sort order: +
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 6144 Data size: 1233808 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions:

[11/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
index 6c6c6d6..14606ed 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
@@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@vsmb_bucket_txt
 POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
 POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -117,33 +121,71 @@ STAGE PLANS:
 TableScan
   alias: a
   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: key (type: int)
   sort order: +
   Map-reduce partition columns: key (type: int)
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
   Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: value (type: string)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 3 
 Map Operator Tree:
 TableScan
   alias: b
   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: key (type: int)
   sort order: +
   Map-reduce partition columns: key (type: int)
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,

[08/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
index 996b893..423fdbf 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
@@ -120,7 +120,7 @@ POSTHOOK: Lineage: part_orc.p_size SIMPLE 
[(part_staging)part_staging.FieldSchem
 POSTHOOK: Lineage: part_orc.p_type SIMPLE 
[(part_staging)part_staging.FieldSchema(name:p_type, type:string, 
comment:null), ]
 PREHOOK: query: --1. test1
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name, p_size,
 rank() over (partition by p_mfgr order by p_name) as r,
 dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -132,7 +132,7 @@ from noop(on part_orc
 PREHOOK: type: QUERY
 POSTHOOK: query: --1. test1
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name, p_size,
 rank() over (partition by p_mfgr order by p_name) as r,
 dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -142,6 +142,10 @@ from noop(on part_orc
   order by p_name
   )
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -172,6 +176,14 @@ STAGE PLANS:
 auto parallelism: true
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -224,6 +236,11 @@ STAGE PLANS:
 Reducer 2 
 Execution mode: llap
 Needs Tagging: false
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+notVectorizedReason: PTF Operator (PTF) not supported
+vectorized: false
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: 
double)
@@ -255,6 +272,11 @@ STAGE PLANS:
 Reducer 3 
 Execution mode: llap
 Needs Tagging: false
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+notVectorizedReason: PTF Operator (PTF) not supported
+vectorized: false
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: 
double)
@@ -377,7 +399,7 @@ Manufacturer#5  almond aquamarine dodger light 
gainsboro46  4   4   6208.18
 Manufacturer#5 almond azure blanched chiffon midnight  23  5   5   
7672.66
 PREHOOK: query: -- 2. testJoinWithNoop
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name,
 p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by 
p_name) as deltaSz
 from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = 
p2.p_partkey) j
@@ -386,13 +408,17 @@ sort by j.p_name)
 PREHOOK: type: QUERY
 POSTHOOK: query: -- 2. testJoinWithNoop
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name,
 p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by 
p_name) as deltaSz
 from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = 
p2.p_partkey) j
 distribute by j.p_mfgr
 sort by j.p_name)
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -428,6 +454,14 @@ STAGE PLANS:
   auto parallelism: true
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true

[02/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out 
b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
index bd9b852..eb61044 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
@@ -66,105 +66,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE 
STATISTICS FOR COLUMNS
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_table
  A masked pattern was here 
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
 select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
 on s.ctinyint = st.ctinyint
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
 select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
 on s.ctinyint = st.ctinyint
 ) t1
 POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-2
-Spark
- A masked pattern was here 
-  Vertices:
-Map 3 
-Map Operator Tree:
-TableScan
-  alias: st
-  Statistics: Num rows: 100 Data size: 380 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: ctinyint (type: tinyint)
-outputColumnNames: _col0
-Statistics: Num rows: 100 Data size: 380 Basic stats: 
COMPLETE Column stats: NONE
-Spark HashTable Sink Operator
-  keys:
-0 _col0 (type: tinyint)
-1 _col0 (type: tinyint)
-Execution mode: vectorized
-Local Work:
-  Map Reduce Local Work
-
-  Stage: Stage-1
-Spark
-  Edges:
-Reducer 2 <- Map 1 (GROUP, 1)
  A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: s
-  Statistics: Num rows: 6058 Data size: 2027 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: ctinyint (type: tinyint)
-outputColumnNames: _col0
-Statistics: Num rows: 6058 Data size: 2027 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col0 (type: tinyint)
-1 _col0 (type: tinyint)
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 6663 Data size: 2229 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-aggregations: count()
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  sort order: 
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col0 (type: bigint)
-Execution mode: vectorized
-Local Work:
-  Map Reduce Local Work
-Reducer 2 
-Execution mode: vectorized
-Reduce Operator Tree:
-  Group By Operator
-aggregations: count(VALUE._col0)
-mode: mergepartial
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-Fetch Operator
-  limit: -1
-  Processor Tree:
-ListSink
-
 PREHOOK: query: select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
@@ -184,111 +100,21 @@ POSTHOOK: Input: default@small_table
 POSTHOOK: Input: default@sorted_mod_4
  A masked pattern was here 
 6876
-PREHOOK: query: explain
+PREHOOK:

[52/57] [abbrv] hive git commit: HIVE-14373: Add integration tests for hive on S3 (Thomas Poepping and Abdullah Yousufi, reviewed by Sergio Pena and Illya Yalovyy)

2016-10-13 Thread sershe

HIVE-14373: Add integration tests for hive on S3 (Thomas Poepping and Abdullah 
Yousufi, reviewed by Sergio Pena and Illya Yalovyy)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/527f21b1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/527f21b1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/527f21b1

Branch: refs/heads/hive-14535
Commit: 527f21b1750f54b69519e63088755463550e301c
Parents: f923db0
Author: Sergio Pena 
Authored: Thu Oct 13 10:23:19 2016 -0500
Committer: Sergio Pena 
Committed: Thu Oct 13 10:25:03 2016 -0500

--
 .gitignore  |   1 +
 itests/hive-blobstore/README|  25 ++
 itests/hive-blobstore/pom.xml   | 355 +++
 .../hadoop/hive/cli/TestBlobstoreCliDriver.java |  64 
 .../cli/TestBlobstoreNegativeCliDriver.java |  64 
 .../clientnegative/select_dropped_table.q   |   4 +
 .../test/queries/clientpositive/insert_into.q   |   4 +
 .../test/resources/blobstore-conf.xml.template  |  22 ++
 .../src/test/resources/hive-site.xml| 271 ++
 .../test/resources/testconfiguration.properties |   2 +
 .../src/test/resources/tez-site.xml |   6 +
 .../clientnegative/select_dropped_table.q.out   |  21 ++
 .../results/clientpositive/insert_into.q.out|  35 ++
 itests/pom.xml  |   1 +
 .../control/AbstractCoreBlobstoreCliDriver.java | 167 +
 .../hadoop/hive/cli/control/CliConfigs.java |  40 +++
 .../cli/control/CoreBlobstoreCliDriver.java |  29 ++
 .../control/CoreBlobstoreNegativeCliDriver.java |  29 ++
 .../org/apache/hadoop/hive/ql/QTestUtil.java|  18 +-
 19 files changed, 1155 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/527f21b1/.gitignore
--
diff --git a/.gitignore b/.gitignore
index 4d341a0..47c59da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,4 +27,5 @@ hcatalog/webhcat/java-client/target
 hcatalog/storage-handlers/hbase/target
 hcatalog/webhcat/svr/target
 conf/hive-default.xml.template
+itests/hive-blobstore/src/test/resources/blobstore-conf.xml
 .DS_Store

http://git-wip-us.apache.org/repos/asf/hive/blob/527f21b1/itests/hive-blobstore/README
--
diff --git a/itests/hive-blobstore/README b/itests/hive-blobstore/README
new file mode 100644
index 000..dbd42d5
--- /dev/null
+++ b/itests/hive-blobstore/README
@@ -0,0 +1,25 @@
+The folder structure details are:
+
+ * ./src/test/queries  - contains the queries to be tested on s3
+ * ./src/test/results  - contains the expected hive console output for 
the queries
+ * ./target/qfile-results  - Hive console output goes here
+ * ../../data/conf/blobstore/  - contains hive-site.xml
+
+To run blobstore integration tests:
+
+ 1. Create blobstore-conf.xml in ./src/test/resources/ with the blobstore 
credentials and test blobstore path (see blobstore-conf.xml.template).
+
+ 2. Run following command:
+   mvn test -Dtest=TestBlobstore[Negative]CliDriver
+
+To run a single integration test:
+
+ 2. Example command:
+   mvn test -Dtest=TestBlobstoreCliDriver -Dqfile=insert_into.q
+
+To run the tests on Tez:
+
+ 2. Example command:
+   mvn test -Dtest=TestBlobstoreCliDriver -Dqfile=insert_into.q 
-Dhive.execution.engine=tez -Dclustermode=tez_local
+
+Use ${hiveconf:test.blobstore.path.unique} in .q test files to access the 
blobstore path.

http://git-wip-us.apache.org/repos/asf/hive/blob/527f21b1/itests/hive-blobstore/pom.xml
--
diff --git a/itests/hive-blobstore/pom.xml b/itests/hive-blobstore/pom.xml
new file mode 100644
index 000..a62885f
--- /dev/null
+++ b/itests/hive-blobstore/pom.xml
@@ -0,0 +1,355 @@
+
+
+http://maven.apache.org/POM/4.0.0;
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance;
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd;>
+  4.0.0
+
+  
+org.apache.hive
+hive-it
+2.2.0-SNAPSHOT
+../pom.xml
+  
+
+  hive-blobstore
+  jar
+  Hive Integration - Blobstore Tests
+
+  
+../..
+
+
+false
+
+false
+${hadoop.version}
+-mkdir -p
+  
+
+  
+
+  tests-off
+  
+
+  src/test/resources/blobstore-conf.xml
+
+  
+  
+true
+  
+
+
+  tests-on
+  
+
+  src/test/resources/blobstore-conf.xml
+
+  
+  
+false
+  
+
+  
+
+  
+
+
+  org.apache.hive
+  hive-ant
+  ${project.version}
+

[37/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out
index b137894..85f858b 100644
--- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out
+++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out
@@ -87,25 +87,72 @@ POSTHOOK: Lineage: table_add_int_permute_select.b SIMPLE 
[(values__tmp__table__1
 POSTHOOK: Lineage: table_add_int_permute_select.c EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
 POSTHOOK: Lineage: table_add_int_permute_select.insert_num EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 _col0  _col1   _col2   _col3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select insert_num,a,b from table_add_int_permute_select
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select insert_num,a,b from table_add_int_permute_select
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 vectorized, llap
-  File Output Operator [FS_4]
-Select Operator [SEL_3] (rows=5 width=20)
-  Output:["_col0","_col1","_col2"]
-  TableScan [TS_0] (rows=5 width=20)
-
default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: table_add_int_permute_select
+  Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
+  Select Operator
+expressions: insert_num (type: int), a (type: int), b 
(type: string)
+outputColumnNames: _col0, _col1, _col2
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1, 2]
+Statistics: Num rows: 5 Data size: 101 Basic stats: 
COMPLETE Column stats: NONE
+File Output Operator
+  compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
+  Statistics: Num rows: 5 Data size: 101 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: vectorized, llap
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 4
+includeColumns: [0, 1, 2]
+dataColumns: insert_num:int, a:int, b:string, c:int
+partitionColumnCount: 0
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
 
 PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting 
works right
 select insert_num,a,b from table_add_int_permute_select
@@ -212,25 +259,72 @@ POSTHOOK: Lineage: table_add_int_string_permute_select.c 
EXPRESSION [(values__tm
 POSTHOOK: Lineage: table_add_int_string_permute_select.d SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
 POSTHOOK: Lineage: table_add_int_string_permute_select.insert_num EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 _col0  _col1   _col2   _col3   _col4
-PREHOOK: query: explain

[51/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed 
by Gopal Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f923db0b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f923db0b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f923db0b

Branch: refs/heads/hive-14535
Commit: f923db0b53acf8b7912d3f98a23deca509c9c6fb
Parents: f9843ac
Author: Matt McCline 
Authored: Thu Oct 13 03:49:52 2016 -0700
Committer: Matt McCline 
Committed: Thu Oct 13 03:49:52 2016 -0700

--
 .../org/apache/hive/common/util/DateUtils.java  |20 +
 .../ColumnArithmeticColumn.txt  | 7 +-
 .../ColumnArithmeticColumnDecimal.txt   | 5 +
 .../ColumnArithmeticColumnWithConvert.txt   |   173 -
 .../ColumnArithmeticScalar.txt  | 5 +
 .../ColumnArithmeticScalarDecimal.txt   | 5 +
 .../ColumnArithmeticScalarWithConvert.txt   |   150 -
 .../ExpressionTemplates/ColumnCompareColumn.txt | 5 +
 .../ExpressionTemplates/ColumnCompareScalar.txt | 5 +
 .../ExpressionTemplates/ColumnDivideColumn.txt  | 5 +
 .../ColumnDivideColumnDecimal.txt   | 5 +
 .../ExpressionTemplates/ColumnDivideScalar.txt  | 5 +
 .../ColumnDivideScalarDecimal.txt   | 5 +
 .../ExpressionTemplates/ColumnUnaryFunc.txt | 5 +
 .../ExpressionTemplates/ColumnUnaryMinus.txt| 5 +
 ...eColumnArithmeticIntervalYearMonthColumn.txt | 5 +
 ...eColumnArithmeticIntervalYearMonthScalar.txt | 5 +
 .../DateColumnArithmeticTimestampColumn.txt | 5 +
 .../DateColumnArithmeticTimestampScalar.txt | 5 +
 ...eScalarArithmeticIntervalYearMonthColumn.txt | 5 +
 .../DateScalarArithmeticTimestampColumn.txt | 5 +
 .../DecimalColumnUnaryFunc.txt  | 5 +
 .../ExpressionTemplates/FilterColumnBetween.txt | 7 +-
 .../FilterColumnCompareColumn.txt   | 9 +-
 .../FilterColumnCompareScalar.txt   | 9 +-
 .../FilterDecimalColumnBetween.txt  | 5 +
 .../FilterDecimalColumnCompareDecimalColumn.txt | 5 +
 .../FilterDecimalColumnCompareDecimalScalar.txt | 5 +
 .../FilterDecimalScalarCompareDecimalColumn.txt | 5 +
 ...erLongDoubleColumnCompareTimestampColumn.txt | 5 +
 ...erLongDoubleScalarCompareTimestampColumn.txt | 5 +
 .../FilterScalarCompareColumn.txt   | 9 +-
 .../FilterStringColumnBetween.txt   | 9 +-
 ...tringGroupColumnCompareStringGroupColumn.txt | 5 +
 ...gGroupColumnCompareStringGroupScalarBase.txt | 7 +
 ...gGroupScalarCompareStringGroupColumnBase.txt | 8 +
 .../FilterTimestampColumnBetween.txt| 5 +
 ...erTimestampColumnCompareLongDoubleColumn.txt | 5 +
 ...erTimestampColumnCompareLongDoubleScalar.txt | 5 +
 ...terTimestampColumnCompareTimestampColumn.txt | 5 +
 ...terTimestampColumnCompareTimestampScalar.txt | 5 +
 ...erTimestampScalarCompareLongDoubleColumn.txt | 5 +
 ...terTimestampScalarCompareTimestampColumn.txt | 5 +
 .../FilterTruncStringColumnBetween.txt  |10 +-
 .../ExpressionTemplates/IfExprColumnScalar.txt  | 5 +
 .../ExpressionTemplates/IfExprScalarColumn.txt  | 5 +
 .../ExpressionTemplates/IfExprScalarScalar.txt  | 5 +
 ...ervalYearMonthColumnArithmeticDateColumn.txt | 5 +
 ...ervalYearMonthColumnArithmeticDateScalar.txt | 5 +
 ...YearMonthColumnArithmeticTimestampColumn.txt | 5 +
 ...YearMonthColumnArithmeticTimestampScalar.txt | 5 +
 ...ervalYearMonthScalarArithmeticDateColumn.txt | 5 +
 ...YearMonthScalarArithmeticTimestampColumn.txt | 5 +
 .../LongDoubleColumnCompareTimestampColumn.txt  | 5 +
 .../LongDoubleColumnCompareTimestampScalar.txt  | 4 +
 .../LongDoubleScalarCompareTimestampColumn.txt  | 5 +
 .../ScalarArithmeticColumn.txt  | 5 +
 .../ScalarArithmeticColumnDecimal.txt   | 5 +
 .../ScalarArithmeticColumnWithConvert.txt   |   163 -
 .../ExpressionTemplates/ScalarCompareColumn.txt | 5 +
 .../ExpressionTemplates/ScalarDivideColumn.txt  | 5 +
 .../ScalarDivideColumnDecimal.txt   | 5 +
 ...tringGroupColumnCompareStringGroupColumn.txt | 5 +
 ...gGroupColumnCompareStringGroupScalarBase.txt | 6 +
 ...tringGroupColumnCompareTruncStringScalar.txt | 7 +
 ...gGroupScalarCompareStringGroupColumnBase.txt | 7 +
 .../TimestampColumnArithmeticDateColumn.txt | 5 +
 .../TimestampColumnArithmeticDateScalar.txt | 5 +
 ...pColumnArithmeticIntervalYearMonthColumn.txt | 5 +
 ...pColumnArithmeticIntervalYearMonthScalar.txt | 5 +
 ...TimestampColumnArithmeticTimestampColumn.txt | 5 +
 ...TimestampColumnArithmeticTimestampScalar.txt | 5 +

[15/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
index 4c252c7..0bab7bd 100644
--- a/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
@@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE 
[(vectortab2k)vectortab2k.FieldSchem
 POSTHOOK: Lineage: vectortab2korc.t SIMPLE 
[(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: vectortab2korc.ts SIMPLE 
[(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE 
[(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select b from vectortab2korc order by b
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select b from vectortab2korc order by b
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -128,25 +132,59 @@ STAGE PLANS:
 TableScan
   alias: vectortab2korc
   Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12]
   Select Operator
 expressions: b (type: bigint)
 outputColumnNames: _col0
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [3]
 Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: bigint)
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 2000 Data size: 918712 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: bigint)
 outputColumnNames: _col0
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0]
 Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
 File Output Operator
   compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
   Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_reduce2.q.out

[53/57] [abbrv] hive git commit: HIVE-14929: Adding JDBC test for query cancellation scenario (Deepak Jaiswal via Jason Dere)

2016-10-13 Thread sershe

HIVE-14929: Adding JDBC test for query cancellation scenario (Deepak Jaiswal 
via Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0995719d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0995719d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0995719d

Branch: refs/heads/hive-14535
Commit: 0995719d06e7fedb75e53c5db9f195767c259ec9
Parents: 527f21b
Author: Jason Dere 
Authored: Thu Oct 13 14:23:08 2016 -0700
Committer: Jason Dere 
Committed: Thu Oct 13 14:23:08 2016 -0700

--
 .../org/apache/hive/jdbc/TestJdbcDriver2.java   | 97 
 1 file changed, 97 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0995719d/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
index 689eab3..b7362fb 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
@@ -2249,6 +2249,47 @@ public class TestJdbcDriver2 {
   }
 
   /**
+   *  Tests for query cancellation
+   */
+
+  @Test
+  public void testCancelQueryNotRun() throws Exception {
+try (final Statement stmt = con.createStatement()){
+  System.out.println("Cancel the Statement without running query ...");
+  stmt.cancel();
+  System.out.println("Executing query: ");
+  stmt.executeQuery(" show databases");
+}
+  }
+
+  @Test
+  public void testCancelQueryFinished() throws Exception {
+try (final Statement stmt = con.createStatement()){
+  System.out.println("Executing query: ");
+  stmt.executeQuery(" show databases");
+  System.out.println("Cancel the Statement after running query ...");
+  stmt.cancel();
+}
+  }
+
+  @Test
+  public void testCancelQueryErrored() throws Exception {
+final Statement stmt = con.createStatement();
+try {
+  System.out.println("Executing query: ");
+  stmt.executeQuery("list dbs");
+  fail("Expecting SQLException");
+} catch (SQLException e) {
+  // No-op
+}
+
+// Cancel the query
+System.out.println("Cancel the Statement ...");
+stmt.cancel();
+stmt.close();
+  }
+
+  /**
* Test the cancellation of a query that is running.
* We spawn 2 threads - one running the query and
* the other attempting to cancel.
@@ -2303,6 +2344,62 @@ public class TestJdbcDriver2 {
   }
 
   @Test
+  public void testQueryCancelTwice() throws Exception {
+String udfName = SleepMsUDF.class.getName();
+Statement stmt1 = con.createStatement();
+stmt1.execute("create temporary function sleepMsUDF as '" + udfName + "'");
+stmt1.close();
+final Statement stmt = con.createStatement();
+// Thread executing the query
+Thread tExecute = new Thread(new Runnable() {
+  @Override
+  public void run() {
+try {
+  System.out.println("Executing query: ");
+  // The test table has 500 rows, so total query time should be ~ 
500*500ms
+  stmt.executeQuery("select sleepMsUDF(t1.under_col, 1) as u0, 
t1.under_col as u1, " +
+  "t2.under_col as u2 from " + tableName +  " t1 join " + 
tableName +
+  " t2 on t1.under_col = t2.under_col");
+  fail("Expecting SQLException");
+} catch (SQLException e) {
+  // This thread should throw an exception
+  assertNotNull(e);
+  System.out.println(e.toString());
+}
+  }
+});
+// Thread cancelling the query
+Thread tCancel = new Thread(new Runnable() {
+  @Override
+  public void run() {
+// 1st Cancel
+try {
+  // Sleep for 100ms
+  Thread.sleep(100);
+  System.out.println("Cancelling query: ");
+  stmt.cancel();
+} catch (Exception e) {
+  // No-op
+}
+// 2nd cancel
+try {
+  // Sleep for 5ms and cancel again
+  Thread.sleep(5);
+  System.out.println("Cancelling query again: ");
+  stmt.cancel();
+} catch (Exception e) {
+  // No-op
+}
+  }
+});
+tExecute.start();
+tCancel.start();
+tExecute.join();
+tCancel.join();
+stmt.close();
+  }
+
+  @Test
   public void testQueryTimeout() throws Exception {
 String udfName = SleepMsUDF.class.getName();
 Statement stmt1 = con.createStatement();

[35/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out 
b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
index 735e4f4..8e2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
@@ -43,48 +43,110 @@ POSTHOOK: Output: default@tbl2
 POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
 PREHOOK: query: -- The join is being performed as part of sub-query. It should 
be converted to a sort-merge join
-explain
+explain vectorization expression
 select count(*) from (
   select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
 ) subq1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- The join is being performed as part of sub-query. It 
should be converted to a sort-merge join
-explain
+explain vectorization expression
 select count(*) from (
   select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
 ) subq1
 POSTHOOK: type: QUERY
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Reducer 2 vectorized, llap
-  File Output Operator [FS_22]
-Group By Operator [GBY_21] (rows=1 width=8)
-  Output:["_col0"],aggregations:["count(VALUE._col0)"]
-<-Map 1 [SIMPLE_EDGE] llap
-  SHUFFLE [RS_11]
-Group By Operator [GBY_10] (rows=1 width=8)
-  Output:["_col0"],aggregations:["count()"]
-  Merge Join Operator [MERGEJOIN_19] (rows=11 width=93)
-Conds:SEL_2._col0=SEL_5._col0(Inner)
-  <-Select Operator [SEL_5] (rows=10 width=93)
-  Output:["_col0"]
-  Filter Operator [FIL_18] (rows=10 width=93)
-predicate:key is not null
-TableScan [TS_3] (rows=10 width=93)
-  default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"]
-  <-Select Operator [SEL_2] (rows=10 width=93)
-  Output:["_col0"]
-  Filter Operator [FIL_17] (rows=10 width=93)
-predicate:key is not null
-TableScan [TS_0] (rows=10 width=93)
-  default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"]
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+Map Operator Tree:
+TableScan
+  alias: a
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+  Merge Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
+Group By Operator
+  aggregations: count()
+  mode: hash
+  outputColumnNames: _col0
+

[13/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
index 8cf503f..f0d2a50 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
@@ -1,6 +1,6 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cboolean1,
  ctinyint,
  ctimestamp1,
@@ -35,7 +35,7 @@ LIMIT 40
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cboolean1,
  ctinyint,
  ctimestamp1,
@@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1
 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, 
c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16
 LIMIT 40
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -86,15 +90,34 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 2028982 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterExprOrExpr(children: 
FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) 
-> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> 
boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 
11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, 
FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: 
CastTimestampToDouble(col 9) -> 12:double) -> boolean, 
FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: 
CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean
 predicate: (((cfloat < 3569) and (10.175 >= cdouble) and 
(cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and 
(UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 
9763215.5639))) (type: boolean)
 Statistics: Num rows: 5461 Data size: 901772 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cboolean1 (type: boolean), ctinyint (type: 
tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: 
string)
   outputColumnNames: cboolean1, ctinyint, ctimestamp1, 
cfloat, cstring1
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [10, 0, 8, 4, 6]
   Statistics: Num rows: 5461 Data size: 901772 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: max(ctinyint), sum(cfloat), 
stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
+Group By Vectorization:
+aggregators: VectorUDAFMaxLong(col 0) -> tinyint, 
VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> 
struct, VectorUDAFStdPopLong(col 0) -> 
struct, VectorUDAFMaxDouble(col 4) -> 
float, VectorUDAFMinLong(col 0) -> tinyint
+className: VectorGroupByOperator
+vectorOutput: false
+keyExpressions: col 10, col 0, col 8, col 4, col 6
+native: false
+projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+vectorOutputConditionsNotMet: Vector output of 
VectorUDAFStdPopDouble(col 4) -> 
struct output type STRUCT requires 
PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> 
struct output type STRUCT requires 
PRIMITIVE IS false
 keys: cboolean1 (type: boolean), ctinyint (type: 
tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: 
string)

[17/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
index 9369661..9a95606 100644
--- a/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
@@ -126,113 +126,15 @@ POSTHOOK: Output: default@TJOIN4
 POSTHOOK: Lineage: tjoin4.c1 SIMPLE 
[(tjoin4_txt)tjoin4_txt.FieldSchema(name:c1, type:int, comment:null), ]
 POSTHOOK: Lineage: tjoin4.c2 SIMPLE 
[(tjoin4_txt)tjoin4_txt.FieldSchema(name:c2, type:char(2), comment:null), ]
 POSTHOOK: Lineage: tjoin4.rnum SIMPLE 
[(tjoin4_txt)tjoin4_txt.FieldSchema(name:rnum, type:int, comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1
 POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Edges:
-Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE)
  A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: tjoin1
-  Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
-  Select Operator
-expressions: rnum (type: int), c1 (type: int)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 3 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col1 (type: int)
-1 _col1 (type: int)
-  outputColumnNames: _col0, _col2, _col3
-  input vertices:
-1 Map 2
-  Statistics: Num rows: 4 Data size: 409 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: _col0 (type: int), _col2 (type: int), 
_col3 (type: int)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 4 Data size: 409 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col2 (type: int)
-1 _col1 (type: int)
-  outputColumnNames: _col0, _col1, _col3
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 4 Data size: 449 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: _col0 (type: int), _col1 (type: int), 
_col3 (type: int)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 4 Data size: 449 Basic 
stats: COMPLETE Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 4 Data size: 449 Basic 
stats: COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-Execution mode: vectorized, llap
-LLAP IO: all inputs
-Map 2 
-Map Operator Tree:
-TableScan
-  alias: tjoin2
-  Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE 
Column stats: NONE
-  Select Operator
-expressions: rnum (type: int), c1 (type: int)
-outputColumnNames: _col0, _col1
-

[56/57] [abbrv] hive git commit: HIVE-14640 : handle hive.merge.*files in select queries (Sergey Shelukhin)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/eacf9f9b/ql/src/test/results/clientpositive/llap/mm_all.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out 
b/ql/src/test/results/clientpositive/llap/mm_all.q.out
index b70ae3c..f8001c2 100644
--- a/ql/src/test/results/clientpositive/llap/mm_all.q.out
+++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out
@@ -153,12 +153,12 @@ POSTHOOK: Input: default@part_mm@key_mm=456
 10 456
 10 455
 10 455
-97 456
 97 455
+97 456
 97 455
 98 455
-98 455
 98 456
+98 455
 PREHOOK: query: drop table part_mm
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@part_mm
@@ -735,37 +735,195 @@ POSTHOOK: query: drop table skew_dp_union_mm
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@skew_dp_union_mm
 POSTHOOK: Output: default@skew_dp_union_mm
-PREHOOK: query: -- future
+PREHOOK: query: create table merge0_mm (id int) stored as orc 
tblproperties('hivecommit'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@merge0_mm
+POSTHOOK: query: create table merge0_mm (id int) stored as orc 
tblproperties('hivecommit'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@merge0_mm
+PREHOOK: query: insert into table merge0_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Output: default@merge0_mm
+POSTHOOK: query: insert into table merge0_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Output: default@merge0_mm
+POSTHOOK: Lineage: merge0_mm.id SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from merge0_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@merge0_mm
+ A masked pattern was here 
+POSTHOOK: query: select * from merge0_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@merge0_mm
+ A masked pattern was here 
+98
+97
+0
+10
+PREHOOK: query: insert into table merge0_mm select key from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Output: default@merge0_mm
+POSTHOOK: query: insert into table merge0_mm select key from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Output: default@merge0_mm
+POSTHOOK: Lineage: merge0_mm.id SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from merge0_mm
+PREHOOK: type: QUERY
+PREHOOK: Input: default@merge0_mm
+ A masked pattern was here 
+POSTHOOK: query: select * from merge0_mm
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@merge0_mm
+ A masked pattern was here 
+98
+97
+0
+10
+98
+97
+0
+10
+PREHOOK: query: drop table merge0_mm
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@merge0_mm
+PREHOOK: Output: default@merge0_mm
+POSTHOOK: query: drop table merge0_mm
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@merge0_mm
+POSTHOOK: Output: default@merge0_mm
+PREHOOK: query: create table merge1_mm (id int) partitioned by (key int) 
stored as orc tblproperties('hivecommit'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@merge1_mm
+POSTHOOK: query: create table merge1_mm (id int) partitioned by (key int) 
stored as orc tblproperties('hivecommit'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@merge1_mm
+PREHOOK: query: insert into table merge1_mm partition (key) select key, key 
from intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@intermediate
+PREHOOK: Input: default@intermediate@p=455
+PREHOOK: Input: default@intermediate@p=456
+PREHOOK: Output: default@merge1_mm
+POSTHOOK: query: insert into table merge1_mm partition (key) select key, key 
from intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@intermediate
+POSTHOOK: Input: default@intermediate@p=455
+POSTHOOK: Input: default@intermediate@p=456
+POSTHOOK: Output: default@merge1_mm@key=0
+POSTHOOK: Output: default@merge1_mm@key=10
+POSTHOOK: Output: default@merge1_mm@key=97
+POSTHOOK: Output: default@merge1_mm@key=98
+POSTHOOK: Lineage: merge1_mm PARTITION(key=0).id SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=10).id SIMPLE 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: merge1_mm PARTITION(key=97).id SIMPLE

[49/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
index 4f5ba9a..061e396 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector;
 
 import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
 /**
  * This class collects column information for copying a row from one 
VectorizedRowBatch to
@@ -35,9 +36,9 @@ public class VectorColumnSourceMapping extends 
VectorColumnMapping {
   }
 
   @Override
-  public void add(int sourceColumn, int outputColumn, String typeName) {
+  public void add(int sourceColumn, int outputColumn, TypeInfo typeInfo) {
 // Order on sourceColumn.
-vectorColumnMapping.add(sourceColumn, outputColumn, typeName);
+vectorColumnMapping.add(sourceColumn, outputColumn, typeInfo);
   }
 
   @Override
@@ -47,7 +48,7 @@ public class VectorColumnSourceMapping extends 
VectorColumnMapping {
 // Ordered columns are the source columns.
 sourceColumns = mapping.getOrderedColumns();
 outputColumns = mapping.getValueColumns();
-typeNames = mapping.getTypeNames();
+typeInfos = mapping.getTypeInfos();
 
 // Not needed anymore.
 vectorColumnMapping = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
index c8e0284..911aeb0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
@@ -262,8 +262,7 @@ public class VectorCopyRow {
 for (int i = 0; i < count; i++) {
   int inputColumn = columnMapping.getInputColumns()[i];
   int outputColumn = columnMapping.getOutputColumns()[i];
-  String typeName = columnMapping.getTypeNames()[i].toLowerCase();
-  TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+  TypeInfo typeInfo = columnMapping.getTypeInfos()[i];
   Type columnVectorType = 
VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
 
   CopyRow copyRowByValue = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
index 261246b..bfe22b0 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.VectorFilterDesc;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -50,9 +51,8 @@ public class VectorFilterOperator extends FilterOperator {
   public VectorFilterOperator(CompilationOpContext ctx,
   VectorizationContext vContext, OperatorDesc conf) throws HiveException {
 this(ctx);
-ExprNodeDesc oldExpression = ((FilterDesc) conf).getPredicate();
-conditionEvaluator = vContext.getVectorExpression(oldExpression, 
VectorExpressionDescriptor.Mode.FILTER);
 this.conf = (FilterDesc) conf;
+conditionEvaluator = ((VectorFilterDesc) 
this.conf.getVectorDesc()).getPredicateExpression();
   }
 
   /** Kryo ctor. */

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 2605203..fef7c2a 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import

[57/57] [abbrv] hive git commit: HIVE-14640 : handle hive.merge.*files in select queries (Sergey Shelukhin)

2016-10-13 Thread sershe

HIVE-14640 : handle hive.merge.*files in select queries (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/eacf9f9b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/eacf9f9b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/eacf9f9b

Branch: refs/heads/hive-14535
Commit: eacf9f9b6d7405b68def88ffc5fd755222375efc
Parents: bd78d66
Author: Sergey Shelukhin 
Authored: Thu Oct 13 17:18:46 2016 -0700
Committer: Sergey Shelukhin 
Committed: Thu Oct 13 17:18:46 2016 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +-
 .../hive/ql/exec/AbstractFileMergeOperator.java | 181 +--
 .../hadoop/hive/ql/exec/FileSinkOperator.java   | 187 ++-
 .../apache/hadoop/hive/ql/exec/MoveTask.java|   8 +-
 .../hive/ql/exec/OrcFileMergeOperator.java  |  11 +-
 .../hive/ql/exec/RCFileMergeOperator.java   |   3 +-
 .../apache/hadoop/hive/ql/exec/Utilities.java   | 225 --
 .../rcfile/truncate/ColumnTruncateMapper.java   |   1 +
 .../apache/hadoop/hive/ql/metadata/Hive.java|   1 +
 .../hive/ql/optimizer/GenMapRedUtils.java   | 214 +
 .../hive/ql/parse/DDLSemanticAnalyzer.java  |   9 +-
 .../hadoop/hive/ql/parse/GenTezUtils.java   |   4 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   4 +-
 .../ql/plan/ConditionalResolverMergeFiles.java  |  17 +-
 .../hadoop/hive/ql/plan/FileMergeDesc.java  |   9 +
 .../hadoop/hive/ql/plan/FileSinkDesc.java   |  14 +-
 .../apache/hadoop/hive/ql/plan/MoveWork.java|  10 +-
 ql/src/test/queries/clientpositive/mm_all.q |  57 +++--
 ql/src/test/queries/clientpositive/mm_current.q |  40 +---
 .../results/clientpositive/llap/mm_all.q.out| 232 +++
 .../clientpositive/llap/mm_current.q.out| 165 +
 21 files changed, 758 insertions(+), 636 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/eacf9f9b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index c89142c..6201c04 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3122,7 +3122,7 @@ public class HiveConf extends Configuration {
 
 HIVE_METASTORE_MM_HEARTBEAT_TIMEOUT("hive.metastore.mm.heartbeat.timeout", 
"1800s",
 new TimeValidator(TimeUnit.SECONDS),
-"MM write ID times out after this long if a heartbeat is not send. 
Currently disabled."), // TODO# heartbeating not implemented
+"MM write ID times out after this long if a heartbeat is not send. 
Currently disabled."),
 
 HIVE_METASTORE_MM_ABSOLUTE_TIMEOUT("hive.metastore.mm.absolute.timeout", 
"7d",
 new TimeValidator(TimeUnit.SECONDS),

http://git-wip-us.apache.org/repos/asf/hive/blob/eacf9f9b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
index 40c784b..dedbb78 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
@@ -34,6 +34,8 @@ import org.apache.hadoop.mapred.JobConf;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.collect.Lists;
+
 /**
  * Fast file merge operator for ORC and RCfile. This is an abstract class which
  * does not process any rows. Refer {@link 
org.apache.hadoop.hive.ql.exec.OrcFileMergeOperator}
@@ -47,20 +49,21 @@ public abstract class AbstractFileMergeOperator
 
   protected JobConf jc;
   protected FileSystem fs;
-  protected boolean autoDelete;
-  protected boolean exception;
-  protected Path outPath;
-  protected Path finalPath;
-  protected Path dpPath;
-  protected Path tmpPath;
-  protected Path taskTmpPath;
-  protected int listBucketingDepth;
-  protected boolean hasDynamicPartitions;
-  protected boolean isListBucketingAlterTableConcatenate;
-  protected boolean tmpPathFixedConcatenate;
-  protected boolean tmpPathFixed;
-  protected Set incompatFileSet;
-  protected transient DynamicPartitionCtx dpCtx;
+  private boolean autoDelete;
+  private Path outPath; // The output path used by the subclasses.
+  private Path finalPath; // Used as a final destination; same as outPath for 
MM tables.
+  private Path dpPath;
+  private Path tmpPath; // Only stored to update based on the original in

[38/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
index 437770d..1d4163c 100644
--- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
+++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
@@ -87,25 +87,73 @@ POSTHOOK: Lineage: part_add_int_permute_select 
PARTITION(part=1).b SIMPLE [(valu
 POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
 POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num 
EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 _col0  _col1   _col2   _col3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select insert_num,part,a,b from part_add_int_permute_select
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select insert_num,part,a,b from part_add_int_permute_select
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 vectorized, llap
-  File Output Operator [FS_4]
-Select Operator [SEL_3] (rows=2 width=4)
-  Output:["_col0","_col1","_col2","_col3"]
-  TableScan [TS_0] (rows=2 width=16)
-
default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: part_add_int_permute_select
+  Statistics: Num rows: 2 Data size: 33 Basic stats: COMPLETE 
Column stats: PARTIAL
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4]
+  Select Operator
+expressions: insert_num (type: int), part (type: int), a 
(type: int), b (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 4, 1, 2]
+Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: PARTIAL
+File Output Operator
+  compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
+  Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: PARTIAL
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: vectorized, llap
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 4
+includeColumns: [0, 1, 2]
+dataColumns: insert_num:int, a:int, b:string, c:int
+partitionColumnCount: 1
+partitionColumns: part:int
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
 
 PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting 
works right
 select insert_num,part,a,b from part_add_int_permute_select
@@ -206,25 +254,73 @@ POSTHOOK: Lineage: part_add_int_string_permute_select 
PARTITION(part=1).c EXPRES
 POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d 
SIMPLE 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
 POSTHOOK: Lineage: part_add_int_string_permute_select 
PARTITION(part=1).insert_num EXPRESSION

[31/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out 
b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
index 3d67664..9e185c6 100644
--- a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
@@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales 
PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [
 POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost 
SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, 
type:decimal(7,2), comment:null), ]
 PREHOOK: query: 
--
 
-explain
+explain vectorization expression
 select count(distinct ws_order_number) from web_sales
 PREHOOK: type: QUERY
 POSTHOOK: query: 
--
 
-explain
+explain vectorization expression
 select count(distinct ws_order_number) from web_sales
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1257,11 +1261,24 @@ STAGE PLANS:
 TableScan
   alias: web_sales
   Statistics: Num rows: 2000 Data size: 352 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 
30, 31, 32, 33]
   Select Operator
 expressions: ws_order_number (type: int)
 outputColumnNames: ws_order_number
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [16]
 Statistics: Num rows: 2000 Data size: 352 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
+  Group By Vectorization:
+  className: VectorGroupByOperator
+  vectorOutput: true
+  keyExpressions: col 16
+  native: false
+  projectedOutputColumns: []
   keys: ws_order_number (type: int)
   mode: hash
   outputColumnNames: _col0
@@ -1270,36 +1287,88 @@ STAGE PLANS:
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
+Reduce Sink Vectorization:
+className: VectorReduceSinkLongOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 2000 Data size: 352 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
+Group By Vectorization:
+className: VectorGroupByOperator
+vectorOutput: true
+keyExpressions: col 0
+native: false
+projectedOutputColumns: []
 keys: KEY._col0 (type: int)

[26/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
index 23a977e..61702bd 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
@@ -44,7 +44,7 @@ POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION []
 PREHOOK: query: -- interval comparisons in select clause
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be true
@@ -78,7 +78,7 @@ from vector_interval_2 order by str1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- interval comparisons in select clause
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be true
@@ -110,6 +110,10 @@ select
   interval '1-2' year to month != interval_year_month(str2)
 from vector_interval_2 order by str1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -127,26 +131,61 @@ STAGE PLANS:
 TableScan
   alias: vector_interval_2
   Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5]
   Select Operator
 expressions: str1 (type: string), (CAST( str1 AS INTERVAL 
YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( 
str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) 
(type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS 
INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO 
MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS 
INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: 
boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR 
TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 
AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO 
MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 
AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR 
TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MON
 TH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) 
(type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: 
boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), 
(CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS 
INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL 
YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) 
(type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: 
boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 
>= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 
AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR 
TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) 
(type: boolean)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, 
_col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 8, 9, 10, 11, 12, 13, 14, 
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+selectExpressions: LongColEqualLongColumn(col 6, col 
7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, 
CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, 
LongColLessEqualLongColumn(col 6, col 7)(children: 
CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, 
CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 9:long, 
LongColLessEqualLongColumn(col 6, col 7)(children: 
CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, 
CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 10:long, 
LongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 
2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 
7:interval_year_month) -> 11:long, LongColGreaterEqualLongColumn(col 6, col 
7)(children:

[45/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
new file mode 100644
index 000..e0a6198
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * Why a node did not vectorize.
+ *
+ */
+public class VectorizerReason  {
+
+  private static long serialVersionUID = 1L;
+
+  public static enum VectorizerNodeIssue {
+NONE,
+NODE_ISSUE,
+OPERATOR_ISSUE,
+EXPRESSION_ISSUE
+  }
+
+  private final VectorizerNodeIssue vectorizerNodeIssue;
+
+  private final Operator operator;
+
+  private final String expressionTitle;
+
+  private final String issue;
+
+  private VectorizerReason(VectorizerNodeIssue vectorizerNodeIssue,
+  Operator operator, String expressionTitle, 
String issue) {
+this.vectorizerNodeIssue = vectorizerNodeIssue;
+this.operator = operator;
+this.expressionTitle = expressionTitle;
+this.issue = issue;
+  }
+
+  public static VectorizerReason createNodeIssue(String issue) {
+return new VectorizerReason(
+VectorizerNodeIssue.NODE_ISSUE,
+null,
+null,
+issue);
+  }
+
+  public static VectorizerReason createOperatorIssue(Operator operator,
+  String issue) {
+return new VectorizerReason(
+VectorizerNodeIssue.OPERATOR_ISSUE,
+operator,
+null,
+issue);
+  }
+
+  public static VectorizerReason createExpressionIssue(Operator operator,
+  String expressionTitle, String issue) {
+return new VectorizerReason(
+VectorizerNodeIssue.EXPRESSION_ISSUE,
+operator,
+expressionTitle,
+issue);
+  }
+
+  @Override
+  public VectorizerReason clone() {
+return new VectorizerReason(vectorizerNodeIssue, operator, 
expressionTitle, issue);
+  }
+
+  public VectorizerNodeIssue getVectorizerNodeIssue() {
+return vectorizerNodeIssue;
+  }
+
+  public Operator getOperator() {
+return operator;
+  }
+
+  public String getExpressionTitle() {
+return expressionTitle;
+  }
+
+  public String getIssue() {
+return issue;
+  }
+
+  @Override
+  public String toString() {
+String reason;
+switch (vectorizerNodeIssue) {
+case NODE_ISSUE:
+  reason = (issue == null ? "unknown" : issue);
+  break;
+case OPERATOR_ISSUE:
+  reason = (operator == null ? "Unknown" : operator.getType()) + " 
operator: " +
+   (issue == null ? "unknown" : issue);
+  break;
+case EXPRESSION_ISSUE:
+  reason = expressionTitle + " expression for " +
+  (operator == null ? "Unknown" : operator.getType()) + " operator: " +
+  (issue == null ? "unknown" : issue);
+  break;
+default:
+  reason = "Unknown " + vectorizerNodeIssue;
+}
+return reason;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
index 4a8ff15..1f118dc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
@@ -27,12 +27,27 @@ import org.apache.hadoop.fs.Path;
  */
 
 public class ExplainConfiguration {
+
+  public enum VectorizationDetailLevel {
+
+SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1);
+
+public final int rank;
+VectorizationDetailLevel(int rank) {
+  this.rank = rank;
+}
+  };
+
   private boolean extended = false;
   private boolean

[41/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_outer_join6.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_outer_join6.q 
b/ql/src/test/queries/clientpositive/vector_outer_join6.q
index 06fa385..b39e8ed 100644
--- a/ql/src/test/queries/clientpositive/vector_outer_join6.q
+++ b/ql/src/test/queries/clientpositive/vector_outer_join6.q
@@ -3,6 +3,7 @@ set hive.explain.user=false;
 SET hive.vectorized.execution.enabled=true;
 SET hive.vectorized.execution.mapjoin.native.enabled=true;
 SET hive.auto.convert.join=true;
+set hive.fetch.task.conversion=none;
 
 -- SORT_QUERY_RESULTS
 
@@ -28,14 +29,14 @@ create table TJOIN2 stored as orc AS SELECT * FROM 
TJOIN2_txt;
 create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt;
 create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt;
 
-explain
+explain vectorization detail formatted
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1;
 
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1;
 
-explain
+explain vectorization detail formatted
 select tj1rnum, tj2rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
--
diff --git 
a/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q 
b/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
index f25374d..b825fb3 100644
--- a/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
+++ b/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
@@ -1,7 +1,7 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
 SET hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=minimal;
+set hive.fetch.task.conversion=none;
 
 create table inventory_txt
 (
@@ -27,7 +27,7 @@ partitioned by (par string) stored as orc;
 insert into table inventory_part_0 partition(par='1') select * from 
inventory_txt;
 insert into table inventory_part_0 partition(par='2') select * from 
inventory_txt;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_0;
 
 select sum(inv_quantity_on_hand) from inventory_part_0;
@@ -47,7 +47,7 @@ alter table inventory_part_1 add columns (fifthcol string);
 
 insert into table inventory_part_1 partition(par='5cols') select *, '5th' as 
fifthcol from inventory_txt;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_1;
 
 select sum(inv_quantity_on_hand) from inventory_part_1;
@@ -66,7 +66,7 @@ insert into table inventory_part_2a partition(par='1') select 
* from inventory_t
 insert into table inventory_part_2a partition(par='2') select * from 
inventory_txt;
 alter table inventory_part_2a partition (par='2') change inv_item_sk 
other_name int;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_2a;
 
 create table inventory_part_2b(
@@ -80,7 +80,7 @@ insert into table inventory_part_2b 
partition(par1='1',par2=4) select * from inv
 insert into table inventory_part_2b partition(par1='2',par2=3) select * from 
inventory_txt;
 alter table inventory_part_2b partition (par1='2',par2=3) change 
inv_quantity_on_hand other_name int;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_2b;
 
 -- Verify we do not vectorize when a partition column type is different.
@@ -97,5 +97,5 @@ insert into table inventory_part_3 partition(par='1') select 
* from inventory_tx
 insert into table inventory_part_3 partition(par='2') select * from 
inventory_txt;
 alter table inventory_part_3 partition (par='2') change inv_warehouse_sk 
inv_warehouse_sk bigint;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_3;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q 
b/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
index f53d8c0..ee22c01 100644
--- a/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
+++

[47/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index c288731..77b44fb 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+
 import org.apache.commons.lang.ArrayUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
 import 
org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -63,6 +65,8 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
+import com.google.common.base.Preconditions;
+
 /**
  * This class is common operator class for native vectorized map join.
  *
@@ -72,7 +76,43 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
  */
 public abstract class VectorMapJoinCommonOperator extends MapJoinOperator 
implements VectorizationContextRegion {
   private static final long serialVersionUID = 1L;
-  private static final Logger LOG = 
LoggerFactory.getLogger(VectorMapJoinCommonOperator.class.getName());
+
+  
//
+
+  private static final String CLASS_NAME = 
VectorMapJoinCommonOperator.class.getName();
+private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
+
+  protected abstract String getLoggingPrefix();
+
+  // For debug tracing: information about the map or reduce task, operator, 
operator class, etc.
+  protected transient String loggingPrefix;
+
+  protected String getLoggingPrefix(String className) {
+if (loggingPrefix == null) {
+  initLoggingPrefix(className);
+}
+return loggingPrefix;
+  }
+
+  protected void initLoggingPrefix(String className) {
+if (hconf == null) {
+  // Constructor time...
+  loggingPrefix = className;
+} else {
+  // Determine the name of our map or reduce task for debug tracing.
+  BaseWork work = Utilities.getMapWork(hconf);
+  if (work == null) {
+work = Utilities.getReduceWork(hconf);
+  }
+  loggingPrefix = className + " " + work.getName() + " " + getOperatorId();
+}
+  }
+
+  
//
+
+  protected VectorMapJoinDesc vectorDesc;
+
+  protected VectorMapJoinInfo vectorMapJoinInfo;
 
   // Whether this operator is an outer join.
   protected boolean isOuterJoin;
@@ -88,10 +128,10 @@ public abstract class VectorMapJoinCommonOperator extends 
MapJoinOperator implem
   // a mixture of input big table columns and new scratch columns.
   protected VectorizationContext vOutContext;
 
-  // The output column projection of the vectorized row batch.  And, the type 
names of the output
+  // The output column projection of the vectorized row batch.  And, the type 
infos of the output
   // columns.
   protected int[] outputProjection;
-  protected String[] outputTypeNames;
+  protected TypeInfo[] outputTypeInfos;
 
   // These are the vectorized batch expressions for filtering, key 
expressions, and value
   // expressions.
@@ -101,15 +141,17 @@ public abstract class VectorMapJoinCommonOperator extends 
MapJoinOperator implem
 
   // This is map of which vectorized row batch columns are the big table key 
columns.  Since
   // we may have key expressions that produce new scratch columns, we need a 
mapping.
-  // And, we have their type names.
+  // And, we have their type infos.
   protected int[] bigTableKeyColumnMap;
-  protected ArrayList bigTableKeyTypeNames;
+  protected String[] bigTableKeyColumnNames;
+  protected TypeInfo[] bigTableKeyTypeInfos;
 
   // Similarly, this is map of which vectorized row batch columns are the big 
table value columns.
   // Since we may have value expressions that produce new scratch columns, we 
need a mapping.
-  // And, we have their type names.
+  // And, we have their type infos.
   protected int[]

[50/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
index a72b882..9114932 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
@@ -477,6 +477,11 @@ public class  extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum1 + ", col " + + colNum2;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
index 8b1c366..b56d451 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
@@ -18,6 +18,8 @@
  
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -152,4 +154,9 @@ public abstract class  extends VectorExpression {
 this.value = value;
   }
 
+  @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum + ", val " + new String(value, 
StandardCharsets.UTF_8);
+  }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
index 930069c..4fb5035 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
@@ -18,6 +18,8 @@
  
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -155,4 +157,10 @@ public abstract class  extends VectorExpression 
{
   public void setValue(byte[] value) {
 this.value = value;
   }
+
+  @Override
+  public String vectorExpressionParameters() {
+return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + + 
colNum;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
index 4298d79..7863b16 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
@@ -154,6 +154,11 @@ public class  extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum + ", left " + leftValue.toString() + ", right " + 
rightValue.toString();
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt

[10/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index 76c8404..c2e1dfd 100644
--- 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -34,10 +34,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
  A masked pattern was here 
 11
 12
-PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds
+PREHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds 
as ds, ds as `date` from srcpart group by ds
 PREHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds
+POSTHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds 
as ds, ds as `date` from srcpart group by ds
 POSTHOOK: type: CREATETABLE_AS_SELECT
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -74,8 +78,19 @@ STAGE PLANS:
 Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
 keys: KEY._col0 (type: string)
@@ -199,11 +214,15 @@ POSTHOOK: Output: default@srcpart_double_hour
 POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
 POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
 PREHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 PREHOOK: type: QUERY
 POSTHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -234,6 +253,10 @@ STAGE PLANS:
   Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 4 
 Map Operator Tree:
 TableScan
@@ -269,6 +292,14 @@ STAGE PLANS:
 Target Vertex: Map 1
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: llap
 Reduce Operator Tree:
@@ -290,6 +321,13 @@ STAGE PLANS:
 value expressions: _col0 (type: bigint)
 Reducer 3 
 Execution mode: vectorized, llap
+

[01/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

Repository: hive
Updated Branches:
  refs/heads/hive-14535 b9e815722 -> eacf9f9b6


http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
index 22fe7cd..b297a7d 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
@@ -1,7 +1,7 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
 -- Use ORDER BY clauses to generate 2 stages.
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ctinyint) as c1,
MAX(ctinyint),
COUNT(ctinyint),
@@ -12,7 +12,7 @@ PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
 -- Use ORDER BY clauses to generate 2 stages.
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ctinyint) as c1,
MAX(ctinyint),
COUNT(ctinyint),
@@ -20,6 +20,10 @@ SELECT MIN(ctinyint) as c1,
 FROM   alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -37,42 +41,100 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Select Operator
 expressions: ctinyint (type: tinyint)
 outputColumnNames: ctinyint
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0]
 Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations: min(ctinyint), max(ctinyint), 
count(ctinyint), count()
+  Group By Vectorization:
+  aggregators: VectorUDAFMinLong(col 0) -> tinyint, 
VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, 
VectorUDAFCountStar(*) -> bigint
+  className: VectorGroupByOperator
+  vectorOutput: true
+  native: false
+  projectedOutputColumns: [0, 1, 2, 3]
   mode: hash
   outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
 sort order: 
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col0 (type: tinyint), _col1 (type: 
tinyint), _col2 (type: bigint), _col3 (type: bigint)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
 aggregations: min(VALUE._col0), max(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3)
+Group By Vectorization:
+aggregators: VectorUDAFMinLong(col 0) -> tinyint,

[09/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
index 1bab6f7..a7c0d10 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
@@ -1,15 +1,19 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+EXPLAIN VECTORIZATION EXPRESSION  SELECT COUNT(t1.cint), MAX(t2.cint), 
MIN(t1.cint), AVG(t1.cint+t2.cint)
   FROM alltypesorc t1
   JOIN alltypesorc t2 ON t1.cint = t2.cint
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+EXPLAIN VECTORIZATION EXPRESSION  SELECT COUNT(t1.cint), MAX(t2.cint), 
MIN(t1.cint), AVG(t1.cint+t2.cint)
   FROM alltypesorc t1
   JOIN alltypesorc t2 ON t1.cint = t2.cint
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -28,12 +32,23 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 2) -> 
boolean
 predicate: cint is not null (type: boolean)
 Statistics: Num rows: 9173 Data size: 27396 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cint (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [2]
   Statistics: Num rows: 9173 Data size: 27396 Basic stats: 
COMPLETE Column stats: COMPLETE
   Map Join Operator
 condition map:
@@ -41,6 +56,10 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
+Map Join Vectorization:
+className: VectorMapJoinInnerBigOnlyLongOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
 outputColumnNames: _col0, _col1
 input vertices:
   1 Map 3
@@ -48,9 +67,21 @@ STAGE PLANS:
 Select Operator
   expressions: _col0 (type: int), _col1 (type: int), 
(_col0 + _col1) (type: int)
   outputColumnNames: _col0, _col1, _col2
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [2, 2, 12]
+  selectExpressions: LongColAddLongColumn(col 2, 
col 2) -> 12:long
   Statistics: Num rows: 19518 Data size: 156144 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: count(_col0), max(_col1), 
min(_col0), avg(_col2)
+Group By Vectorization:
+aggregators: VectorUDAFCount(col 2) -> bigint, 
VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, 
VectorUDAFAvgLong(col 12) -> struct
+className: VectorGroupByOperator
+vectorOutput: false
+native: false
+projectedOutputColumns: [0, 1, 2, 3]
+vectorOutputConditionsNotMet: Vector output of 
VectorUDAFAvgLong(col 12) -> struct output type

[07/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
index ceaac4f..636463b 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
@@ -19,10 +19,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__1
 POSTHOOK: Output: default@test
 POSTHOOK: Lineage: test.ts EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -48,10 +48,10 @@ POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00
 -12-31 23:59:59.9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -87,10 +87,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00-12-31 23:59:59.9   3652060 
23:59:59.9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -116,10 +116,10 @@ POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00
 -12-31 23:59:59.9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index 4092911..ae59b06 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -73,7 +73,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@alltypesorc_wrong
 POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE []
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -116,26 +120,61 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc_string
   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: to_unix_timestamp(ctimestamp1) (type: 
bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), 
day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), 
weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), 
minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10]

[27/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
index d8003ba..e7d1963 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
@@ -39,7 +39,7 @@ POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION []
 PREHOOK: query: -- constants/cast from string
-explain
+explain vectorization expression
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
@@ -47,13 +47,17 @@ select
 from vector_interval_1 order by str1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- constants/cast from string
-explain
+explain vectorization expression
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
   interval '1 2:3:4' day to second, interval_day_time(str2)
 from vector_interval_1 order by str1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -71,26 +75,62 @@ STAGE PLANS:
 TableScan
   alias: vector_interval_1
   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
   Select Operator
 expressions: str1 (type: string), CAST( str1 AS INTERVAL 
YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO 
SECOND) (type: interval_day_time)
 outputColumnNames: _col0, _col2, _col4
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 4, 5]
+selectExpressions: CastStringToIntervalYearMonth(col 
2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) -> 
5:interval_day_time
 Statistics: Num rows: 2 Data size: 442 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: string)
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 2 Data size: 442 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col2 (type: interval_year_month), 
_col4 (type: interval_day_time)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: 
interval_year_month), VALUE._col0 (type: interval_year_month), 1 
02:03:04.0 (type: interval_day_time), VALUE._col1 (type: 
interval_day_time)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 3, 1, 4, 2]
+selectExpressions: ConstantVectorExpression(val 14) -> 
3:long,

[05/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_data_types.q.out 
b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
index dbaf14d..79638c1 100644
--- a/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
@@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE 
[(over1k)over1k.FieldSchema(name:s, type:s
 POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, 
type:smallint, comment:null), ]
 POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, 
type:tinyint, comment:null), ]
 POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, 
type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, 
s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, 
bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: false
+  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -189,10 +193,14 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1korc
  A masked pattern was here 
 -17045922556
-PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, 
s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, 
bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -209,29 +217,66 @@ STAGE PLANS:
 TableScan
   alias: over1korc
   Statistics: Num rows: 1049 Data size: 311170 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10]
   Select Operator
 expressions: t (type: tinyint), si (type: smallint), i 
(type: int), b (type: bigint), f (type: float), d (type: double), bo (type: 
boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin 
(type: binary)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10]
 Statistics: Num rows: 1049 Data size: 311170 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: int)
   sort order: +++
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
   Statistics: Num rows: 1049 Data size: 311170 Basic 
stats: COMPLETE Column stats: NONE
   TopN Hash Memory Usage: 0.1
   value expressions: _col3 (type: bigint), _col4 (type: 
float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), 
_col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet:

[39/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
 
b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
index 0a01b8c..1511298 100644
--- 
a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
@@ -149,25 +149,55 @@ POSTHOOK: Lineage: part_change_various_various_struct1 
PARTITION(part=1).b SIMPL
 POSTHOOK: Lineage: part_change_various_various_struct1 
PARTITION(part=1).insert_num SIMPLE 
[(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:insert_num, 
type:int, comment:null), ]
 POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).s1 
SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:s1, 
type:struct,
 comment:null), ]
 complex_struct1_c_txt.insert_num   complex_struct1_c_txt.s1
complex_struct1_c_txt.b
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select insert_num,part,s1,b from part_change_various_various_struct1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select insert_num,part,s1,b from part_change_various_various_struct1
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 llap
-  File Output Operator [FS_2]
-Select Operator [SEL_1] (rows=6 width=4)
-  Output:["_col0","_col1","_col2","_col3"]
-  TableScan [TS_0] (rows=6 width=789)
-
default@part_change_various_various_struct1,part_change_various_various_struct1,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","s1","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: part_change_various_various_struct1
+  Statistics: Num rows: 6 Data size: 4734 Basic stats: 
COMPLETE Column stats: PARTIAL
+  Select Operator
+expressions: insert_num (type: int), part (type: int), s1 
(type: 
struct),
 b (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: PARTIAL
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: PARTIAL
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: llap
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+notVectorizedReason: Select expression for SELECT operator: 
Data type 
struct
 of Column[s1] not supported
+vectorized: false
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
 
 PREHOOK: query: select insert_num,part,s1,b from 
part_change_various_various_struct1
 PREHOOK: type: QUERY
@@ -413,25 +443,55 @@ POSTHOOK: Lineage: part_add_various_various_struct2 
PARTITION(part=1).b SIMPLE [
 POSTHOOK: Lineage: part_add_various_various_struct2 
PARTITION(part=1).insert_num SIMPLE 
[(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:insert_num, 
type:int, comment:null), ]
 POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).s2 
SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:s2, 
type:struct,
 comment:null), ]

[36/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
index 85116e7..964ce95 100644
--- 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
@@ -282,25 +282,73 @@ POSTHOOK: Lineage: 
part_change_various_various_boolean_to_bigint PARTITION(part=
 POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint 
PARTITION(part=1).c9 SIMPLE 
[(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, 
type:boolean, comment:null), ]
 POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint 
PARTITION(part=1).insert_num SIMPLE 
[(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, 
type:int, comment:null), ]
 insert_num boolean1boolean1boolean1boolean1
boolean1boolean1boolean1boolean1boolean1
tinyint1tinyint1tinyint1tinyint1tinyint1
tinyint1tinyint1tinyint1tinyint1tinyint1
tinyint1smallint1   smallint1   smallint1   smallint1   
smallint1   smallint1   smallint1   smallint1   smallint1   
smallint1   smallint1   int1int1int1int1int1int1
int1int1int1int1int1bigint1 bigint1 bigint1 bigint1 bigint1 
bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b
 from part_change_various_various_boolean_to_bigint
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b
 from part_change_various_various_boolean_to_bigint
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 vectorized, llap
-  File Output Operator [FS_4]
-Select Operator [SEL_3] (rows=10 width=4)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"]
-  TableScan [TS_0] (rows=10 width=475)
-
default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: part_change_various_various_boolean_to_bigint
+  Statistics: Num rows: 10 Data size: 4759 Basic stats: 
COMPLETE Column stats: PARTIAL
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 
50, 51, 52, 53, 54, 55]
+  Select Operator
+expressions: insert_num (type: int), part (type: int), c1 
(type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 
(type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type:

[25/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out 
b/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out
index 13a8b35..ab7a103 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out
@@ -36,7 +36,7 @@ POSTHOOK: Lineage: interval_arithmetic_1.dateval EXPRESSION 
[(unique_timestamps)
 POSTHOOK: Lineage: interval_arithmetic_1.tsval SIMPLE 
[(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, 
comment:null), ]
 tsval  tsval
 PREHOOK: query: -- interval year-month arithmetic
-explain
+explain vectorization expression
 select
   dateval,
   dateval - interval '2-2' year to month,
@@ -49,7 +49,7 @@ from interval_arithmetic_1
 order by dateval
 PREHOOK: type: QUERY
 POSTHOOK: query: -- interval year-month arithmetic
-explain
+explain vectorization expression
 select
   dateval,
   dateval - interval '2-2' year to month,
@@ -62,6 +62,10 @@ from interval_arithmetic_1
 order by dateval
 POSTHOOK: type: QUERY
 Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -79,26 +83,61 @@ STAGE PLANS:
 TableScan
   alias: interval_arithmetic_1
   Statistics: Num rows: 50 Data size: 4800 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: dateval (type: date), (dateval - 2-2) (type: 
date), (dateval - -2-2) (type: date), (dateval + 2-2) (type: date), (dateval + 
-2-2) (type: date), (-2-2 + dateval) (type: date), (2-2 + dateval) (type: date)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7]
+selectExpressions: 
DateColSubtractIntervalYearMonthScalar(col 0, val 2-2) -> 2:long, 
DateColSubtractIntervalYearMonthScalar(col 0, val -2-2) -> 3:long, 
DateColAddIntervalYearMonthScalar(col 0, val 2-2) -> 4:long, 
DateColAddIntervalYearMonthScalar(col 0, val -2-2) -> 5:long, 
IntervalYearMonthScalarAddDateColumn(val -2-2, col 0) -> 6:long, 
IntervalYearMonthScalarAddDateColumn(val 2-2, col 0) -> 7:long
 Statistics: Num rows: 50 Data size: 4800 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: date)
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 50 Data size: 4800 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col1 (type: date), _col2 (type: 
date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: 
date)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 
(type: date),

[20/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
index 06e30d8..cf90430 100644
--- a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
@@ -49,11 +49,15 @@ POSTHOOK: Output: default@myinput1
 POSTHOOK: Lineage: myinput1.key SIMPLE 
[(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: myinput1.value SIMPLE 
[(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ]
 PREHOOK: query: -- merging
-explain select * from myinput1 a join myinput1 b on a.key<=>b.value
+explain vectorization expression select * from myinput1 a join myinput1 b on 
a.key<=>b.value
 PREHOOK: type: QUERY
 POSTHOOK: query: -- merging
-explain select * from myinput1 a join myinput1 b on a.key<=>b.value
+explain vectorization expression select * from myinput1 a join myinput1 b on 
a.key<=>b.value
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -71,12 +75,20 @@ STAGE PLANS:
 TableScan
   alias: a
   Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Map Join Operator
 condition map:
  Inner Join 0 to 1
 keys:
   0 key (type: int)
   1 value (type: int)
+Map Join Vectorization:
+className: VectorMapJoinOperator
+native: false
+nativeConditionsMet: hive.execution.engine tez IN 
[tez, spark] IS true, One MapJoin Condition IS true, Supports Key Types IS 
true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash 
Join IS true, Small table vectorizes IS true
+nativeConditionsNotMet: 
hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false
 nullSafes: [true]
 outputColumnNames: _col0, _col1, _col5, _col6
 input vertices:
@@ -85,9 +97,16 @@ STAGE PLANS:
 Select Operator
   expressions: _col0 (type: int), _col1 (type: int), _col5 
(type: int), _col6 (type: int)
   outputColumnNames: _col0, _col1, _col2, _col3
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
   Statistics: Num rows: 6 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
+File Sink Vectorization:
+className: VectorFileSinkOperator
+native: false
 Statistics: Num rows: 6 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -95,19 +114,42 @@ STAGE PLANS:
 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 2 
 Map Operator Tree:
 TableScan
   alias: b
   Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Reduce Output Operator
 key expressions: value (type: int)
 sort order: +
 Map-reduce partition columns: value (type: int)
+Reduce Sink Vectorization:
+className: VectorReduceSinkLongOperator
+native:

[14/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out 
b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
index edb67f1..911a962 100644
--- a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
@@ -45,16 +45,20 @@ POSTHOOK: Input: default@src
 0  val_0
 10 val_10
 100val_100
-PREHOOK: query: explain select key, value
+PREHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key asc
 limit 5
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, value
+POSTHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key asc
 limit 5
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -84,8 +88,23 @@ STAGE PLANS:
   value expressions: _col1 (type: varchar(20))
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: varchar(10)), 
VALUE._col0 (type: varchar(20))
@@ -148,16 +167,20 @@ POSTHOOK: Input: default@src
 97 val_97
 97 val_97
 96 val_96
-PREHOOK: query: explain select key, value
+PREHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key desc
 limit 5
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, value
+POSTHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key desc
 limit 5
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -187,8 +210,23 @@ STAGE PLANS:
   value expressions: _col1 (type: varchar(20))
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: varchar(10)), 
VALUE._col0 (type: varchar(20))
@@ -254,12 +292,16 @@ create table varchar_3 (
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@varchar_3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 insert into table varchar_3 select cint from alltypesorc limit 10
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 insert into table varchar_3 select cint from alltypesorc limit 10
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -279,36 +321,81 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats:

[42/57] [abbrv] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_interval_1.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_interval_1.q 
b/ql/src/test/queries/clientpositive/vector_interval_1.q
index 8fefe41..f4f0024 100644
--- a/ql/src/test/queries/clientpositive/vector_interval_1.q
+++ b/ql/src/test/queries/clientpositive/vector_interval_1.q
@@ -1,8 +1,7 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
-
 set hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=minimal;
+set hive.fetch.task.conversion=none;
 
 drop table if exists vector_interval_1;
 create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 
string) stored as orc;
@@ -13,7 +12,7 @@ insert into vector_interval_1
   select null, null, null, null from src limit 1;
 
 -- constants/cast from string
-explain
+explain vectorization expression
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
@@ -28,7 +27,7 @@ from vector_interval_1 order by str1;
 
 
 -- interval arithmetic
-explain
+explain vectorization expression
 select
   dt,
   interval '1-2' year to month + interval '1-2' year to month,
@@ -49,7 +48,7 @@ select
   interval '1-2' year to month - interval_year_month(str1)
 from vector_interval_1 order by dt;
 
-explain
+explain vectorization expression
 select
   dt,
   interval '1 2:3:4' day to second + interval '1 2:3:4' day to second,
@@ -72,7 +71,7 @@ from vector_interval_1 order by dt;
 
 
 -- date-interval arithmetic
-explain
+explain vectorization expression
 select
   dt,
   dt + interval '1-2' year to month,
@@ -107,7 +106,7 @@ from vector_interval_1 order by dt;
 
 
 -- timestamp-interval arithmetic
-explain
+explain vectorization expression
 select
   ts,
   ts + interval '1-2' year to month,
@@ -142,7 +141,7 @@ from vector_interval_1 order by ts;
 
 
 -- timestamp-timestamp arithmetic
-explain
+explain vectorization expression
 select
   ts,
   ts - ts,
@@ -159,7 +158,7 @@ from vector_interval_1 order by ts;
 
 
 -- date-date arithmetic
-explain
+explain vectorization expression
 select
   dt,
   dt - dt,
@@ -176,7 +175,7 @@ from vector_interval_1 order by dt;
 
 
 -- date-timestamp arithmetic
-explain
+explain vectorization expression
 select
   dt,
   ts - dt,

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_interval_2.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_interval_2.q 
b/ql/src/test/queries/clientpositive/vector_interval_2.q
index 5afb511..0b78a4b 100644
--- a/ql/src/test/queries/clientpositive/vector_interval_2.q
+++ b/ql/src/test/queries/clientpositive/vector_interval_2.q
@@ -1,7 +1,7 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
 set hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=minimal;
+set hive.fetch.task.conversion=none;
 
 drop table if exists vector_interval_2;
 create table vector_interval_2 (ts timestamp, dt date, str1 string, str2 
string, str3 string, str4 string) stored as orc;
@@ -14,7 +14,7 @@ insert into vector_interval_2
 
 -- interval comparisons in select clause
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be true
@@ -77,7 +77,7 @@ select
   interval '1-2' year to month != interval_year_month(str2)
 from vector_interval_2 order by str1;
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be false
@@ -128,7 +128,7 @@ select
   interval '1-2' year to month != interval_year_month(str1)
 from vector_interval_2 order by str1;
 
-explain
+explain vectorization expression
 select
   str3,
   -- Should all be true
@@ -191,7 +191,7 @@ select
   interval '1 2:3:4' day to second != interval_day_time(str4)
 from vector_interval_2 order by str3;
 
-explain
+explain vectorization expression
 select
   str3,
   -- Should all be false
@@ -244,7 +244,7 @@ from vector_interval_2 order by str3;
 
 
 -- interval expressions in predicates
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   interval_year_month(str1) = interval_year_month(str1)
@@ -293,7 +293,7 @@ where
   and interval '1-3' year to month > interval_year_month(str1)
 order by ts;
 
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   interval_day_time(str3) = interval_day_time(str3)
@@ -342,7 +342,7 @@ where
   and interval '1 2:3:5' day to second > interval_day_time(str3)
 order by ts;
 
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   date '2002-03-01' = dt + interval_year_month(str1)
@@ -381,7 +381,7 @@ where
   and dt != dt + interval '1-2' year to month
 order by ts;
 
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to

hive git commit: HIVE-14835: Improve ptest2 build time (Prasanth Jayachandran reviewed by Sergio Pena)

2016-10-13 Thread prasanthj

Repository: hive
Updated Branches:
  refs/heads/master 0995719d0 -> 535316187


HIVE-14835: Improve ptest2 build time (Prasanth Jayachandran reviewed by Sergio 
Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/53531618
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/53531618
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/53531618

Branch: refs/heads/master
Commit: 535316187f9451f11ac1cfbe7d6d66f61f2ee6d8
Parents: 0995719
Author: Prasanth Jayachandran 
Authored: Thu Oct 13 14:40:09 2016 -0700
Committer: Prasanth Jayachandran 
Committed: Thu Oct 13 14:40:09 2016 -0700

--
 testutils/ptest2/src/main/resources/source-prep.vm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/53531618/testutils/ptest2/src/main/resources/source-prep.vm
--
diff --git a/testutils/ptest2/src/main/resources/source-prep.vm 
b/testutils/ptest2/src/main/resources/source-prep.vm
index 67e6a95..0fc22be 100644
--- a/testutils/ptest2/src/main/resources/source-prep.vm
+++ b/testutils/ptest2/src/main/resources/source-prep.vm
@@ -102,11 +102,11 @@ cd $workingDir/
 fi
   done
 #end
-mvn -B clean install -DskipTests -Dmaven.repo.local=$workingDir/maven 
$mavenArgs $mavenBuildArgs
+mvn -B clean install -DskipTests -T 4 -q 
-Dmaven.repo.local=$workingDir/maven $mavenArgs $mavenBuildArgs
 if [[ -d "itests" ]]
 then
   cd itests
-  mvn -B clean install -DskipTests -Dmaven.repo.local=$workingDir/maven 
$mavenArgs $mavenBuildArgs
+  mvn -B clean install -DskipTests -T 4 -q 
-Dmaven.repo.local=$workingDir/maven $mavenArgs $mavenBuildArgs
 fi
   elif [[ "${buildTool}" == "ant" ]]
   then

hive git commit: HIVE-14929: Adding JDBC test for query cancellation scenario (Deepak Jaiswal via Jason Dere)

2016-10-13 Thread jdere

Repository: hive
Updated Branches:
  refs/heads/master 527f21b17 -> 0995719d0


HIVE-14929: Adding JDBC test for query cancellation scenario (Deepak Jaiswal 
via Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0995719d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0995719d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0995719d

Branch: refs/heads/master
Commit: 0995719d06e7fedb75e53c5db9f195767c259ec9
Parents: 527f21b
Author: Jason Dere 
Authored: Thu Oct 13 14:23:08 2016 -0700
Committer: Jason Dere 
Committed: Thu Oct 13 14:23:08 2016 -0700

--
 .../org/apache/hive/jdbc/TestJdbcDriver2.java   | 97 
 1 file changed, 97 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0995719d/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java 
b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
index 689eab3..b7362fb 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java
@@ -2249,6 +2249,47 @@ public class TestJdbcDriver2 {
   }
 
   /**
+   *  Tests for query cancellation
+   */
+
+  @Test
+  public void testCancelQueryNotRun() throws Exception {
+try (final Statement stmt = con.createStatement()){
+  System.out.println("Cancel the Statement without running query ...");
+  stmt.cancel();
+  System.out.println("Executing query: ");
+  stmt.executeQuery(" show databases");
+}
+  }
+
+  @Test
+  public void testCancelQueryFinished() throws Exception {
+try (final Statement stmt = con.createStatement()){
+  System.out.println("Executing query: ");
+  stmt.executeQuery(" show databases");
+  System.out.println("Cancel the Statement after running query ...");
+  stmt.cancel();
+}
+  }
+
+  @Test
+  public void testCancelQueryErrored() throws Exception {
+final Statement stmt = con.createStatement();
+try {
+  System.out.println("Executing query: ");
+  stmt.executeQuery("list dbs");
+  fail("Expecting SQLException");
+} catch (SQLException e) {
+  // No-op
+}
+
+// Cancel the query
+System.out.println("Cancel the Statement ...");
+stmt.cancel();
+stmt.close();
+  }
+
+  /**
* Test the cancellation of a query that is running.
* We spawn 2 threads - one running the query and
* the other attempting to cancel.
@@ -2303,6 +2344,62 @@ public class TestJdbcDriver2 {
   }
 
   @Test
+  public void testQueryCancelTwice() throws Exception {
+String udfName = SleepMsUDF.class.getName();
+Statement stmt1 = con.createStatement();
+stmt1.execute("create temporary function sleepMsUDF as '" + udfName + "'");
+stmt1.close();
+final Statement stmt = con.createStatement();
+// Thread executing the query
+Thread tExecute = new Thread(new Runnable() {
+  @Override
+  public void run() {
+try {
+  System.out.println("Executing query: ");
+  // The test table has 500 rows, so total query time should be ~ 
500*500ms
+  stmt.executeQuery("select sleepMsUDF(t1.under_col, 1) as u0, 
t1.under_col as u1, " +
+  "t2.under_col as u2 from " + tableName +  " t1 join " + 
tableName +
+  " t2 on t1.under_col = t2.under_col");
+  fail("Expecting SQLException");
+} catch (SQLException e) {
+  // This thread should throw an exception
+  assertNotNull(e);
+  System.out.println(e.toString());
+}
+  }
+});
+// Thread cancelling the query
+Thread tCancel = new Thread(new Runnable() {
+  @Override
+  public void run() {
+// 1st Cancel
+try {
+  // Sleep for 100ms
+  Thread.sleep(100);
+  System.out.println("Cancelling query: ");
+  stmt.cancel();
+} catch (Exception e) {
+  // No-op
+}
+// 2nd cancel
+try {
+  // Sleep for 5ms and cancel again
+  Thread.sleep(5);
+  System.out.println("Cancelling query again: ");
+  stmt.cancel();
+} catch (Exception e) {
+  // No-op
+}
+  }
+});
+tExecute.start();
+tCancel.start();
+tExecute.join();
+tCancel.join();
+stmt.close();
+  }
+
+  @Test
   public void testQueryTimeout() throws Exception {
 String udfName = SleepMsUDF.class.getName();
 Statement stmt1 = con.createStatement();

hive git commit: HIVE-14373: Add integration tests for hive on S3 (Thomas Poepping and Abdullah Yousufi, reviewed by Sergio Pena and Illya Yalovyy)

2016-10-13 Thread spena

Repository: hive
Updated Branches:
  refs/heads/master f923db0b5 -> 527f21b17


HIVE-14373: Add integration tests for hive on S3 (Thomas Poepping and Abdullah 
Yousufi, reviewed by Sergio Pena and Illya Yalovyy)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/527f21b1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/527f21b1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/527f21b1

Branch: refs/heads/master
Commit: 527f21b1750f54b69519e63088755463550e301c
Parents: f923db0
Author: Sergio Pena 
Authored: Thu Oct 13 10:23:19 2016 -0500
Committer: Sergio Pena 
Committed: Thu Oct 13 10:25:03 2016 -0500

--
 .gitignore  |   1 +
 itests/hive-blobstore/README|  25 ++
 itests/hive-blobstore/pom.xml   | 355 +++
 .../hadoop/hive/cli/TestBlobstoreCliDriver.java |  64 
 .../cli/TestBlobstoreNegativeCliDriver.java |  64 
 .../clientnegative/select_dropped_table.q   |   4 +
 .../test/queries/clientpositive/insert_into.q   |   4 +
 .../test/resources/blobstore-conf.xml.template  |  22 ++
 .../src/test/resources/hive-site.xml| 271 ++
 .../test/resources/testconfiguration.properties |   2 +
 .../src/test/resources/tez-site.xml |   6 +
 .../clientnegative/select_dropped_table.q.out   |  21 ++
 .../results/clientpositive/insert_into.q.out|  35 ++
 itests/pom.xml  |   1 +
 .../control/AbstractCoreBlobstoreCliDriver.java | 167 +
 .../hadoop/hive/cli/control/CliConfigs.java |  40 +++
 .../cli/control/CoreBlobstoreCliDriver.java |  29 ++
 .../control/CoreBlobstoreNegativeCliDriver.java |  29 ++
 .../org/apache/hadoop/hive/ql/QTestUtil.java|  18 +-
 19 files changed, 1155 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/527f21b1/.gitignore
--
diff --git a/.gitignore b/.gitignore
index 4d341a0..47c59da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,4 +27,5 @@ hcatalog/webhcat/java-client/target
 hcatalog/storage-handlers/hbase/target
 hcatalog/webhcat/svr/target
 conf/hive-default.xml.template
+itests/hive-blobstore/src/test/resources/blobstore-conf.xml
 .DS_Store

http://git-wip-us.apache.org/repos/asf/hive/blob/527f21b1/itests/hive-blobstore/README
--
diff --git a/itests/hive-blobstore/README b/itests/hive-blobstore/README
new file mode 100644
index 000..dbd42d5
--- /dev/null
+++ b/itests/hive-blobstore/README
@@ -0,0 +1,25 @@
+The folder structure details are:
+
+ * ./src/test/queries  - contains the queries to be tested on s3
+ * ./src/test/results  - contains the expected hive console output for 
the queries
+ * ./target/qfile-results  - Hive console output goes here
+ * ../../data/conf/blobstore/  - contains hive-site.xml
+
+To run blobstore integration tests:
+
+ 1. Create blobstore-conf.xml in ./src/test/resources/ with the blobstore 
credentials and test blobstore path (see blobstore-conf.xml.template).
+
+ 2. Run following command:
+   mvn test -Dtest=TestBlobstore[Negative]CliDriver
+
+To run a single integration test:
+
+ 2. Example command:
+   mvn test -Dtest=TestBlobstoreCliDriver -Dqfile=insert_into.q
+
+To run the tests on Tez:
+
+ 2. Example command:
+   mvn test -Dtest=TestBlobstoreCliDriver -Dqfile=insert_into.q 
-Dhive.execution.engine=tez -Dclustermode=tez_local
+
+Use ${hiveconf:test.blobstore.path.unique} in .q test files to access the 
blobstore path.

http://git-wip-us.apache.org/repos/asf/hive/blob/527f21b1/itests/hive-blobstore/pom.xml
--
diff --git a/itests/hive-blobstore/pom.xml b/itests/hive-blobstore/pom.xml
new file mode 100644
index 000..a62885f
--- /dev/null
+++ b/itests/hive-blobstore/pom.xml
@@ -0,0 +1,355 @@
+
+
+http://maven.apache.org/POM/4.0.0;
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance;
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd;>
+  4.0.0
+
+  
+org.apache.hive
+hive-it
+2.2.0-SNAPSHOT
+../pom.xml
+  
+
+  hive-blobstore
+  jar
+  Hive Integration - Blobstore Tests
+
+  
+../..
+
+
+false
+
+false
+${hadoop.version}
+-mkdir -p
+  
+
+  
+
+  tests-off
+  
+
+  src/test/resources/blobstore-conf.xml
+
+  
+  
+true
+  
+
+
+  tests-on
+  
+
+  src/test/resources/blobstore-conf.xml
+
+  
+  
+false
+  
+
+  
+
+  
+

[48/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
index 2162f17..3e4a195 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
@@ -131,4 +131,10 @@ public abstract class IfExprTimestampScalarColumnBase 
extends VectorExpression {
   public String getOutputType() {
 return "timestamp";
   }
+
+  @Override
+  public String vectorExpressionParameters() {
+return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
index 707f574..5273131 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
@@ -117,4 +117,10 @@ public abstract class IfExprTimestampScalarScalarBase 
extends VectorExpression {
   public String getOutputType() {
 return "timestamp";
   }
+
+  @Override
+  public String vectorExpressionParameters() {
+return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
index f19551e..2f6e7b9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
@@ -107,6 +107,11 @@ public class IsNotNull extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
index 3169bae..583ab7a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
@@ -105,6 +105,11 @@ public class IsNull extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 VectorExpressionDescriptor.Builder b = new 
VectorExpressionDescriptor.Builder();
 b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
index 33f50e0..6fa9779 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
@@ -174,6 +174,11 @@ public class LongColDivideLongColumn extends 
VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum1 + ", col " + colNum2;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

[50/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
index a72b882..9114932 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
@@ -477,6 +477,11 @@ public class  extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum1 + ", col " + + colNum2;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
index 8b1c366..b56d451 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
@@ -18,6 +18,8 @@
  
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -152,4 +154,9 @@ public abstract class  extends VectorExpression {
 this.value = value;
   }
 
+  @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum + ", val " + new String(value, 
StandardCharsets.UTF_8);
+  }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
index 930069c..4fb5035 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
@@ -18,6 +18,8 @@
  
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -155,4 +157,10 @@ public abstract class  extends VectorExpression 
{
   public void setValue(byte[] value) {
 this.value = value;
   }
+
+  @Override
+  public String vectorExpressionParameters() {
+return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + + 
colNum;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
index 4298d79..7863b16 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
@@ -154,6 +154,11 @@ public class  extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum + ", left " + leftValue.toString() + ", right " + 
rightValue.toString();
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt

[02/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out 
b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
index bd9b852..eb61044 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
@@ -66,105 +66,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE 
STATISTICS FOR COLUMNS
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_table
  A masked pattern was here 
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
 select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
 on s.ctinyint = st.ctinyint
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
 select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
 on s.ctinyint = st.ctinyint
 ) t1
 POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-2
-Spark
- A masked pattern was here 
-  Vertices:
-Map 3 
-Map Operator Tree:
-TableScan
-  alias: st
-  Statistics: Num rows: 100 Data size: 380 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: ctinyint (type: tinyint)
-outputColumnNames: _col0
-Statistics: Num rows: 100 Data size: 380 Basic stats: 
COMPLETE Column stats: NONE
-Spark HashTable Sink Operator
-  keys:
-0 _col0 (type: tinyint)
-1 _col0 (type: tinyint)
-Execution mode: vectorized
-Local Work:
-  Map Reduce Local Work
-
-  Stage: Stage-1
-Spark
-  Edges:
-Reducer 2 <- Map 1 (GROUP, 1)
  A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: s
-  Statistics: Num rows: 6058 Data size: 2027 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: ctinyint (type: tinyint)
-outputColumnNames: _col0
-Statistics: Num rows: 6058 Data size: 2027 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col0 (type: tinyint)
-1 _col0 (type: tinyint)
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 6663 Data size: 2229 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-aggregations: count()
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  sort order: 
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col0 (type: bigint)
-Execution mode: vectorized
-Local Work:
-  Map Reduce Local Work
-Reducer 2 
-Execution mode: vectorized
-Reduce Operator Tree:
-  Group By Operator
-aggregations: count(VALUE._col0)
-mode: mergepartial
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-Fetch Operator
-  limit: -1
-  Processor Tree:
-ListSink
-
 PREHOOK: query: select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
@@ -184,111 +100,21 @@ POSTHOOK: Input: default@small_table
 POSTHOOK: Input: default@sorted_mod_4
  A masked pattern was here 
 6876
-PREHOOK: query: explain
+PREHOOK:

[39/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
 
b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
index 0a01b8c..1511298 100644
--- 
a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
@@ -149,25 +149,55 @@ POSTHOOK: Lineage: part_change_various_various_struct1 
PARTITION(part=1).b SIMPL
 POSTHOOK: Lineage: part_change_various_various_struct1 
PARTITION(part=1).insert_num SIMPLE 
[(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:insert_num, 
type:int, comment:null), ]
 POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).s1 
SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:s1, 
type:struct,
 comment:null), ]
 complex_struct1_c_txt.insert_num   complex_struct1_c_txt.s1
complex_struct1_c_txt.b
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select insert_num,part,s1,b from part_change_various_various_struct1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select insert_num,part,s1,b from part_change_various_various_struct1
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 llap
-  File Output Operator [FS_2]
-Select Operator [SEL_1] (rows=6 width=4)
-  Output:["_col0","_col1","_col2","_col3"]
-  TableScan [TS_0] (rows=6 width=789)
-
default@part_change_various_various_struct1,part_change_various_various_struct1,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","s1","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: part_change_various_various_struct1
+  Statistics: Num rows: 6 Data size: 4734 Basic stats: 
COMPLETE Column stats: PARTIAL
+  Select Operator
+expressions: insert_num (type: int), part (type: int), s1 
(type: 
struct),
 b (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: PARTIAL
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: PARTIAL
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: llap
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+notVectorizedReason: Select expression for SELECT operator: 
Data type 
struct
 of Column[s1] not supported
+vectorized: false
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
 
 PREHOOK: query: select insert_num,part,s1,b from 
part_change_various_various_struct1
 PREHOOK: type: QUERY
@@ -413,25 +443,55 @@ POSTHOOK: Lineage: part_add_various_various_struct2 
PARTITION(part=1).b SIMPLE [
 POSTHOOK: Lineage: part_add_various_various_struct2 
PARTITION(part=1).insert_num SIMPLE 
[(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:insert_num, 
type:int, comment:null), ]
 POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).s2 
SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:s2, 
type:struct,
 comment:null), ]

[42/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_interval_1.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_interval_1.q 
b/ql/src/test/queries/clientpositive/vector_interval_1.q
index 8fefe41..f4f0024 100644
--- a/ql/src/test/queries/clientpositive/vector_interval_1.q
+++ b/ql/src/test/queries/clientpositive/vector_interval_1.q
@@ -1,8 +1,7 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
-
 set hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=minimal;
+set hive.fetch.task.conversion=none;
 
 drop table if exists vector_interval_1;
 create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 
string) stored as orc;
@@ -13,7 +12,7 @@ insert into vector_interval_1
   select null, null, null, null from src limit 1;
 
 -- constants/cast from string
-explain
+explain vectorization expression
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
@@ -28,7 +27,7 @@ from vector_interval_1 order by str1;
 
 
 -- interval arithmetic
-explain
+explain vectorization expression
 select
   dt,
   interval '1-2' year to month + interval '1-2' year to month,
@@ -49,7 +48,7 @@ select
   interval '1-2' year to month - interval_year_month(str1)
 from vector_interval_1 order by dt;
 
-explain
+explain vectorization expression
 select
   dt,
   interval '1 2:3:4' day to second + interval '1 2:3:4' day to second,
@@ -72,7 +71,7 @@ from vector_interval_1 order by dt;
 
 
 -- date-interval arithmetic
-explain
+explain vectorization expression
 select
   dt,
   dt + interval '1-2' year to month,
@@ -107,7 +106,7 @@ from vector_interval_1 order by dt;
 
 
 -- timestamp-interval arithmetic
-explain
+explain vectorization expression
 select
   ts,
   ts + interval '1-2' year to month,
@@ -142,7 +141,7 @@ from vector_interval_1 order by ts;
 
 
 -- timestamp-timestamp arithmetic
-explain
+explain vectorization expression
 select
   ts,
   ts - ts,
@@ -159,7 +158,7 @@ from vector_interval_1 order by ts;
 
 
 -- date-date arithmetic
-explain
+explain vectorization expression
 select
   dt,
   dt - dt,
@@ -176,7 +175,7 @@ from vector_interval_1 order by dt;
 
 
 -- date-timestamp arithmetic
-explain
+explain vectorization expression
 select
   dt,
   ts - dt,

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_interval_2.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_interval_2.q 
b/ql/src/test/queries/clientpositive/vector_interval_2.q
index 5afb511..0b78a4b 100644
--- a/ql/src/test/queries/clientpositive/vector_interval_2.q
+++ b/ql/src/test/queries/clientpositive/vector_interval_2.q
@@ -1,7 +1,7 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
 set hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=minimal;
+set hive.fetch.task.conversion=none;
 
 drop table if exists vector_interval_2;
 create table vector_interval_2 (ts timestamp, dt date, str1 string, str2 
string, str3 string, str4 string) stored as orc;
@@ -14,7 +14,7 @@ insert into vector_interval_2
 
 -- interval comparisons in select clause
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be true
@@ -77,7 +77,7 @@ select
   interval '1-2' year to month != interval_year_month(str2)
 from vector_interval_2 order by str1;
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be false
@@ -128,7 +128,7 @@ select
   interval '1-2' year to month != interval_year_month(str1)
 from vector_interval_2 order by str1;
 
-explain
+explain vectorization expression
 select
   str3,
   -- Should all be true
@@ -191,7 +191,7 @@ select
   interval '1 2:3:4' day to second != interval_day_time(str4)
 from vector_interval_2 order by str3;
 
-explain
+explain vectorization expression
 select
   str3,
   -- Should all be false
@@ -244,7 +244,7 @@ from vector_interval_2 order by str3;
 
 
 -- interval expressions in predicates
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   interval_year_month(str1) = interval_year_month(str1)
@@ -293,7 +293,7 @@ where
   and interval '1-3' year to month > interval_year_month(str1)
 order by ts;
 
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   interval_day_time(str3) = interval_day_time(str3)
@@ -342,7 +342,7 @@ where
   and interval '1 2:3:5' day to second > interval_day_time(str3)
 order by ts;
 
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   date '2002-03-01' = dt + interval_year_month(str1)
@@ -381,7 +381,7 @@ where
   and dt != dt + interval '1-2' year to month
 order by ts;
 
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to

[31/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out 
b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
index 3d67664..9e185c6 100644
--- a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
@@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales 
PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [
 POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost 
SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, 
type:decimal(7,2), comment:null), ]
 PREHOOK: query: 
--
 
-explain
+explain vectorization expression
 select count(distinct ws_order_number) from web_sales
 PREHOOK: type: QUERY
 POSTHOOK: query: 
--
 
-explain
+explain vectorization expression
 select count(distinct ws_order_number) from web_sales
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1257,11 +1261,24 @@ STAGE PLANS:
 TableScan
   alias: web_sales
   Statistics: Num rows: 2000 Data size: 352 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 
30, 31, 32, 33]
   Select Operator
 expressions: ws_order_number (type: int)
 outputColumnNames: ws_order_number
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [16]
 Statistics: Num rows: 2000 Data size: 352 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
+  Group By Vectorization:
+  className: VectorGroupByOperator
+  vectorOutput: true
+  keyExpressions: col 16
+  native: false
+  projectedOutputColumns: []
   keys: ws_order_number (type: int)
   mode: hash
   outputColumnNames: _col0
@@ -1270,36 +1287,88 @@ STAGE PLANS:
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
+Reduce Sink Vectorization:
+className: VectorReduceSinkLongOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 2000 Data size: 352 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
+Group By Vectorization:
+className: VectorGroupByOperator
+vectorOutput: true
+keyExpressions: col 0
+native: false
+projectedOutputColumns: []
 keys: KEY._col0 (type: int)

[06/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out 
b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
index fbb43c4..7d722d0 100644
--- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
@@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION 
[(alltypesorc)alltypesorc.
 POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
 POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
 POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
-PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN 
(CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM 
decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" 
AS DATE)) ORDER BY cdate
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN 
(CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM 
decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" 
AS DATE)) ORDER BY cdate
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -32,27 +36,65 @@ STAGE PLANS:
 TableScan
   alias: decimal_date_test
   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterLongColumnInList(col 3, 
values [-67, -171]) -> boolean
 predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: 
boolean)
 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: cdate (type: date)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [3]
   Statistics: Num rows: 6144 Data size: 1233808 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: date)
 sort order: +
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 6144 Data size: 1233808 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions:

[51/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed 
by Gopal Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f923db0b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f923db0b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f923db0b

Branch: refs/heads/master
Commit: f923db0b53acf8b7912d3f98a23deca509c9c6fb
Parents: f9843ac
Author: Matt McCline 
Authored: Thu Oct 13 03:49:52 2016 -0700
Committer: Matt McCline 
Committed: Thu Oct 13 03:49:52 2016 -0700

--
 .../org/apache/hive/common/util/DateUtils.java  |20 +
 .../ColumnArithmeticColumn.txt  | 7 +-
 .../ColumnArithmeticColumnDecimal.txt   | 5 +
 .../ColumnArithmeticColumnWithConvert.txt   |   173 -
 .../ColumnArithmeticScalar.txt  | 5 +
 .../ColumnArithmeticScalarDecimal.txt   | 5 +
 .../ColumnArithmeticScalarWithConvert.txt   |   150 -
 .../ExpressionTemplates/ColumnCompareColumn.txt | 5 +
 .../ExpressionTemplates/ColumnCompareScalar.txt | 5 +
 .../ExpressionTemplates/ColumnDivideColumn.txt  | 5 +
 .../ColumnDivideColumnDecimal.txt   | 5 +
 .../ExpressionTemplates/ColumnDivideScalar.txt  | 5 +
 .../ColumnDivideScalarDecimal.txt   | 5 +
 .../ExpressionTemplates/ColumnUnaryFunc.txt | 5 +
 .../ExpressionTemplates/ColumnUnaryMinus.txt| 5 +
 ...eColumnArithmeticIntervalYearMonthColumn.txt | 5 +
 ...eColumnArithmeticIntervalYearMonthScalar.txt | 5 +
 .../DateColumnArithmeticTimestampColumn.txt | 5 +
 .../DateColumnArithmeticTimestampScalar.txt | 5 +
 ...eScalarArithmeticIntervalYearMonthColumn.txt | 5 +
 .../DateScalarArithmeticTimestampColumn.txt | 5 +
 .../DecimalColumnUnaryFunc.txt  | 5 +
 .../ExpressionTemplates/FilterColumnBetween.txt | 7 +-
 .../FilterColumnCompareColumn.txt   | 9 +-
 .../FilterColumnCompareScalar.txt   | 9 +-
 .../FilterDecimalColumnBetween.txt  | 5 +
 .../FilterDecimalColumnCompareDecimalColumn.txt | 5 +
 .../FilterDecimalColumnCompareDecimalScalar.txt | 5 +
 .../FilterDecimalScalarCompareDecimalColumn.txt | 5 +
 ...erLongDoubleColumnCompareTimestampColumn.txt | 5 +
 ...erLongDoubleScalarCompareTimestampColumn.txt | 5 +
 .../FilterScalarCompareColumn.txt   | 9 +-
 .../FilterStringColumnBetween.txt   | 9 +-
 ...tringGroupColumnCompareStringGroupColumn.txt | 5 +
 ...gGroupColumnCompareStringGroupScalarBase.txt | 7 +
 ...gGroupScalarCompareStringGroupColumnBase.txt | 8 +
 .../FilterTimestampColumnBetween.txt| 5 +
 ...erTimestampColumnCompareLongDoubleColumn.txt | 5 +
 ...erTimestampColumnCompareLongDoubleScalar.txt | 5 +
 ...terTimestampColumnCompareTimestampColumn.txt | 5 +
 ...terTimestampColumnCompareTimestampScalar.txt | 5 +
 ...erTimestampScalarCompareLongDoubleColumn.txt | 5 +
 ...terTimestampScalarCompareTimestampColumn.txt | 5 +
 .../FilterTruncStringColumnBetween.txt  |10 +-
 .../ExpressionTemplates/IfExprColumnScalar.txt  | 5 +
 .../ExpressionTemplates/IfExprScalarColumn.txt  | 5 +
 .../ExpressionTemplates/IfExprScalarScalar.txt  | 5 +
 ...ervalYearMonthColumnArithmeticDateColumn.txt | 5 +
 ...ervalYearMonthColumnArithmeticDateScalar.txt | 5 +
 ...YearMonthColumnArithmeticTimestampColumn.txt | 5 +
 ...YearMonthColumnArithmeticTimestampScalar.txt | 5 +
 ...ervalYearMonthScalarArithmeticDateColumn.txt | 5 +
 ...YearMonthScalarArithmeticTimestampColumn.txt | 5 +
 .../LongDoubleColumnCompareTimestampColumn.txt  | 5 +
 .../LongDoubleColumnCompareTimestampScalar.txt  | 4 +
 .../LongDoubleScalarCompareTimestampColumn.txt  | 5 +
 .../ScalarArithmeticColumn.txt  | 5 +
 .../ScalarArithmeticColumnDecimal.txt   | 5 +
 .../ScalarArithmeticColumnWithConvert.txt   |   163 -
 .../ExpressionTemplates/ScalarCompareColumn.txt | 5 +
 .../ExpressionTemplates/ScalarDivideColumn.txt  | 5 +
 .../ScalarDivideColumnDecimal.txt   | 5 +
 ...tringGroupColumnCompareStringGroupColumn.txt | 5 +
 ...gGroupColumnCompareStringGroupScalarBase.txt | 6 +
 ...tringGroupColumnCompareTruncStringScalar.txt | 7 +
 ...gGroupScalarCompareStringGroupColumnBase.txt | 7 +
 .../TimestampColumnArithmeticDateColumn.txt | 5 +
 .../TimestampColumnArithmeticDateScalar.txt | 5 +
 ...pColumnArithmeticIntervalYearMonthColumn.txt | 5 +
 ...pColumnArithmeticIntervalYearMonthScalar.txt | 5 +
 ...TimestampColumnArithmeticTimestampColumn.txt | 5 +
 ...TimestampColumnArithmeticTimestampScalar.txt | 5 +

[35/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out 
b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
index 735e4f4..8e2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
@@ -43,48 +43,110 @@ POSTHOOK: Output: default@tbl2
 POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
 PREHOOK: query: -- The join is being performed as part of sub-query. It should 
be converted to a sort-merge join
-explain
+explain vectorization expression
 select count(*) from (
   select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
 ) subq1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- The join is being performed as part of sub-query. It 
should be converted to a sort-merge join
-explain
+explain vectorization expression
 select count(*) from (
   select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
 ) subq1
 POSTHOOK: type: QUERY
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Reducer 2 vectorized, llap
-  File Output Operator [FS_22]
-Group By Operator [GBY_21] (rows=1 width=8)
-  Output:["_col0"],aggregations:["count(VALUE._col0)"]
-<-Map 1 [SIMPLE_EDGE] llap
-  SHUFFLE [RS_11]
-Group By Operator [GBY_10] (rows=1 width=8)
-  Output:["_col0"],aggregations:["count()"]
-  Merge Join Operator [MERGEJOIN_19] (rows=11 width=93)
-Conds:SEL_2._col0=SEL_5._col0(Inner)
-  <-Select Operator [SEL_5] (rows=10 width=93)
-  Output:["_col0"]
-  Filter Operator [FIL_18] (rows=10 width=93)
-predicate:key is not null
-TableScan [TS_3] (rows=10 width=93)
-  default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"]
-  <-Select Operator [SEL_2] (rows=10 width=93)
-  Output:["_col0"]
-  Filter Operator [FIL_17] (rows=10 width=93)
-predicate:key is not null
-TableScan [TS_0] (rows=10 width=93)
-  default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"]
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+Map Operator Tree:
+TableScan
+  alias: a
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+  Merge Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
+Group By Operator
+  aggregations: count()
+  mode: hash
+  outputColumnNames: _col0
+

[40/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
--
diff --git 
a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q 
b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
index 2d3788d..d2ded71 100644
--- a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
+++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
@@ -7,33 +7,34 @@ set hive.tez.dynamic.partition.pruning=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
 set hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
 
 
 select distinct ds from srcpart;
 select distinct hr from srcpart;
 
-EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from 
srcpart group by ds;
+EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds;
 create table srcpart_date stored as orc as select ds as ds, ds as `date` from 
srcpart group by ds;
 create table srcpart_hour stored as orc as select hr as hr, hr as hour from 
srcpart group by hr;
 create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, 
hr as hr, hr as hour from srcpart group by ds, hr;
 create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as 
hour from srcpart group by hr;
 
 -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 set hive.tez.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 set hive.tez.dynamic.partition.pruning=true;
 select count(*) from srcpart where ds = '2008-04-08';
 
 -- multiple sources, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 set hive.tez.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
@@ -41,77 +42,77 @@ set hive.tez.dynamic.partition.pruning=true;
 select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
 
 -- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on 
(srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
 select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
 set hive.tez.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on 
(srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where

[05/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_data_types.q.out 
b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
index dbaf14d..79638c1 100644
--- a/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
@@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE 
[(over1k)over1k.FieldSchema(name:s, type:s
 POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, 
type:smallint, comment:null), ]
 POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, 
type:tinyint, comment:null), ]
 POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, 
type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, 
s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, 
bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: false
+  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -189,10 +193,14 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1korc
  A masked pattern was here 
 -17045922556
-PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, 
s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, 
bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -209,29 +217,66 @@ STAGE PLANS:
 TableScan
   alias: over1korc
   Statistics: Num rows: 1049 Data size: 311170 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10]
   Select Operator
 expressions: t (type: tinyint), si (type: smallint), i 
(type: int), b (type: bigint), f (type: float), d (type: double), bo (type: 
boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin 
(type: binary)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10]
 Statistics: Num rows: 1049 Data size: 311170 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: int)
   sort order: +++
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
   Statistics: Num rows: 1049 Data size: 311170 Basic 
stats: COMPLETE Column stats: NONE
   TopN Hash Memory Usage: 0.1
   value expressions: _col3 (type: bigint), _col4 (type: 
float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), 
_col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet:

[23/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
index 16603c7..c21da5f 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
@@ -97,9 +97,9 @@ POSTHOOK: type: SHOWPARTITIONS
 POSTHOOK: Input: default@char_tbl2
 gpa=3
 gpa=3.5  
-PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, 
c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, 
c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, 
c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, 
c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
index b9ffa34..25066be 100644
--- a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain 
+PREHOOK: query: explain vectorization 
 select count(*) from (select c.ctinyint 
 from alltypesorc c
 left outer join alltypesorc cd
@@ -7,7 +7,7 @@ left outer join alltypesorc hd
   on hd.ctinyint = c.ctinyint
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain 
+POSTHOOK: query: explain vectorization 
 select count(*) from (select c.ctinyint 
 from alltypesorc c
 left outer join alltypesorc cd
@@ -16,6 +16,10 @@ left outer join alltypesorc hd
   on hd.ctinyint = c.ctinyint
 ) t1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -68,6 +72,14 @@ STAGE PLANS:
 value expressions: _col0 (type: bigint)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 3 
 Map Operator Tree:
 TableScan
@@ -84,6 +96,14 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 36696 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
@@ -100,8 +120,23 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 36696 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true

[24/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
index bb6916b..9e591b8 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
@@ -14,7 +14,7 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orcsrc
 POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 FROM 
 (SELECT orcsrc.* FROM orcsrc sort by key) x
 JOIN
@@ -22,7 +22,7 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 FROM 
 (SELECT orcsrc.* FROM orcsrc sort by key) x
 JOIN
@@ -30,6 +30,10 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -49,40 +53,93 @@ STAGE PLANS:
 TableScan
   alias: orcsrc
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: key (type: string)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: string)
 sort order: +
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 500 Data size: 88000 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
   alias: orcsrc
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: key (type: string), value (type: string)
   outputColumnNames: _col0, _col1
+  Select Vectorization:
+  className:

[13/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
index 8cf503f..f0d2a50 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
@@ -1,6 +1,6 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cboolean1,
  ctinyint,
  ctimestamp1,
@@ -35,7 +35,7 @@ LIMIT 40
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cboolean1,
  ctinyint,
  ctimestamp1,
@@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1
 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, 
c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16
 LIMIT 40
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -86,15 +90,34 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 2028982 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterExprOrExpr(children: 
FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) 
-> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> 
boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 
11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, 
FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: 
CastTimestampToDouble(col 9) -> 12:double) -> boolean, 
FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: 
CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean
 predicate: (((cfloat < 3569) and (10.175 >= cdouble) and 
(cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and 
(UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 
9763215.5639))) (type: boolean)
 Statistics: Num rows: 5461 Data size: 901772 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cboolean1 (type: boolean), ctinyint (type: 
tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: 
string)
   outputColumnNames: cboolean1, ctinyint, ctimestamp1, 
cfloat, cstring1
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [10, 0, 8, 4, 6]
   Statistics: Num rows: 5461 Data size: 901772 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: max(ctinyint), sum(cfloat), 
stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
+Group By Vectorization:
+aggregators: VectorUDAFMaxLong(col 0) -> tinyint, 
VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> 
struct, VectorUDAFStdPopLong(col 0) -> 
struct, VectorUDAFMaxDouble(col 4) -> 
float, VectorUDAFMinLong(col 0) -> tinyint
+className: VectorGroupByOperator
+vectorOutput: false
+keyExpressions: col 10, col 0, col 8, col 4, col 6
+native: false
+projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+vectorOutputConditionsNotMet: Vector output of 
VectorUDAFStdPopDouble(col 4) -> 
struct output type STRUCT requires 
PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> 
struct output type STRUCT requires 
PRIMITIVE IS false
 keys: cboolean1 (type: boolean), ctinyint (type: 
tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: 
string)

[15/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
index 4c252c7..0bab7bd 100644
--- a/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
@@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE 
[(vectortab2k)vectortab2k.FieldSchem
 POSTHOOK: Lineage: vectortab2korc.t SIMPLE 
[(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: vectortab2korc.ts SIMPLE 
[(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE 
[(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select b from vectortab2korc order by b
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select b from vectortab2korc order by b
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -128,25 +132,59 @@ STAGE PLANS:
 TableScan
   alias: vectortab2korc
   Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12]
   Select Operator
 expressions: b (type: bigint)
 outputColumnNames: _col0
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [3]
 Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: bigint)
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 2000 Data size: 918712 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: bigint)
 outputColumnNames: _col0
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0]
 Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
 File Output Operator
   compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
   Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_reduce2.q.out

[03/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out 
b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
index b311c49..d1319b8 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
@@ -62,12 +62,16 @@ POSTHOOK: Input: default@orc_table_2
 4  FOUR
 NULL   
 NULL   
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join 
orc_table_2 t2 on t1.a = t2.c
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join 
orc_table_2 t2 on t1.a = t2.c
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -83,15 +87,38 @@ STAGE PLANS:
 TableScan
   alias: t2
   Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: c (type: int), v2 (type: string)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1]
 Statistics: Num rows: 6 Data size: 550 Basic stats: 
COMPLETE Column stats: NONE
 Spark HashTable Sink Operator
+  Spark Hash Table Sink Vectorization:
+  className: VectorSparkHashTableSinkOperator
+  native: true
   keys:
 0 _col1 (type: int)
 1 _col0 (type: int)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 2
+includeColumns: [0, 1]
+dataColumns: c:int, v2:string
+partitionColumnCount: 0
 Local Work:
   Map Reduce Local Work
 
@@ -104,9 +131,16 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: v1 (type: string), a (type: int)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1]
 Statistics: Num rows: 6 Data size: 544 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
@@ -114,18 +148,45 @@ STAGE PLANS:
   keys:
 0 _col1 (type: int)
 1 _col0 (type: int)
+  Map Join Vectorization:
+  bigTableKeyColumns: [1]
+  bigTableOuterKeyMapping: 1 -> 2
+  bigTableRetainedColumns: [0, 1, 2]
+  bigTableValueColumns: [0, 1]
+  className: VectorMapJoinOuterLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+  projectedOutputColumns: [0, 1, 2, 3]
+  smallTableMapping: [3]
   outputColumnNames: _col0, _col1, _col2, _col3
   input vertices:

[44/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
index ebe613e..78b2e8b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.TableSample;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
+import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
 import org.apache.hadoop.hive.serde.serdeConstants;
 
 
@@ -396,4 +397,29 @@ public class TableScanDesc extends AbstractOperatorDesc {
 return opProps;
   }
 
+  public class TableScanOperatorExplainVectorization extends 
OperatorExplainVectorization {
+
+private final TableScanDesc tableScanDesc;
+private final VectorTableScanDesc vectorTableScanDesc;
+
+public TableScanOperatorExplainVectorization(TableScanDesc tableScanDesc, 
VectorDesc vectorDesc) {
+  // Native vectorization supported.
+  super(vectorDesc, true);
+  this.tableScanDesc = tableScanDesc;
+  vectorTableScanDesc = (VectorTableScanDesc) vectorDesc;
+}
+
+@Explain(vectorization = Vectorization.EXPRESSION, displayName = 
"projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+public String getProjectedOutputColumns() {
+  return Arrays.toString(vectorTableScanDesc.getProjectedOutputColumns());
+}
+  }
+
+  @Explain(vectorization = Vectorization.OPERATOR, displayName = "TableScan 
Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+  public TableScanOperatorExplainVectorization getTableScanVectorization() {
+if (vectorDesc == null) {
+  return null;
+}
+return new TableScanOperatorExplainVectorization(this, vectorDesc);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
index 7a70e6b..a037ea3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
@@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.exec.tez.DagUtils;
 import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
-
+import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
 
 /**
  * TezWork. This class encapsulates all the work objects that can be executed
@@ -49,7 +49,8 @@ import org.apache.hadoop.hive.ql.plan.Explain.Level;
  *
  */
 @SuppressWarnings("serial")
-@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED })
+@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED },
+vectorization = Vectorization.SUMMARY_PATH)
 public class TezWork extends AbstractOperatorDesc {
 
   public enum VertexType {
@@ -107,7 +108,8 @@ public class TezWork extends AbstractOperatorDesc {
   /**
* getWorkMap returns a map of "vertex name" to BaseWork
*/
-  @Explain(displayName = "Vertices", explainLevels = { Level.USER, 
Level.DEFAULT, Level.EXTENDED })
+  @Explain(displayName = "Vertices", explainLevels = { Level.USER, 
Level.DEFAULT, Level.EXTENDED },
+  vectorization = Vectorization.SUMMARY_PATH)
   public Map getWorkMap() {
 Map result = new LinkedHashMap();
 for (BaseWork w: getAllWork()) {
@@ -306,7 +308,8 @@ public class TezWork extends AbstractOperatorDesc {
 }
   }
 
-  @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED })
+  @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED },
+  vectorization = Vectorization.SUMMARY_PATH)
   public Map getDependencyMap() {
 Map result = new LinkedHashMap();
 for (Map.Entry entry: 
invertedWorkGraph.entrySet()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
new file mode 100644
index 000..2e11321
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
@@ -0,0 +1,35 @@

[11/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
index 6c6c6d6..14606ed 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
@@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@vsmb_bucket_txt
 POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
 POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -117,33 +121,71 @@ STAGE PLANS:
 TableScan
   alias: a
   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: key (type: int)
   sort order: +
   Map-reduce partition columns: key (type: int)
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
   Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: value (type: string)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 3 
 Map Operator Tree:
 TableScan
   alias: b
   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: key (type: int)
   sort order: +
   Map-reduce partition columns: key (type: int)
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,

[30/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
index 882e83d..5d28d22 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
@@ -35,7 +35,7 @@ PREHOOK: query: -- EXPLAIN
 -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
 -- FROM decimal_tbl_1_orc ORDER BY dec;
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT
   round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
   round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
@@ -51,13 +51,17 @@ POSTHOOK: query: -- EXPLAIN
 -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
 -- FROM decimal_tbl_1_orc ORDER BY dec;
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT
   round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
   round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
   round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8)
 FROM decimal_tbl_1_orc ORDER BY d
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -75,26 +79,61 @@ STAGE PLANS:
 TableScan
   alias: decimal_tbl_1_orc
   Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Select Operator
 expressions: round(dec) (type: decimal(21,0)), round(dec, 
0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) 
(type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) 
(type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) 
(type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) 
(type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) 
(type: decimal(21,0)), round(dec, -8) (type: decimal(21,0))
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13]
+selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 
1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) 
-> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 
0, decimalPlaces 3) -> 5:decimal(24,3), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 
6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
-2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -3) -> 8:decimal(21,0), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 
9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
-5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -6) -> 11:decimal(21,0), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7)
  -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -8) -> 13:decimal(21,0)
 Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: decimal(21,0))
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col1 (type: decimal(21,0)), _col2 
(type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: 
decimal(24,3)), _col5 (type:

[04/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out 
b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
index 511bd79..ef19bad 100644
--- a/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
@@ -32,12 +32,16 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__2
 POSTHOOK: Output: default@orc_table_2a
 POSTHOOK: Lineage: orc_table_2a.c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -53,18 +57,45 @@ STAGE PLANS:
 TableScan
   alias: t2
   Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: 
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
 predicate: (c > 2) (type: boolean)
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: c (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
+Spark Hash Table Sink Vectorization:
+className: VectorSparkHashTableSinkOperator
+native: true
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 1
+includeColumns: [0]
+dataColumns: c:int
+partitionColumnCount: 0
 Local Work:
   Map Reduce Local Work
 
@@ -77,12 +108,23 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: 
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
 predicate: (a > 2) (type: boolean)
 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: a (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -90,6 +132,14 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)

[16/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out 
b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
index 9eeb0d6..26fa9d9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
@@ -256,12 +256,16 @@ POSTHOOK: Input: default@flights_tiny_orc
 2010-10-29 12
 2010-10-30 11
 2010-10-31 8
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select * from flights_tiny_orc sort by fl_num, fl_date limit 25
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select * from flights_tiny_orc sort by fl_num, fl_date limit 25
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -280,46 +284,102 @@ STAGE PLANS:
 TableScan
   alias: flights_tiny_orc
   Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5]
   Select Operator
 expressions: origin_city_name (type: string), 
dest_city_name (type: string), fl_date (type: date), fl_time (type: timestamp), 
arr_delay (type: float), fl_num (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1, 2, 3, 4, 5]
 Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col5 (type: int), _col2 (type: date)
   sort order: ++
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
   Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
   TopN Hash Memory Usage: 0.1
   value expressions: _col0 (type: string), _col1 (type: 
string), _col3 (type: timestamp), _col4 (type: float)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), 
VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 3, 1, 4, 5, 0]
 Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
 Limit
   Number of rows: 25
+  Limit Vectorization:
+  className: VectorLimitOperator
+  native: true
   Statistics: Num rows: 25 Data size: 7200

[43/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q 
b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
index 8ed041b..11df12e 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
@@ -1,4 +1,4 @@
-set hive.explain.user=true;
+set hive.explain.user=false;
 set hive.mapred.mode=nonstrict;
 set hive.cli.print.header=true;
 SET hive.exec.schema.evolution=true;
@@ -39,7 +39,7 @@ alter table part_add_int_permute_select add columns(c int);
 
 insert into table part_add_int_permute_select partition(part=1) VALUES (2, 
, 'new', );
 
-explain
+explain vectorization detail
 select insert_num,part,a,b from part_add_int_permute_select;
 
 -- SELECT permutation columns to make sure NULL defaulting works right
@@ -62,7 +62,7 @@ alter table part_add_int_string_permute_select add columns(c 
int, d string);
 
 insert into table part_add_int_string_permute_select partition(part=1) VALUES 
(2, , 'new', , '');
 
-explain
+explain vectorization detail
 select insert_num,part,a,b from part_add_int_string_permute_select;
 
 -- SELECT permutation columns to make sure NULL defaulting works right
@@ -94,7 +94,7 @@ alter table part_change_string_group_double replace columns 
(insert_num int, c1
 
 insert into table part_change_string_group_double partition(part=1) SELECT 
insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE 
insert_num = 111;
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,b from part_change_string_group_double;
 
 select insert_num,part,c1,c2,c3,b from part_change_string_group_double;
@@ -117,7 +117,7 @@ alter table 
part_change_date_group_string_group_date_timestamp replace columns(i
 
 insert into table part_change_date_group_string_group_date_timestamp 
partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 
'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_date_group_string_group_date_timestamp;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_date_group_string_group_date_timestamp;
@@ -165,7 +165,7 @@ insert into table 
part_change_numeric_group_string_group_multi_ints_string_group
 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 
'filler', 'filler',
 'new');
 
-explain
+explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b
 from part_change_numeric_group_string_group_multi_ints_string_group;
 
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b
 from part_change_numeric_group_string_group_multi_ints_string_group;
@@ -208,7 +208,7 @@ insert into table 
part_change_numeric_group_string_group_floating_string_group p
  'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
  'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b 
from part_change_numeric_group_string_group_floating_string_group;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b 
from part_change_numeric_group_string_group_floating_string_group;
@@ -250,7 +250,7 @@ insert into table 
part_change_string_group_string_group_string partition(part=1)
   'filler', 'filler', 'filler',
   'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_string_group_string_group_string;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_string_group_string_group_string;
@@ -300,7 +300,7 @@ insert into table 
part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa
 1234.5678, 9876.543, 789.321,
'new');
 
-explain
+explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b
 from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
 
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b
 from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
@@ -331,7 +331,7 @@ alter table 
part_change_lower_to_higher_numeric_group_decimal_to_float replace c
 
 insert into table part_change_lower_to_higher_numeric_group_decimal_to_float 
partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,b from 
part_change_lower_to_higher_numeric_group_decimal_to_float;
 
 select

[34/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out 
b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
index 6b59497..739d0e1 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
@@ -69,13 +69,17 @@ POSTHOOK: Lineage: tint.cint SIMPLE 
[(tint_txt)tint_txt.FieldSchema(name:cint, t
 POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, 
type:int, comment:null), ]
 tint_txt.rnum  tint_txt.cint
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint 
between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col 
from tint , tsint
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint 
between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col 
from tint , tsint
 POSTHOOK: type: QUERY
 Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -93,9 +97,16 @@ STAGE PLANS:
 TableScan
   alias: tint
   Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: rnum (type: int), cint (type: int)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1]
 Statistics: Num rows: 5 Data size: 36 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
@@ -103,6 +114,11 @@ STAGE PLANS:
   keys:
 0 
 1 
+  Map Join Vectorization:
+  className: VectorMapJoinOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid 
Hash Join IS true, Small table vectorizes IS true
+  nativeConditionsNotMet: Not empty key IS false
   outputColumnNames: _col0, _col1, _col2, _col3
   input vertices:
 1 Map 2
@@ -110,9 +126,17 @@ STAGE PLANS:
   Select Operator
 expressions: _col0 (type: int), _col2 (type: int), 
_col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND 
_col3) THEN ('Ok') ELSE ('NoOk') END (type: string)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 2, 1, 3, 5]
+selectExpressions: VectorUDFAdaptor(CASE WHEN 
(_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: 
VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:Long) -> 5:String
 Statistics: Num rows: 25 Data size: 385 Basic stats: 
COMPLETE Column stats: NONE
 File Output Operator
   compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
   Statistics: Num rows: 25 Data size: 385 Basic stats: 
COMPLETE Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -120,21 +144,49 @@ STAGE PLANS:
   serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet:

[08/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
index 996b893..423fdbf 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
@@ -120,7 +120,7 @@ POSTHOOK: Lineage: part_orc.p_size SIMPLE 
[(part_staging)part_staging.FieldSchem
 POSTHOOK: Lineage: part_orc.p_type SIMPLE 
[(part_staging)part_staging.FieldSchema(name:p_type, type:string, 
comment:null), ]
 PREHOOK: query: --1. test1
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name, p_size,
 rank() over (partition by p_mfgr order by p_name) as r,
 dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -132,7 +132,7 @@ from noop(on part_orc
 PREHOOK: type: QUERY
 POSTHOOK: query: --1. test1
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name, p_size,
 rank() over (partition by p_mfgr order by p_name) as r,
 dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -142,6 +142,10 @@ from noop(on part_orc
   order by p_name
   )
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -172,6 +176,14 @@ STAGE PLANS:
 auto parallelism: true
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -224,6 +236,11 @@ STAGE PLANS:
 Reducer 2 
 Execution mode: llap
 Needs Tagging: false
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+notVectorizedReason: PTF Operator (PTF) not supported
+vectorized: false
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: 
double)
@@ -255,6 +272,11 @@ STAGE PLANS:
 Reducer 3 
 Execution mode: llap
 Needs Tagging: false
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+notVectorizedReason: PTF Operator (PTF) not supported
+vectorized: false
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: 
double)
@@ -377,7 +399,7 @@ Manufacturer#5  almond aquamarine dodger light 
gainsboro46  4   4   6208.18
 Manufacturer#5 almond azure blanched chiffon midnight  23  5   5   
7672.66
 PREHOOK: query: -- 2. testJoinWithNoop
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name,
 p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by 
p_name) as deltaSz
 from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = 
p2.p_partkey) j
@@ -386,13 +408,17 @@ sort by j.p_name)
 PREHOOK: type: QUERY
 POSTHOOK: query: -- 2. testJoinWithNoop
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name,
 p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by 
p_name) as deltaSz
 from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = 
p2.p_partkey) j
 distribute by j.p_mfgr
 sort by j.p_name)
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -428,6 +454,14 @@ STAGE PLANS:
   auto parallelism: true
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true

[45/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
new file mode 100644
index 000..e0a6198
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * Why a node did not vectorize.
+ *
+ */
+public class VectorizerReason  {
+
+  private static long serialVersionUID = 1L;
+
+  public static enum VectorizerNodeIssue {
+NONE,
+NODE_ISSUE,
+OPERATOR_ISSUE,
+EXPRESSION_ISSUE
+  }
+
+  private final VectorizerNodeIssue vectorizerNodeIssue;
+
+  private final Operator operator;
+
+  private final String expressionTitle;
+
+  private final String issue;
+
+  private VectorizerReason(VectorizerNodeIssue vectorizerNodeIssue,
+  Operator operator, String expressionTitle, 
String issue) {
+this.vectorizerNodeIssue = vectorizerNodeIssue;
+this.operator = operator;
+this.expressionTitle = expressionTitle;
+this.issue = issue;
+  }
+
+  public static VectorizerReason createNodeIssue(String issue) {
+return new VectorizerReason(
+VectorizerNodeIssue.NODE_ISSUE,
+null,
+null,
+issue);
+  }
+
+  public static VectorizerReason createOperatorIssue(Operator operator,
+  String issue) {
+return new VectorizerReason(
+VectorizerNodeIssue.OPERATOR_ISSUE,
+operator,
+null,
+issue);
+  }
+
+  public static VectorizerReason createExpressionIssue(Operator operator,
+  String expressionTitle, String issue) {
+return new VectorizerReason(
+VectorizerNodeIssue.EXPRESSION_ISSUE,
+operator,
+expressionTitle,
+issue);
+  }
+
+  @Override
+  public VectorizerReason clone() {
+return new VectorizerReason(vectorizerNodeIssue, operator, 
expressionTitle, issue);
+  }
+
+  public VectorizerNodeIssue getVectorizerNodeIssue() {
+return vectorizerNodeIssue;
+  }
+
+  public Operator getOperator() {
+return operator;
+  }
+
+  public String getExpressionTitle() {
+return expressionTitle;
+  }
+
+  public String getIssue() {
+return issue;
+  }
+
+  @Override
+  public String toString() {
+String reason;
+switch (vectorizerNodeIssue) {
+case NODE_ISSUE:
+  reason = (issue == null ? "unknown" : issue);
+  break;
+case OPERATOR_ISSUE:
+  reason = (operator == null ? "Unknown" : operator.getType()) + " 
operator: " +
+   (issue == null ? "unknown" : issue);
+  break;
+case EXPRESSION_ISSUE:
+  reason = expressionTitle + " expression for " +
+  (operator == null ? "Unknown" : operator.getType()) + " operator: " +
+  (issue == null ? "unknown" : issue);
+  break;
+default:
+  reason = "Unknown " + vectorizerNodeIssue;
+}
+return reason;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
index 4a8ff15..1f118dc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
@@ -27,12 +27,27 @@ import org.apache.hadoop.fs.Path;
  */
 
 public class ExplainConfiguration {
+
+  public enum VectorizationDetailLevel {
+
+SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1);
+
+public final int rank;
+VectorizationDetailLevel(int rank) {
+  this.rank = rank;
+}
+  };
+
   private boolean extended = false;
   private boolean

[41/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_outer_join6.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_outer_join6.q 
b/ql/src/test/queries/clientpositive/vector_outer_join6.q
index 06fa385..b39e8ed 100644
--- a/ql/src/test/queries/clientpositive/vector_outer_join6.q
+++ b/ql/src/test/queries/clientpositive/vector_outer_join6.q
@@ -3,6 +3,7 @@ set hive.explain.user=false;
 SET hive.vectorized.execution.enabled=true;
 SET hive.vectorized.execution.mapjoin.native.enabled=true;
 SET hive.auto.convert.join=true;
+set hive.fetch.task.conversion=none;
 
 -- SORT_QUERY_RESULTS
 
@@ -28,14 +29,14 @@ create table TJOIN2 stored as orc AS SELECT * FROM 
TJOIN2_txt;
 create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt;
 create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt;
 
-explain
+explain vectorization detail formatted
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1;
 
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1;
 
-explain
+explain vectorization detail formatted
 select tj1rnum, tj2rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
--
diff --git 
a/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q 
b/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
index f25374d..b825fb3 100644
--- a/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
+++ b/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
@@ -1,7 +1,7 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
 SET hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=minimal;
+set hive.fetch.task.conversion=none;
 
 create table inventory_txt
 (
@@ -27,7 +27,7 @@ partitioned by (par string) stored as orc;
 insert into table inventory_part_0 partition(par='1') select * from 
inventory_txt;
 insert into table inventory_part_0 partition(par='2') select * from 
inventory_txt;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_0;
 
 select sum(inv_quantity_on_hand) from inventory_part_0;
@@ -47,7 +47,7 @@ alter table inventory_part_1 add columns (fifthcol string);
 
 insert into table inventory_part_1 partition(par='5cols') select *, '5th' as 
fifthcol from inventory_txt;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_1;
 
 select sum(inv_quantity_on_hand) from inventory_part_1;
@@ -66,7 +66,7 @@ insert into table inventory_part_2a partition(par='1') select 
* from inventory_t
 insert into table inventory_part_2a partition(par='2') select * from 
inventory_txt;
 alter table inventory_part_2a partition (par='2') change inv_item_sk 
other_name int;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_2a;
 
 create table inventory_part_2b(
@@ -80,7 +80,7 @@ insert into table inventory_part_2b 
partition(par1='1',par2=4) select * from inv
 insert into table inventory_part_2b partition(par1='2',par2=3) select * from 
inventory_txt;
 alter table inventory_part_2b partition (par1='2',par2=3) change 
inv_quantity_on_hand other_name int;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_2b;
 
 -- Verify we do not vectorize when a partition column type is different.
@@ -97,5 +97,5 @@ insert into table inventory_part_3 partition(par='1') select 
* from inventory_tx
 insert into table inventory_part_3 partition(par='2') select * from 
inventory_txt;
 alter table inventory_part_3 partition (par='2') change inv_warehouse_sk 
inv_warehouse_sk bigint;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_3;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q 
b/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
index f53d8c0..ee22c01 100644
--- a/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
+++

[28/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
index ca07200..d9e701a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
@@ -32,12 +32,16 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__2
 POSTHOOK: Output: default@orc_table_2a
 POSTHOOK: Lineage: orc_table_2a.c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -55,12 +59,23 @@ STAGE PLANS:
 TableScan
   alias: t2
   Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: 
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
 predicate: (c > 2) (type: boolean)
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: c (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -68,6 +83,13 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
+Map Join Vectorization:
+bigTableKeyColumns: [0]
+bigTableRetainedColumns: [0]
+className: VectorMapJoinInnerBigOnlyLongOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+projectedOutputColumns: [0]
 outputColumnNames: _col1
 input vertices:
   1 Map 2
@@ -75,9 +97,16 @@ STAGE PLANS:
 Select Operator
   expressions: _col1 (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
+File Sink Vectorization:
+className: VectorFileSinkOperator
+native: false
 Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -85,25 +114,66 @@ STAGE PLANS:
 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+

[09/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
index 1bab6f7..a7c0d10 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
@@ -1,15 +1,19 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+EXPLAIN VECTORIZATION EXPRESSION  SELECT COUNT(t1.cint), MAX(t2.cint), 
MIN(t1.cint), AVG(t1.cint+t2.cint)
   FROM alltypesorc t1
   JOIN alltypesorc t2 ON t1.cint = t2.cint
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+EXPLAIN VECTORIZATION EXPRESSION  SELECT COUNT(t1.cint), MAX(t2.cint), 
MIN(t1.cint), AVG(t1.cint+t2.cint)
   FROM alltypesorc t1
   JOIN alltypesorc t2 ON t1.cint = t2.cint
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -28,12 +32,23 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 2) -> 
boolean
 predicate: cint is not null (type: boolean)
 Statistics: Num rows: 9173 Data size: 27396 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cint (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [2]
   Statistics: Num rows: 9173 Data size: 27396 Basic stats: 
COMPLETE Column stats: COMPLETE
   Map Join Operator
 condition map:
@@ -41,6 +56,10 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
+Map Join Vectorization:
+className: VectorMapJoinInnerBigOnlyLongOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
 outputColumnNames: _col0, _col1
 input vertices:
   1 Map 3
@@ -48,9 +67,21 @@ STAGE PLANS:
 Select Operator
   expressions: _col0 (type: int), _col1 (type: int), 
(_col0 + _col1) (type: int)
   outputColumnNames: _col0, _col1, _col2
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [2, 2, 12]
+  selectExpressions: LongColAddLongColumn(col 2, 
col 2) -> 12:long
   Statistics: Num rows: 19518 Data size: 156144 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: count(_col0), max(_col1), 
min(_col0), avg(_col2)
+Group By Vectorization:
+aggregators: VectorUDAFCount(col 2) -> bigint, 
VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, 
VectorUDAFAvgLong(col 12) -> struct
+className: VectorGroupByOperator
+vectorOutput: false
+native: false
+projectedOutputColumns: [0, 1, 2, 3]
+vectorOutputConditionsNotMet: Vector output of 
VectorUDAFAvgLong(col 12) -> struct output type

[18/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
index 5729237..fbd294e 100644
--- a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
@@ -226,7 +226,7 @@ NULLNULL-850295959  -1887561756 NULL
NULLWMIgGA734hA4KQj2vD3fI6gX82220d  NULL
 NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
 NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
 NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
 select count(*) from (select c.cstring1 
 from small_alltypesorc_a c
 left outer join small_alltypesorc_a cd
@@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd
   on hd.cstring1 = c.cstring1
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
 select count(*) from (select c.cstring1 
 from small_alltypesorc_a c
 left outer join small_alltypesorc_a cd
@@ -244,112 +244,7 @@ left outer join small_alltypesorc_a hd
   on hd.cstring1 = c.cstring1
 ) t1
 POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Edges:
-Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
  A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: c
-  Statistics: Num rows: 20 Data size: 1023 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Select Operator
-expressions: cint (type: int), cstring1 (type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 20 Data size: 1023 Basic stats: 
COMPLETE Column stats: COMPLETE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col0 (type: int)
-1 _col0 (type: int)
-  outputColumnNames: _col1
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 40 Data size: 3560 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Map Join Operator
-condition map:
- Left Outer Join0 to 1
-keys:
-  0 _col1 (type: string)
-  1 _col0 (type: string)
-input vertices:
-  1 Map 4
-Statistics: Num rows: 80 Data size: 640 Basic stats: 
COMPLETE Column stats: COMPLETE
-Group By Operator
-  aggregations: count()
-  mode: hash
-  outputColumnNames: _col0
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Reduce Output Operator
-sort order: 
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
-value expressions: _col0 (type: bigint)
-Execution mode: vectorized, llap
-LLAP IO: all inputs
-Map 3 
-Map Operator Tree:
-TableScan
-  alias: cd
-  Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE 
Column stats: COMPLETE
-  Select Operator
-expressions: cint (type: int)
-outputColumnNames: _col0
-Statistics: Num rows: 20 Data size: 44 Basic stats: 
COMPLETE Column stats: COMPLETE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 20 Data size: 44 Basic stats: 
COMPLETE Column stats: COMPLETE
-Execution mode: vectorized, llap
-LLAP IO: all inputs
-Map 4 
-Map Operator Tree:

[22/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
index 69911f5..f3ffee8 100644
--- a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
@@ -132,91 +132,17 @@ POSTHOOK: query: select * from t4
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t4
  A masked pattern was here 
-PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key 
sort by a.key, a.value
-PREHOOK: type: QUERY
-POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key 
sort by a.key, a.value
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+PREHOOK: query: explain vectorization only summary
 
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Edges:
-Map 1 <- Map 3 (BROADCAST_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: a
-  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-  Filter Operator
-predicate: key is not null (type: boolean)
-Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Semi Join 0 to 1
-  keys:
-0 key (type: int)
-1 _col0 (type: int)
-  outputColumnNames: _col0, _col1
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-key expressions: _col0 (type: int), _col1 (type: 
string)
-sort order: ++
-Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
-Execution mode: llap
-LLAP IO: all inputs
-Map 3 
-Map Operator Tree:
-TableScan
-  alias: b
-  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-  Filter Operator
-predicate: key is not null (type: boolean)
-Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-Select Operator
-  expressions: key (type: int)
-  outputColumnNames: _col0
-  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-keys: _col0 (type: int)
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 11 Data size: 1023 Basic 
stats: COMPLETE Column stats: NONE
-Execution mode: llap
-LLAP IO: all inputs
-Reducer 2 
-Execution mode: llap
-Reduce Operator Tree:
-  Select Operator
-expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization only summary
 
-  Stage: Stage-0
-Fetch Operator
-  limit: -1
-  Processor Tree:
-ListSink
+select * from t1 a left semi join t2 b on

[21/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out 
b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
index a075662..1fde0a9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
@@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
 -- Query copied from subquery_in.q
 
 -- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li 
on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
@@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
 -- Query copied from subquery_in.q
 
 -- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li 
on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -65,6 +69,10 @@ STAGE PLANS:
   value expressions: _col2 (type: int)
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 2 
 Map Operator Tree:
 TableScan
@@ -89,6 +97,10 @@ STAGE PLANS:
   Statistics: Num rows: 4 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 3 
 Map Operator Tree:
 TableScan
@@ -109,10 +121,27 @@ STAGE PLANS:
 Statistics: Num rows: 50 Data size: 200 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Reducer 4 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
+Group By Vectorization:
+className: VectorGroupByOperator
+vectorOutput: true
+keyExpressions: col 0
+native: false
+projectedOutputColumns: []
 keys: KEY._col0 (type: int)
 mode: mergepartial
 outputColumnNames: _col0
@@ -123,6 +152,10 @@ STAGE PLANS:
   keys:
 0 _col1 (type: int)
 1 _col0 (type: int)
+  Map Join Vectorization:
+  className: VectorMapJoinInnerLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
   outputColumnNames: _col2, _col4
   input vertices:
 0 Map 1
@@ -130,9 +163,16 @@ STAGE PLANS:
   Select Operator
 expressions: _col4 (type: int), _col2 (type: int)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+

[32/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out 
b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
index c7897f7..2789664 100644
--- a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
@@ -1,6 +1,6 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, 
cstring1, cint, cfloat, csmallint) as c
+EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, 
csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
@@ -8,12 +8,16 @@ LIMIT 10
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, 
cstring1, cint, cfloat, csmallint) as c
+EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, 
csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
 LIMIT 10
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -21,53 +25,62 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
 Tez
- A masked pattern was here 
   Edges:
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
- A masked pattern was here 
   Vertices:
 Map 1 
 Map Operator Tree:
-TableScan
-  alias: alltypesorc
-  Statistics: Num rows: 12288 Data size: 1045942 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Filter Operator
-predicate: cdouble is null (type: boolean)
-Statistics: Num rows: 3114 Data size: 265164 Basic stats: 
COMPLETE Column stats: COMPLETE
-Select Operator
-  expressions: cstring1 (type: string), cint (type: int), 
cfloat (type: float), csmallint (type: smallint), 
COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string)
-  outputColumnNames: _col1, _col2, _col3, _col4, _col5
-  Statistics: Num rows: 3114 Data size: 819540 Basic 
stats: COMPLETE Column stats: COMPLETE
-  Reduce Output Operator
-key expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string)
-sort order: +
-Statistics: Num rows: 3114 Data size: 819540 Basic 
stats: COMPLETE Column stats: COMPLETE
-TopN Hash Memory Usage: 0.1
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNull(col 5) -> 
boolean
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [6, 2, 4, 1, 16]
+  selectExpressions: VectorCoalesce(columns [12, 6, 
13, 14, 15])(children: ConstantVectorExpression(val null) -> 12:string, col 6, 
CastLongToString(col 2) -> 13:String, VectorUDFAdaptor(null(cfloat)) -> 
14:String, CastLongToString(col 1) -> 15:String) -> 16:string
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+

[10/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index 76c8404..c2e1dfd 100644
--- 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -34,10 +34,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
  A masked pattern was here 
 11
 12
-PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds
+PREHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds 
as ds, ds as `date` from srcpart group by ds
 PREHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds
+POSTHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds 
as ds, ds as `date` from srcpart group by ds
 POSTHOOK: type: CREATETABLE_AS_SELECT
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -74,8 +78,19 @@ STAGE PLANS:
 Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
 keys: KEY._col0 (type: string)
@@ -199,11 +214,15 @@ POSTHOOK: Output: default@srcpart_double_hour
 POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
 POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
 PREHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 PREHOOK: type: QUERY
 POSTHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -234,6 +253,10 @@ STAGE PLANS:
   Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 4 
 Map Operator Tree:
 TableScan
@@ -269,6 +292,14 @@ STAGE PLANS:
 Target Vertex: Map 1
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: llap
 Reduce Operator Tree:
@@ -290,6 +321,13 @@ STAGE PLANS:
 value expressions: _col0 (type: bigint)
 Reducer 3 
 Execution mode: vectorized, llap
+

[46/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 3a179a3..6167f48 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.optimizer.physical;
 import static 
org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM;
 
 import java.io.Serializable;
+import java.lang.annotation.Annotation;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -33,6 +34,7 @@ import java.util.Properties;
 import java.util.Set;
 import java.util.Stack;
 import java.util.regex.Pattern;
+import org.apache.commons.lang.ArrayUtils;
 
 import org.apache.calcite.util.Pair;
 import org.apache.commons.lang.ArrayUtils;
@@ -43,6 +45,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.*;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
 import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
@@ -62,7 +66,11 @@ import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOpe
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
+import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
@@ -73,6 +81,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -91,18 +100,36 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.Explain;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.FetchWork;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc;
+import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.VectorFilterDesc;
+import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc;
+import org.apache.hadoop.hive.ql.plan.VectorizationCondition;
 import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode;
+import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc;
+import org.apache.hadoop.hive.ql.plan.VectorLimitDesc;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import

[07/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
index ceaac4f..636463b 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
@@ -19,10 +19,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__1
 POSTHOOK: Output: default@test
 POSTHOOK: Lineage: test.ts EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -48,10 +48,10 @@ POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00
 -12-31 23:59:59.9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -87,10 +87,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00-12-31 23:59:59.9   3652060 
23:59:59.9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -116,10 +116,10 @@ POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00
 -12-31 23:59:59.9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index 4092911..ae59b06 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -73,7 +73,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@alltypesorc_wrong
 POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE []
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -116,26 +120,61 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc_string
   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: to_unix_timestamp(ctimestamp1) (type: 
bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), 
day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), 
weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), 
minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10]

[12/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
index a14d515..d6c405e 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
@@ -30,7 +30,8 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
 -- ArithmeticOps: Add, Multiply, Subtract, Divide
 -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual
 -- GroupBy: NoGroupByProjectAggs
-EXPLAIN SELECT AVG(cint),
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT AVG(cint),
(AVG(cint) + -3728),
(-((AVG(cint) + -3728))),
(-((-((AVG(cint) + -3728),
@@ -98,7 +99,8 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
 -- ArithmeticOps: Add, Multiply, Subtract, Divide
 -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual
 -- GroupBy: NoGroupByProjectAggs
-EXPLAIN SELECT AVG(cint),
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT AVG(cint),
(AVG(cint) + -3728),
(-((AVG(cint) + -3728))),
(-((-((AVG(cint) + -3728),
@@ -134,6 +136,10 @@ WHERE  ((762 = cbigint)
 AND ((79.553 != cint)
  AND (cboolean2 != cboolean1)
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -151,15 +157,33 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 2601650 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterExprOrExpr(children: 
FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 
4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, 
FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: 
CastTimestampToDouble(col 9) -> 12:double) -> boolean, 
FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: 
CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, 
FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, 
FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val 
-1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, 
FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, 
FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: 
CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, 
FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) 
 -> boolean) -> boolean
 predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < 
cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> 
UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= 
-1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and 
(cboolean2 <> cboolean1))) (type: boolean)
 Statistics: Num rows: 5466 Data size: 1157380 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cint (type: int), cdouble (type: double), 
csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint)
   outputColumnNames: cint, cdouble, csmallint, cfloat, 
ctinyint
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [2, 5, 1, 4, 0]
   Statistics: Num rows: 5466 Data size: 1157380 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: avg(cint), sum(cdouble), 
stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), 
stddev_samp(cint), min(ctinyint), count(csmallint)
+Group By Vectorization:
+aggregators: VectorUDAFAvgLong(col 2) -> 
struct, VectorUDAFSumDouble(col 5) -> double, 
VectorUDAFStdPopLong(col 2) -> struct, 
VectorUDAFStdSampLong(col 1) -> 
struct, VectorUDAFVarSampLong(col 2) 
->

[47/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index c288731..77b44fb 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+
 import org.apache.commons.lang.ArrayUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
 import 
org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -63,6 +65,8 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
+import com.google.common.base.Preconditions;
+
 /**
  * This class is common operator class for native vectorized map join.
  *
@@ -72,7 +76,43 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
  */
 public abstract class VectorMapJoinCommonOperator extends MapJoinOperator 
implements VectorizationContextRegion {
   private static final long serialVersionUID = 1L;
-  private static final Logger LOG = 
LoggerFactory.getLogger(VectorMapJoinCommonOperator.class.getName());
+
+  
//
+
+  private static final String CLASS_NAME = 
VectorMapJoinCommonOperator.class.getName();
+private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
+
+  protected abstract String getLoggingPrefix();
+
+  // For debug tracing: information about the map or reduce task, operator, 
operator class, etc.
+  protected transient String loggingPrefix;
+
+  protected String getLoggingPrefix(String className) {
+if (loggingPrefix == null) {
+  initLoggingPrefix(className);
+}
+return loggingPrefix;
+  }
+
+  protected void initLoggingPrefix(String className) {
+if (hconf == null) {
+  // Constructor time...
+  loggingPrefix = className;
+} else {
+  // Determine the name of our map or reduce task for debug tracing.
+  BaseWork work = Utilities.getMapWork(hconf);
+  if (work == null) {
+work = Utilities.getReduceWork(hconf);
+  }
+  loggingPrefix = className + " " + work.getName() + " " + getOperatorId();
+}
+  }
+
+  
//
+
+  protected VectorMapJoinDesc vectorDesc;
+
+  protected VectorMapJoinInfo vectorMapJoinInfo;
 
   // Whether this operator is an outer join.
   protected boolean isOuterJoin;
@@ -88,10 +128,10 @@ public abstract class VectorMapJoinCommonOperator extends 
MapJoinOperator implem
   // a mixture of input big table columns and new scratch columns.
   protected VectorizationContext vOutContext;
 
-  // The output column projection of the vectorized row batch.  And, the type 
names of the output
+  // The output column projection of the vectorized row batch.  And, the type 
infos of the output
   // columns.
   protected int[] outputProjection;
-  protected String[] outputTypeNames;
+  protected TypeInfo[] outputTypeInfos;
 
   // These are the vectorized batch expressions for filtering, key 
expressions, and value
   // expressions.
@@ -101,15 +141,17 @@ public abstract class VectorMapJoinCommonOperator extends 
MapJoinOperator implem
 
   // This is map of which vectorized row batch columns are the big table key 
columns.  Since
   // we may have key expressions that produce new scratch columns, we need a 
mapping.
-  // And, we have their type names.
+  // And, we have their type infos.
   protected int[] bigTableKeyColumnMap;
-  protected ArrayList bigTableKeyTypeNames;
+  protected String[] bigTableKeyColumnNames;
+  protected TypeInfo[] bigTableKeyTypeInfos;
 
   // Similarly, this is map of which vectorized row batch columns are the big 
table value columns.
   // Since we may have value expressions that produce new scratch columns, we 
need a mapping.
-  // And, we have their type names.
+  // And, we have their type infos.
   protected int[]

[14/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out 
b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
index edb67f1..911a962 100644
--- a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
@@ -45,16 +45,20 @@ POSTHOOK: Input: default@src
 0  val_0
 10 val_10
 100val_100
-PREHOOK: query: explain select key, value
+PREHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key asc
 limit 5
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, value
+POSTHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key asc
 limit 5
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -84,8 +88,23 @@ STAGE PLANS:
   value expressions: _col1 (type: varchar(20))
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: varchar(10)), 
VALUE._col0 (type: varchar(20))
@@ -148,16 +167,20 @@ POSTHOOK: Input: default@src
 97 val_97
 97 val_97
 96 val_96
-PREHOOK: query: explain select key, value
+PREHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key desc
 limit 5
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, value
+POSTHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key desc
 limit 5
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -187,8 +210,23 @@ STAGE PLANS:
   value expressions: _col1 (type: varchar(20))
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: varchar(10)), 
VALUE._col0 (type: varchar(20))
@@ -254,12 +292,16 @@ create table varchar_3 (
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@varchar_3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 insert into table varchar_3 select cint from alltypesorc limit 10
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 insert into table varchar_3 select cint from alltypesorc limit 10
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -279,36 +321,81 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats:

[49/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
index 4f5ba9a..061e396 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector;
 
 import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
 /**
  * This class collects column information for copying a row from one 
VectorizedRowBatch to
@@ -35,9 +36,9 @@ public class VectorColumnSourceMapping extends 
VectorColumnMapping {
   }
 
   @Override
-  public void add(int sourceColumn, int outputColumn, String typeName) {
+  public void add(int sourceColumn, int outputColumn, TypeInfo typeInfo) {
 // Order on sourceColumn.
-vectorColumnMapping.add(sourceColumn, outputColumn, typeName);
+vectorColumnMapping.add(sourceColumn, outputColumn, typeInfo);
   }
 
   @Override
@@ -47,7 +48,7 @@ public class VectorColumnSourceMapping extends 
VectorColumnMapping {
 // Ordered columns are the source columns.
 sourceColumns = mapping.getOrderedColumns();
 outputColumns = mapping.getValueColumns();
-typeNames = mapping.getTypeNames();
+typeInfos = mapping.getTypeInfos();
 
 // Not needed anymore.
 vectorColumnMapping = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
index c8e0284..911aeb0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
@@ -262,8 +262,7 @@ public class VectorCopyRow {
 for (int i = 0; i < count; i++) {
   int inputColumn = columnMapping.getInputColumns()[i];
   int outputColumn = columnMapping.getOutputColumns()[i];
-  String typeName = columnMapping.getTypeNames()[i].toLowerCase();
-  TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+  TypeInfo typeInfo = columnMapping.getTypeInfos()[i];
   Type columnVectorType = 
VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
 
   CopyRow copyRowByValue = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
index 261246b..bfe22b0 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.VectorFilterDesc;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -50,9 +51,8 @@ public class VectorFilterOperator extends FilterOperator {
   public VectorFilterOperator(CompilationOpContext ctx,
   VectorizationContext vContext, OperatorDesc conf) throws HiveException {
 this(ctx);
-ExprNodeDesc oldExpression = ((FilterDesc) conf).getPredicate();
-conditionEvaluator = vContext.getVectorExpression(oldExpression, 
VectorExpressionDescriptor.Mode.FILTER);
 this.conf = (FilterDesc) conf;
+conditionEvaluator = ((VectorFilterDesc) 
this.conf.getVectorDesc()).getPredicateExpression();
   }
 
   /** Kryo ctor. */

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 2605203..fef7c2a 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import

[33/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out 
b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
index a510e38..ce05391 100644
--- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
@@ -97,14 +97,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE 
[(over1k)over1k.FieldSchema(name:s, type:
 POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, 
type:smallint, comment:null), ]
 POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, 
type:tinyint, comment:null), ]
 POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, 
type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN 
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT sum(hash(*))
 FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN 
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT sum(hash(*))
 FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -155,6 +159,12 @@ STAGE PLANS:
   value expressions: _col0 (type: bigint)
 Execution mode: llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+notVectorizedReason: Predicate expression for FILTER operator: 
org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for 
SelectColumnIsNotNull argument #0 type name Binary
+vectorized: false
 Map 3 
 Map Operator Tree:
 TableScan
@@ -175,16 +185,38 @@ STAGE PLANS:
 value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 
(type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: 
timestamp), _col9 (type: decimal(4,2))
 Execution mode: llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+notVectorizedReason: Predicate expression for FILTER operator: 
org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for 
SelectColumnIsNotNull argument #0 type name Binary
+vectorized: false
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
 aggregations: sum(VALUE._col0)
+Group By Vectorization:
+aggregators: VectorUDAFSumLong(col 0) -> bigint
+className: VectorGroupByOperator
+vectorOutput: true
+native: false
+projectedOutputColumns: [0]
 mode: mergepartial
 outputColumnNames: _col0
 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
 File Output Operator
   compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -208,16 +240,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@hundredorc
  A masked pattern was here 
 -27832781952
-PREHOOK: query: EXPLAIN 
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT count(*), bin
 FROM hundredorc
 GROUP BY bin
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN 
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT count(*), bin
 FROM hundredorc
 GROUP BY bin
 POSTHOOK: type: QUERY
+PLAN

[20/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
index 06e30d8..cf90430 100644
--- a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
@@ -49,11 +49,15 @@ POSTHOOK: Output: default@myinput1
 POSTHOOK: Lineage: myinput1.key SIMPLE 
[(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: myinput1.value SIMPLE 
[(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ]
 PREHOOK: query: -- merging
-explain select * from myinput1 a join myinput1 b on a.key<=>b.value
+explain vectorization expression select * from myinput1 a join myinput1 b on 
a.key<=>b.value
 PREHOOK: type: QUERY
 POSTHOOK: query: -- merging
-explain select * from myinput1 a join myinput1 b on a.key<=>b.value
+explain vectorization expression select * from myinput1 a join myinput1 b on 
a.key<=>b.value
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -71,12 +75,20 @@ STAGE PLANS:
 TableScan
   alias: a
   Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Map Join Operator
 condition map:
  Inner Join 0 to 1
 keys:
   0 key (type: int)
   1 value (type: int)
+Map Join Vectorization:
+className: VectorMapJoinOperator
+native: false
+nativeConditionsMet: hive.execution.engine tez IN 
[tez, spark] IS true, One MapJoin Condition IS true, Supports Key Types IS 
true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash 
Join IS true, Small table vectorizes IS true
+nativeConditionsNotMet: 
hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false
 nullSafes: [true]
 outputColumnNames: _col0, _col1, _col5, _col6
 input vertices:
@@ -85,9 +97,16 @@ STAGE PLANS:
 Select Operator
   expressions: _col0 (type: int), _col1 (type: int), _col5 
(type: int), _col6 (type: int)
   outputColumnNames: _col0, _col1, _col2, _col3
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
   Statistics: Num rows: 6 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
+File Sink Vectorization:
+className: VectorFileSinkOperator
+native: false
 Statistics: Num rows: 6 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -95,19 +114,42 @@ STAGE PLANS:
 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 2 
 Map Operator Tree:
 TableScan
   alias: b
   Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Reduce Output Operator
 key expressions: value (type: int)
 sort order: +
 Map-reduce partition columns: value (type: int)
+Reduce Sink Vectorization:
+className: VectorReduceSinkLongOperator
+native:

[26/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
index 23a977e..61702bd 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
@@ -44,7 +44,7 @@ POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION []
 PREHOOK: query: -- interval comparisons in select clause
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be true
@@ -78,7 +78,7 @@ from vector_interval_2 order by str1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- interval comparisons in select clause
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be true
@@ -110,6 +110,10 @@ select
   interval '1-2' year to month != interval_year_month(str2)
 from vector_interval_2 order by str1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -127,26 +131,61 @@ STAGE PLANS:
 TableScan
   alias: vector_interval_2
   Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5]
   Select Operator
 expressions: str1 (type: string), (CAST( str1 AS INTERVAL 
YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( 
str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) 
(type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS 
INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO 
MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS 
INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: 
boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR 
TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 
AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO 
MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 
AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR 
TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MON
 TH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) 
(type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: 
boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), 
(CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS 
INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL 
YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) 
(type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: 
boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 
>= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 
AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR 
TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) 
(type: boolean)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, 
_col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 8, 9, 10, 11, 12, 13, 14, 
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+selectExpressions: LongColEqualLongColumn(col 6, col 
7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, 
CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, 
LongColLessEqualLongColumn(col 6, col 7)(children: 
CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, 
CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 9:long, 
LongColLessEqualLongColumn(col 6, col 7)(children: 
CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, 
CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 10:long, 
LongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 
2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 
7:interval_year_month) -> 11:long, LongColGreaterEqualLongColumn(col 6, col 
7)(children:

[38/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

2016-10-13 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
index 437770d..1d4163c 100644
--- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
+++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
@@ -87,25 +87,73 @@ POSTHOOK: Lineage: part_add_int_permute_select 
PARTITION(part=1).b SIMPLE [(valu
 POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
 POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num 
EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 _col0  _col1   _col2   _col3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select insert_num,part,a,b from part_add_int_permute_select
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select insert_num,part,a,b from part_add_int_permute_select
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 vectorized, llap
-  File Output Operator [FS_4]
-Select Operator [SEL_3] (rows=2 width=4)
-  Output:["_col0","_col1","_col2","_col3"]
-  TableScan [TS_0] (rows=2 width=16)
-
default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: part_add_int_permute_select
+  Statistics: Num rows: 2 Data size: 33 Basic stats: COMPLETE 
Column stats: PARTIAL
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4]
+  Select Operator
+expressions: insert_num (type: int), part (type: int), a 
(type: int), b (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 4, 1, 2]
+Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: PARTIAL
+File Output Operator
+  compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
+  Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: PARTIAL
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: vectorized, llap
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 4
+includeColumns: [0, 1, 2]
+dataColumns: insert_num:int, a:int, b:string, c:int
+partitionColumnCount: 1
+partitionColumns: part:int
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
 
 PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting 
works right
 select insert_num,part,a,b from part_add_int_permute_select
@@ -206,25 +254,73 @@ POSTHOOK: Lineage: part_add_int_string_permute_select 
PARTITION(part=1).c EXPRES
 POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d 
SIMPLE 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
 POSTHOOK: Lineage: part_add_int_string_permute_select 
PARTITION(part=1).insert_num EXPRESSION

1 2 >

1 - 100 of 105 matches

Mail list logo