date:20161017

hive git commit: HIVE-14940: MiniTezCliDriver - switch back to SQL metastore as default (Prasanth Jayachandran reviewed by Siddharth Seth)

2016-10-17 Thread prasanthj

Repository: hive
Updated Branches:
  refs/heads/master 3bab49a32 -> 4b7f373e5


HIVE-14940: MiniTezCliDriver - switch back to SQL metastore as default 
(Prasanth Jayachandran reviewed by Siddharth Seth)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4b7f373e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4b7f373e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4b7f373e

Branch: refs/heads/master
Commit: 4b7f373e58a222cc2bd83ea28b916009d7ebf75b
Parents: 3bab49a
Author: Prasanth Jayachandran 
Authored: Mon Oct 17 21:06:56 2016 -0700
Committer: Prasanth Jayachandran 
Committed: Mon Oct 17 21:06:56 2016 -0700

--
 .../test/resources/testconfiguration.properties |  5 +-
 .../hadoop/hive/cli/control/CliConfigs.java |  2 +-
 ...umn_names_with_leading_and_trailing_spaces.q | 15 -
 .../clientpositive/tez/explainanalyze_5.q.out   |  6 +-
 ...names_with_leading_and_trailing_spaces.q.out | 65 
 .../clientpositive/tez/unionDistinct_2.q.out|  6 +-
 .../tez/vector_join_part_col_char.q.out | 40 ++--
 7 files changed, 29 insertions(+), 110 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/4b7f373e/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 97e310d..8aee7f5 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -26,7 +26,8 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\
   ql_rewrite_gbtoidx_cbo_1.q,\
   ql_rewrite_gbtoidx_cbo_2.q,\
   rcfile_merge1.q,\
-  smb_mapjoin_8.q
+  smb_mapjoin_8.q,\
+  stats_filemetadata.q
 
 # NOTE: Add tests to minitez only if it is very
 # specific to tez and cannot be added to minillap.
@@ -50,8 +51,6 @@ minitez.query.files=explainuser_3.q,\
   explainanalyze_5.q,\
   hybridgrace_hashjoin_1.q,\
   hybridgrace_hashjoin_2.q,\
-  partition_column_names_with_leading_and_trailing_spaces.q,\
-  stats_filemetadata.q,\
   tez_union_with_udf.q
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4b7f373e/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
--
diff --git 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
index 366c7b4..c5e027b 100644
--- 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
+++ 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
@@ -129,7 +129,7 @@ public class CliConfigs {
 
 setHiveConfDir("data/conf/tez");
 setClusterType(MiniClusterType.tez);
-setMetastoreType(MetastoreType.hbase);
+setMetastoreType(MetastoreType.sql);
 setFsType(QTestUtil.FsType.hdfs);
   } catch (Exception e) {
 throw new RuntimeException("can't construct cliconfig", e);

http://git-wip-us.apache.org/repos/asf/hive/blob/4b7f373e/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
--
diff --git 
a/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
 
b/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
deleted file mode 100644
index f087130..000
--- 
a/ql/src/test/queries/clientpositive/partition_column_names_with_leading_and_trailing_spaces.q
+++ /dev/null
@@ -1,15 +0,0 @@
-set hive.mapred.mode=nonstrict;
-
-create table foo (d string);
-
-create table foo_p (d string) partitioned by (p string);
-
-insert into foo values ("1");
-
-insert into foo_p partition (p="a ") select foo.d from foo;
-
-insert into foo_p partition (p="a") select foo.d from foo;
-
-select * from foo_p where p="a ";
-
-select * from foo_p where p="a";

http://git-wip-us.apache.org/repos/asf/hive/blob/4b7f373e/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
--
diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out 
b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
index 7da21db..ff04e98 100644
--- a/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out
@@ -308,14 +308,14 @@ Stage-3
   Reducer 2
   File Output Operator [FS_8]
 table:{"name:":"default.acid_uami"}
-Select Operator

hive git commit: HIVE-14991: JDBC result set iterator has useless DEBUG log (Prasanth Jayachandran reviewed by Vaibhav Gumashta)

2016-10-17 Thread prasanthj

Repository: hive
Updated Branches:
  refs/heads/master ad6ce0781 -> 3bab49a32


HIVE-14991: JDBC result set iterator has useless DEBUG log (Prasanth 
Jayachandran reviewed by Vaibhav Gumashta)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3bab49a3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3bab49a3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3bab49a3

Branch: refs/heads/master
Commit: 3bab49a3210c857d599cb5706706d2c2a30ee63e
Parents: ad6ce07
Author: Prasanth Jayachandran 
Authored: Mon Oct 17 14:09:27 2016 -0700
Committer: Prasanth Jayachandran 
Committed: Mon Oct 17 14:09:27 2016 -0700

--
 jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java | 5 -
 1 file changed, 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/3bab49a3/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java
--
diff --git a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java 
b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java
index 92fdbca..6a91381 100644
--- a/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java
+++ b/jdbc/src/java/org/apache/hive/jdbc/HiveQueryResultSet.java
@@ -377,7 +377,6 @@ public class HiveQueryResultSet extends HiveBaseResultSet {
 fetchedRowsItr = fetchedRows.iterator();
   }
 
-  String rowStr = "";
   if (fetchedRowsItr.hasNext()) {
 row = fetchedRowsItr.next();
   } else {
@@ -385,10 +384,6 @@ public class HiveQueryResultSet extends HiveBaseResultSet {
   }
 
   rowsFetched++;
-  if (LOG.isDebugEnabled()) {
-LOG.debug("Fetched row string: " + rowStr);
-  }
-
 } catch (SQLException eS) {
   throw eS;
 } catch (Exception ex) {

[24/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_join30.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
index 9e591b8..bb6916b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
@@ -14,7 +14,7 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orcsrc
 POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 FROM 
 (SELECT orcsrc.* FROM orcsrc sort by key) x
 JOIN
@@ -22,7 +22,7 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 FROM 
 (SELECT orcsrc.* FROM orcsrc sort by key) x
 JOIN
@@ -30,10 +30,6 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -53,93 +49,40 @@ STAGE PLANS:
 TableScan
   alias: orcsrc
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1]
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: key (type: string)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumns: [0]
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: string)
 sort order: +
-Reduce Sink Vectorization:
-className: VectorReduceSinkOperator
-native: false
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
-nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 500 Data size: 88000 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
   alias: orcsrc
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1]
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: key (type: string), value (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className:

hive git commit: HIVE-14899 : MM: support (or disable) alter table concatenate (Sergey Shelukhin)

2016-10-17 Thread sershe

Repository: hive
Updated Branches:
  refs/heads/hive-14535 edaebb4b2 -> 9ecffcb1b


HIVE-14899 : MM: support (or disable) alter table concatenate (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9ecffcb1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9ecffcb1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9ecffcb1

Branch: refs/heads/hive-14535
Commit: 9ecffcb1bc44558d7959fa0289f27cc49f54d875
Parents: edaebb4
Author: Sergey Shelukhin 
Authored: Mon Oct 17 13:54:16 2016 -0700
Committer: Sergey Shelukhin 
Committed: Mon Oct 17 13:54:16 2016 -0700

--
 .../test/queries/clientnegative/mm_concatenate.q  |  5 +
 .../results/clientnegative/mm_concatenate.q.out   | 18 ++
 2 files changed, 23 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/9ecffcb1/ql/src/test/queries/clientnegative/mm_concatenate.q
--
diff --git a/ql/src/test/queries/clientnegative/mm_concatenate.q 
b/ql/src/test/queries/clientnegative/mm_concatenate.q
new file mode 100644
index 000..c580767
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/mm_concatenate.q
@@ -0,0 +1,5 @@
+create table concat_mm (id int) stored as orc 
tblproperties('hivecommit'='true');
+
+insert into table concat_mm select key from src limit 10;
+
+alter table concat_mm concatenate;

http://git-wip-us.apache.org/repos/asf/hive/blob/9ecffcb1/ql/src/test/results/clientnegative/mm_concatenate.q.out
--
diff --git a/ql/src/test/results/clientnegative/mm_concatenate.q.out 
b/ql/src/test/results/clientnegative/mm_concatenate.q.out
new file mode 100644
index 000..0736409
--- /dev/null
+++ b/ql/src/test/results/clientnegative/mm_concatenate.q.out
@@ -0,0 +1,18 @@
+PREHOOK: query: create table concat_mm (id int) stored as orc 
tblproperties('hivecommit'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@concat_mm
+POSTHOOK: query: create table concat_mm (id int) stored as orc 
tblproperties('hivecommit'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@concat_mm
+PREHOOK: query: insert into table concat_mm select key from src limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@concat_mm
+POSTHOOK: query: insert into table concat_mm select key from src limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@concat_mm
+POSTHOOK: Lineage: concat_mm.id EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+FAILED: SemanticException org.apache.hadoop.hive.ql.parse.SemanticException: 
Merge is not supported for MM tables

[36/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
index 85116e7..964ce95 100644
--- 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
@@ -282,25 +282,73 @@ POSTHOOK: Lineage: 
part_change_various_various_boolean_to_bigint PARTITION(part=
 POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint 
PARTITION(part=1).c9 SIMPLE 
[(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, 
type:boolean, comment:null), ]
 POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint 
PARTITION(part=1).insert_num SIMPLE 
[(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, 
type:int, comment:null), ]
 insert_num boolean1boolean1boolean1boolean1
boolean1boolean1boolean1boolean1boolean1
tinyint1tinyint1tinyint1tinyint1tinyint1
tinyint1tinyint1tinyint1tinyint1tinyint1
tinyint1smallint1   smallint1   smallint1   smallint1   
smallint1   smallint1   smallint1   smallint1   smallint1   
smallint1   smallint1   int1int1int1int1int1int1
int1int1int1int1int1bigint1 bigint1 bigint1 bigint1 bigint1 
bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b
 from part_change_various_various_boolean_to_bigint
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b
 from part_change_various_various_boolean_to_bigint
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 vectorized, llap
-  File Output Operator [FS_4]
-Select Operator [SEL_3] (rows=10 width=4)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52","_col53","_col54","_col55"]
-  TableScan [TS_0] (rows=10 width=475)
-
default@part_change_various_various_boolean_to_bigint,part_change_various_various_boolean_to_bigint,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","c1","c2","c3","c4","c5","c6","c7","c8","c9","c10","c11","c12","c13","c14","c15","c16","c17","c18","c19","c20","c21","c22","c23","c24","c25","c26","c27","c28","c29","c30","c31","c32","c33","c34","c35","c36","c37","c38","c39","c40","c41","c42","c43","c44","c45","c46","c47","c48","c49","c50","c51","c52","c53","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: part_change_various_various_boolean_to_bigint
+  Statistics: Num rows: 10 Data size: 4759 Basic stats: 
COMPLETE Column stats: PARTIAL
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 
50, 51, 52, 53, 54, 55]
+  Select Operator
+expressions: insert_num (type: int), part (type: int), c1 
(type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 
(type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type:

[29/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
index ff658d7..9a09b89 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out
@@ -1,6 +1,6 @@
 Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy 
will work properly.
-explain
+explain vectorization expression
 select *
 from src
 where not key in
@@ -8,65 +8,199 @@ where not key in
 order by key
 PREHOOK: type: QUERY
 POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy 
will work properly.
-explain
+explain vectorization expression
 select *
 from src
 where not key in
 (select key from src)
 order by key
 POSTHOOK: type: QUERY
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Vertex dependency in root stage
-Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
-Reducer 4 <- Map 3 (SIMPLE_EDGE)
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Reducer 2 vectorized, llap
-  File Output Operator [FS_36]
-Select Operator [SEL_35] (rows=1 width=178)
-  Output:["_col0","_col1"]
-<-Map 1 [SIMPLE_EDGE] llap
-  SHUFFLE [RS_21]
-Select Operator [SEL_20] (rows=1 width=178)
-  Output:["_col0","_col1"]
-  Filter Operator [FIL_19] (rows=1 width=265)
-predicate:_col3 is null
-Map Join Operator [MAPJOIN_28] (rows=1219 width=265)
-  Conds:MAPJOIN_27._col0=RS_17._col0(Left 
Outer),Output:["_col0","_col1","_col3"]
-<-Map 5 [BROADCAST_EDGE] llap
-  BROADCAST [RS_17]
-PartitionCols:_col0
-Select Operator [SEL_12] (rows=500 width=87)
-  Output:["_col0"]
-  TableScan [TS_11] (rows=500 width=87)
-
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
-<-Map Join Operator [MAPJOIN_27] (rows=500 width=178)
-Conds:(Inner),Output:["_col0","_col1"]
-  <-Reducer 4 [BROADCAST_EDGE] vectorized, llap
-BROADCAST [RS_34]
-  Select Operator [SEL_33] (rows=1 width=8)
-Filter Operator [FIL_32] (rows=1 width=8)
-  predicate:(_col0 = 0)
-  Group By Operator [GBY_31] (rows=1 width=8)
-
Output:["_col0"],aggregations:["count(VALUE._col0)"]
-  <-Map 3 [SIMPLE_EDGE] llap
-SHUFFLE [RS_6]
-  Group By Operator [GBY_5] (rows=1 width=8)
-Output:["_col0"],aggregations:["count()"]
-Select Operator [SEL_4] (rows=1 width=87)
-  Filter Operator [FIL_25] (rows=1 width=87)
-predicate:key is null
-TableScan [TS_2] (rows=500 width=87)
-  
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"]
-  <-Select Operator [SEL_1] (rows=500 width=178)
-  Output:["_col0","_col1"]
-  TableScan [TS_0] (rows=500 width=178)
-
default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 4 <- Map 3 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: src
+  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+Map Join Operator
+  condition map:
+   Inner Join 0 to 1
+  keys:
+0 
+1 
+

[61/67] [abbrv] hive git commit: HIVE-14891: Parallelize TestHCatStorer (Vaibhav Gumashta reviewed by Siddharth Seth)

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/2cae7361/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
--
diff --git 
a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
 
b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
index b6f8a6f..7d9271c 100644
--- 
a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
+++ 
b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java
@@ -18,98 +18,75 @@
  */
 package org.apache.hive.hcatalog.pig;
 
-import com.google.common.collect.ImmutableSet;
+import static org.junit.Assume.assumeTrue;
 
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
 import java.io.IOException;
-import java.math.BigDecimal;
-import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
 import java.util.Map;
-import java.util.Properties;
 import java.util.Set;
 
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.CommandNeedRetryException;
 import org.apache.hadoop.hive.ql.io.IOConstants;
 import org.apache.hadoop.hive.ql.io.StorageFormats;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-import org.apache.hive.hcatalog.HcatTestUtils;
-import org.apache.hive.hcatalog.mapreduce.HCatBaseTest;
-import org.apache.pig.EvalFunc;
-import org.apache.pig.ExecType;
-import org.apache.pig.PigException;
-import org.apache.pig.PigServer;
-import org.apache.pig.data.DataByteArray;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.impl.util.LogUtils;
-import org.joda.time.DateTime;
-import org.joda.time.DateTimeZone;
-import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.junit.Assert.*;
-import static org.junit.Assume.assumeTrue;
-
 @RunWith(Parameterized.class)
-public class TestHCatStorer extends HCatBaseTest {
-  private static final Logger LOG = 
LoggerFactory.getLogger(TestHCatStorer.class);
-
-  private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data";
+public class TestHCatStorer extends AbstractHCatStorerTest {
+  static Logger LOG = LoggerFactory.getLogger(TestHCatStorer.class);
+  private static final Set allTests = new HashSet() {
+{
+  add("testBagNStruct");
+  add("testDateCharTypes");
+  add("testDynamicPartitioningMultiPartColsInDataNoSpec");
+  add("testDynamicPartitioningMultiPartColsInDataPartialSpec");
+  add("testDynamicPartitioningMultiPartColsNoDataInDataNoSpec");
+  add("testEmptyStore");
+  add("testMultiPartColsInData");
+  add("testNoAlias");
+  add("testPartColsInData");
+  add("testPartitionPublish");
+  add("testStoreFuncAllSimpleTypes");
+  add("testStoreFuncSimple");
+  add("testStoreInPartiitonedTbl");
+  add("testStoreMultiTables");
+  add("testStoreWithNoCtorArgs");
+  add("testStoreWithNoSchema");
+  add("testWriteChar");
+  add("testWriteDate");
+  add("testWriteDate2");
+  add("testWriteDate3");
+  add("testWriteDecimal");
+  add("testWriteDecimalX");
+  add("testWriteDecimalXY");
+  add("testWriteSmallint");
+  add("testWriteTimestamp");
+  add("testWriteTinyint");
+  add("testWriteVarchar");
+}
+  };
 
+  /**
+   * We're disabling these tests as they're going to be run from their 
individual
+   * TestHCatStorer classes. However, we're still leaving this 
test in case new file
+   * formats in future are added.
+   */
   private static final Map DISABLED_STORAGE_FORMATS =
-new HashMap() {{
-  put(IOConstants.AVRO, new HashSet() {{
-add("testDateCharTypes"); // incorrect precision
-  // expected:<0  x   yyy 5.2[]> but was:<0   x   
yyy 5.2[0]>
-add("testWriteDecimalXY"); // incorrect precision
-  // expected:<1.2[]> but was:<1.2[0]>
-add("testWriteSmallint");  // doesn't have a notion of small, and 
saves the full value as an int, so no overflow
-  // expected: but was:<32768>
-add("testWriteTimestamp"); // does not support timestamp
-  // TypeInfoToSchema.createAvroPrimitive : 
UnsupportedOperationException
-add("testWriteTinyint"); // doesn't have a notion of tiny, and saves 
the full value as an int, so no overflow
-  // expected: but was:<300>
-  }});
-  put(IOConstants.PARQUETFILE, new HashSet() {{
-add("testBagNStruct");
-

[56/67] [abbrv] hive git commit: HIVE-14063: beeline to auto connect to the HiveServer2 (Vihang Karajgaonkar, reviewed by Aihua Xu, Szehon Ho, Sergio Peña)

2016-10-17 Thread sershe

HIVE-14063: beeline to auto connect to the HiveServer2 (Vihang Karajgaonkar, 
reviewed by Aihua Xu, Szehon Ho, Sergio PeÃ±a)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8029e11b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8029e11b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8029e11b

Branch: refs/heads/hive-14535
Commit: 8029e11b3c913e0aebc0a719a19b85d96db28a32
Parents: 9caf230
Author: Aihua Xu 
Authored: Mon Oct 17 13:06:30 2016 -0400
Committer: Aihua Xu 
Committed: Mon Oct 17 13:08:29 2016 -0400

--
 .../java/org/apache/hive/beeline/BeeLine.java   | 176 ++-
 .../java/org/apache/hive/beeline/Commands.java  |  52 +++--
 .../BeelineHS2ConnectionFileParseException.java |  30 +++
 .../hs2connection/HS2ConnectionFileParser.java  |  88 
 .../hs2connection/HS2ConnectionFileUtils.java   | 119 +++
 .../HiveSiteHS2ConnectionFileParser.java| 172 +++
 .../UserHS2ConnectionFileParser.java| 117 ++
 .../apache/hive/beeline/TestBeeLineHistory.java |   4 +-
 .../TestUserHS2ConnectionFileParser.java| 211 ++
 beeline/src/test/resources/hive-site.xml|   5 +
 .../test-hs2-conn-conf-kerberos-http.xml|  48 +
 .../test-hs2-conn-conf-kerberos-nossl.xml   |  32 +++
 .../test-hs2-conn-conf-kerberos-ssl.xml |  40 
 .../resources/test-hs2-connection-conf-list.xml |  36 
 .../test-hs2-connection-config-noauth.xml   |  28 +++
 .../test-hs2-connection-multi-conf-list.xml |  37 
 .../test-hs2-connection-zookeeper-config.xml|  32 +++
 .../TestBeelineConnectionUsingHiveSite.java | 109 ++
 .../TestBeelineWithHS2ConnectionFile.java   | 214 +++
 .../TestBeelineWithUserHs2ConnectionFile.java   | 129 +++
 20 files changed, 1612 insertions(+), 67 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/8029e11b/beeline/src/java/org/apache/hive/beeline/BeeLine.java
--
diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLine.java 
b/beeline/src/java/org/apache/hive/beeline/BeeLine.java
index 79922d2..fdbe0a2 100644
--- a/beeline/src/java/org/apache/hive/beeline/BeeLine.java
+++ b/beeline/src/java/org/apache/hive/beeline/BeeLine.java
@@ -28,6 +28,7 @@ import java.io.Closeable;
 import java.io.EOFException;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
@@ -93,8 +94,15 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hive.beeline.cli.CliOptionsProcessor;
 import org.apache.hive.common.util.ShutdownHookManager;
+import 
org.apache.hive.beeline.hs2connection.BeelineHS2ConnectionFileParseException;
+import org.apache.hive.beeline.hs2connection.HS2ConnectionFileUtils;
+import org.apache.hive.beeline.hs2connection.UserHS2ConnectionFileParser;
+import org.apache.hive.beeline.hs2connection.HS2ConnectionFileParser;
+import org.apache.hive.beeline.hs2connection.HiveSiteHS2ConnectionFileParser;
 import org.apache.thrift.transport.TTransportException;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import org.apache.hive.jdbc.Utils;
 import org.apache.hive.jdbc.Utils.JdbcConnectionParams;
 
@@ -279,7 +287,6 @@ public class BeeLine implements Closeable {
   "org.apache.hadoop.hive.jdbc.HiveDriver",
   }));
 
-
   static {
 try {
   Class.forName("jline.console.ConsoleReader");
@@ -728,6 +735,46 @@ public class BeeLine implements Closeable {
   return -1;
 }
 
+boolean connSuccessful = connectUsingArgs(cl);
+// checks if default hs2 connection configuration file is present
+// and uses it to connect if found
+// no-op if the file is not present
+if(!connSuccessful && !exit) {
+  connSuccessful = defaultBeelineConnect();
+}
+
+int code = 0;
+if (cl.getOptionValues('e') != null) {
+  commands = Arrays.asList(cl.getOptionValues('e'));
+}
+
+if (!commands.isEmpty() && getOpts().getScriptFile() != null) {
+  error("The '-e' and '-f' options cannot be specified simultaneously");
+  return 1;
+} else if(!commands.isEmpty() && !connSuccessful) {
+  error("Cannot run commands specified using -e. No current connection");
+  return 1;
+}
+if (!commands.isEmpty()) {
+  for (Iterator i = commands.iterator(); i.hasNext();) {
+String command = i.next().toString();
+debug(loc("executing-command", command));
+if (!dispatch(command)) {
+  code++;
+}
+  }
+  exit = true; // execute and exit
+

[63/67] [abbrv] hive git commit: HIVE-14959: Fix DISTINCT with windowing when CBO is enabled/disabled (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2016-10-17 Thread sershe

HIVE-14959: Fix DISTINCT with windowing when CBO is enabled/disabled (Jesus 
Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/36e810fa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/36e810fa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/36e810fa

Branch: refs/heads/hive-14535
Commit: 36e810fa6fd1b353ee1d9907927aa472ae53dd48
Parents: 2cae736
Author: Jesus Camacho Rodriguez 
Authored: Thu Oct 13 22:04:37 2016 +0100
Committer: Jesus Camacho Rodriguez 
Committed: Mon Oct 17 20:20:54 2016 +0100

--
 .../hadoop/hive/ql/parse/CalcitePlanner.java|   6 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  37 +-
 .../clientpositive/distinct_windowing_no_cbo.q  |  63 ++
 ql/src/test/queries/clientpositive/windowing.q  |   6 +
 .../distinct_windowing_no_cbo.q.out | 796 +++
 .../llap/cbo_rp_windowing_2.q.out   |   5 +-
 .../results/clientpositive/llap/windowing.q.out | 110 ++-
 .../clientpositive/spark/windowing.q.out| 105 ++-
 8 files changed, 1103 insertions(+), 25 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index e6ab947..cf66ad9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2366,6 +2366,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
 }
   }
 
+  // Select DISTINCT + windowing; GBy handled by genSelectForWindowing
+  if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI &&
+  !qb.getAllWindowingSpecs().isEmpty()) {
+return null;
+  }
+
   List grpByAstExprs = getGroupByForClause(qbp, detsClauseName);
   HashMap aggregationTrees = 
qbp.getAggregationExprsForClause(detsClauseName);
   boolean hasGrpByAstExprs = (grpByAstExprs != null && 
!grpByAstExprs.isEmpty()) ? true : false;

http://git-wip-us.apache.org/repos/asf/hive/blob/36e810fa/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 747f387..9d58193 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3814,24 +3814,12 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   List result = new ArrayList(selectExprs == null ? 0
   : selectExprs.getChildCount());
   if (selectExprs != null) {
-HashMap windowingExprs = 
parseInfo.getWindowingExprsForClause(dest);
-
 for (int i = 0; i < selectExprs.getChildCount(); ++i) {
   if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == 
HiveParser.TOK_HINTLIST) {
 continue;
   }
   // table.column AS alias
   ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
-  /*
-   * If this is handled by Windowing then ignore it.
-   */
-  if (windowingExprs != null && 
windowingExprs.containsKey(grpbyExpr.toStringTree())) {
-if (!isCBOExecuted()) {
-  throw new SemanticException("SELECT DISTINCT not allowed in the 
presence of windowing"
-  + " functions when CBO is off");
-}
-continue;
-  }
   result.add(grpbyExpr);
 }
   }
@@ -9316,8 +9304,10 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 // Preserve operator before the GBY - we'll use it to resolve '*'
 Operator gbySource = curr;
 
-if (qbp.getAggregationExprsForClause(dest).size() != 0
-|| getGroupByForClause(qbp, dest).size() > 0) {
+if ((qbp.getAggregationExprsForClause(dest).size() != 0
+|| getGroupByForClause(qbp, dest).size() > 0)
+&& (qbp.getSelForClause(dest).getToken().getType() != 
HiveParser.TOK_SELECTDI
+  || qbp.getWindowingExprsForClause(dest) == null)) {
   // multiple distincts is not supported with skew in data
   if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) &&
   qbp.getDistinctFuncExprsForClause(dest).size() > 1) {
@@ -9401,12

[51/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt 
McCline, reviewed by Gopal Vijayaraghavan)""

This reverts commit aed21d0b7c83058f77d796f63d699b5bd0d77de1.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/16d28b34
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/16d28b34
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/16d28b34

Branch: refs/heads/hive-14535
Commit: 16d28b343b76c998b8fdbd8a91bae07ac82357de
Parents: 8f886f2
Author: Matt McCline 
Authored: Fri Oct 14 15:12:02 2016 -0700
Committer: Matt McCline 
Committed: Fri Oct 14 15:12:02 2016 -0700

--
 .../org/apache/hive/common/util/DateUtils.java  |20 +
 .../ColumnArithmeticColumn.txt  | 7 +-
 .../ColumnArithmeticColumnDecimal.txt   | 5 +
 .../ColumnArithmeticColumnWithConvert.txt   |   173 -
 .../ColumnArithmeticScalar.txt  | 5 +
 .../ColumnArithmeticScalarDecimal.txt   | 5 +
 .../ColumnArithmeticScalarWithConvert.txt   |   150 -
 .../ExpressionTemplates/ColumnCompareColumn.txt | 5 +
 .../ExpressionTemplates/ColumnCompareScalar.txt | 5 +
 .../ExpressionTemplates/ColumnDivideColumn.txt  | 5 +
 .../ColumnDivideColumnDecimal.txt   | 5 +
 .../ExpressionTemplates/ColumnDivideScalar.txt  | 5 +
 .../ColumnDivideScalarDecimal.txt   | 5 +
 .../ExpressionTemplates/ColumnUnaryFunc.txt | 5 +
 .../ExpressionTemplates/ColumnUnaryMinus.txt| 5 +
 ...eColumnArithmeticIntervalYearMonthColumn.txt | 5 +
 ...eColumnArithmeticIntervalYearMonthScalar.txt | 5 +
 .../DateColumnArithmeticTimestampColumn.txt | 5 +
 .../DateColumnArithmeticTimestampScalar.txt | 5 +
 ...eScalarArithmeticIntervalYearMonthColumn.txt | 5 +
 .../DateScalarArithmeticTimestampColumn.txt | 5 +
 .../DecimalColumnUnaryFunc.txt  | 5 +
 .../ExpressionTemplates/FilterColumnBetween.txt | 7 +-
 .../FilterColumnCompareColumn.txt   | 9 +-
 .../FilterColumnCompareScalar.txt   | 9 +-
 .../FilterDecimalColumnBetween.txt  | 5 +
 .../FilterDecimalColumnCompareDecimalColumn.txt | 5 +
 .../FilterDecimalColumnCompareDecimalScalar.txt | 5 +
 .../FilterDecimalScalarCompareDecimalColumn.txt | 5 +
 ...erLongDoubleColumnCompareTimestampColumn.txt | 5 +
 ...erLongDoubleScalarCompareTimestampColumn.txt | 5 +
 .../FilterScalarCompareColumn.txt   | 9 +-
 .../FilterStringColumnBetween.txt   | 9 +-
 ...tringGroupColumnCompareStringGroupColumn.txt | 5 +
 ...gGroupColumnCompareStringGroupScalarBase.txt | 7 +
 ...gGroupScalarCompareStringGroupColumnBase.txt | 8 +
 .../FilterTimestampColumnBetween.txt| 5 +
 ...erTimestampColumnCompareLongDoubleColumn.txt | 5 +
 ...erTimestampColumnCompareLongDoubleScalar.txt | 5 +
 ...terTimestampColumnCompareTimestampColumn.txt | 5 +
 ...terTimestampColumnCompareTimestampScalar.txt | 5 +
 ...erTimestampScalarCompareLongDoubleColumn.txt | 5 +
 ...terTimestampScalarCompareTimestampColumn.txt | 5 +
 .../FilterTruncStringColumnBetween.txt  |10 +-
 .../ExpressionTemplates/IfExprColumnScalar.txt  | 5 +
 .../ExpressionTemplates/IfExprScalarColumn.txt  | 5 +
 .../ExpressionTemplates/IfExprScalarScalar.txt  | 5 +
 ...ervalYearMonthColumnArithmeticDateColumn.txt | 5 +
 ...ervalYearMonthColumnArithmeticDateScalar.txt | 5 +
 ...YearMonthColumnArithmeticTimestampColumn.txt | 5 +
 ...YearMonthColumnArithmeticTimestampScalar.txt | 5 +
 ...ervalYearMonthScalarArithmeticDateColumn.txt | 5 +
 ...YearMonthScalarArithmeticTimestampColumn.txt | 5 +
 .../LongDoubleColumnCompareTimestampColumn.txt  | 5 +
 .../LongDoubleColumnCompareTimestampScalar.txt  | 4 +
 .../LongDoubleScalarCompareTimestampColumn.txt  | 5 +
 .../ScalarArithmeticColumn.txt  | 5 +
 .../ScalarArithmeticColumnDecimal.txt   | 5 +
 .../ScalarArithmeticColumnWithConvert.txt   |   163 -
 .../ExpressionTemplates/ScalarCompareColumn.txt | 5 +
 .../ExpressionTemplates/ScalarDivideColumn.txt  | 5 +
 .../ScalarDivideColumnDecimal.txt   | 5 +
 ...tringGroupColumnCompareStringGroupColumn.txt | 5 +
 ...gGroupColumnCompareStringGroupScalarBase.txt | 6 +
 ...tringGroupColumnCompareTruncStringScalar.txt | 7 +
 ...gGroupScalarCompareStringGroupColumnBase.txt | 7 +
 .../TimestampColumnArithmeticDateColumn.txt | 5 +
 .../TimestampColumnArithmeticDateScalar.txt | 5 +
 ...pColumnArithmeticIntervalYearMonthColumn.txt | 5 +
 ...pColumnArithmeticIntervalYearMonthScalar.txt | 5 +
 ...TimestampColumnArithmeticTimestampColumn.txt | 5 +

[67/67] [abbrv] hive git commit: HIVE-14932 : handle bucketing for MM tables (Sergey Shelukhin)

2016-10-17 Thread sershe

HIVE-14932 : handle bucketing for MM tables (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/edaebb4b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/edaebb4b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/edaebb4b

Branch: refs/heads/hive-14535
Commit: edaebb4b29e99a0fd4abf50db910449f1c41d06a
Parents: af4ff37
Author: Sergey Shelukhin 
Authored: Mon Oct 17 13:41:20 2016 -0700
Committer: Sergey Shelukhin 
Committed: Mon Oct 17 13:41:20 2016 -0700

--
 .../apache/hadoop/hive/ql/exec/Utilities.java   | 201 +---
 .../hadoop/hive/ql/metadata/Partition.java  |   1 +
 .../hive/ql/optimizer/GenMapRedUtils.java   |   2 +-
 .../hadoop/hive/ql/optimizer/SamplePruner.java  |   7 +-
 .../apache/hadoop/hive/ql/plan/MoveWork.java|   2 +-
 ql/src/test/queries/clientpositive/mm_all.q |   7 +-
 ql/src/test/queries/clientpositive/mm_all2.q|  70 +++
 ql/src/test/queries/clientpositive/mm_current.q |  18 +-
 .../results/clientpositive/llap/mm_all.q.out| 204 +++-
 .../results/clientpositive/llap/mm_all2.q.out   | 503 +++
 .../clientpositive/llap/mm_current.q.out| 217 ++--
 11 files changed, 1094 insertions(+), 138 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index accb237..f1dad71 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -1525,23 +1525,22 @@ public final class Utilities {
 int dpLevels = dpCtx == null ? 0 : dpCtx.getNumDPCols(),
 numBuckets = (conf != null && conf.getTable() != null)
   ? conf.getTable().getNumBuckets() : 0;
-return removeTempOrDuplicateFiles(fs, fileStats, dpLevels, numBuckets, 
hconf);
+return removeTempOrDuplicateFiles(fs, fileStats, dpLevels, numBuckets, 
hconf, null);
   }
 
   public static List removeTempOrDuplicateFiles(FileSystem fs, 
FileStatus[] fileStats,
-  int dpLevels, int numBuckets, Configuration hconf) throws IOException {
+  int dpLevels, int numBuckets, Configuration hconf, Long mmWriteId) 
throws IOException {
 if (fileStats == null) {
   return null;
 }
-
 List result = new ArrayList();
 HashMap taskIDToFile = null;
 if (dpLevels > 0) {
   FileStatus parts[] = fileStats;
-
   for (int i = 0; i < parts.length; ++i) {
 assert parts[i].isDir() : "dynamic partition " + parts[i].getPath()
 + " is not a directory";
+Utilities.LOG14535.info("removeTempOrDuplicateFiles looking at DP " + 
parts[i].getPath());
 FileStatus[] items = fs.listStatus(parts[i].getPath());
 
 // remove empty directory since DP insert should not generate empty 
partitions.
@@ -1551,46 +1550,80 @@ public final class Utilities {
 LOG.error("Cannot delete empty directory " + parts[i].getPath());
 throw new IOException("Cannot delete empty directory " + 
parts[i].getPath());
   }
+  parts[i] = null;
+  continue;
 }
 
-taskIDToFile = removeTempOrDuplicateFiles(items, fs);
-// if the table is bucketed and enforce bucketing, we should check and 
generate all buckets
-if (numBuckets > 0 && taskIDToFile != null && 
!"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) {
-  // refresh the file list
-  items = fs.listStatus(parts[i].getPath());
-  // get the missing buckets and generate empty buckets
-  String taskID1 = taskIDToFile.keySet().iterator().next();
-  Path bucketPath = taskIDToFile.values().iterator().next().getPath();
-  Utilities.LOG14535.info("Bucket path " + bucketPath);
-  for (int j = 0; j < numBuckets; ++j) {
-addBucketFileIfMissing(result, taskIDToFile, taskID1, bucketPath, 
j);
+if (mmWriteId != null) {
+  Path mmDir = parts[i].getPath();
+  if 
(!mmDir.getName().equals(ValidWriteIds.getMmFilePrefix(mmWriteId))) {
+throw new IOException("Unexpected non-MM directory name " + mmDir);
   }
+  Utilities.LOG14535.info("removeTempOrDuplicateFiles processing files 
in MM directory " + mmDir);
 }
+taskIDToFile = removeTempOrDuplicateFilesNonMm(items, fs);
+
+// TODO: not clear why two if conditions are different. Preserve the 
existing logic for now.
+addBucketFileToResults(taskIDToFile, numBuckets, hconf, result);
   }

[45/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
deleted file mode 100644
index e0a6198..000
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.optimizer.physical;
-
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-
-/**
- * Why a node did not vectorize.
- *
- */
-public class VectorizerReason  {
-
-  private static long serialVersionUID = 1L;
-
-  public static enum VectorizerNodeIssue {
-NONE,
-NODE_ISSUE,
-OPERATOR_ISSUE,
-EXPRESSION_ISSUE
-  }
-
-  private final VectorizerNodeIssue vectorizerNodeIssue;
-
-  private final Operator operator;
-
-  private final String expressionTitle;
-
-  private final String issue;
-
-  private VectorizerReason(VectorizerNodeIssue vectorizerNodeIssue,
-  Operator operator, String expressionTitle, 
String issue) {
-this.vectorizerNodeIssue = vectorizerNodeIssue;
-this.operator = operator;
-this.expressionTitle = expressionTitle;
-this.issue = issue;
-  }
-
-  public static VectorizerReason createNodeIssue(String issue) {
-return new VectorizerReason(
-VectorizerNodeIssue.NODE_ISSUE,
-null,
-null,
-issue);
-  }
-
-  public static VectorizerReason createOperatorIssue(Operator operator,
-  String issue) {
-return new VectorizerReason(
-VectorizerNodeIssue.OPERATOR_ISSUE,
-operator,
-null,
-issue);
-  }
-
-  public static VectorizerReason createExpressionIssue(Operator operator,
-  String expressionTitle, String issue) {
-return new VectorizerReason(
-VectorizerNodeIssue.EXPRESSION_ISSUE,
-operator,
-expressionTitle,
-issue);
-  }
-
-  @Override
-  public VectorizerReason clone() {
-return new VectorizerReason(vectorizerNodeIssue, operator, 
expressionTitle, issue);
-  }
-
-  public VectorizerNodeIssue getVectorizerNodeIssue() {
-return vectorizerNodeIssue;
-  }
-
-  public Operator getOperator() {
-return operator;
-  }
-
-  public String getExpressionTitle() {
-return expressionTitle;
-  }
-
-  public String getIssue() {
-return issue;
-  }
-
-  @Override
-  public String toString() {
-String reason;
-switch (vectorizerNodeIssue) {
-case NODE_ISSUE:
-  reason = (issue == null ? "unknown" : issue);
-  break;
-case OPERATOR_ISSUE:
-  reason = (operator == null ? "Unknown" : operator.getType()) + " 
operator: " +
-   (issue == null ? "unknown" : issue);
-  break;
-case EXPRESSION_ISSUE:
-  reason = expressionTitle + " expression for " +
-  (operator == null ? "Unknown" : operator.getType()) + " operator: " +
-  (issue == null ? "unknown" : issue);
-  break;
-default:
-  reason = "Unknown " + vectorizerNodeIssue;
-}
-return reason;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
index 1f118dc..4a8ff15 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
@@ -27,27 +27,12 @@ import org.apache.hadoop.fs.Path;
  */
 
 public class ExplainConfiguration {
-
-  public enum VectorizationDetailLevel {
-
-SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1);
-
-public final int rank;
-VectorizationDetailLevel(int rank) {
-  this.rank = rank;
-}
-  };
-
   private boolean extended = false;
   private

[19/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_nvl.q.out 
b/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
index aa8ed4a..b926ab4b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
@@ -1,82 +1,31 @@
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT cdouble, nvl(cdouble, 
100) as n
+PREHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 LIMIT 10
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT cdouble, 
nvl(cdouble, 100) as n
+POSTHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 LIMIT 10
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+  Stage-0 is a root stage
 
 STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: alltypesorc
-  Statistics: Num rows: 12288 Data size: 73400 Basic stats: 
COMPLETE Column stats: COMPLETE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
-  Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNull(col 5) -> 
boolean
-predicate: cdouble is null (type: boolean)
-Statistics: Num rows: 3114 Data size: 18608 Basic stats: 
COMPLETE Column stats: COMPLETE
-Select Operator
-  expressions: null (type: double), 100.0 (type: double)
-  outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumns: [12, 13]
-  selectExpressions: ConstantVectorExpression(val 
null) -> 12:double, ConstantVectorExpression(val 100.0) -> 13:double
-  Statistics: Num rows: 3114 Data size: 24920 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Limit
-Number of rows: 10
-Limit Vectorization:
-className: VectorLimitOperator
-native: true
-Statistics: Num rows: 10 Data size: 88 Basic stats: 
COMPLETE Column stats: COMPLETE
-File Output Operator
-  compressed: false
-  File Sink Vectorization:
-  className: VectorFileSinkOperator
-  native: false
-  Statistics: Num rows: 10 Data size: 88 Basic stats: 
COMPLETE Column stats: COMPLETE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-Execution mode: vectorized, llap
-LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
-
   Stage: Stage-0
 Fetch Operator
   limit: 10
   Processor Tree:
-ListSink
+TableScan
+  alias: alltypesorc
+  Filter Operator
+predicate: cdouble is null (type: boolean)
+Select Operator
+  expressions: null (type: double), 100.0 (type: double)
+  outputColumnNames: _col0, _col1
+  Limit
+Number of rows: 10
+ListSink
 
 PREHOOK: query: SELECT cdouble, nvl(cdouble, 100) as n
 FROM alltypesorc
@@ -102,76 +51,30 @@ NULL   100.0
 NULL   100.0
 NULL   100.0
 NULL   100.0
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT cfloat, nvl(cfloat, 
1) as n
+PREHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n
 FROM

[18/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
index fbd294e..5729237 100644
--- a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
@@ -226,7 +226,7 @@ NULLNULL-850295959  -1887561756 NULL
NULLWMIgGA734hA4KQj2vD3fI6gX82220d  NULL
 NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
 NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
 NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
-PREHOOK: query: explain vectorization detail formatted
+PREHOOK: query: explain
 select count(*) from (select c.cstring1 
 from small_alltypesorc_a c
 left outer join small_alltypesorc_a cd
@@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd
   on hd.cstring1 = c.cstring1
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail formatted
+POSTHOOK: query: explain
 select count(*) from (select c.cstring1 
 from small_alltypesorc_a c
 left outer join small_alltypesorc_a cd
@@ -244,7 +244,112 @@ left outer join small_alltypesorc_a hd
   on hd.cstring1 = c.cstring1
 ) t1
 POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
  A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: c
+  Statistics: Num rows: 20 Data size: 1023 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Select Operator
+expressions: cint (type: int), cstring1 (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 20 Data size: 1023 Basic stats: 
COMPLETE Column stats: COMPLETE
+Map Join Operator
+  condition map:
+   Left Outer Join0 to 1
+  keys:
+0 _col0 (type: int)
+1 _col0 (type: int)
+  outputColumnNames: _col1
+  input vertices:
+1 Map 3
+  Statistics: Num rows: 40 Data size: 3560 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Map Join Operator
+condition map:
+ Left Outer Join0 to 1
+keys:
+  0 _col1 (type: string)
+  1 _col0 (type: string)
+input vertices:
+  1 Map 4
+Statistics: Num rows: 80 Data size: 640 Basic stats: 
COMPLETE Column stats: COMPLETE
+Group By Operator
+  aggregations: count()
+  mode: hash
+  outputColumnNames: _col0
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Reduce Output Operator
+sort order: 
+Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+value expressions: _col0 (type: bigint)
+Execution mode: vectorized, llap
+LLAP IO: all inputs
+Map 3 
+Map Operator Tree:
+TableScan
+  alias: cd
+  Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE 
Column stats: COMPLETE
+  Select Operator
+expressions: cint (type: int)
+outputColumnNames: _col0
+Statistics: Num rows: 20 Data size: 44 Basic stats: 
COMPLETE Column stats: COMPLETE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 20 Data size: 44 Basic stats: 
COMPLETE Column stats: COMPLETE
+Execution mode: vectorized, llap
+LLAP IO: all inputs
+Map 4 
+Map Operator Tree:

[03/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out 
b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
index d1319b8..b311c49 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
@@ -62,16 +62,12 @@ POSTHOOK: Input: default@orc_table_2
 4  FOUR
 NULL   
 NULL   
-PREHOOK: query: explain vectorization detail
+PREHOOK: query: explain
 select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join 
orc_table_2 t2 on t1.a = t2.c
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
+POSTHOOK: query: explain
 select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join 
orc_table_2 t2 on t1.a = t2.c
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -87,38 +83,15 @@ STAGE PLANS:
 TableScan
   alias: t2
   Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE 
Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: c (type: int), v2 (type: string)
 outputColumnNames: _col0, _col1
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [0, 1]
 Statistics: Num rows: 6 Data size: 550 Basic stats: 
COMPLETE Column stats: NONE
 Spark HashTable Sink Operator
-  Spark Hash Table Sink Vectorization:
-  className: VectorSparkHashTableSinkOperator
-  native: true
   keys:
 0 _col1 (type: int)
 1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
-rowBatchContext:
-dataColumnCount: 2
-includeColumns: [0, 1]
-dataColumns: c:int, v2:string
-partitionColumnCount: 0
 Local Work:
   Map Reduce Local Work
 
@@ -131,16 +104,9 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE 
Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: v1 (type: string), a (type: int)
 outputColumnNames: _col0, _col1
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [0, 1]
 Statistics: Num rows: 6 Data size: 544 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
@@ -148,45 +114,18 @@ STAGE PLANS:
   keys:
 0 _col1 (type: int)
 1 _col0 (type: int)
-  Map Join Vectorization:
-  bigTableKeyColumns: [1]
-  bigTableOuterKeyMapping: 1 -> 2
-  bigTableRetainedColumns: [0, 1, 2]
-  bigTableValueColumns: [0, 1]
-  className: VectorMapJoinOuterLongOperator
-  native: true
-  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
-  projectedOutputColumns: [0, 1, 2, 3]
-  smallTableMapping: [3]
   outputColumnNames: _col0, _col1, _col2, _col3
   input vertices:

[30/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
index 5d28d22..882e83d 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
@@ -35,7 +35,7 @@ PREHOOK: query: -- EXPLAIN
 -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
 -- FROM decimal_tbl_1_orc ORDER BY dec;
 
-EXPLAIN VECTORIZATION EXPRESSION
+EXPLAIN
 SELECT
   round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
   round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
@@ -51,17 +51,13 @@ POSTHOOK: query: -- EXPLAIN
 -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
 -- FROM decimal_tbl_1_orc ORDER BY dec;
 
-EXPLAIN VECTORIZATION EXPRESSION
+EXPLAIN
 SELECT
   round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
   round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
   round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8)
 FROM decimal_tbl_1_orc ORDER BY d
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -79,61 +75,26 @@ STAGE PLANS:
 TableScan
   alias: decimal_tbl_1_orc
   Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE 
Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0]
   Select Operator
 expressions: round(dec) (type: decimal(21,0)), round(dec, 
0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) 
(type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) 
(type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) 
(type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) 
(type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) 
(type: decimal(21,0)), round(dec, -8) (type: decimal(21,0))
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13]
-selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 
1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) 
-> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 
0, decimalPlaces 3) -> 5:decimal(24,3), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 
6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
-2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -3) -> 8:decimal(21,0), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 
9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
-5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -6) -> 11:decimal(21,0), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7)
  -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -8) -> 13:decimal(21,0)
 Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: decimal(21,0))
   sort order: +
-  Reduce Sink Vectorization:
-  className: VectorReduceSinkOperator
-  native: false
-  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
-  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col1 (type: decimal(21,0)), _col2 
(type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: 
decimal(24,3)), _col5 (type:

[23/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
index c21da5f..16603c7 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
@@ -97,9 +97,9 @@ POSTHOOK: type: SHOWPARTITIONS
 POSTHOOK: Input: default@char_tbl2
 gpa=3
 gpa=3.5  
-PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, 
c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, 
c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, 
c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, 
c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
index 25066be..b9ffa34 100644
--- a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization 
+PREHOOK: query: explain 
 select count(*) from (select c.ctinyint 
 from alltypesorc c
 left outer join alltypesorc cd
@@ -7,7 +7,7 @@ left outer join alltypesorc hd
   on hd.ctinyint = c.ctinyint
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization 
+POSTHOOK: query: explain 
 select count(*) from (select c.ctinyint 
 from alltypesorc c
 left outer join alltypesorc cd
@@ -16,10 +16,6 @@ left outer join alltypesorc hd
   on hd.ctinyint = c.ctinyint
 ) t1
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -72,14 +68,6 @@ STAGE PLANS:
 value expressions: _col0 (type: bigint)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 3 
 Map Operator Tree:
 TableScan
@@ -96,14 +84,6 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 36696 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
@@ -120,23 +100,8 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 36696 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
-Reduce Vectorization:
-enabled: true
-enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-groupByVectorOutput: true
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true

[36/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
index 964ce95..85116e7 100644
--- 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_primitive.q.out
@@ -282,73 +282,25 @@ POSTHOOK: Lineage: 
part_change_various_various_boolean_to_bigint PARTITION(part=
 POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint 
PARTITION(part=1).c9 SIMPLE 
[(schema_evolution_data)schema_evolution_data.FieldSchema(name:boolean1, 
type:boolean, comment:null), ]
 POSTHOOK: Lineage: part_change_various_various_boolean_to_bigint 
PARTITION(part=1).insert_num SIMPLE 
[(schema_evolution_data)schema_evolution_data.FieldSchema(name:insert_num, 
type:int, comment:null), ]
 insert_num boolean1boolean1boolean1boolean1
boolean1boolean1boolean1boolean1boolean1
tinyint1tinyint1tinyint1tinyint1tinyint1
tinyint1tinyint1tinyint1tinyint1tinyint1
tinyint1smallint1   smallint1   smallint1   smallint1   
smallint1   smallint1   smallint1   smallint1   smallint1   
smallint1   smallint1   int1int1int1int1int1int1
int1int1int1int1int1bigint1 bigint1 bigint1 bigint1 bigint1 
bigint1 bigint1 bigint1 bigint1 bigint1 bigint1 _c54
-PREHOOK: query: explain vectorization detail
+PREHOOK: query: explain
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b
 from part_change_various_various_boolean_to_bigint
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
+POSTHOOK: query: explain
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30,c31,c32,c33,c34,c35,c36,c37,c38,c39,c40,c41,c42,c43,c44,c45,c46,c47,c48,c49,c50,c51,c52,c53,b
 from part_change_various_various_boolean_to_bigint
 POSTHOOK: type: QUERY
 Explain
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+Plan optimized by CBO.
 
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: part_change_various_various_boolean_to_bigint
-  Statistics: Num rows: 10 Data size: 4759 Basic stats: 
COMPLETE Column stats: PARTIAL
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 
30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 
50, 51, 52, 53, 54, 55]
-  Select Operator
-expressions: insert_num (type: int), part (type: int), c1 
(type: boolean), c2 (type: boolean), c3 (type: boolean), c4 (type: boolean), c5 
(type: boolean), c6 (type: boolean), c7 (type: boolean), c8 (type: boolean), c9 
(type: boolean), c10 (type: tinyint), c11 (type: tinyint), c12 (type: tinyint), 
c13 (type: tinyint), c14 (type: tinyint), c15 (type: tinyint), c16 (type: 
tinyint), c17 (type: tinyint), c18 (type: tinyint), c19 (type: tinyint), c20 
(type: tinyint), c21 (type: smallint), c22 (type: smallint), c23 (type: 
smallint), c24 (type: smallint), c25 (type: smallint), c26 (type: smallint), 
c27 (type: smallint), c28 (type: smallint), c29 (type: smallint), c30 (type: 
smallint), c31 (type: smallint), c32 (type: int), c33 (type: int), c34 (type: 
int), c35 (type: int), c36 (type: int), c37 (type: int), c38 (type: int), c39 
(type: int), c40 (type: int), c41 (type: int), c42 (type: int), c43 (type: 
bigint), c44 (type: bigint), c45 (type: bigint), c46 (type: bigint), c4
 7 (type: bigint), c48 (type: bigint), c49 (type: bigint), c50 (type: bigint), 
c51 (type: bigint), c52 (type: bigint), c53 (type: bigint), b (type: string)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, 
_col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, 
_col25, _col26, _col27, _col28,

[27/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
index e7d1963..d8003ba 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
@@ -39,7 +39,7 @@ POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION []
 PREHOOK: query: -- constants/cast from string
-explain vectorization expression
+explain
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
@@ -47,17 +47,13 @@ select
 from vector_interval_1 order by str1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- constants/cast from string
-explain vectorization expression
+explain
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
   interval '1 2:3:4' day to second, interval_day_time(str2)
 from vector_interval_1 order by str1
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -75,62 +71,26 @@ STAGE PLANS:
 TableScan
   alias: vector_interval_1
   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE 
Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1, 2, 3]
   Select Operator
 expressions: str1 (type: string), CAST( str1 AS INTERVAL 
YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO 
SECOND) (type: interval_day_time)
 outputColumnNames: _col0, _col2, _col4
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [2, 4, 5]
-selectExpressions: CastStringToIntervalYearMonth(col 
2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) -> 
5:interval_day_time
 Statistics: Num rows: 2 Data size: 442 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: string)
   sort order: +
-  Reduce Sink Vectorization:
-  className: VectorReduceSinkOperator
-  native: false
-  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
-  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 2 Data size: 442 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col2 (type: interval_year_month), 
_col4 (type: interval_day_time)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
-Reduce Vectorization:
-enabled: true
-enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-groupByVectorOutput: true
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: 
interval_year_month), VALUE._col0 (type: interval_year_month), 1 
02:03:04.0 (type: interval_day_time), VALUE._col1 (type: 
interval_day_time)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [0, 3, 1, 4, 2]
-selectExpressions: ConstantVectorExpression(val 14) -> 
3:long,

[46/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 6167f48..3a179a3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.optimizer.physical;
 import static 
org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM;
 
 import java.io.Serializable;
-import java.lang.annotation.Annotation;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -34,7 +33,6 @@ import java.util.Properties;
 import java.util.Set;
 import java.util.Stack;
 import java.util.regex.Pattern;
-import org.apache.commons.lang.ArrayUtils;
 
 import org.apache.calcite.util.Pair;
 import org.apache.commons.lang.ArrayUtils;
@@ -45,8 +43,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.*;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
-import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask;
-import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
 import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
@@ -66,11 +62,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOpe
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator;
-import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
-import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
-import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
@@ -81,7 +73,6 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -100,36 +91,18 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
-import org.apache.hadoop.hive.ql.plan.Explain;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.FetchWork;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
-import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
-import org.apache.hadoop.hive.ql.plan.LimitDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
-import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
-import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-import org.apache.hadoop.hive.ql.plan.SelectDesc;
-import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc;
-import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc;
-import org.apache.hadoop.hive.ql.plan.VectorFilterDesc;
-import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc;
-import org.apache.hadoop.hive.ql.plan.VectorizationCondition;
 import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode;
-import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc;
-import org.apache.hadoop.hive.ql.plan.VectorLimitDesc;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import

[14/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out 
b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
index 911a962..edb67f1 100644
--- a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
@@ -45,20 +45,16 @@ POSTHOOK: Input: default@src
 0  val_0
 10 val_10
 100val_100
-PREHOOK: query: explain vectorization select key, value
+PREHOOK: query: explain select key, value
 from varchar_2
 order by key asc
 limit 5
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization select key, value
+POSTHOOK: query: explain select key, value
 from varchar_2
 order by key asc
 limit 5
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -88,23 +84,8 @@ STAGE PLANS:
   value expressions: _col1 (type: varchar(20))
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
-Reduce Vectorization:
-enabled: true
-enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-groupByVectorOutput: true
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: varchar(10)), 
VALUE._col0 (type: varchar(20))
@@ -167,20 +148,16 @@ POSTHOOK: Input: default@src
 97 val_97
 97 val_97
 96 val_96
-PREHOOK: query: explain vectorization select key, value
+PREHOOK: query: explain select key, value
 from varchar_2
 order by key desc
 limit 5
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization select key, value
+POSTHOOK: query: explain select key, value
 from varchar_2
 order by key desc
 limit 5
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -210,23 +187,8 @@ STAGE PLANS:
   value expressions: _col1 (type: varchar(20))
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
-Reduce Vectorization:
-enabled: true
-enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-groupByVectorOutput: true
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: varchar(10)), 
VALUE._col0 (type: varchar(20))
@@ -292,16 +254,12 @@ create table varchar_3 (
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@varchar_3
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 insert into table varchar_3 select cint from alltypesorc limit 10
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 insert into table varchar_3 select cint from alltypesorc limit 10
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -321,81 +279,36 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats:

[10/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index c2e1dfd..76c8404 100644
--- 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -34,14 +34,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
  A masked pattern was here 
 11
 12
-PREHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds 
as ds, ds as `date` from srcpart group by ds
+PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds
 PREHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds 
as ds, ds as `date` from srcpart group by ds
+POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds
 POSTHOOK: type: CREATETABLE_AS_SELECT
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -78,19 +74,8 @@ STAGE PLANS:
 Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
-Map Vectorization:
-enabled: false
-enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
-inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Reducer 2 
 Execution mode: vectorized, llap
-Reduce Vectorization:
-enabled: true
-enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-groupByVectorOutput: true
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reduce Operator Tree:
   Group By Operator
 keys: KEY._col0 (type: string)
@@ -214,15 +199,11 @@ POSTHOOK: Output: default@srcpart_double_hour
 POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
 POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
 PREHOOK: query: -- single column, single key
-EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 PREHOOK: type: QUERY
 POSTHOOK: query: -- single column, single key
-EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -253,10 +234,6 @@ STAGE PLANS:
   Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
-Map Vectorization:
-enabled: false
-enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
-inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 4 
 Map Operator Tree:
 TableScan
@@ -292,14 +269,6 @@ STAGE PLANS:
 Target Vertex: Map 1
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reducer 2 
 Execution mode: llap
 Reduce Operator Tree:
@@ -321,13 +290,6 @@ STAGE PLANS:
 value expressions: _col0 (type: bigint)
 Reducer 3 
 Execution mode: vectorized, llap
-

[47/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index 77b44fb..c288731 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -22,7 +22,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
-
 import org.apache.commons.lang.ArrayUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -57,7 +56,6 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
 import 
org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -65,8 +63,6 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
-import com.google.common.base.Preconditions;
-
 /**
  * This class is common operator class for native vectorized map join.
  *
@@ -76,43 +72,7 @@ import com.google.common.base.Preconditions;
  */
 public abstract class VectorMapJoinCommonOperator extends MapJoinOperator 
implements VectorizationContextRegion {
   private static final long serialVersionUID = 1L;
-
-  
//
-
-  private static final String CLASS_NAME = 
VectorMapJoinCommonOperator.class.getName();
-private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
-
-  protected abstract String getLoggingPrefix();
-
-  // For debug tracing: information about the map or reduce task, operator, 
operator class, etc.
-  protected transient String loggingPrefix;
-
-  protected String getLoggingPrefix(String className) {
-if (loggingPrefix == null) {
-  initLoggingPrefix(className);
-}
-return loggingPrefix;
-  }
-
-  protected void initLoggingPrefix(String className) {
-if (hconf == null) {
-  // Constructor time...
-  loggingPrefix = className;
-} else {
-  // Determine the name of our map or reduce task for debug tracing.
-  BaseWork work = Utilities.getMapWork(hconf);
-  if (work == null) {
-work = Utilities.getReduceWork(hconf);
-  }
-  loggingPrefix = className + " " + work.getName() + " " + getOperatorId();
-}
-  }
-
-  
//
-
-  protected VectorMapJoinDesc vectorDesc;
-
-  protected VectorMapJoinInfo vectorMapJoinInfo;
+  private static final Logger LOG = 
LoggerFactory.getLogger(VectorMapJoinCommonOperator.class.getName());
 
   // Whether this operator is an outer join.
   protected boolean isOuterJoin;
@@ -128,10 +88,10 @@ private static final Logger LOG = 
LoggerFactory.getLogger(CLASS_NAME);
   // a mixture of input big table columns and new scratch columns.
   protected VectorizationContext vOutContext;
 
-  // The output column projection of the vectorized row batch.  And, the type 
infos of the output
+  // The output column projection of the vectorized row batch.  And, the type 
names of the output
   // columns.
   protected int[] outputProjection;
-  protected TypeInfo[] outputTypeInfos;
+  protected String[] outputTypeNames;
 
   // These are the vectorized batch expressions for filtering, key 
expressions, and value
   // expressions.
@@ -141,17 +101,15 @@ private static final Logger LOG = 
LoggerFactory.getLogger(CLASS_NAME);
 
   // This is map of which vectorized row batch columns are the big table key 
columns.  Since
   // we may have key expressions that produce new scratch columns, we need a 
mapping.
-  // And, we have their type infos.
+  // And, we have their type names.
   protected int[] bigTableKeyColumnMap;
-  protected String[] bigTableKeyColumnNames;
-  protected TypeInfo[] bigTableKeyTypeInfos;
+  protected ArrayList bigTableKeyTypeNames;
 
   // Similarly, this is map of which vectorized row batch columns are the big 
table value columns.
   // Since we may have value expressions that produce new scratch columns, we 
need a mapping.
-  // And, we have their type infos.
+  // And, we have their type names.
   protected int[] bigTableValueColumnMap;
-  protected String[]

[28/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
index d9e701a..ca07200 100644
--- a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
@@ -32,16 +32,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__2
 POSTHOOK: Output: default@orc_table_2a
 POSTHOOK: Lineage: orc_table_2a.c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: explain vectorization detail
+PREHOOK: query: explain
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
+POSTHOOK: query: explain
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -59,23 +55,12 @@ STAGE PLANS:
 TableScan
   alias: t2
   Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0]
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: 
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
 predicate: (c > 2) (type: boolean)
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: c (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -83,13 +68,6 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
-Map Join Vectorization:
-bigTableKeyColumns: [0]
-bigTableRetainedColumns: [0]
-className: VectorMapJoinInnerBigOnlyLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
-projectedOutputColumns: [0]
 outputColumnNames: _col1
 input vertices:
   1 Map 2
@@ -97,16 +75,9 @@ STAGE PLANS:
 Select Operator
   expressions: _col1 (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
-File Sink Vectorization:
-className: VectorFileSinkOperator
-native: false
 Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -114,66 +85,25 @@ STAGE PLANS:
 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-

[44/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
index 78b2e8b..ebe613e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.TableSample;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
-import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
 import org.apache.hadoop.hive.serde.serdeConstants;
 
 
@@ -397,29 +396,4 @@ public class TableScanDesc extends AbstractOperatorDesc {
 return opProps;
   }
 
-  public class TableScanOperatorExplainVectorization extends 
OperatorExplainVectorization {
-
-private final TableScanDesc tableScanDesc;
-private final VectorTableScanDesc vectorTableScanDesc;
-
-public TableScanOperatorExplainVectorization(TableScanDesc tableScanDesc, 
VectorDesc vectorDesc) {
-  // Native vectorization supported.
-  super(vectorDesc, true);
-  this.tableScanDesc = tableScanDesc;
-  vectorTableScanDesc = (VectorTableScanDesc) vectorDesc;
-}
-
-@Explain(vectorization = Vectorization.EXPRESSION, displayName = 
"projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
-public String getProjectedOutputColumns() {
-  return Arrays.toString(vectorTableScanDesc.getProjectedOutputColumns());
-}
-  }
-
-  @Explain(vectorization = Vectorization.OPERATOR, displayName = "TableScan 
Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED })
-  public TableScanOperatorExplainVectorization getTableScanVectorization() {
-if (vectorDesc == null) {
-  return null;
-}
-return new TableScanOperatorExplainVectorization(this, vectorDesc);
-  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
index a037ea3..7a70e6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
@@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.exec.tez.DagUtils;
 import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
-import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
+
 
 /**
  * TezWork. This class encapsulates all the work objects that can be executed
@@ -49,8 +49,7 @@ import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
  *
  */
 @SuppressWarnings("serial")
-@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED },
-vectorization = Vectorization.SUMMARY_PATH)
+@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED })
 public class TezWork extends AbstractOperatorDesc {
 
   public enum VertexType {
@@ -108,8 +107,7 @@ public class TezWork extends AbstractOperatorDesc {
   /**
* getWorkMap returns a map of "vertex name" to BaseWork
*/
-  @Explain(displayName = "Vertices", explainLevels = { Level.USER, 
Level.DEFAULT, Level.EXTENDED },
-  vectorization = Vectorization.SUMMARY_PATH)
+  @Explain(displayName = "Vertices", explainLevels = { Level.USER, 
Level.DEFAULT, Level.EXTENDED })
   public Map getWorkMap() {
 Map result = new LinkedHashMap();
 for (BaseWork w: getAllWork()) {
@@ -308,8 +306,7 @@ public class TezWork extends AbstractOperatorDesc {
 }
   }
 
-  @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED },
-  vectorization = Vectorization.SUMMARY_PATH)
+  @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED })
   public Map getDependencyMap() {
 Map result = new LinkedHashMap();
 for (Map.Entry entry: 
invertedWorkGraph.entrySet()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
deleted file mode 100644
index 2e11321..000
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
+++ /dev/null
@@

[44/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
index ebe613e..78b2e8b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.TableSample;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
+import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
 import org.apache.hadoop.hive.serde.serdeConstants;
 
 
@@ -396,4 +397,29 @@ public class TableScanDesc extends AbstractOperatorDesc {
 return opProps;
   }
 
+  public class TableScanOperatorExplainVectorization extends 
OperatorExplainVectorization {
+
+private final TableScanDesc tableScanDesc;
+private final VectorTableScanDesc vectorTableScanDesc;
+
+public TableScanOperatorExplainVectorization(TableScanDesc tableScanDesc, 
VectorDesc vectorDesc) {
+  // Native vectorization supported.
+  super(vectorDesc, true);
+  this.tableScanDesc = tableScanDesc;
+  vectorTableScanDesc = (VectorTableScanDesc) vectorDesc;
+}
+
+@Explain(vectorization = Vectorization.EXPRESSION, displayName = 
"projectedOutputColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+public String getProjectedOutputColumns() {
+  return Arrays.toString(vectorTableScanDesc.getProjectedOutputColumns());
+}
+  }
+
+  @Explain(vectorization = Vectorization.OPERATOR, displayName = "TableScan 
Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED })
+  public TableScanOperatorExplainVectorization getTableScanVectorization() {
+if (vectorDesc == null) {
+  return null;
+}
+return new TableScanOperatorExplainVectorization(this, vectorDesc);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
index 7a70e6b..a037ea3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java
@@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.exec.tez.DagUtils;
 import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.hive.ql.plan.Explain.Level;
-
+import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
 
 /**
  * TezWork. This class encapsulates all the work objects that can be executed
@@ -49,7 +49,8 @@ import org.apache.hadoop.hive.ql.plan.Explain.Level;
  *
  */
 @SuppressWarnings("serial")
-@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED })
+@Explain(displayName = "Tez", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED },
+vectorization = Vectorization.SUMMARY_PATH)
 public class TezWork extends AbstractOperatorDesc {
 
   public enum VertexType {
@@ -107,7 +108,8 @@ public class TezWork extends AbstractOperatorDesc {
   /**
* getWorkMap returns a map of "vertex name" to BaseWork
*/
-  @Explain(displayName = "Vertices", explainLevels = { Level.USER, 
Level.DEFAULT, Level.EXTENDED })
+  @Explain(displayName = "Vertices", explainLevels = { Level.USER, 
Level.DEFAULT, Level.EXTENDED },
+  vectorization = Vectorization.SUMMARY_PATH)
   public Map getWorkMap() {
 Map result = new LinkedHashMap();
 for (BaseWork w: getAllWork()) {
@@ -306,7 +308,8 @@ public class TezWork extends AbstractOperatorDesc {
 }
   }
 
-  @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED })
+  @Explain(displayName = "Edges", explainLevels = { Level.USER, Level.DEFAULT, 
Level.EXTENDED },
+  vectorization = Vectorization.SUMMARY_PATH)
   public Map getDependencyMap() {
 Map result = new LinkedHashMap();
 for (Map.Entry entry: 
invertedWorkGraph.entrySet()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
new file mode 100644
index 000..2e11321
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorAppMasterEventDesc.java
@@ -0,0 +1,35 @@

[45/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
new file mode 100644
index 000..e0a6198
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/VectorizerReason.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.physical;
+
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+/**
+ * Why a node did not vectorize.
+ *
+ */
+public class VectorizerReason  {
+
+  private static long serialVersionUID = 1L;
+
+  public static enum VectorizerNodeIssue {
+NONE,
+NODE_ISSUE,
+OPERATOR_ISSUE,
+EXPRESSION_ISSUE
+  }
+
+  private final VectorizerNodeIssue vectorizerNodeIssue;
+
+  private final Operator operator;
+
+  private final String expressionTitle;
+
+  private final String issue;
+
+  private VectorizerReason(VectorizerNodeIssue vectorizerNodeIssue,
+  Operator operator, String expressionTitle, 
String issue) {
+this.vectorizerNodeIssue = vectorizerNodeIssue;
+this.operator = operator;
+this.expressionTitle = expressionTitle;
+this.issue = issue;
+  }
+
+  public static VectorizerReason createNodeIssue(String issue) {
+return new VectorizerReason(
+VectorizerNodeIssue.NODE_ISSUE,
+null,
+null,
+issue);
+  }
+
+  public static VectorizerReason createOperatorIssue(Operator operator,
+  String issue) {
+return new VectorizerReason(
+VectorizerNodeIssue.OPERATOR_ISSUE,
+operator,
+null,
+issue);
+  }
+
+  public static VectorizerReason createExpressionIssue(Operator operator,
+  String expressionTitle, String issue) {
+return new VectorizerReason(
+VectorizerNodeIssue.EXPRESSION_ISSUE,
+operator,
+expressionTitle,
+issue);
+  }
+
+  @Override
+  public VectorizerReason clone() {
+return new VectorizerReason(vectorizerNodeIssue, operator, 
expressionTitle, issue);
+  }
+
+  public VectorizerNodeIssue getVectorizerNodeIssue() {
+return vectorizerNodeIssue;
+  }
+
+  public Operator getOperator() {
+return operator;
+  }
+
+  public String getExpressionTitle() {
+return expressionTitle;
+  }
+
+  public String getIssue() {
+return issue;
+  }
+
+  @Override
+  public String toString() {
+String reason;
+switch (vectorizerNodeIssue) {
+case NODE_ISSUE:
+  reason = (issue == null ? "unknown" : issue);
+  break;
+case OPERATOR_ISSUE:
+  reason = (operator == null ? "Unknown" : operator.getType()) + " 
operator: " +
+   (issue == null ? "unknown" : issue);
+  break;
+case EXPRESSION_ISSUE:
+  reason = expressionTitle + " expression for " +
+  (operator == null ? "Unknown" : operator.getType()) + " operator: " +
+  (issue == null ? "unknown" : issue);
+  break;
+default:
+  reason = "Unknown " + vectorizerNodeIssue;
+}
+return reason;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
index 4a8ff15..1f118dc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java
@@ -27,12 +27,27 @@ import org.apache.hadoop.fs.Path;
  */
 
 public class ExplainConfiguration {
+
+  public enum VectorizationDetailLevel {
+
+SUMMARY(4), OPERATOR(3), EXPRESSION(2), DETAIL(1);
+
+public final int rank;
+VectorizationDetailLevel(int rank) {
+  this.rank = rank;
+}
+  };
+
   private boolean extended = false;
   private boolean

[37/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out
index b137894..85f858b 100644
--- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out
+++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_table.q.out
@@ -87,25 +87,72 @@ POSTHOOK: Lineage: table_add_int_permute_select.b SIMPLE 
[(values__tmp__table__1
 POSTHOOK: Lineage: table_add_int_permute_select.c EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
 POSTHOOK: Lineage: table_add_int_permute_select.insert_num EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 _col0  _col1   _col2   _col3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select insert_num,a,b from table_add_int_permute_select
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select insert_num,a,b from table_add_int_permute_select
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 vectorized, llap
-  File Output Operator [FS_4]
-Select Operator [SEL_3] (rows=5 width=20)
-  Output:["_col0","_col1","_col2"]
-  TableScan [TS_0] (rows=5 width=20)
-
default@table_add_int_permute_select,table_add_int_permute_select,Tbl:COMPLETE,Col:NONE,Output:["insert_num","a","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: table_add_int_permute_select
+  Statistics: Num rows: 5 Data size: 101 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
+  Select Operator
+expressions: insert_num (type: int), a (type: int), b 
(type: string)
+outputColumnNames: _col0, _col1, _col2
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1, 2]
+Statistics: Num rows: 5 Data size: 101 Basic stats: 
COMPLETE Column stats: NONE
+File Output Operator
+  compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
+  Statistics: Num rows: 5 Data size: 101 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: vectorized, llap
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 4
+includeColumns: [0, 1, 2]
+dataColumns: insert_num:int, a:int, b:string, c:int
+partitionColumnCount: 0
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
 
 PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting 
works right
 select insert_num,a,b from table_add_int_permute_select
@@ -212,25 +259,72 @@ POSTHOOK: Lineage: table_add_int_string_permute_select.c 
EXPRESSION [(values__tm
 POSTHOOK: Lineage: table_add_int_string_permute_select.d SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
 POSTHOOK: Lineage: table_add_int_string_permute_select.insert_num EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 _col0  _col1   _col2   _col3   _col4
-PREHOOK: query: explain

[66/67] [abbrv] hive git commit: HIVE-14932 : handle bucketing for MM tables (Sergey Shelukhin)

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/edaebb4b/ql/src/test/results/clientpositive/llap/mm_current.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/mm_current.q.out 
b/ql/src/test/results/clientpositive/llap/mm_current.q.out
index 5b51fa3..1bbef9d 100644
--- a/ql/src/test/results/clientpositive/llap/mm_current.q.out
+++ b/ql/src/test/results/clientpositive/llap/mm_current.q.out
@@ -28,48 +28,205 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@intermediate@p=456
 POSTHOOK: Lineage: intermediate PARTITION(p=456).key EXPRESSION 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
-PREHOOK: query: create table ctas1_mm tblproperties ('hivecommit'='true') as
-  select * from intermediate union all select * from intermediate
-PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: query: insert into table intermediate partition(p='457') select 
distinct key from src where key >= 100 order by key asc limit 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@intermediate@p=457
+POSTHOOK: query: insert into table intermediate partition(p='457') select 
distinct key from src where key >= 100 order by key asc limit 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@intermediate@p=457
+POSTHOOK: Lineage: intermediate PARTITION(p=457).key EXPRESSION 
[(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: drop table bucket1_mm
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table bucket1_mm
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table bucket1_mm(key int, id int) partitioned by (key2 
int)
+clustered by (key) sorted by (key) into 2 buckets
+tblproperties('hivecommit'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@bucket1_mm
+POSTHOOK: query: create table bucket1_mm(key int, id int) partitioned by (key2 
int)
+clustered by (key) sorted by (key) into 2 buckets
+tblproperties('hivecommit'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@bucket1_mm
+PREHOOK: query: insert into table bucket1_mm partition (key2)
+select key + 1, key, key - 1 from intermediate
+union all 
+select key - 1, key, key + 1 from intermediate
+PREHOOK: type: QUERY
 PREHOOK: Input: default@intermediate
 PREHOOK: Input: default@intermediate@p=455
 PREHOOK: Input: default@intermediate@p=456
-PREHOOK: Output: database:default
-PREHOOK: Output: default@ctas1_mm
-POSTHOOK: query: create table ctas1_mm tblproperties ('hivecommit'='true') as
-  select * from intermediate union all select * from intermediate
-POSTHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@intermediate@p=457
+PREHOOK: Output: default@bucket1_mm
+POSTHOOK: query: insert into table bucket1_mm partition (key2)
+select key + 1, key, key - 1 from intermediate
+union all 
+select key - 1, key, key + 1 from intermediate
+POSTHOOK: type: QUERY
 POSTHOOK: Input: default@intermediate
 POSTHOOK: Input: default@intermediate@p=455
 POSTHOOK: Input: default@intermediate@p=456
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@ctas1_mm
-POSTHOOK: Lineage: ctas1_mm.key EXPRESSION 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
-POSTHOOK: Lineage: ctas1_mm.p EXPRESSION 
[(intermediate)intermediate.FieldSchema(name:p, type:int, comment:null), ]
-PREHOOK: query: select * from ctas1_mm
+POSTHOOK: Input: default@intermediate@p=457
+POSTHOOK: Output: default@bucket1_mm@key2=-1
+POSTHOOK: Output: default@bucket1_mm@key2=1
+POSTHOOK: Output: default@bucket1_mm@key2=101
+POSTHOOK: Output: default@bucket1_mm@key2=102
+POSTHOOK: Output: default@bucket1_mm@key2=104
+POSTHOOK: Output: default@bucket1_mm@key2=11
+POSTHOOK: Output: default@bucket1_mm@key2=9
+POSTHOOK: Output: default@bucket1_mm@key2=96
+POSTHOOK: Output: default@bucket1_mm@key2=97
+POSTHOOK: Output: default@bucket1_mm@key2=98
+POSTHOOK: Output: default@bucket1_mm@key2=99
+POSTHOOK: Lineage: bucket1_mm PARTITION(key2=-1).id EXPRESSION 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: bucket1_mm PARTITION(key2=-1).key EXPRESSION 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: bucket1_mm PARTITION(key2=101).id EXPRESSION 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: bucket1_mm PARTITION(key2=101).key EXPRESSION 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: bucket1_mm PARTITION(key2=102).id EXPRESSION 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: bucket1_mm PARTITION(key2=102).key EXPRESSION 
[(intermediate)intermediate.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: bucket1_mm PARTITION(key2=104).id EXPRESSION

[35/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out 
b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
index 735e4f4..8e2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out
@@ -43,48 +43,110 @@ POSTHOOK: Output: default@tbl2
 POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
 PREHOOK: query: -- The join is being performed as part of sub-query. It should 
be converted to a sort-merge join
-explain
+explain vectorization expression
 select count(*) from (
   select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
 ) subq1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- The join is being performed as part of sub-query. It 
should be converted to a sort-merge join
-explain
+explain vectorization expression
 select count(*) from (
   select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 
b on a.key = b.key
 ) subq1
 POSTHOOK: type: QUERY
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Reducer 2 vectorized, llap
-  File Output Operator [FS_22]
-Group By Operator [GBY_21] (rows=1 width=8)
-  Output:["_col0"],aggregations:["count(VALUE._col0)"]
-<-Map 1 [SIMPLE_EDGE] llap
-  SHUFFLE [RS_11]
-Group By Operator [GBY_10] (rows=1 width=8)
-  Output:["_col0"],aggregations:["count()"]
-  Merge Join Operator [MERGEJOIN_19] (rows=11 width=93)
-Conds:SEL_2._col0=SEL_5._col0(Inner)
-  <-Select Operator [SEL_5] (rows=10 width=93)
-  Output:["_col0"]
-  Filter Operator [FIL_18] (rows=10 width=93)
-predicate:key is not null
-TableScan [TS_3] (rows=10 width=93)
-  default@tbl2,b,Tbl:COMPLETE,Col:NONE,Output:["key"]
-  <-Select Operator [SEL_2] (rows=10 width=93)
-  Output:["_col0"]
-  Filter Operator [FIL_17] (rows=10 width=93)
-predicate:key is not null
-TableScan [TS_0] (rows=10 width=93)
-  default@tbl1,a,Tbl:COMPLETE,Col:NONE,Output:["key"]
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+Map Operator Tree:
+TableScan
+  alias: a
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 10 Data size: 930 Basic stats: 
COMPLETE Column stats: NONE
+  Merge Join Operator
+condition map:
+ Inner Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
+Group By Operator
+  aggregations: count()
+  mode: hash
+  outputColumnNames: _col0
+

[65/67] [abbrv] hive git commit: HIVE-14643 : handle ctas for the MM tables (Sergey Shelukhin)

2016-10-17 Thread sershe

HIVE-14643 : handle ctas for the MM tables (Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af4ff378
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af4ff378
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af4ff378

Branch: refs/heads/hive-14535
Commit: af4ff3787d648a9f4c80b5446d6bcd80b1efc69e
Parents: 2474f06
Author: Sergey Shelukhin 
Authored: Mon Oct 17 12:33:31 2016 -0700
Committer: Sergey Shelukhin 
Committed: Mon Oct 17 12:33:31 2016 -0700

--
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |  12 +-
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |  52 --
 .../apache/hadoop/hive/ql/exec/Utilities.java   |  74 +---
 .../apache/hadoop/hive/ql/metadata/Hive.java|   2 +-
 .../hive/ql/optimizer/GenMapRedUtils.java   |   5 +-
 .../optimizer/unionproc/UnionProcFactory.java   |   1 -
 .../hadoop/hive/ql/parse/GenTezUtils.java   |   1 -
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 182 +++
 .../hadoop/hive/ql/parse/TaskCompiler.java  | 144 +--
 .../hadoop/hive/ql/plan/CreateTableDesc.java|  22 +++
 .../hadoop/hive/ql/plan/FileSinkDesc.java   |  10 +-
 .../apache/hadoop/hive/ql/plan/LoadDesc.java|   5 +-
 .../hadoop/hive/ql/plan/LoadFileDesc.java   |   2 +-
 .../apache/hadoop/hive/ql/plan/MoveWork.java|   2 +-
 ql/src/test/queries/clientpositive/mm_all.q |  30 ++-
 ql/src/test/queries/clientpositive/mm_current.q |  10 +-
 .../results/clientpositive/llap/mm_all.q.out| 138 +-
 .../clientpositive/llap/mm_current.q.out|  42 +
 18 files changed, 463 insertions(+), 271 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/af4ff378/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index acf570f..bb9eaf5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -4059,8 +4059,18 @@ public class DDLTask extends Task implements 
Serializable {
   } else {
 db.createTable(tbl, crtTbl.getIfNotExists());
   }
-  if ( crtTbl.isCTAS()) {
+  if (crtTbl.isCTAS()) {
 Table createdTable = db.getTable(tbl.getDbName(), tbl.getTableName());
+if (crtTbl.getInitialWriteId() != null) {
+  // TODO# this would be retrieved via ACID before the query runs; for 
now we rely on it
+  //   being zero at start; we can't create a write ID before we 
create the table here.
+  long initialWriteId = db.getNextTableWriteId(tbl.getDbName(), 
tbl.getTableName());
+  if (initialWriteId != crtTbl.getInitialWriteId()) {
+throw new HiveException("Initial write ID mismatch - expected "
++ crtTbl.getInitialWriteId() + " but got " + initialWriteId);
+  }
+  db.commitMmTableWrite(tbl, initialWriteId);
+}
 DataContainer dc = new DataContainer(createdTable.getTTable());
 SessionState.get().getLineageState().setLineage(
 createdTable.getPath(), dc, createdTable.getCols()

http://git-wip-us.apache.org/repos/asf/hive/blob/af4ff378/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index dda4b51..ef6473a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -112,7 +112,8 @@ public class FileSinkOperator extends 
TerminalOperator implements
   protected transient Path parent;
   protected transient HiveOutputFormat hiveOutputFormat;
   protected transient Path specPath;
-  protected transient String childSpecPathDynLinkedPartitions;
+  protected transient String unionPath;
+  protected transient boolean isUnionDp;
   protected transient int dpStartCol; // start column # for DP columns
   protected transient List dpVals; // array of values corresponding to 
DP columns
   protected transient List dpWritables;
@@ -304,7 +305,12 @@ public class FileSinkOperator extends 
TerminalOperator implements
   }
   outPaths[filesIdx] = getTaskOutPath(taskId);
 } else {
-  String subdirPath = 
ValidWriteIds.getMmFilePrefix(conf.getMmWriteId()) + "/" + taskId;
+  String subdirPath = 
ValidWriteIds.getMmFilePrefix(conf.getMmWriteId());
+  if (unionPath != null) {
+// Create

[55/67] [abbrv] hive git commit: HIVE-14822: Add support for credential provider for jobs launched from Hiveserver2 (Vihang Karajgaonkar, reviewed by Barna Zsombor Klara, Mohit Sabharwal)

2016-10-17 Thread sershe

HIVE-14822: Add support for credential provider for jobs launched from 
Hiveserver2 (Vihang Karajgaonkar, reviewed by Barna Zsombor Klara, Mohit 
Sabharwal)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9caf2300
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9caf2300
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9caf2300

Branch: refs/heads/hive-14535
Commit: 9caf230013377229e88b2c8eeaf53b7038e8e9a1
Parents: 36bdbcc
Author: Mohit Sabharwal 
Authored: Mon Oct 17 12:48:49 2016 -0400
Committer: Mohit Sabharwal 
Committed: Mon Oct 17 12:48:49 2016 -0400

--
 .../apache/hadoop/hive/common/FileUtils.java|   2 +
 .../org/apache/hadoop/hive/conf/Constants.java  |   4 +
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   5 +-
 .../apache/hadoop/hive/conf/HiveConfUtil.java   |  91 ++
 .../hive/common/util/HiveStringUtils.java   |  32 ++
 .../hadoop/hive/ql/exec/mr/ExecDriver.java  |   2 +
 .../ql/exec/spark/HiveSparkClientFactory.java   |  14 +
 .../ql/exec/spark/LocalHiveSparkClient.java |   6 +
 .../ql/exec/spark/RemoteHiveSparkClient.java|   4 +
 .../ql/exec/TestHiveCredentialProviders.java| 314 +++
 .../hive/spark/client/SparkClientImpl.java  |  18 +-
 11 files changed, 490 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/9caf2300/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java 
b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index 3ed2d08..1d734f9 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.Trash;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConfUtil;
 import org.apache.hadoop.hive.io.HdfsUtils;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
@@ -576,6 +577,7 @@ public final class FileUtils {
 srcFS.getFileStatus(src).getLen() > 
conf.getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE)) {
   LOG.info("Source is " + srcFS.getFileStatus(src).getLen() + " bytes. 
(MAX: " + conf.getLongVar(HiveConf.ConfVars.HIVE_EXEC_COPYFILE_MAXSIZE) + ")");
   LOG.info("Launch distributed copy (distcp) job.");
+  HiveConfUtil.updateJobCredentialProviders(conf);
   copied = shims.runDistCp(src, dst, conf);
   if (copied && deleteSource) {
 srcFS.delete(src, true);

http://git-wip-us.apache.org/repos/asf/hive/blob/9caf2300/common/src/java/org/apache/hadoop/hive/conf/Constants.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/Constants.java 
b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
index 77c6aa5..6c42163 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/Constants.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/Constants.java
@@ -30,4 +30,8 @@ public class Constants {
   public static final String DRUID_QUERY_JSON = "druid.query.json";
   public static final String DRUID_QUERY_TYPE = "druid.query.type";
   public static final String DRUID_QUERY_FETCH = "druid.query.fetch";
+
+  public static final String HIVE_SERVER2_JOB_CREDSTORE_PASSWORD_ENVVAR = 
"HIVE_JOB_CREDSTORE_PASSWORD";
+  public static final String HADOOP_CREDENTIAL_PASSWORD_ENVVAR = 
"HADOOP_CREDSTORE_PASSWORD";
+  public static final String HADOOP_CREDENTIAL_PROVIDER_PATH_CONFIG = 
"hadoop.security.credential.provider.path";
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/9caf2300/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 8ffae3b..6f168b5 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2521,7 +2521,10 @@ public class HiveConf extends Configuration {
 "if an X-XSRF-HEADER header is not present"),
 HIVE_SECURITY_COMMAND_WHITELIST("hive.security.command.whitelist", 
"set,reset,dfs,add,list,delete,reload,compile",
 "Comma separated list of non-SQL Hive commands users are authorized to 
execute"),
-
+
HIVE_SERVER2_JOB_CREDENTIAL_PROVIDER_PATH("hive.server2.job.credential.provider.path",
 "",
+"If set, this configuration property should

[60/67] [abbrv] hive git commit: HIVE-14957: HiveSortLimitPullUpConstantsRule misses branches when parent operator is Union (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

2016-10-17 Thread sershe

HIVE-14957: HiveSortLimitPullUpConstantsRule misses branches when parent 
operator is Union (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e0e10a93
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e0e10a93
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e0e10a93

Branch: refs/heads/hive-14535
Commit: e0e10a9324cfccc8bef7bccb33d9f1509832eba1
Parents: e9c217f
Author: Pengcheng Xiong 
Authored: Mon Oct 17 11:12:16 2016 -0700
Committer: Pengcheng Xiong 
Committed: Mon Oct 17 11:12:16 2016 -0700

--
 .../calcite/rules/HiveSortLimitPullUpConstantsRule.java  | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e0e10a93/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
index cc318db..3ec9dac 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortLimitPullUpConstantsRule.java
@@ -26,6 +26,7 @@ import org.apache.calcite.plan.RelOptPredicateList;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.hep.HepRelVertex;
 import org.apache.calcite.rel.RelCollations;
 import org.apache.calcite.rel.RelFieldCollation;
 import org.apache.calcite.rel.RelNode;
@@ -158,7 +159,15 @@ public class HiveSortLimitPullUpConstantsRule extends 
RelOptRule {
 relBuilder.project(topChildExprs, topChildExprsFields);
 relBuilder.convert(sort.getRowType(), false);
 
-call.transformTo(parent.copy(parent.getTraitSet(), 
ImmutableList.of(relBuilder.build(;
+List inputs = new ArrayList<>();
+for (RelNode child : parent.getInputs()) {
+  if (!((HepRelVertex) child).getCurrentRel().equals(sort)) {
+inputs.add(child);
+  } else {
+inputs.add(relBuilder.build());
+  }
+}
+call.transformTo(parent.copy(parent.getTraitSet(), inputs));
   }
 
 }

[43/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q 
b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
index 8ed041b..11df12e 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
@@ -1,4 +1,4 @@
-set hive.explain.user=true;
+set hive.explain.user=false;
 set hive.mapred.mode=nonstrict;
 set hive.cli.print.header=true;
 SET hive.exec.schema.evolution=true;
@@ -39,7 +39,7 @@ alter table part_add_int_permute_select add columns(c int);
 
 insert into table part_add_int_permute_select partition(part=1) VALUES (2, 
, 'new', );
 
-explain
+explain vectorization detail
 select insert_num,part,a,b from part_add_int_permute_select;
 
 -- SELECT permutation columns to make sure NULL defaulting works right
@@ -62,7 +62,7 @@ alter table part_add_int_string_permute_select add columns(c 
int, d string);
 
 insert into table part_add_int_string_permute_select partition(part=1) VALUES 
(2, , 'new', , '');
 
-explain
+explain vectorization detail
 select insert_num,part,a,b from part_add_int_string_permute_select;
 
 -- SELECT permutation columns to make sure NULL defaulting works right
@@ -94,7 +94,7 @@ alter table part_change_string_group_double replace columns 
(insert_num int, c1
 
 insert into table part_change_string_group_double partition(part=1) SELECT 
insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE 
insert_num = 111;
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,b from part_change_string_group_double;
 
 select insert_num,part,c1,c2,c3,b from part_change_string_group_double;
@@ -117,7 +117,7 @@ alter table 
part_change_date_group_string_group_date_timestamp replace columns(i
 
 insert into table part_change_date_group_string_group_date_timestamp 
partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 
'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_date_group_string_group_date_timestamp;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_date_group_string_group_date_timestamp;
@@ -165,7 +165,7 @@ insert into table 
part_change_numeric_group_string_group_multi_ints_string_group
 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 
'filler', 'filler',
 'new');
 
-explain
+explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b
 from part_change_numeric_group_string_group_multi_ints_string_group;
 
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b
 from part_change_numeric_group_string_group_multi_ints_string_group;
@@ -208,7 +208,7 @@ insert into table 
part_change_numeric_group_string_group_floating_string_group p
  'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
  'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b 
from part_change_numeric_group_string_group_floating_string_group;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b 
from part_change_numeric_group_string_group_floating_string_group;
@@ -250,7 +250,7 @@ insert into table 
part_change_string_group_string_group_string partition(part=1)
   'filler', 'filler', 'filler',
   'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_string_group_string_group_string;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_string_group_string_group_string;
@@ -300,7 +300,7 @@ insert into table 
part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa
 1234.5678, 9876.543, 789.321,
'new');
 
-explain
+explain vectorization detail
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b
 from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
 
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b
 from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
@@ -331,7 +331,7 @@ alter table 
part_change_lower_to_higher_numeric_group_decimal_to_float replace c
 
 insert into table part_change_lower_to_higher_numeric_group_decimal_to_float 
partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new');
 
-explain
+explain vectorization detail
 select insert_num,part,c1,c2,c3,b from 
part_change_lower_to_higher_numeric_group_decimal_to_float;
 
 select

[53/67] [abbrv] hive git commit: HIVE-14799: Query operation are not thread safe during its cancellation (Chaoyu Tang, reviewed by Sergey Shelukhin, Yongzhi Chen)

2016-10-17 Thread sershe

HIVE-14799: Query operation are not thread safe during its cancellation (Chaoyu 
Tang, reviewed by Sergey Shelukhin, Yongzhi Chen)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1901e3a6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1901e3a6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1901e3a6

Branch: refs/heads/hive-14535
Commit: 1901e3a6ab97c150905c04c591b33b2c640e4b87
Parents: c71ef4f
Author: ctang 
Authored: Sat Oct 15 08:55:36 2016 -0400
Committer: ctang 
Committed: Sat Oct 15 08:55:36 2016 -0400

--
 .../java/org/apache/hadoop/hive/ql/Driver.java  | 665 +--
 1 file changed, 468 insertions(+), 197 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1901e3a6/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java 
b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index dd55434..9e5fd37 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -164,8 +164,6 @@ public class Driver implements CommandProcessor {
   private int maxthreads;
   private int tryCount = Integer.MAX_VALUE;
 
-  private boolean destroyed;
-
   private String userName;
 
   // HS2 operation handle guid string
@@ -180,6 +178,28 @@ public class Driver implements CommandProcessor {
   // Query hooks that execute before compilation and after execution
   List queryHooks;
 
+  // a lock is used for synchronizing the state transition and its associated
+  // resource releases
+  private final ReentrantLock stateLock = new ReentrantLock();
+  private DriverState driverState = DriverState.INITIALIZED;
+
+  private enum DriverState {
+INITIALIZED,
+COMPILING,
+COMPILED,
+EXECUTING,
+EXECUTED,
+// a state that the driver enters after close() has been called to 
interrupt its running
+// query in the query cancellation
+INTERRUPT,
+// a state that the driver enters after close() has been called to clean 
the query results
+// and release the resources after the query has been executed
+CLOSED,
+// a state that the driver enters after destroy() is called and it is the 
end of driver life cycle
+DESTROYED,
+ERROR
+  }
+
   private boolean checkConcurrency() {
 boolean supportConcurrency = 
conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
 if (!supportConcurrency) {
@@ -350,9 +370,22 @@ public class Driver implements CommandProcessor {
* @return 0 for ok
*/
   public int compile(String command, boolean resetTaskIds) {
+return compile(command, resetTaskIds, false);
+  }
+
+  // deferClose indicates if the close/destroy should be deferred when the 
process has been
+  // interrupted, it should be set to true if the compile is called within 
another method like
+  // runInternal, which defers the close to the called in that method.
+  public int compile(String command, boolean resetTaskIds, boolean deferClose) 
{
 PerfLogger perfLogger = SessionState.getPerfLogger(true);
 perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.DRIVER_RUN);
 perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.COMPILE);
+stateLock.lock();
+try {
+  driverState = DriverState.COMPILING;
+} finally {
+  stateLock.unlock();
+}
 
 command = new VariableSubstitution(new HiveVariableSource() {
   @Override
@@ -370,8 +403,13 @@ public class Driver implements CommandProcessor {
   LOG.warn("WARNING! Query command could not be redacted." + e);
 }
 
+if (isInterrupted()) {
+  return handleInterruption("at beginning of compilation."); //indicate if 
need clean resource
+}
+
 if (ctx != null && ctx.getExplainAnalyze() != AnalyzeState.RUNNING) {
-  close();
+  // close the existing ctx etc before compiling a new query, but does not 
destroy driver
+  closeInProcess(false);
 }
 
 if (resetTaskIds) {
@@ -411,9 +449,13 @@ public class Driver implements CommandProcessor {
   };
   ShutdownHookManager.addShutdownHook(shutdownRunner, 
SHUTDOWN_HOOK_PRIORITY);
 
+  if (isInterrupted()) {
+return handleInterruption("before parsing and analysing the query");
+  }
   if (ctx == null) {
 ctx = new Context(conf);
   }
+
   ctx.setTryCount(getTryCount());
   ctx.setCmd(command);
   ctx.setHDFSCleanup(true);
@@ -477,9 +519,12 @@ public class Driver implements CommandProcessor {
   acidInQuery = sem.hasAcidInQuery();
   perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.ANALYZE);
 
+  if (isInterrupted()) {
+return handleInterruption("after analyzing query.");
+  }
+
   // get

[54/67] [abbrv] hive git commit: HIVE-11957 : Add StartedTime and LastHeartbeatTime columns to SHOW TRANSACTIONS output (Wei Zheng, reviewed by Eugene Koifman)

2016-10-17 Thread sershe

HIVE-11957 : Add StartedTime and LastHeartbeatTime columns to SHOW TRANSACTIONS 
output (Wei Zheng, reviewed by Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/36bdbcc1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/36bdbcc1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/36bdbcc1

Branch: refs/heads/hive-14535
Commit: 36bdbcc18e6efa017a28248eb5caef1bf5cfde00
Parents: 1901e3a
Author: Wei Zheng 
Authored: Sun Oct 16 14:28:15 2016 -0700
Committer: Wei Zheng 
Committed: Sun Oct 16 14:28:15 2016 -0700

--
 metastore/if/hive_metastore.thrift  |   2 +
 .../gen/thrift/gen-cpp/hive_metastore_types.cpp |  44 
 .../gen/thrift/gen-cpp/hive_metastore_types.h   |  20 +-
 .../hadoop/hive/metastore/api/TxnInfo.java  | 206 ++-
 .../src/gen/thrift/gen-php/metastore/Types.php  |  46 +
 .../gen/thrift/gen-py/hive_metastore/ttypes.py  |  28 ++-
 .../gen/thrift/gen-rb/hive_metastore_types.rb   |   6 +-
 .../hadoop/hive/metastore/txn/TxnHandler.java   |  12 +-
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |   8 +
 .../hadoop/hive/ql/plan/ShowTxnsDesc.java   |   2 +-
 .../clientpositive/dbtxnmgr_showlocks.q.out |   2 +-
 11 files changed, 362 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/36bdbcc1/metastore/if/hive_metastore.thrift
--
diff --git a/metastore/if/hive_metastore.thrift 
b/metastore/if/hive_metastore.thrift
index c5ba309..9f6ef91 100755
--- a/metastore/if/hive_metastore.thrift
+++ b/metastore/if/hive_metastore.thrift
@@ -625,6 +625,8 @@ struct TxnInfo {
 5: optional string agentInfo = "Unknown",
 6: optional i32 heartbeatCount=0,
 7: optional string metaInfo,
+8: optional i64 startedTime,
+9: optional i64 lastHeartbeatTime,
 }
 
 struct GetOpenTxnsInfoResponse {

http://git-wip-us.apache.org/repos/asf/hive/blob/36bdbcc1/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
--
diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp 
b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
index 174b539..89541fa 100644
--- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
+++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
@@ -11579,6 +11579,16 @@ void TxnInfo::__set_metaInfo(const std::string& val) {
 __isset.metaInfo = true;
 }
 
+void TxnInfo::__set_startedTime(const int64_t val) {
+  this->startedTime = val;
+__isset.startedTime = true;
+}
+
+void TxnInfo::__set_lastHeartbeatTime(const int64_t val) {
+  this->lastHeartbeatTime = val;
+__isset.lastHeartbeatTime = true;
+}
+
 uint32_t TxnInfo::read(::apache::thrift::protocol::TProtocol* iprot) {
 
   apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -11662,6 +11672,22 @@ uint32_t 
TxnInfo::read(::apache::thrift::protocol::TProtocol* iprot) {
   xfer += iprot->skip(ftype);
 }
 break;
+  case 8:
+if (ftype == ::apache::thrift::protocol::T_I64) {
+  xfer += iprot->readI64(this->startedTime);
+  this->__isset.startedTime = true;
+} else {
+  xfer += iprot->skip(ftype);
+}
+break;
+  case 9:
+if (ftype == ::apache::thrift::protocol::T_I64) {
+  xfer += iprot->readI64(this->lastHeartbeatTime);
+  this->__isset.lastHeartbeatTime = true;
+} else {
+  xfer += iprot->skip(ftype);
+}
+break;
   default:
 xfer += iprot->skip(ftype);
 break;
@@ -11718,6 +11744,16 @@ uint32_t 
TxnInfo::write(::apache::thrift::protocol::TProtocol* oprot) const {
 xfer += oprot->writeString(this->metaInfo);
 xfer += oprot->writeFieldEnd();
   }
+  if (this->__isset.startedTime) {
+xfer += oprot->writeFieldBegin("startedTime", 
::apache::thrift::protocol::T_I64, 8);
+xfer += oprot->writeI64(this->startedTime);
+xfer += oprot->writeFieldEnd();
+  }
+  if (this->__isset.lastHeartbeatTime) {
+xfer += oprot->writeFieldBegin("lastHeartbeatTime", 
::apache::thrift::protocol::T_I64, 9);
+xfer += oprot->writeI64(this->lastHeartbeatTime);
+xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -11732,6 +11768,8 @@ void swap(TxnInfo , TxnInfo ) {
   swap(a.agentInfo, b.agentInfo);
   swap(a.heartbeatCount, b.heartbeatCount);
   swap(a.metaInfo, b.metaInfo);
+  swap(a.startedTime, b.startedTime);
+  swap(a.lastHeartbeatTime, b.lastHeartbeatTime);
   swap(a.__isset, b.__isset);
 }
 
@@ -11743,6 +11781,8 @@ TxnInfo::TxnInfo(const TxnInfo& other507) {
   agentInfo =

[47/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index c288731..77b44fb 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+
 import org.apache.commons.lang.ArrayUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -56,6 +57,7 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
 import 
org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -63,6 +65,8 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 
+import com.google.common.base.Preconditions;
+
 /**
  * This class is common operator class for native vectorized map join.
  *
@@ -72,7 +76,43 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
  */
 public abstract class VectorMapJoinCommonOperator extends MapJoinOperator 
implements VectorizationContextRegion {
   private static final long serialVersionUID = 1L;
-  private static final Logger LOG = 
LoggerFactory.getLogger(VectorMapJoinCommonOperator.class.getName());
+
+  
//
+
+  private static final String CLASS_NAME = 
VectorMapJoinCommonOperator.class.getName();
+private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME);
+
+  protected abstract String getLoggingPrefix();
+
+  // For debug tracing: information about the map or reduce task, operator, 
operator class, etc.
+  protected transient String loggingPrefix;
+
+  protected String getLoggingPrefix(String className) {
+if (loggingPrefix == null) {
+  initLoggingPrefix(className);
+}
+return loggingPrefix;
+  }
+
+  protected void initLoggingPrefix(String className) {
+if (hconf == null) {
+  // Constructor time...
+  loggingPrefix = className;
+} else {
+  // Determine the name of our map or reduce task for debug tracing.
+  BaseWork work = Utilities.getMapWork(hconf);
+  if (work == null) {
+work = Utilities.getReduceWork(hconf);
+  }
+  loggingPrefix = className + " " + work.getName() + " " + getOperatorId();
+}
+  }
+
+  
//
+
+  protected VectorMapJoinDesc vectorDesc;
+
+  protected VectorMapJoinInfo vectorMapJoinInfo;
 
   // Whether this operator is an outer join.
   protected boolean isOuterJoin;
@@ -88,10 +128,10 @@ public abstract class VectorMapJoinCommonOperator extends 
MapJoinOperator implem
   // a mixture of input big table columns and new scratch columns.
   protected VectorizationContext vOutContext;
 
-  // The output column projection of the vectorized row batch.  And, the type 
names of the output
+  // The output column projection of the vectorized row batch.  And, the type 
infos of the output
   // columns.
   protected int[] outputProjection;
-  protected String[] outputTypeNames;
+  protected TypeInfo[] outputTypeInfos;
 
   // These are the vectorized batch expressions for filtering, key 
expressions, and value
   // expressions.
@@ -101,15 +141,17 @@ public abstract class VectorMapJoinCommonOperator extends 
MapJoinOperator implem
 
   // This is map of which vectorized row batch columns are the big table key 
columns.  Since
   // we may have key expressions that produce new scratch columns, we need a 
mapping.
-  // And, we have their type names.
+  // And, we have their type infos.
   protected int[] bigTableKeyColumnMap;
-  protected ArrayList bigTableKeyTypeNames;
+  protected String[] bigTableKeyColumnNames;
+  protected TypeInfo[] bigTableKeyTypeInfos;
 
   // Similarly, this is map of which vectorized row batch columns are the big 
table value columns.
   // Since we may have value expressions that produce new scratch columns, we 
need a mapping.
-  // And, we have their type names.
+  // And, we have their type infos.
   protected int[]

[58/67] [abbrv] hive git commit: HIVE-14935: Add tests for beeline force option (Kavan Suresh via Jason Dere)

2016-10-17 Thread sershe

HIVE-14935: Add tests for beeline force option (Kavan Suresh via Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/df6afdd4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/df6afdd4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/df6afdd4

Branch: refs/heads/hive-14535
Commit: df6afdd4c1ef52f8c8af7a0425298b240230661b
Parents: 8d3925f
Author: Jason Dere 
Authored: Mon Oct 17 11:03:29 2016 -0700
Committer: Jason Dere 
Committed: Mon Oct 17 11:03:29 2016 -0700

--
 .../org/apache/hive/beeline/TestBeeLineWithArgs.java | 15 +++
 1 file changed, 15 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/df6afdd4/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
 
b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
index 1ca7623..9e99a91 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java
@@ -911,4 +911,19 @@ public class TestBeeLineWithArgs {
 final String EXPECTED_PATTERN = "hello world";
 testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList,true,false);
   }
+
+  /**
+   * Attempt to execute Beeline with force option to continue running script 
even after errors.
+   * Test for presence of an expected pattern to match the output of a valid 
command at the end.
+   */
+  @Test
+  public void testBeelineWithForce() throws Throwable {
+final String SCRIPT_TEXT = "drop table does_not_exist;\ncreate table 
incomplete_syntax(a, string, );\n "
++ "drop table if exists new_table;\n create table new_table(foo 
int, bar string);\n "
++ "desc new_table;\n";
+final String EXPECTED_PATTERN = "2 rows selected";
+List argList = getBaseArgs(miniHS2.getBaseJdbcURL());
+argList.add("--force");
+testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList);
+  }
 }

[59/67] [abbrv] hive git commit: HIVE-14958. Improve the 'TestClass' did not produce a TEST-*.xml file message to include list of all qfiles in a batch, batch id. (Siddharth Seth, reviewed by Sergio P

2016-10-17 Thread sershe

HIVE-14958. Improve the 'TestClass' did not produce a TEST-*.xml file message 
to include list of all qfiles in a batch, batch id. (Siddharth Seth, reviewed 
by Sergio PeÃ±a)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9c217fe
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9c217fe
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9c217fe

Branch: refs/heads/hive-14535
Commit: e9c217fe69e1da068579ea3b5a1de02ad11730a3
Parents: df6afdd
Author: Siddharth Seth 
Authored: Mon Oct 17 11:07:34 2016 -0700
Committer: Siddharth Seth 
Committed: Mon Oct 17 11:07:34 2016 -0700

--
 .../hive/ptest/execution/ExecutionPhase.java| 21 ++--
 .../ptest/execution/conf/QFileTestBatch.java| 11 --
 .../ptest/execution/TestExecutionPhase.java |  8 
 .../TestScripts.testPrepGit.approved.txt|  4 ++--
 .../TestScripts.testPrepHadoop1.approved.txt|  4 ++--
 .../TestScripts.testPrepNone.approved.txt   |  4 ++--
 .../TestScripts.testPrepSvn.approved.txt|  4 ++--
 7 files changed, 40 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e9c217fe/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
--
diff --git 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
index 8a64499..2015187 100644
--- 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
+++ 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/ExecutionPhase.java
@@ -20,6 +20,7 @@ package org.apache.hive.ptest.execution;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
@@ -28,7 +29,9 @@ import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.TimeUnit;
 
+import com.google.common.base.Joiner;
 import org.apache.hive.ptest.execution.conf.Host;
+import org.apache.hive.ptest.execution.conf.QFileTestBatch;
 import org.apache.hive.ptest.execution.conf.TestBatch;
 import org.apache.hive.ptest.execution.context.ExecutionContext;
 import org.slf4j.Logger;
@@ -116,13 +119,27 @@ public class ExecutionPhase extends Phase {
 }
 JUnitReportParser parser = new JUnitReportParser(logger, batchLogDir);
 executedTests.addAll(parser.getAllExecutedTests());
-failedTests.addAll(parser.getAllFailedTests());
+for (String failedTest : parser.getAllFailedTests()) {
+  failedTests.add(failedTest + " (batchId=" + batch.getBatchId() + 
")");
+}
+
 // if the TEST*.xml was not generated or was corrupt, let someone know
 if (parser.getTestClassesWithReportAvailable().size() < 
batch.getTestClasses().size()) {
   Set expTestClasses = new HashSet<>(batch.getTestClasses());
   expTestClasses.removeAll(parser.getTestClassesWithReportAvailable());
   for (String testClass : expTestClasses) {
-failedTests.add(testClass + " - did not produce a TEST-*.xml 
file");
+StringBuilder messageBuilder = new StringBuilder();
+messageBuilder.append(testClass).append(" - did not produce a 
TEST-*.xml file (likely timed out)")
+.append(" (batchId=").append(batch.getBatchId()).append(")");
+if (batch instanceof QFileTestBatch) {
+  Collection tests = ((QFileTestBatch)batch).getTests();
+  if (tests.size() != 0) {
+messageBuilder.append("\n\t[");
+messageBuilder.append(Joiner.on(",").join(tests));
+messageBuilder.append("]");
+  }
+}
+failedTests.add(messageBuilder.toString());
   }
 }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/e9c217fe/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/conf/QFileTestBatch.java
--
diff --git 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/conf/QFileTestBatch.java
 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/conf/QFileTestBatch.java
index 405c44b..ced83bf 100644
--- 
a/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/conf/QFileTestBatch.java
+++ 
b/testutils/ptest2/src/main/java/org/apache/hive/ptest/execution/conf/QFileTestBatch.java
@@ -20,11 +20,13 @@ package org.apache.hive.ptest.execution.conf;
 
 import java.util.Collection;
 import java.util.Collections;
+import

[46/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 3a179a3..6167f48 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.optimizer.physical;
 import static 
org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM;
 
 import java.io.Serializable;
+import java.lang.annotation.Annotation;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -33,6 +34,7 @@ import java.util.Properties;
 import java.util.Set;
 import java.util.Stack;
 import java.util.regex.Pattern;
+import org.apache.commons.lang.ArrayUtils;
 
 import org.apache.calcite.util.Pair;
 import org.apache.commons.lang.ArrayUtils;
@@ -43,6 +45,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.*;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
 import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
@@ -62,7 +66,11 @@ import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOpe
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkMultiKeyOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkStringOperator;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
+import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator;
@@ -73,6 +81,7 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -91,18 +100,36 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.Explain;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.FetchWork;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc;
+import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.VectorFilterDesc;
+import org.apache.hadoop.hive.ql.plan.VectorTableScanDesc;
+import org.apache.hadoop.hive.ql.plan.VectorizationCondition;
 import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode;
+import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc;
+import org.apache.hadoop.hive.ql.plan.VectorLimitDesc;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import

[40/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
--
diff --git 
a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q 
b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
index 2d3788d..d2ded71 100644
--- a/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
+++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_partition_pruning.q
@@ -7,33 +7,34 @@ set hive.tez.dynamic.partition.pruning=true;
 set hive.optimize.metadataonly=false;
 set hive.optimize.index.filter=true;
 set hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
 
 
 select distinct ds from srcpart;
 select distinct hr from srcpart;
 
-EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from 
srcpart group by ds;
+EXPLAIN VECTORIZATION create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds;
 create table srcpart_date stored as orc as select ds as ds, ds as `date` from 
srcpart group by ds;
 create table srcpart_hour stored as orc as select hr as hr, hr as hour from 
srcpart group by hr;
 create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, 
hr as hr, hr as hour from srcpart group by ds, hr;
 create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as 
hour from srcpart group by hr;
 
 -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 set hive.tez.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08';
 set hive.tez.dynamic.partition.pruning=true;
 select count(*) from srcpart where ds = '2008-04-08';
 
 -- multiple sources, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 set hive.tez.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = 
srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
 select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) 
 where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11;
@@ -41,77 +42,77 @@ set hive.tez.dynamic.partition.pruning=true;
 select count(*) from srcpart where hr = 11 and ds = '2008-04-08';
 
 -- multiple columns single source
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on 
(srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
 select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
 set hive.tez.dynamic.partition.pruning=false;
-EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = 
srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where 
srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11;
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date_hour on 
(srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where

[57/67] [abbrv] hive git commit: HIVE-13046: DependencyResolver should not lowercase the dependency URI's authority (Anthony Hsu via Carl Steinbach

2016-10-17 Thread sershe

HIVE-13046: DependencyResolver should not lowercase the dependency URI's 
authority (Anthony Hsu via Carl Steinbach


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8d3925f6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8d3925f6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8d3925f6

Branch: refs/heads/hive-14535
Commit: 8d3925f6138fce0e0b7c7832f386e89dfd5b0575
Parents: 8029e11
Author: Carl Steinbach 
Authored: Mon Oct 17 10:18:11 2016 -0700
Committer: Carl Steinbach 
Committed: Mon Oct 17 10:18:11 2016 -0700

--
 ql/src/java/org/apache/hadoop/hive/ql/util/DependencyResolver.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/8d3925f6/ql/src/java/org/apache/hadoop/hive/ql/util/DependencyResolver.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/util/DependencyResolver.java 
b/ql/src/java/org/apache/hadoop/hive/ql/util/DependencyResolver.java
index 15d0fa1..d080c47 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/util/DependencyResolver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/util/DependencyResolver.java
@@ -81,7 +81,7 @@ public class DependencyResolver {
 if (authority == null) {
   throw new URISyntaxException(authority, "Invalid url: Expected 
'org:module:version', found null");
 }
-String[] authorityTokens = authority.toLowerCase().split(":");
+String[] authorityTokens = authority.split(":");
 
 if (authorityTokens.length != 3) {
   throw new URISyntaxException(authority, "Invalid url: Expected 
'org:module:version', found " + authority);

[64/67] [abbrv] hive git commit: HIVE-14671 : merge master into hive-14535 (Sergey Shelukhin)

2016-10-17 Thread sershe

HIVE-14671 : merge master into hive-14535 (Sergey Shelukhin)

Conflicts:
ql/src/java/org/apache/hadoop/hive/ql/Driver.java


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2474f063
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2474f063
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2474f063

Branch: refs/heads/hive-14535
Commit: 2474f063aaaf3a91fdab9d9c5358723072183ddf
Parents: eacf9f9 36e810f
Author: Sergey Shelukhin 
Authored: Mon Oct 17 12:31:12 2016 -0700
Committer: Sergey Shelukhin 
Committed: Mon Oct 17 12:31:12 2016 -0700

--
 .../java/org/apache/hive/beeline/BeeLine.java   |  176 ++-
 .../java/org/apache/hive/beeline/Commands.java  |   52 +-
 .../BeelineHS2ConnectionFileParseException.java |   30 +
 .../hs2connection/HS2ConnectionFileParser.java  |   88 ++
 .../hs2connection/HS2ConnectionFileUtils.java   |  119 ++
 .../HiveSiteHS2ConnectionFileParser.java|  172 +++
 .../UserHS2ConnectionFileParser.java|  117 ++
 .../apache/hive/beeline/TestBeeLineHistory.java |4 +-
 .../TestUserHS2ConnectionFileParser.java|  211 
 beeline/src/test/resources/hive-site.xml|5 +
 .../test-hs2-conn-conf-kerberos-http.xml|   48 +
 .../test-hs2-conn-conf-kerberos-nossl.xml   |   32 +
 .../test-hs2-conn-conf-kerberos-ssl.xml |   40 +
 .../resources/test-hs2-connection-conf-list.xml |   36 +
 .../test-hs2-connection-config-noauth.xml   |   28 +
 .../test-hs2-connection-multi-conf-list.xml |   37 +
 .../test-hs2-connection-zookeeper-config.xml|   32 +
 .../apache/hadoop/hive/common/FileUtils.java|2 +
 .../org/apache/hadoop/hive/conf/Constants.java  |4 +
 .../org/apache/hadoop/hive/conf/HiveConf.java   |8 +-
 .../apache/hadoop/hive/conf/HiveConfUtil.java   |   91 ++
 .../hive/common/util/HiveStringUtils.java   |   32 +
 data/conf/spark/yarn-client/hive-site.xml   |4 +-
 data/files/identity_udf.jar |  Bin 0 -> 710 bytes
 .../hcatalog/pig/AbstractHCatStorerTest.java| 1096 ++
 .../hive/hcatalog/pig/TestAvroHCatStorer.java   |   77 ++
 .../hive/hcatalog/pig/TestHCatStorer.java   | 1036 ++---
 .../hive/hcatalog/pig/TestOrcHCatStorer.java|   33 +
 .../hcatalog/pig/TestParquetHCatStorer.java |  200 
 .../hive/hcatalog/pig/TestRCFileHCatStorer.java |   32 +
 .../pig/TestSequenceFileHCatStorer.java |   33 +
 .../hcatalog/pig/TestTextFileHCatStorer.java|   33 +
 .../hive/minikdc/TestJdbcWithMiniKdcCookie.java |1 -
 .../hive/beeline/TestBeeLineWithArgs.java   |   15 +
 .../TestBeelineConnectionUsingHiveSite.java |  109 ++
 .../TestBeelineWithHS2ConnectionFile.java   |  214 
 .../TestBeelineWithUserHs2ConnectionFile.java   |  129 +++
 metastore/if/hive_metastore.thrift  |2 +
 .../gen/thrift/gen-cpp/hive_metastore_types.cpp |   44 +
 .../gen/thrift/gen-cpp/hive_metastore_types.h   |   20 +-
 .../hadoop/hive/metastore/api/TxnInfo.java  |  206 +++-
 .../src/gen/thrift/gen-php/metastore/Types.php  |   46 +
 .../gen/thrift/gen-py/hive_metastore/ttypes.py  |   28 +-
 .../gen/thrift/gen-rb/hive_metastore_types.rb   |6 +-
 .../hadoop/hive/metastore/txn/TxnHandler.java   |   12 +-
 .../java/org/apache/hadoop/hive/ql/Driver.java  |  675 +++
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |8 +
 .../hadoop/hive/ql/exec/mr/ExecDriver.java  |2 +
 .../ql/exec/spark/HiveSparkClientFactory.java   |   14 +
 .../ql/exec/spark/LocalHiveSparkClient.java |6 +
 .../ql/exec/spark/RemoteHiveSparkClient.java|4 +
 .../rules/HiveSortLimitPullUpConstantsRule.java |   11 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java|6 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   37 +-
 .../hadoop/hive/ql/plan/ShowTxnsDesc.java   |2 +-
 .../hadoop/hive/ql/util/DependencyResolver.java |2 +-
 .../ql/exec/TestHiveCredentialProviders.java|  314 +
 .../clientpositive/distinct_windowing_no_cbo.q  |   63 +
 ql/src/test/queries/clientpositive/windowing.q  |6 +
 .../clientpositive/dbtxnmgr_showlocks.q.out |2 +-
 .../distinct_windowing_no_cbo.q.out |  796 +
 .../llap/cbo_rp_windowing_2.q.out   |5 +-
 .../results/clientpositive/llap/windowing.q.out |  110 +-
 .../spark/constprog_semijoin.q.out  |   20 +-
 .../clientpositive/spark/index_bitmap3.q.out|4 +-
 .../spark/index_bitmap_auto.q.out   |4 +-
 .../spark/infer_bucket_sort_map_operators.q.out |8 +-
 .../infer_bucket_sort_reducers_power_two.q.out  |2 +-
 .../clientpositive/spark/windowing.q.out|  105 +-
 .../service/cli/operation/OperationManager.java |3 +
 .../service/cli/thrift/ThriftHttpServlet.java   |4 +-

[34/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out 
b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
index 6b59497..739d0e1 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
@@ -69,13 +69,17 @@ POSTHOOK: Lineage: tint.cint SIMPLE 
[(tint_txt)tint_txt.FieldSchema(name:cint, t
 POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, 
type:int, comment:null), ]
 tint_txt.rnum  tint_txt.cint
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint 
between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col 
from tint , tsint
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint 
between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col 
from tint , tsint
 POSTHOOK: type: QUERY
 Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -93,9 +97,16 @@ STAGE PLANS:
 TableScan
   alias: tint
   Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: rnum (type: int), cint (type: int)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1]
 Statistics: Num rows: 5 Data size: 36 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
@@ -103,6 +114,11 @@ STAGE PLANS:
   keys:
 0 
 1 
+  Map Join Vectorization:
+  className: VectorMapJoinOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid 
Hash Join IS true, Small table vectorizes IS true
+  nativeConditionsNotMet: Not empty key IS false
   outputColumnNames: _col0, _col1, _col2, _col3
   input vertices:
 1 Map 2
@@ -110,9 +126,17 @@ STAGE PLANS:
   Select Operator
 expressions: _col0 (type: int), _col2 (type: int), 
_col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND 
_col3) THEN ('Ok') ELSE ('NoOk') END (type: string)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 2, 1, 3, 5]
+selectExpressions: VectorUDFAdaptor(CASE WHEN 
(_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: 
VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:Long) -> 5:String
 Statistics: Num rows: 25 Data size: 385 Basic stats: 
COMPLETE Column stats: NONE
 File Output Operator
   compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
   Statistics: Num rows: 25 Data size: 385 Basic stats: 
COMPLETE Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -120,21 +144,49 @@ STAGE PLANS:
   serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet:

[62/67] [abbrv] hive git commit: HIVE-14891: Parallelize TestHCatStorer (Vaibhav Gumashta reviewed by Siddharth Seth)

2016-10-17 Thread sershe

HIVE-14891: Parallelize TestHCatStorer (Vaibhav Gumashta reviewed by Siddharth 
Seth)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2cae7361
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2cae7361
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2cae7361

Branch: refs/heads/hive-14535
Commit: 2cae7361fdf8347faf0fa2edfc6f52600ab764aa
Parents: e0e10a9
Author: Vaibhav Gumashta 
Authored: Mon Oct 17 11:58:16 2016 -0700
Committer: Vaibhav Gumashta 
Committed: Mon Oct 17 11:58:16 2016 -0700

--
 .../hcatalog/pig/AbstractHCatStorerTest.java| 1096 ++
 .../hive/hcatalog/pig/TestAvroHCatStorer.java   |   77 ++
 .../hive/hcatalog/pig/TestHCatStorer.java   | 1036 ++---
 .../hive/hcatalog/pig/TestOrcHCatStorer.java|   33 +
 .../hcatalog/pig/TestParquetHCatStorer.java |  200 
 .../hive/hcatalog/pig/TestRCFileHCatStorer.java |   32 +
 .../pig/TestSequenceFileHCatStorer.java |   33 +
 .../hcatalog/pig/TestTextFileHCatStorer.java|   33 +
 8 files changed, 1618 insertions(+), 922 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/2cae7361/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
--
diff --git 
a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
 
b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
new file mode 100644
index 000..2975287
--- /dev/null
+++ 
b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java
@@ -0,0 +1,1096 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hive.hcatalog.pig;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.CommandNeedRetryException;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hive.hcatalog.HcatTestUtils;
+import org.apache.hive.hcatalog.mapreduce.HCatBaseTest;
+import org.apache.pig.EvalFunc;
+import org.apache.pig.ExecType;
+import org.apache.pig.PigException;
+import org.apache.pig.PigServer;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.util.LogUtils;
+import org.joda.time.DateTime;
+import org.joda.time.DateTimeZone;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class AbstractHCatStorerTest extends HCatBaseTest {
+  static Logger LOG = LoggerFactory.getLogger(AbstractHCatStorerTest.class);
+  static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data";
+  String storageFormat;
+
+  public AbstractHCatStorerTest() {
+storageFormat = getStorageFormat();
+  }
+
+  // Start: tests that check values from Pig that are out of range for target 
column
+  @Test
+  public void testWriteTinyint() throws Exception {
+pigValueRangeTest("junitTypeTest1", "tinyint", "int", null, 
Integer.toString(1),
+Integer.toString(1));
+pigValueRangeTestOverflow("junitTypeTest1", "tinyint", "int", null, 
Integer.toString(300));
+pigValueRangeTestOverflow("junitTypeTest2", "tinyint", "int",
+HCatBaseStorer.OOR_VALUE_OPT_VALUES.Null, Integer.toString(300));
+pigValueRangeTestOverflow("junitTypeTest3", "tinyint", "int",
+

[31/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out 
b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
index 3d67664..9e185c6 100644
--- a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out
@@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales 
PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [
 POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost 
SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, 
type:decimal(7,2), comment:null), ]
 PREHOOK: query: 
--
 
-explain
+explain vectorization expression
 select count(distinct ws_order_number) from web_sales
 PREHOOK: type: QUERY
 POSTHOOK: query: 
--
 
-explain
+explain vectorization expression
 select count(distinct ws_order_number) from web_sales
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -1257,11 +1261,24 @@ STAGE PLANS:
 TableScan
   alias: web_sales
   Statistics: Num rows: 2000 Data size: 352 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 
30, 31, 32, 33]
   Select Operator
 expressions: ws_order_number (type: int)
 outputColumnNames: ws_order_number
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [16]
 Statistics: Num rows: 2000 Data size: 352 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
+  Group By Vectorization:
+  className: VectorGroupByOperator
+  vectorOutput: true
+  keyExpressions: col 16
+  native: false
+  projectedOutputColumns: []
   keys: ws_order_number (type: int)
   mode: hash
   outputColumnNames: _col0
@@ -1270,36 +1287,88 @@ STAGE PLANS:
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
+Reduce Sink Vectorization:
+className: VectorReduceSinkLongOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 2000 Data size: 352 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
+Group By Vectorization:
+className: VectorGroupByOperator
+vectorOutput: true
+keyExpressions: col 0
+native: false
+projectedOutputColumns: []
 keys: KEY._col0 (type: int)

[52/67] [abbrv] hive git commit: HIVE-14966: JDBC: Make cookie-auth work in HTTP mode (Gopal V reviewed by Tao Li, Vaibhav Gumashta)

2016-10-17 Thread sershe

HIVE-14966: JDBC: Make cookie-auth work in HTTP mode (Gopal V reviewed by Tao 
Li, Vaibhav Gumashta)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c71ef4fe
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c71ef4fe
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c71ef4fe

Branch: refs/heads/hive-14535
Commit: c71ef4fed771cdd2373ca693a417d716618bf0ec
Parents: 16d28b3
Author: Vaibhav Gumashta 
Authored: Sat Oct 15 00:45:47 2016 -0700
Committer: Vaibhav Gumashta 
Committed: Sat Oct 15 00:45:47 2016 -0700

--
 common/src/java/org/apache/hadoop/hive/conf/HiveConf.java| 3 ++-
 .../java/org/apache/hive/minikdc/TestJdbcWithMiniKdcCookie.java  | 1 -
 .../org/apache/hive/service/cli/thrift/ThriftHttpServlet.java| 4 ++--
 .../hive/service/cli/thrift/ThriftCliServiceTestWithCookie.java  | 1 -
 4 files changed, 4 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/c71ef4fe/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 18b98e9..8ffae3b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2302,8 +2302,9 @@ public class HiveConf extends Configuration {
 "Domain for the HS2 generated cookies"),
 
HIVE_SERVER2_THRIFT_HTTP_COOKIE_PATH("hive.server2.thrift.http.cookie.path", 
null,
 "Path for the HS2 generated cookies"),
+@Deprecated
 
HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_SECURE("hive.server2.thrift.http.cookie.is.secure",
 true,
-"Secure attribute of the HS2 generated cookie."),
+"Deprecated: Secure attribute of the HS2 generated cookie (this is 
automatically enabled for SSL enabled HiveServer2)."),
 
HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_HTTPONLY("hive.server2.thrift.http.cookie.is.httponly",
 true,
 "HttpOnly attribute of the HS2 generated cookie."),
 

http://git-wip-us.apache.org/repos/asf/hive/blob/c71ef4fe/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdcCookie.java
--
diff --git 
a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdcCookie.java
 
b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdcCookie.java
index 98438ed..5e70d68 100644
--- 
a/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdcCookie.java
+++ 
b/itests/hive-minikdc/src/test/java/org/apache/hive/minikdc/TestJdbcWithMiniKdcCookie.java
@@ -55,7 +55,6 @@ public class TestJdbcWithMiniKdcCookie {
 // set a small time unit as cookie max age so that the server sends a 401
 hiveConf.setTimeVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_MAX_AGE,
   1, TimeUnit.SECONDS);
-hiveConf.setBoolVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_SECURE, 
false);
 hiveConf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false);
 miniHiveKdc = MiniHiveKdc.getMiniHiveKdc(hiveConf);
 miniHS2 = MiniHiveKdc.getMiniHS2WithKerb(miniHiveKdc, hiveConf);

http://git-wip-us.apache.org/repos/asf/hive/blob/c71ef4fe/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
--
diff --git 
a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java 
b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
index 50449e0..fbe6da4 100644
--- a/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
+++ b/service/src/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
@@ -114,8 +114,8 @@ public class ThriftHttpServlet extends TServlet {
 ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_MAX_AGE, TimeUnit.SECONDS);
   this.cookieDomain = 
hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_DOMAIN);
   this.cookiePath = 
hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_PATH);
-  this.isCookieSecure = hiveConf.getBoolVar(
-ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_SECURE);
+  // always send secure cookies for SSL mode
+  this.isCookieSecure = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL);
   this.isHttpOnlyCookie = hiveConf.getBoolVar(
 ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_HTTPONLY);
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/c71ef4fe/service/src/test/org/apache/hive/service/cli/thrift/ThriftCliServiceTestWithCookie.java
--
diff --git

[07/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
index ceaac4f..636463b 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
@@ -19,10 +19,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__1
 POSTHOOK: Output: default@test
 POSTHOOK: Lineage: test.ts EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -48,10 +48,10 @@ POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00
 -12-31 23:59:59.9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -87,10 +87,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00-12-31 23:59:59.9   3652060 
23:59:59.9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -116,10 +116,10 @@ POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00
 -12-31 23:59:59.9
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index 4092911..ae59b06 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -73,7 +73,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@alltypesorc_wrong
 POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE []
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -99,6 +99,10 @@ POSTHOOK: query: EXPLAIN SELECT
 FROM alltypesorc_string
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -116,26 +120,61 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc_string
   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: to_unix_timestamp(ctimestamp1) (type: 
bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), 
day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), 
weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), 
minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10]

[24/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_join30.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
index bb6916b..9e591b8 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
@@ -14,7 +14,7 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orcsrc
 POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 FROM 
 (SELECT orcsrc.* FROM orcsrc sort by key) x
 JOIN
@@ -22,7 +22,7 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 FROM 
 (SELECT orcsrc.* FROM orcsrc sort by key) x
 JOIN
@@ -30,6 +30,10 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -49,40 +53,93 @@ STAGE PLANS:
 TableScan
   alias: orcsrc
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: key (type: string)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: string)
 sort order: +
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 500 Data size: 88000 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
   alias: orcsrc
   Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 88000 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: key (type: string), value (type: string)
   outputColumnNames: _col0, _col1
+  Select Vectorization:
+  className:

[42/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/queries/clientpositive/vector_interval_1.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_interval_1.q 
b/ql/src/test/queries/clientpositive/vector_interval_1.q
index 8fefe41..f4f0024 100644
--- a/ql/src/test/queries/clientpositive/vector_interval_1.q
+++ b/ql/src/test/queries/clientpositive/vector_interval_1.q
@@ -1,8 +1,7 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
-
 set hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=minimal;
+set hive.fetch.task.conversion=none;
 
 drop table if exists vector_interval_1;
 create table vector_interval_1 (ts timestamp, dt date, str1 string, str2 
string) stored as orc;
@@ -13,7 +12,7 @@ insert into vector_interval_1
   select null, null, null, null from src limit 1;
 
 -- constants/cast from string
-explain
+explain vectorization expression
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
@@ -28,7 +27,7 @@ from vector_interval_1 order by str1;
 
 
 -- interval arithmetic
-explain
+explain vectorization expression
 select
   dt,
   interval '1-2' year to month + interval '1-2' year to month,
@@ -49,7 +48,7 @@ select
   interval '1-2' year to month - interval_year_month(str1)
 from vector_interval_1 order by dt;
 
-explain
+explain vectorization expression
 select
   dt,
   interval '1 2:3:4' day to second + interval '1 2:3:4' day to second,
@@ -72,7 +71,7 @@ from vector_interval_1 order by dt;
 
 
 -- date-interval arithmetic
-explain
+explain vectorization expression
 select
   dt,
   dt + interval '1-2' year to month,
@@ -107,7 +106,7 @@ from vector_interval_1 order by dt;
 
 
 -- timestamp-interval arithmetic
-explain
+explain vectorization expression
 select
   ts,
   ts + interval '1-2' year to month,
@@ -142,7 +141,7 @@ from vector_interval_1 order by ts;
 
 
 -- timestamp-timestamp arithmetic
-explain
+explain vectorization expression
 select
   ts,
   ts - ts,
@@ -159,7 +158,7 @@ from vector_interval_1 order by ts;
 
 
 -- date-date arithmetic
-explain
+explain vectorization expression
 select
   dt,
   dt - dt,
@@ -176,7 +175,7 @@ from vector_interval_1 order by dt;
 
 
 -- date-timestamp arithmetic
-explain
+explain vectorization expression
 select
   dt,
   ts - dt,

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/queries/clientpositive/vector_interval_2.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_interval_2.q 
b/ql/src/test/queries/clientpositive/vector_interval_2.q
index 5afb511..0b78a4b 100644
--- a/ql/src/test/queries/clientpositive/vector_interval_2.q
+++ b/ql/src/test/queries/clientpositive/vector_interval_2.q
@@ -1,7 +1,7 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
 set hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=minimal;
+set hive.fetch.task.conversion=none;
 
 drop table if exists vector_interval_2;
 create table vector_interval_2 (ts timestamp, dt date, str1 string, str2 
string, str3 string, str4 string) stored as orc;
@@ -14,7 +14,7 @@ insert into vector_interval_2
 
 -- interval comparisons in select clause
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be true
@@ -77,7 +77,7 @@ select
   interval '1-2' year to month != interval_year_month(str2)
 from vector_interval_2 order by str1;
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be false
@@ -128,7 +128,7 @@ select
   interval '1-2' year to month != interval_year_month(str1)
 from vector_interval_2 order by str1;
 
-explain
+explain vectorization expression
 select
   str3,
   -- Should all be true
@@ -191,7 +191,7 @@ select
   interval '1 2:3:4' day to second != interval_day_time(str4)
 from vector_interval_2 order by str3;
 
-explain
+explain vectorization expression
 select
   str3,
   -- Should all be false
@@ -244,7 +244,7 @@ from vector_interval_2 order by str3;
 
 
 -- interval expressions in predicates
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   interval_year_month(str1) = interval_year_month(str1)
@@ -293,7 +293,7 @@ where
   and interval '1-3' year to month > interval_year_month(str1)
 order by ts;
 
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   interval_day_time(str3) = interval_day_time(str3)
@@ -342,7 +342,7 @@ where
   and interval '1 2:3:5' day to second > interval_day_time(str3)
 order by ts;
 
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   date '2002-03-01' = dt + interval_year_month(str1)
@@ -381,7 +381,7 @@ where
   and dt != dt + interval '1-2' year to month
 order by ts;
 
-explain
+explain vectorization expression
 select ts from vector_interval_2
 where
   timestamp '2002-03-01 01:02:03' = ts + interval '1-2' year to

[21/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out 
b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
index a075662..1fde0a9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
@@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
 -- Query copied from subquery_in.q
 
 -- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li 
on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
@@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
 -- Query copied from subquery_in.q
 
 -- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li 
on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -65,6 +69,10 @@ STAGE PLANS:
   value expressions: _col2 (type: int)
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 2 
 Map Operator Tree:
 TableScan
@@ -89,6 +97,10 @@ STAGE PLANS:
   Statistics: Num rows: 4 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 3 
 Map Operator Tree:
 TableScan
@@ -109,10 +121,27 @@ STAGE PLANS:
 Statistics: Num rows: 50 Data size: 200 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Reducer 4 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
+Group By Vectorization:
+className: VectorGroupByOperator
+vectorOutput: true
+keyExpressions: col 0
+native: false
+projectedOutputColumns: []
 keys: KEY._col0 (type: int)
 mode: mergepartial
 outputColumnNames: _col0
@@ -123,6 +152,10 @@ STAGE PLANS:
   keys:
 0 _col1 (type: int)
 1 _col0 (type: int)
+  Map Join Vectorization:
+  className: VectorMapJoinInnerLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
   outputColumnNames: _col2, _col4
   input vertices:
 0 Map 1
@@ -130,9 +163,16 @@ STAGE PLANS:
   Select Operator
 expressions: _col4 (type: int), _col2 (type: int)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+

[25/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out 
b/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out
index 13a8b35..ab7a103 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_arithmetic.q.out
@@ -36,7 +36,7 @@ POSTHOOK: Lineage: interval_arithmetic_1.dateval EXPRESSION 
[(unique_timestamps)
 POSTHOOK: Lineage: interval_arithmetic_1.tsval SIMPLE 
[(unique_timestamps)unique_timestamps.FieldSchema(name:tsval, type:timestamp, 
comment:null), ]
 tsval  tsval
 PREHOOK: query: -- interval year-month arithmetic
-explain
+explain vectorization expression
 select
   dateval,
   dateval - interval '2-2' year to month,
@@ -49,7 +49,7 @@ from interval_arithmetic_1
 order by dateval
 PREHOOK: type: QUERY
 POSTHOOK: query: -- interval year-month arithmetic
-explain
+explain vectorization expression
 select
   dateval,
   dateval - interval '2-2' year to month,
@@ -62,6 +62,10 @@ from interval_arithmetic_1
 order by dateval
 POSTHOOK: type: QUERY
 Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -79,26 +83,61 @@ STAGE PLANS:
 TableScan
   alias: interval_arithmetic_1
   Statistics: Num rows: 50 Data size: 4800 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: dateval (type: date), (dateval - 2-2) (type: 
date), (dateval - -2-2) (type: date), (dateval + 2-2) (type: date), (dateval + 
-2-2) (type: date), (-2-2 + dateval) (type: date), (2-2 + dateval) (type: date)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 2, 3, 4, 5, 6, 7]
+selectExpressions: 
DateColSubtractIntervalYearMonthScalar(col 0, val 2-2) -> 2:long, 
DateColSubtractIntervalYearMonthScalar(col 0, val -2-2) -> 3:long, 
DateColAddIntervalYearMonthScalar(col 0, val 2-2) -> 4:long, 
DateColAddIntervalYearMonthScalar(col 0, val -2-2) -> 5:long, 
IntervalYearMonthScalarAddDateColumn(val -2-2, col 0) -> 6:long, 
IntervalYearMonthScalarAddDateColumn(val 2-2, col 0) -> 7:long
 Statistics: Num rows: 50 Data size: 4800 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: date)
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 50 Data size: 4800 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col1 (type: date), _col2 (type: 
date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: 
date)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 
(type: date),

[50/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
index a72b882..9114932 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
@@ -477,6 +477,11 @@ public class  extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum1 + ", col " + + colNum2;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
index 8b1c366..b56d451 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
@@ -18,6 +18,8 @@
  
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -152,4 +154,9 @@ public abstract class  extends VectorExpression {
 this.value = value;
   }
 
+  @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum + ", val " + new String(value, 
StandardCharsets.UTF_8);
+  }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
index 930069c..4fb5035 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
@@ -18,6 +18,8 @@
  
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -155,4 +157,10 @@ public abstract class  extends VectorExpression 
{
   public void setValue(byte[] value) {
 this.value = value;
   }
+
+  @Override
+  public String vectorExpressionParameters() {
+return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + + 
colNum;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
index 4298d79..7863b16 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
@@ -154,6 +154,11 @@ public class  extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum + ", left " + leftValue.toString() + ", right " + 
rightValue.toString();
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt

[48/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
index 2162f17..3e4a195 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
@@ -131,4 +131,10 @@ public abstract class IfExprTimestampScalarColumnBase 
extends VectorExpression {
   public String getOutputType() {
 return "timestamp";
   }
+
+  @Override
+  public String vectorExpressionParameters() {
+return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
index 707f574..5273131 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
@@ -117,4 +117,10 @@ public abstract class IfExprTimestampScalarScalarBase 
extends VectorExpression {
   public String getOutputType() {
 return "timestamp";
   }
+
+  @Override
+  public String vectorExpressionParameters() {
+return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
index f19551e..2f6e7b9 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
@@ -107,6 +107,11 @@ public class IsNotNull extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
index 3169bae..583ab7a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
@@ -105,6 +105,11 @@ public class IsNull extends VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 VectorExpressionDescriptor.Builder b = new 
VectorExpressionDescriptor.Builder();
 b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
index 33f50e0..6fa9779 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
@@ -174,6 +174,11 @@ public class LongColDivideLongColumn extends 
VectorExpression {
   }
 
   @Override
+  public String vectorExpressionParameters() {
+return "col " + colNum1 + ", col " + colNum2;
+  }
+
+  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

[38/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
index 437770d..1d4163c 100644
--- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
+++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
@@ -87,25 +87,73 @@ POSTHOOK: Lineage: part_add_int_permute_select 
PARTITION(part=1).b SIMPLE [(valu
 POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
 POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num 
EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 _col0  _col1   _col2   _col3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select insert_num,part,a,b from part_add_int_permute_select
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select insert_num,part,a,b from part_add_int_permute_select
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 vectorized, llap
-  File Output Operator [FS_4]
-Select Operator [SEL_3] (rows=2 width=4)
-  Output:["_col0","_col1","_col2","_col3"]
-  TableScan [TS_0] (rows=2 width=16)
-
default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: part_add_int_permute_select
+  Statistics: Num rows: 2 Data size: 33 Basic stats: COMPLETE 
Column stats: PARTIAL
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4]
+  Select Operator
+expressions: insert_num (type: int), part (type: int), a 
(type: int), b (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 4, 1, 2]
+Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: PARTIAL
+File Output Operator
+  compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
+  Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: PARTIAL
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: vectorized, llap
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+groupByVectorOutput: true
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 4
+includeColumns: [0, 1, 2]
+dataColumns: insert_num:int, a:int, b:string, c:int
+partitionColumnCount: 1
+partitionColumns: part:int
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
 
 PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting 
works right
 select insert_num,part,a,b from part_add_int_permute_select
@@ -206,25 +254,73 @@ POSTHOOK: Lineage: part_add_int_string_permute_select 
PARTITION(part=1).c EXPRES
 POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d 
SIMPLE 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
 POSTHOOK: Lineage: part_add_int_string_permute_select 
PARTITION(part=1).insert_num EXPRESSION

[30/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
index 882e83d..5d28d22 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
@@ -35,7 +35,7 @@ PREHOOK: query: -- EXPLAIN
 -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
 -- FROM decimal_tbl_1_orc ORDER BY dec;
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT
   round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
   round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
@@ -51,13 +51,17 @@ POSTHOOK: query: -- EXPLAIN
 -- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
 -- FROM decimal_tbl_1_orc ORDER BY dec;
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT
   round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
   round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
   round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8)
 FROM decimal_tbl_1_orc ORDER BY d
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -75,26 +79,61 @@ STAGE PLANS:
 TableScan
   alias: decimal_tbl_1_orc
   Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Select Operator
 expressions: round(dec) (type: decimal(21,0)), round(dec, 
0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) 
(type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) 
(type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) 
(type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) 
(type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) 
(type: decimal(21,0)), round(dec, -8) (type: decimal(21,0))
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12, 13]
+selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 
1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) 
-> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 
0, decimalPlaces 3) -> 5:decimal(24,3), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 
6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
-2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -3) -> 8:decimal(21,0), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 
9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 
-5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -6) -> 11:decimal(21,0), 
FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7)
  -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, 
decimalPlaces -8) -> 13:decimal(21,0)
 Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: decimal(21,0))
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col1 (type: decimal(21,0)), _col2 
(type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: 
decimal(24,3)), _col5 (type:

[33/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out 
b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
index a510e38..ce05391 100644
--- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
@@ -97,14 +97,18 @@ POSTHOOK: Lineage: hundredorc.s SIMPLE 
[(over1k)over1k.FieldSchema(name:s, type:
 POSTHOOK: Lineage: hundredorc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, 
type:smallint, comment:null), ]
 POSTHOOK: Lineage: hundredorc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, 
type:tinyint, comment:null), ]
 POSTHOOK: Lineage: hundredorc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, 
type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN 
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT sum(hash(*))
 FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN 
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT sum(hash(*))
 FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -155,6 +159,12 @@ STAGE PLANS:
   value expressions: _col0 (type: bigint)
 Execution mode: llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+notVectorizedReason: Predicate expression for FILTER operator: 
org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for 
SelectColumnIsNotNull argument #0 type name Binary
+vectorized: false
 Map 3 
 Map Operator Tree:
 TableScan
@@ -175,16 +185,38 @@ STAGE PLANS:
 value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 
(type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: 
timestamp), _col9 (type: decimal(4,2))
 Execution mode: llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+notVectorizedReason: Predicate expression for FILTER operator: 
org.apache.hadoop.hive.ql.metadata.HiveException: No vector type for 
SelectColumnIsNotNull argument #0 type name Binary
+vectorized: false
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
 aggregations: sum(VALUE._col0)
+Group By Vectorization:
+aggregators: VectorUDAFSumLong(col 0) -> bigint
+className: VectorGroupByOperator
+vectorOutput: true
+native: false
+projectedOutputColumns: [0]
 mode: mergepartial
 outputColumnNames: _col0
 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
 File Output Operator
   compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -208,16 +240,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@hundredorc
  A masked pattern was here 
 -27832781952
-PREHOOK: query: EXPLAIN 
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT count(*), bin
 FROM hundredorc
 GROUP BY bin
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN 
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT count(*), bin
 FROM hundredorc
 GROUP BY bin
 POSTHOOK: type: QUERY
+PLAN

[32/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out 
b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
index c7897f7..2789664 100644
--- a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out
@@ -1,6 +1,6 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, 
cstring1, cint, cfloat, csmallint) as c
+EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, 
csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
@@ -8,12 +8,16 @@ LIMIT 10
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT cdouble, cstring1, cint, cfloat, csmallint, coalesce(cdouble, 
cstring1, cint, cfloat, csmallint) as c
+EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat, 
csmallint, coalesce(cdouble, cstring1, cint, cfloat, csmallint) as c
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
 LIMIT 10
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -21,53 +25,62 @@ STAGE DEPENDENCIES:
 STAGE PLANS:
   Stage: Stage-1
 Tez
- A masked pattern was here 
   Edges:
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
- A masked pattern was here 
   Vertices:
 Map 1 
 Map Operator Tree:
-TableScan
-  alias: alltypesorc
-  Statistics: Num rows: 12288 Data size: 1045942 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Filter Operator
-predicate: cdouble is null (type: boolean)
-Statistics: Num rows: 3114 Data size: 265164 Basic stats: 
COMPLETE Column stats: COMPLETE
-Select Operator
-  expressions: cstring1 (type: string), cint (type: int), 
cfloat (type: float), csmallint (type: smallint), 
COALESCE(null,cstring1,cint,cfloat,csmallint) (type: string)
-  outputColumnNames: _col1, _col2, _col3, _col4, _col5
-  Statistics: Num rows: 3114 Data size: 819540 Basic 
stats: COMPLETE Column stats: COMPLETE
-  Reduce Output Operator
-key expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: float), _col4 (type: smallint), _col5 (type: string)
-sort order: +
-Statistics: Num rows: 3114 Data size: 819540 Basic 
stats: COMPLETE Column stats: COMPLETE
-TopN Hash Memory Usage: 0.1
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNull(col 5) -> 
boolean
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [6, 2, 4, 1, 16]
+  selectExpressions: VectorCoalesce(columns [12, 6, 
13, 14, 15])(children: ConstantVectorExpression(val null) -> 12:string, col 6, 
CastLongToString(col 2) -> 13:String, VectorUDFAdaptor(null(cfloat)) -> 
14:String, CastLongToString(col 1) -> 15:String) -> 16:string
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+

[41/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/queries/clientpositive/vector_outer_join6.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_outer_join6.q 
b/ql/src/test/queries/clientpositive/vector_outer_join6.q
index 06fa385..b39e8ed 100644
--- a/ql/src/test/queries/clientpositive/vector_outer_join6.q
+++ b/ql/src/test/queries/clientpositive/vector_outer_join6.q
@@ -3,6 +3,7 @@ set hive.explain.user=false;
 SET hive.vectorized.execution.enabled=true;
 SET hive.vectorized.execution.mapjoin.native.enabled=true;
 SET hive.auto.convert.join=true;
+set hive.fetch.task.conversion=none;
 
 -- SORT_QUERY_RESULTS
 
@@ -28,14 +29,14 @@ create table TJOIN2 stored as orc AS SELECT * FROM 
TJOIN2_txt;
 create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt;
 create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt;
 
-explain
+explain vectorization detail formatted
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1;
 
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1;
 
-explain
+explain vectorization detail formatted
 select tj1rnum, tj2rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
--
diff --git 
a/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q 
b/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
index f25374d..b825fb3 100644
--- a/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
+++ b/ql/src/test/queries/clientpositive/vector_partition_diff_num_cols.q
@@ -1,7 +1,7 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
 SET hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=minimal;
+set hive.fetch.task.conversion=none;
 
 create table inventory_txt
 (
@@ -27,7 +27,7 @@ partitioned by (par string) stored as orc;
 insert into table inventory_part_0 partition(par='1') select * from 
inventory_txt;
 insert into table inventory_part_0 partition(par='2') select * from 
inventory_txt;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_0;
 
 select sum(inv_quantity_on_hand) from inventory_part_0;
@@ -47,7 +47,7 @@ alter table inventory_part_1 add columns (fifthcol string);
 
 insert into table inventory_part_1 partition(par='5cols') select *, '5th' as 
fifthcol from inventory_txt;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_1;
 
 select sum(inv_quantity_on_hand) from inventory_part_1;
@@ -66,7 +66,7 @@ insert into table inventory_part_2a partition(par='1') select 
* from inventory_t
 insert into table inventory_part_2a partition(par='2') select * from 
inventory_txt;
 alter table inventory_part_2a partition (par='2') change inv_item_sk 
other_name int;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_2a;
 
 create table inventory_part_2b(
@@ -80,7 +80,7 @@ insert into table inventory_part_2b 
partition(par1='1',par2=4) select * from inv
 insert into table inventory_part_2b partition(par1='2',par2=3) select * from 
inventory_txt;
 alter table inventory_part_2b partition (par1='2',par2=3) change 
inv_quantity_on_hand other_name int;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_2b;
 
 -- Verify we do not vectorize when a partition column type is different.
@@ -97,5 +97,5 @@ insert into table inventory_part_3 partition(par='1') select 
* from inventory_tx
 insert into table inventory_part_3 partition(par='2') select * from 
inventory_txt;
 alter table inventory_part_3 partition (par='2') change inv_warehouse_sk 
inv_warehouse_sk bigint;
 
-explain
+explain vectorization expression
 select sum(inv_quantity_on_hand) from inventory_part_3;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q 
b/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
index f53d8c0..ee22c01 100644
--- a/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
+++

[01/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

Repository: hive
Updated Branches:
  refs/heads/hive-14535 eacf9f9b6 -> edaebb4b2


http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out 
b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
index 22fe7cd..b297a7d 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
@@ -1,7 +1,7 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
 -- Use ORDER BY clauses to generate 2 stages.
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ctinyint) as c1,
MAX(ctinyint),
COUNT(ctinyint),
@@ -12,7 +12,7 @@ PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
 -- Use ORDER BY clauses to generate 2 stages.
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT MIN(ctinyint) as c1,
MAX(ctinyint),
COUNT(ctinyint),
@@ -20,6 +20,10 @@ SELECT MIN(ctinyint) as c1,
 FROM   alltypesorc
 ORDER BY c1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -37,42 +41,100 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Select Operator
 expressions: ctinyint (type: tinyint)
 outputColumnNames: ctinyint
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0]
 Statistics: Num rows: 12288 Data size: 377237 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations: min(ctinyint), max(ctinyint), 
count(ctinyint), count()
+  Group By Vectorization:
+  aggregators: VectorUDAFMinLong(col 0) -> tinyint, 
VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, 
VectorUDAFCountStar(*) -> bigint
+  className: VectorGroupByOperator
+  vectorOutput: true
+  native: false
+  projectedOutputColumns: [0, 1, 2, 3]
   mode: hash
   outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
 sort order: 
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col0 (type: tinyint), _col1 (type: 
tinyint), _col2 (type: bigint), _col3 (type: bigint)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
 aggregations: min(VALUE._col0), max(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3)
+Group By Vectorization:
+aggregators: VectorUDAFMinLong(col 0) -> tinyint,

[39/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
 
b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
index 0a01b8c..1511298 100644
--- 
a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out
@@ -149,25 +149,55 @@ POSTHOOK: Lineage: part_change_various_various_struct1 
PARTITION(part=1).b SIMPL
 POSTHOOK: Lineage: part_change_various_various_struct1 
PARTITION(part=1).insert_num SIMPLE 
[(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:insert_num, 
type:int, comment:null), ]
 POSTHOOK: Lineage: part_change_various_various_struct1 PARTITION(part=1).s1 
SIMPLE [(complex_struct1_c_txt)complex_struct1_c_txt.FieldSchema(name:s1, 
type:struct,
 comment:null), ]
 complex_struct1_c_txt.insert_num   complex_struct1_c_txt.s1
complex_struct1_c_txt.b
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select insert_num,part,s1,b from part_change_various_various_struct1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select insert_num,part,s1,b from part_change_various_various_struct1
 POSTHOOK: type: QUERY
 Explain
-Plan optimized by CBO.
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
-Stage-0
-  Fetch Operator
-limit:-1
-Stage-1
-  Map 1 llap
-  File Output Operator [FS_2]
-Select Operator [SEL_1] (rows=6 width=4)
-  Output:["_col0","_col1","_col2","_col3"]
-  TableScan [TS_0] (rows=6 width=789)
-
default@part_change_various_various_struct1,part_change_various_various_struct1,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","s1","b"]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: part_change_various_various_struct1
+  Statistics: Num rows: 6 Data size: 4734 Basic stats: 
COMPLETE Column stats: PARTIAL
+  Select Operator
+expressions: insert_num (type: int), part (type: int), s1 
(type: 
struct),
 b (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: PARTIAL
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 6 Data size: 24 Basic stats: 
COMPLETE Column stats: PARTIAL
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: llap
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+notVectorizedReason: Select expression for SELECT operator: 
Data type 
struct
 of Column[s1] not supported
+vectorized: false
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
 
 PREHOOK: query: select insert_num,part,s1,b from 
part_change_various_various_struct1
 PREHOOK: type: QUERY
@@ -413,25 +443,55 @@ POSTHOOK: Lineage: part_add_various_various_struct2 
PARTITION(part=1).b SIMPLE [
 POSTHOOK: Lineage: part_add_various_various_struct2 
PARTITION(part=1).insert_num SIMPLE 
[(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:insert_num, 
type:int, comment:null), ]
 POSTHOOK: Lineage: part_add_various_various_struct2 PARTITION(part=1).s2 
SIMPLE [(complex_struct2_d_txt)complex_struct2_d_txt.FieldSchema(name:s2, 
type:struct,
 comment:null), ]

[49/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
index 4f5ba9a..061e396 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSourceMapping.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.exec.vector;
 
 import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOrderedMap.Mapping;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
 /**
  * This class collects column information for copying a row from one 
VectorizedRowBatch to
@@ -35,9 +36,9 @@ public class VectorColumnSourceMapping extends 
VectorColumnMapping {
   }
 
   @Override
-  public void add(int sourceColumn, int outputColumn, String typeName) {
+  public void add(int sourceColumn, int outputColumn, TypeInfo typeInfo) {
 // Order on sourceColumn.
-vectorColumnMapping.add(sourceColumn, outputColumn, typeName);
+vectorColumnMapping.add(sourceColumn, outputColumn, typeInfo);
   }
 
   @Override
@@ -47,7 +48,7 @@ public class VectorColumnSourceMapping extends 
VectorColumnMapping {
 // Ordered columns are the source columns.
 sourceColumns = mapping.getOrderedColumns();
 outputColumns = mapping.getValueColumns();
-typeNames = mapping.getTypeNames();
+typeInfos = mapping.getTypeInfos();
 
 // Not needed anymore.
 vectorColumnMapping = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
index c8e0284..911aeb0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java
@@ -262,8 +262,7 @@ public class VectorCopyRow {
 for (int i = 0; i < count; i++) {
   int inputColumn = columnMapping.getInputColumns()[i];
   int outputColumn = columnMapping.getOutputColumns()[i];
-  String typeName = columnMapping.getTypeNames()[i].toLowerCase();
-  TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+  TypeInfo typeInfo = columnMapping.getTypeInfos()[i];
   Type columnVectorType = 
VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
 
   CopyRow copyRowByValue = null;

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
index 261246b..bfe22b0 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.VectorFilterDesc;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -50,9 +51,8 @@ public class VectorFilterOperator extends FilterOperator {
   public VectorFilterOperator(CompilationOpContext ctx,
   VectorizationContext vContext, OperatorDesc conf) throws HiveException {
 this(ctx);
-ExprNodeDesc oldExpression = ((FilterDesc) conf).getPredicate();
-conditionEvaluator = vContext.getVectorExpression(oldExpression, 
VectorExpressionDescriptor.Mode.FILTER);
 this.conf = (FilterDesc) conf;
+conditionEvaluator = ((VectorFilterDesc) 
this.conf.getVectorDesc()).getPredicateExpression();
   }
 
   /** Kryo ctor. */

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 2605203..fef7c2a 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import

[28/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
index ca07200..d9e701a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out
@@ -32,12 +32,16 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__2
 POSTHOOK: Output: default@orc_table_2a
 POSTHOOK: Lineage: orc_table_2a.c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -55,12 +59,23 @@ STAGE PLANS:
 TableScan
   alias: t2
   Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: 
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
 predicate: (c > 2) (type: boolean)
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: c (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -68,6 +83,13 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
+Map Join Vectorization:
+bigTableKeyColumns: [0]
+bigTableRetainedColumns: [0]
+className: VectorMapJoinInnerBigOnlyLongOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+projectedOutputColumns: [0]
 outputColumnNames: _col1
 input vertices:
   1 Map 2
@@ -75,9 +97,16 @@ STAGE PLANS:
 Select Operator
   expressions: _col1 (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
+File Sink Vectorization:
+className: VectorFileSinkOperator
+native: false
 Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -85,25 +114,66 @@ STAGE PLANS:
 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+

[26/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
index 23a977e..61702bd 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_2.q.out
@@ -44,7 +44,7 @@ POSTHOOK: Lineage: vector_interval_2.str4 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_2.ts EXPRESSION []
 PREHOOK: query: -- interval comparisons in select clause
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be true
@@ -78,7 +78,7 @@ from vector_interval_2 order by str1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- interval comparisons in select clause
 
-explain
+explain vectorization expression
 select
   str1,
   -- Should all be true
@@ -110,6 +110,10 @@ select
   interval '1-2' year to month != interval_year_month(str2)
 from vector_interval_2 order by str1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -127,26 +131,61 @@ STAGE PLANS:
 TableScan
   alias: vector_interval_2
   Statistics: Num rows: 2 Data size: 788 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5]
   Select Operator
 expressions: str1 (type: string), (CAST( str1 AS INTERVAL 
YEAR TO MONTH) = CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( 
str1 AS INTERVAL YEAR TO MONTH) <= CAST( str1 AS INTERVAL YEAR TO MONTH)) 
(type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) <= CAST( str2 AS 
INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO 
MONTH) < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS 
INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: 
boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= CAST( str1 AS INTERVAL YEAR 
TO MONTH)) (type: boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) > CAST( str1 
AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO 
MONTH) <> CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (CAST( str1 
AS INTERVAL YEAR TO MONTH) = 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR 
TO MONTH) <= 1-2) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MON
 TH) <= 1-3) (type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) < 1-3) 
(type: boolean), (CAST( str1 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: 
boolean), (CAST( str2 AS INTERVAL YEAR TO MONTH) >= 1-2) (type: boolean), 
(CAST( str2 AS INTERVAL YEAR TO MONTH) > 1-2) (type: boolean), (CAST( str1 AS 
INTERVAL YEAR TO MONTH) <> 1-3) (type: boolean), (1-2 = CAST( str1 AS INTERVAL 
YEAR TO MONTH)) (type: boolean), (1-2 <= CAST( str1 AS INTERVAL YEAR TO MONTH)) 
(type: boolean), (1-2 <= CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: 
boolean), (1-2 < CAST( str2 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-2 
>= CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 >= CAST( str1 
AS INTERVAL YEAR TO MONTH)) (type: boolean), (1-3 > CAST( str1 AS INTERVAL YEAR 
TO MONTH)) (type: boolean), (1-2 <> CAST( str2 AS INTERVAL YEAR TO MONTH)) 
(type: boolean)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, 
_col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 8, 9, 10, 11, 12, 13, 14, 
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]
+selectExpressions: LongColEqualLongColumn(col 6, col 
7)(children: CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, 
CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, 
LongColLessEqualLongColumn(col 6, col 7)(children: 
CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, 
CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 9:long, 
LongColLessEqualLongColumn(col 6, col 7)(children: 
CastStringToIntervalYearMonth(col 2) -> 6:interval_year_month, 
CastStringToIntervalYearMonth(col 3) -> 7:interval_year_month) -> 10:long, 
LongColLessLongColumn(col 6, col 7)(children: CastStringToIntervalYearMonth(col 
2) -> 6:interval_year_month, CastStringToIntervalYearMonth(col 3) -> 
7:interval_year_month) -> 11:long, LongColGreaterEqualLongColumn(col 6, col 
7)(children:

[27/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
index d8003ba..e7d1963 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
@@ -39,7 +39,7 @@ POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION []
 POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION []
 PREHOOK: query: -- constants/cast from string
-explain
+explain vectorization expression
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
@@ -47,13 +47,17 @@ select
 from vector_interval_1 order by str1
 PREHOOK: type: QUERY
 POSTHOOK: query: -- constants/cast from string
-explain
+explain vectorization expression
 select
   str1,
   interval '1-2' year to month, interval_year_month(str1),
   interval '1 2:3:4' day to second, interval_day_time(str2)
 from vector_interval_1 order by str1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -71,26 +75,62 @@ STAGE PLANS:
 TableScan
   alias: vector_interval_1
   Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
   Select Operator
 expressions: str1 (type: string), CAST( str1 AS INTERVAL 
YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO 
SECOND) (type: interval_day_time)
 outputColumnNames: _col0, _col2, _col4
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 4, 5]
+selectExpressions: CastStringToIntervalYearMonth(col 
2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) -> 
5:interval_day_time
 Statistics: Num rows: 2 Data size: 442 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: string)
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 2 Data size: 442 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col2 (type: interval_year_month), 
_col4 (type: interval_day_time)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: 
interval_year_month), VALUE._col0 (type: interval_year_month), 1 
02:03:04.0 (type: interval_day_time), VALUE._col1 (type: 
interval_day_time)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 3, 1, 4, 2]
+selectExpressions: ConstantVectorExpression(val 14) -> 
3:long,

[08/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
index 996b893..423fdbf 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
@@ -120,7 +120,7 @@ POSTHOOK: Lineage: part_orc.p_size SIMPLE 
[(part_staging)part_staging.FieldSchem
 POSTHOOK: Lineage: part_orc.p_type SIMPLE 
[(part_staging)part_staging.FieldSchema(name:p_type, type:string, 
comment:null), ]
 PREHOOK: query: --1. test1
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name, p_size,
 rank() over (partition by p_mfgr order by p_name) as r,
 dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -132,7 +132,7 @@ from noop(on part_orc
 PREHOOK: type: QUERY
 POSTHOOK: query: --1. test1
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name, p_size,
 rank() over (partition by p_mfgr order by p_name) as r,
 dense_rank() over (partition by p_mfgr order by p_name) as dr,
@@ -142,6 +142,10 @@ from noop(on part_orc
   order by p_name
   )
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -172,6 +176,14 @@ STAGE PLANS:
 auto parallelism: true
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Path -> Alias:
  A masked pattern was here 
 Path -> Partition:
@@ -224,6 +236,11 @@ STAGE PLANS:
 Reducer 2 
 Execution mode: llap
 Needs Tagging: false
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+notVectorizedReason: PTF Operator (PTF) not supported
+vectorized: false
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: 
double)
@@ -255,6 +272,11 @@ STAGE PLANS:
 Reducer 3 
 Execution mode: llap
 Needs Tagging: false
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+notVectorizedReason: PTF Operator (PTF) not supported
+vectorized: false
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey1 (type: string), 
KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: 
double)
@@ -377,7 +399,7 @@ Manufacturer#5  almond aquamarine dodger light 
gainsboro46  4   4   6208.18
 Manufacturer#5 almond azure blanched chiffon midnight  23  5   5   
7672.66
 PREHOOK: query: -- 2. testJoinWithNoop
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name,
 p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by 
p_name) as deltaSz
 from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = 
p2.p_partkey) j
@@ -386,13 +408,17 @@ sort by j.p_name)
 PREHOOK: type: QUERY
 POSTHOOK: query: -- 2. testJoinWithNoop
 
-explain extended
+explain vectorization extended
 select p_mfgr, p_name,
 p_size, p_size - lag(p_size,1,p_size) over (partition by p_mfgr order by 
p_name) as deltaSz
 from noop (on (select p1.* from part_orc p1 join part_orc p2 on p1.p_partkey = 
p2.p_partkey) j
 distribute by j.p_mfgr
 sort by j.p_name)
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -428,6 +454,14 @@ STAGE PLANS:
   auto parallelism: true
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true

[17/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
index 9369661..9a95606 100644
--- a/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
@@ -126,113 +126,15 @@ POSTHOOK: Output: default@TJOIN4
 POSTHOOK: Lineage: tjoin4.c1 SIMPLE 
[(tjoin4_txt)tjoin4_txt.FieldSchema(name:c1, type:int, comment:null), ]
 POSTHOOK: Lineage: tjoin4.c2 SIMPLE 
[(tjoin4_txt)tjoin4_txt.FieldSchema(name:c2, type:char(2), comment:null), ]
 POSTHOOK: Lineage: tjoin4.rnum SIMPLE 
[(tjoin4_txt)tjoin4_txt.FieldSchema(name:rnum, type:int, comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1
 POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Edges:
-Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE)
  A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: tjoin1
-  Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
-  Select Operator
-expressions: rnum (type: int), c1 (type: int)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 3 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col1 (type: int)
-1 _col1 (type: int)
-  outputColumnNames: _col0, _col2, _col3
-  input vertices:
-1 Map 2
-  Statistics: Num rows: 4 Data size: 409 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: _col0 (type: int), _col2 (type: int), 
_col3 (type: int)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 4 Data size: 409 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col2 (type: int)
-1 _col1 (type: int)
-  outputColumnNames: _col0, _col1, _col3
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 4 Data size: 449 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: _col0 (type: int), _col1 (type: int), 
_col3 (type: int)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 4 Data size: 449 Basic 
stats: COMPLETE Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 4 Data size: 449 Basic 
stats: COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-Execution mode: vectorized, llap
-LLAP IO: all inputs
-Map 2 
-Map Operator Tree:
-TableScan
-  alias: tjoin2
-  Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE 
Column stats: NONE
-  Select Operator
-expressions: rnum (type: int), c1 (type: int)
-outputColumnNames: _col0, _col1
-

[12/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
index a14d515..d6c405e 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
@@ -30,7 +30,8 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
 -- ArithmeticOps: Add, Multiply, Subtract, Divide
 -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual
 -- GroupBy: NoGroupByProjectAggs
-EXPLAIN SELECT AVG(cint),
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT AVG(cint),
(AVG(cint) + -3728),
(-((AVG(cint) + -3728))),
(-((-((AVG(cint) + -3728),
@@ -98,7 +99,8 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
 -- ArithmeticOps: Add, Multiply, Subtract, Divide
 -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual
 -- GroupBy: NoGroupByProjectAggs
-EXPLAIN SELECT AVG(cint),
+EXPLAIN VECTORIZATION EXPRESSION
+SELECT AVG(cint),
(AVG(cint) + -3728),
(-((AVG(cint) + -3728))),
(-((-((AVG(cint) + -3728),
@@ -134,6 +136,10 @@ WHERE  ((762 = cbigint)
 AND ((79.553 != cint)
  AND (cboolean2 != cboolean1)
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -151,15 +157,33 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 2601650 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterExprOrExpr(children: 
FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 
4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, 
FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: 
CastTimestampToDouble(col 9) -> 12:double) -> boolean, 
FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: 
CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, 
FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, 
FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val 
-1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, 
FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, 
FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: 
CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, 
FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) 
 -> boolean) -> boolean
 predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < 
cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> 
UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= 
-1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and 
(cboolean2 <> cboolean1))) (type: boolean)
 Statistics: Num rows: 5466 Data size: 1157380 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cint (type: int), cdouble (type: double), 
csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint)
   outputColumnNames: cint, cdouble, csmallint, cfloat, 
ctinyint
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [2, 5, 1, 4, 0]
   Statistics: Num rows: 5466 Data size: 1157380 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: avg(cint), sum(cdouble), 
stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), 
stddev_samp(cint), min(ctinyint), count(csmallint)
+Group By Vectorization:
+aggregators: VectorUDAFAvgLong(col 2) -> 
struct, VectorUDAFSumDouble(col 5) -> double, 
VectorUDAFStdPopLong(col 2) -> struct, 
VectorUDAFStdSampLong(col 1) -> 
struct, VectorUDAFVarSampLong(col 2) 
->

[09/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
index 1bab6f7..a7c0d10 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
@@ -1,15 +1,19 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+EXPLAIN VECTORIZATION EXPRESSION  SELECT COUNT(t1.cint), MAX(t2.cint), 
MIN(t1.cint), AVG(t1.cint+t2.cint)
   FROM alltypesorc t1
   JOIN alltypesorc t2 ON t1.cint = t2.cint
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
+EXPLAIN VECTORIZATION EXPRESSION  SELECT COUNT(t1.cint), MAX(t2.cint), 
MIN(t1.cint), AVG(t1.cint+t2.cint)
   FROM alltypesorc t1
   JOIN alltypesorc t2 ON t1.cint = t2.cint
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -28,12 +32,23 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 2) -> 
boolean
 predicate: cint is not null (type: boolean)
 Statistics: Num rows: 9173 Data size: 27396 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cint (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [2]
   Statistics: Num rows: 9173 Data size: 27396 Basic stats: 
COMPLETE Column stats: COMPLETE
   Map Join Operator
 condition map:
@@ -41,6 +56,10 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
+Map Join Vectorization:
+className: VectorMapJoinInnerBigOnlyLongOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
 outputColumnNames: _col0, _col1
 input vertices:
   1 Map 3
@@ -48,9 +67,21 @@ STAGE PLANS:
 Select Operator
   expressions: _col0 (type: int), _col1 (type: int), 
(_col0 + _col1) (type: int)
   outputColumnNames: _col0, _col1, _col2
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [2, 2, 12]
+  selectExpressions: LongColAddLongColumn(col 2, 
col 2) -> 12:long
   Statistics: Num rows: 19518 Data size: 156144 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: count(_col0), max(_col1), 
min(_col0), avg(_col2)
+Group By Vectorization:
+aggregators: VectorUDAFCount(col 2) -> bigint, 
VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, 
VectorUDAFAvgLong(col 12) -> struct
+className: VectorGroupByOperator
+vectorOutput: false
+native: false
+projectedOutputColumns: [0, 1, 2, 3]
+vectorOutputConditionsNotMet: Vector output of 
VectorUDAFAvgLong(col 12) -> struct output type

[19/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_nvl.q.out 
b/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
index b926ab4b..aa8ed4a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_nvl.q.out
@@ -1,31 +1,82 @@
-PREHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT cdouble, nvl(cdouble, 
100) as n
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 LIMIT 10
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdouble, nvl(cdouble, 100) as n
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT cdouble, 
nvl(cdouble, 100) as n
 FROM alltypesorc
 WHERE (cdouble IS NULL)
 LIMIT 10
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: alltypesorc
+  Statistics: Num rows: 12288 Data size: 73400 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
+  Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNull(col 5) -> 
boolean
+predicate: cdouble is null (type: boolean)
+Statistics: Num rows: 3114 Data size: 18608 Basic stats: 
COMPLETE Column stats: COMPLETE
+Select Operator
+  expressions: null (type: double), 100.0 (type: double)
+  outputColumnNames: _col0, _col1
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [12, 13]
+  selectExpressions: ConstantVectorExpression(val 
null) -> 12:double, ConstantVectorExpression(val 100.0) -> 13:double
+  Statistics: Num rows: 3114 Data size: 24920 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Limit
+Number of rows: 10
+Limit Vectorization:
+className: VectorLimitOperator
+native: true
+Statistics: Num rows: 10 Data size: 88 Basic stats: 
COMPLETE Column stats: COMPLETE
+File Output Operator
+  compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
+  Statistics: Num rows: 10 Data size: 88 Basic stats: 
COMPLETE Column stats: COMPLETE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: vectorized, llap
+LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
+
   Stage: Stage-0
 Fetch Operator
   limit: 10
   Processor Tree:
-TableScan
-  alias: alltypesorc
-  Filter Operator
-predicate: cdouble is null (type: boolean)
-Select Operator
-  expressions: null (type: double), 100.0 (type: double)
-  outputColumnNames: _col0, _col1
-  Limit
-Number of rows: 10
-ListSink
+ListSink
 
 PREHOOK: query: SELECT cdouble, nvl(cdouble, 100) as n
 FROM alltypesorc
@@ -51,30 +102,76 @@ NULL   100.0
 NULL   100.0
 NULL   100.0
 NULL   100.0
-PREHOOK: query: EXPLAIN SELECT cfloat, nvl(cfloat, 1) as n
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT cfloat, nvl(cfloat, 
1) as n
 FROM

[14/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out 
b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
index edb67f1..911a962 100644
--- a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
@@ -45,16 +45,20 @@ POSTHOOK: Input: default@src
 0  val_0
 10 val_10
 100val_100
-PREHOOK: query: explain select key, value
+PREHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key asc
 limit 5
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, value
+POSTHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key asc
 limit 5
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -84,8 +88,23 @@ STAGE PLANS:
   value expressions: _col1 (type: varchar(20))
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: varchar(10)), 
VALUE._col0 (type: varchar(20))
@@ -148,16 +167,20 @@ POSTHOOK: Input: default@src
 97 val_97
 97 val_97
 96 val_96
-PREHOOK: query: explain select key, value
+PREHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key desc
 limit 5
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select key, value
+POSTHOOK: query: explain vectorization select key, value
 from varchar_2
 order by key desc
 limit 5
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -187,8 +210,23 @@ STAGE PLANS:
   value expressions: _col1 (type: varchar(20))
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: varchar(10)), 
VALUE._col0 (type: varchar(20))
@@ -254,12 +292,16 @@ create table varchar_3 (
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@varchar_3
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 insert into table varchar_3 select cint from alltypesorc limit 10
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 insert into table varchar_3 select cint from alltypesorc limit 10
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -279,36 +321,81 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats:

[05/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_data_types.q.out 
b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
index dbaf14d..79638c1 100644
--- a/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
@@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE 
[(over1k)over1k.FieldSchema(name:s, type:s
 POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, 
type:smallint, comment:null), ]
 POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, 
type:tinyint, comment:null), ]
 POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, 
type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, 
s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, 
bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: false
+  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -189,10 +193,14 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over1korc
  A masked pattern was here 
 -17045922556
-PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, 
s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM 
over1korc ORDER BY t, si, i LIMIT 20
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, 
bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -209,29 +217,66 @@ STAGE PLANS:
 TableScan
   alias: over1korc
   Statistics: Num rows: 1049 Data size: 311170 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10]
   Select Operator
 expressions: t (type: tinyint), si (type: smallint), i 
(type: int), b (type: bigint), f (type: float), d (type: double), bo (type: 
boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin 
(type: binary)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10]
 Statistics: Num rows: 1049 Data size: 311170 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: int)
   sort order: +++
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
   Statistics: Num rows: 1049 Data size: 311170 Basic 
stats: COMPLETE Column stats: NONE
   TopN Hash Memory Usage: 0.1
   value expressions: _col3 (type: bigint), _col4 (type: 
float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), 
_col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet:

[06/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out 
b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
index fbb43c4..7d722d0 100644
--- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
@@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION 
[(alltypesorc)alltypesorc.
 POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
 POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
 POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
-PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN 
(CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM 
decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" 
AS DATE)) ORDER BY cdate
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN 
(CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM 
decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" 
AS DATE)) ORDER BY cdate
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -32,27 +36,65 @@ STAGE PLANS:
 TableScan
   alias: decimal_date_test
   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterLongColumnInList(col 3, 
values [-67, -171]) -> boolean
 predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: 
boolean)
 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: cdate (type: date)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [3]
   Statistics: Num rows: 6144 Data size: 1233808 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: date)
 sort order: +
+Reduce Sink Vectorization:
+className: VectorReduceSinkOperator
+native: false
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 6144 Data size: 1233808 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions:

[15/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
index 4c252c7..0bab7bd 100644
--- a/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_reduce1.q.out
@@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE 
[(vectortab2k)vectortab2k.FieldSchem
 POSTHOOK: Lineage: vectortab2korc.t SIMPLE 
[(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: vectortab2korc.ts SIMPLE 
[(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ]
 POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE 
[(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select b from vectortab2korc order by b
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select b from vectortab2korc order by b
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -128,25 +132,59 @@ STAGE PLANS:
 TableScan
   alias: vectortab2korc
   Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11, 12]
   Select Operator
 expressions: b (type: bigint)
 outputColumnNames: _col0
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [3]
 Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: bigint)
   sort order: +
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: Uniform Hash IS false
   Statistics: Num rows: 2000 Data size: 918712 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: bigint)
 outputColumnNames: _col0
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0]
 Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
 File Output Operator
   compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
   Statistics: Num rows: 2000 Data size: 918712 Basic stats: 
COMPLETE Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_reduce2.q.out

[23/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
index 16603c7..c21da5f 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out
@@ -97,9 +97,9 @@ POSTHOOK: type: SHOWPARTITIONS
 POSTHOOK: Input: default@char_tbl2
 gpa=3
 gpa=3.5  
-PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, 
c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+PREHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, c2.name, 
c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, 
c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
+POSTHOOK: query: explain vectorization select c1.name, c1.age, c1.gpa, 
c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa)
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
 

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
index b9ffa34..25066be 100644
--- a/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain 
+PREHOOK: query: explain vectorization 
 select count(*) from (select c.ctinyint 
 from alltypesorc c
 left outer join alltypesorc cd
@@ -7,7 +7,7 @@ left outer join alltypesorc hd
   on hd.ctinyint = c.ctinyint
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain 
+POSTHOOK: query: explain vectorization 
 select count(*) from (select c.ctinyint 
 from alltypesorc c
 left outer join alltypesorc cd
@@ -16,6 +16,10 @@ left outer join alltypesorc hd
   on hd.ctinyint = c.ctinyint
 ) t1
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -68,6 +72,14 @@ STAGE PLANS:
 value expressions: _col0 (type: bigint)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 3 
 Map Operator Tree:
 TableScan
@@ -84,6 +96,14 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 36696 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 4 
 Map Operator Tree:
 TableScan
@@ -100,8 +120,23 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 36696 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true

[13/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
index 8cf503f..f0d2a50 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
@@ -1,6 +1,6 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cboolean1,
  ctinyint,
  ctimestamp1,
@@ -35,7 +35,7 @@ LIMIT 40
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cboolean1,
  ctinyint,
  ctimestamp1,
@@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1
 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, 
c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16
 LIMIT 40
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -86,15 +90,34 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 2028982 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: FilterExprOrExpr(children: 
FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) 
-> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> 
boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 
11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, 
FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: 
CastTimestampToDouble(col 9) -> 12:double) -> boolean, 
FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: 
CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean
 predicate: (((cfloat < 3569) and (10.175 >= cdouble) and 
(cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and 
(UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 
9763215.5639))) (type: boolean)
 Statistics: Num rows: 5461 Data size: 901772 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cboolean1 (type: boolean), ctinyint (type: 
tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: 
string)
   outputColumnNames: cboolean1, ctinyint, ctimestamp1, 
cfloat, cstring1
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [10, 0, 8, 4, 6]
   Statistics: Num rows: 5461 Data size: 901772 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: max(ctinyint), sum(cfloat), 
stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
+Group By Vectorization:
+aggregators: VectorUDAFMaxLong(col 0) -> tinyint, 
VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> 
struct, VectorUDAFStdPopLong(col 0) -> 
struct, VectorUDAFMaxDouble(col 4) -> 
float, VectorUDAFMinLong(col 0) -> tinyint
+className: VectorGroupByOperator
+vectorOutput: false
+keyExpressions: col 10, col 0, col 8, col 4, col 6
+native: false
+projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+vectorOutputConditionsNotMet: Vector output of 
VectorUDAFStdPopDouble(col 4) -> 
struct output type STRUCT requires 
PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> 
struct output type STRUCT requires 
PRIMITIVE IS false
 keys: cboolean1 (type: boolean), ctinyint (type: 
tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: 
string)

[02/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out 
b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
index bd9b852..eb61044 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
@@ -66,105 +66,21 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE 
STATISTICS FOR COLUMNS
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_table
  A masked pattern was here 
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
 select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
 on s.ctinyint = st.ctinyint
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
 select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
 on s.ctinyint = st.ctinyint
 ) t1
 POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-2 is a root stage
-  Stage-1 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-2
-Spark
- A masked pattern was here 
-  Vertices:
-Map 3 
-Map Operator Tree:
-TableScan
-  alias: st
-  Statistics: Num rows: 100 Data size: 380 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: ctinyint (type: tinyint)
-outputColumnNames: _col0
-Statistics: Num rows: 100 Data size: 380 Basic stats: 
COMPLETE Column stats: NONE
-Spark HashTable Sink Operator
-  keys:
-0 _col0 (type: tinyint)
-1 _col0 (type: tinyint)
-Execution mode: vectorized
-Local Work:
-  Map Reduce Local Work
-
-  Stage: Stage-1
-Spark
-  Edges:
-Reducer 2 <- Map 1 (GROUP, 1)
  A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: s
-  Statistics: Num rows: 6058 Data size: 2027 Basic stats: 
COMPLETE Column stats: NONE
-  Select Operator
-expressions: ctinyint (type: tinyint)
-outputColumnNames: _col0
-Statistics: Num rows: 6058 Data size: 2027 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col0 (type: tinyint)
-1 _col0 (type: tinyint)
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 6663 Data size: 2229 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-aggregations: count()
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  sort order: 
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col0 (type: bigint)
-Execution mode: vectorized
-Local Work:
-  Map Reduce Local Work
-Reducer 2 
-Execution mode: vectorized
-Reduce Operator Tree:
-  Group By Operator
-aggregations: count(VALUE._col0)
-mode: mergepartial
-outputColumnNames: _col0
-Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
-  Stage: Stage-0
-Fetch Operator
-  limit: -1
-  Processor Tree:
-ListSink
-
 PREHOOK: query: select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
@@ -184,111 +100,21 @@ POSTHOOK: Input: default@small_table
 POSTHOOK: Input: default@sorted_mod_4
  A masked pattern was here 
 6876
-PREHOOK: query: explain
+PREHOOK:

[11/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
index 6c6c6d6..14606ed 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
@@ -94,12 +94,16 @@ POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@vsmb_bucket_txt
 POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
 POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -117,33 +121,71 @@ STAGE PLANS:
 TableScan
   alias: a
   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: key (type: int)
   sort order: +
   Map-reduce partition columns: key (type: int)
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
   Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: value (type: string)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 3 
 Map Operator Tree:
 TableScan
   alias: b
   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: key (type: int)
   sort order: +
   Map-reduce partition columns: key (type: int)
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,

[18/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
index 5729237..fbd294e 100644
--- a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out
@@ -226,7 +226,7 @@ NULLNULL-850295959  -1887561756 NULL
NULLWMIgGA734hA4KQj2vD3fI6gX82220d  NULL
 NULL   NULL-886426182  -1887561756 NULLNULL
0i88xYq3gx1nW4vKjp7vBp3 4hA4KQj2vD3fI6gX82220d  NULL1969-12-31 16:00:04.472 
truefalse
 NULL   NULL-89947  -1645852809 NULLNULL73xdw4X 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:07.395 false   false
 NULL   NULL-971543377  -1645852809 NULLNULLuN803aW 
xH7445Rals48VOulSyR5F   NULL1969-12-31 16:00:05.43  false   false
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail formatted
 select count(*) from (select c.cstring1 
 from small_alltypesorc_a c
 left outer join small_alltypesorc_a cd
@@ -235,7 +235,7 @@ left outer join small_alltypesorc_a hd
   on hd.cstring1 = c.cstring1
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail formatted
 select count(*) from (select c.cstring1 
 from small_alltypesorc_a c
 left outer join small_alltypesorc_a cd
@@ -244,112 +244,7 @@ left outer join small_alltypesorc_a hd
   on hd.cstring1 = c.cstring1
 ) t1
 POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Edges:
-Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
  A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: c
-  Statistics: Num rows: 20 Data size: 1023 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Select Operator
-expressions: cint (type: int), cstring1 (type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 20 Data size: 1023 Basic stats: 
COMPLETE Column stats: COMPLETE
-Map Join Operator
-  condition map:
-   Left Outer Join0 to 1
-  keys:
-0 _col0 (type: int)
-1 _col0 (type: int)
-  outputColumnNames: _col1
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 40 Data size: 3560 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Map Join Operator
-condition map:
- Left Outer Join0 to 1
-keys:
-  0 _col1 (type: string)
-  1 _col0 (type: string)
-input vertices:
-  1 Map 4
-Statistics: Num rows: 80 Data size: 640 Basic stats: 
COMPLETE Column stats: COMPLETE
-Group By Operator
-  aggregations: count()
-  mode: hash
-  outputColumnNames: _col0
-  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
-  Reduce Output Operator
-sort order: 
-Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
-value expressions: _col0 (type: bigint)
-Execution mode: vectorized, llap
-LLAP IO: all inputs
-Map 3 
-Map Operator Tree:
-TableScan
-  alias: cd
-  Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE 
Column stats: COMPLETE
-  Select Operator
-expressions: cint (type: int)
-outputColumnNames: _col0
-Statistics: Num rows: 20 Data size: 44 Basic stats: 
COMPLETE Column stats: COMPLETE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 20 Data size: 44 Basic stats: 
COMPLETE Column stats: COMPLETE
-Execution mode: vectorized, llap
-LLAP IO: all inputs
-Map 4 
-Map Operator Tree:

[03/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out 
b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
index b311c49..d1319b8 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out
@@ -62,12 +62,16 @@ POSTHOOK: Input: default@orc_table_2
 4  FOUR
 NULL   
 NULL   
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join 
orc_table_2 t2 on t1.a = t2.c
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select t1.v1, t1.a, t2.c, t2.v2 from orc_table_1 t1 left outer join 
orc_table_2 t2 on t1.a = t2.c
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -83,15 +87,38 @@ STAGE PLANS:
 TableScan
   alias: t2
   Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: c (type: int), v2 (type: string)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1]
 Statistics: Num rows: 6 Data size: 550 Basic stats: 
COMPLETE Column stats: NONE
 Spark HashTable Sink Operator
+  Spark Hash Table Sink Vectorization:
+  className: VectorSparkHashTableSinkOperator
+  native: true
   keys:
 0 _col1 (type: int)
 1 _col0 (type: int)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 2
+includeColumns: [0, 1]
+dataColumns: c:int, v2:string
+partitionColumnCount: 0
 Local Work:
   Map Reduce Local Work
 
@@ -104,9 +131,16 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: v1 (type: string), a (type: int)
 outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1]
 Statistics: Num rows: 6 Data size: 544 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
@@ -114,18 +148,45 @@ STAGE PLANS:
   keys:
 0 _col1 (type: int)
 1 _col0 (type: int)
+  Map Join Vectorization:
+  bigTableKeyColumns: [1]
+  bigTableOuterKeyMapping: 1 -> 2
+  bigTableRetainedColumns: [0, 1, 2]
+  bigTableValueColumns: [0, 1]
+  className: VectorMapJoinOuterLongOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+  projectedOutputColumns: [0, 1, 2, 3]
+  smallTableMapping: [3]
   outputColumnNames: _col0, _col1, _col2, _col3
   input vertices:

[22/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
index 69911f5..f3ffee8 100644
--- a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
@@ -132,91 +132,17 @@ POSTHOOK: query: select * from t4
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t4
  A masked pattern was here 
-PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key 
sort by a.key, a.value
-PREHOOK: type: QUERY
-POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key 
sort by a.key, a.value
-POSTHOOK: type: QUERY
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
+PREHOOK: query: explain vectorization only summary
 
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Edges:
-Map 1 <- Map 3 (BROADCAST_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE)
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: a
-  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-  Filter Operator
-predicate: key is not null (type: boolean)
-Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-Map Join Operator
-  condition map:
-   Left Semi Join 0 to 1
-  keys:
-0 key (type: int)
-1 _col0 (type: int)
-  outputColumnNames: _col0, _col1
-  input vertices:
-1 Map 3
-  Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
-  Reduce Output Operator
-key expressions: _col0 (type: int), _col1 (type: 
string)
-sort order: ++
-Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
-Execution mode: llap
-LLAP IO: all inputs
-Map 3 
-Map Operator Tree:
-TableScan
-  alias: b
-  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-  Filter Operator
-predicate: key is not null (type: boolean)
-Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-Select Operator
-  expressions: key (type: int)
-  outputColumnNames: _col0
-  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-keys: _col0 (type: int)
-mode: hash
-outputColumnNames: _col0
-Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 11 Data size: 1023 Basic 
stats: COMPLETE Column stats: NONE
-Execution mode: llap
-LLAP IO: all inputs
-Reducer 2 
-Execution mode: llap
-Reduce Operator Tree:
-  Select Operator
-expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: string)
-outputColumnNames: _col0, _col1
-Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization only summary
 
-  Stage: Stage-0
-Fetch Operator
-  limit: -1
-  Processor Tree:
-ListSink
+select * from t1 a left semi join t2 b on

[04/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out 
b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
index 511bd79..ef19bad 100644
--- a/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_inner_join.q.out
@@ -32,12 +32,16 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__2
 POSTHOOK: Output: default@orc_table_2a
 POSTHOOK: Lineage: orc_table_2a.c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization detail
 select t1.a from orc_table_2a t2 join orc_table_1a t1 on t1.a = t2.c where 
t1.a > 2
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -53,18 +57,45 @@ STAGE PLANS:
 TableScan
   alias: t2
   Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: 
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
 predicate: (c > 2) (type: boolean)
 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: c (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 3 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
+Spark Hash Table Sink Vectorization:
+className: VectorSparkHashTableSinkOperator
+native: true
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 1
+includeColumns: [0]
+dataColumns: c:int
+partitionColumnCount: 0
 Local Work:
   Map Reduce Local Work
 
@@ -77,12 +108,23 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0]
   Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicateExpression: 
FilterLongColGreaterLongScalar(col 0, val 2) -> boolean
 predicate: (a > 2) (type: boolean)
 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: a (type: int)
   outputColumnNames: _col0
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0]
   Statistics: Num rows: 1 Data size: 4 Basic stats: 
COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -90,6 +132,14 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)

[10/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
index 76c8404..c2e1dfd 100644
--- 
a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out
@@ -34,10 +34,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
  A masked pattern was here 
 11
 12
-PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds
+PREHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds 
as ds, ds as `date` from srcpart group by ds
 PREHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as 
`date` from srcpart group by ds
+POSTHOOK: query: EXPLAIN VECTORIZATION create table srcpart_date as select ds 
as ds, ds as `date` from srcpart group by ds
 POSTHOOK: type: CREATETABLE_AS_SELECT
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -74,8 +78,19 @@ STAGE PLANS:
 Statistics: Num rows: 2 Data size: 368 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Group By Operator
 keys: KEY._col0 (type: string)
@@ -199,11 +214,15 @@ POSTHOOK: Output: default@srcpart_double_hour
 POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
 POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION 
[(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ]
 PREHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 PREHOOK: type: QUERY
 POSTHOOK: query: -- single column, single key
-EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = 
srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
+EXPLAIN VECTORIZATION select count(*) from srcpart join srcpart_date on 
(srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -234,6 +253,10 @@ STAGE PLANS:
   Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: false
+enabledConditionsNotMet: 
hive.vectorized.use.vector.serde.deserialize IS false
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
 Map 4 
 Map Operator Tree:
 TableScan
@@ -269,6 +292,14 @@ STAGE PLANS:
 Target Vertex: Map 1
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: llap
 Reduce Operator Tree:
@@ -290,6 +321,13 @@ STAGE PLANS:
 value expressions: _col0 (type: bigint)
 Reducer 3 
 Execution mode: vectorized, llap
+

[20/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
index 06e30d8..cf90430 100644
--- a/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out
@@ -49,11 +49,15 @@ POSTHOOK: Output: default@myinput1
 POSTHOOK: Lineage: myinput1.key SIMPLE 
[(myinput1_txt)myinput1_txt.FieldSchema(name:key, type:int, comment:null), ]
 POSTHOOK: Lineage: myinput1.value SIMPLE 
[(myinput1_txt)myinput1_txt.FieldSchema(name:value, type:int, comment:null), ]
 PREHOOK: query: -- merging
-explain select * from myinput1 a join myinput1 b on a.key<=>b.value
+explain vectorization expression select * from myinput1 a join myinput1 b on 
a.key<=>b.value
 PREHOOK: type: QUERY
 POSTHOOK: query: -- merging
-explain select * from myinput1 a join myinput1 b on a.key<=>b.value
+explain vectorization expression select * from myinput1 a join myinput1 b on 
a.key<=>b.value
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -71,12 +75,20 @@ STAGE PLANS:
 TableScan
   alias: a
   Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Map Join Operator
 condition map:
  Inner Join 0 to 1
 keys:
   0 key (type: int)
   1 value (type: int)
+Map Join Vectorization:
+className: VectorMapJoinOperator
+native: false
+nativeConditionsMet: hive.execution.engine tez IN 
[tez, spark] IS true, One MapJoin Condition IS true, Supports Key Types IS 
true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash 
Join IS true, Small table vectorizes IS true
+nativeConditionsNotMet: 
hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false
 nullSafes: [true]
 outputColumnNames: _col0, _col1, _col5, _col6
 input vertices:
@@ -85,9 +97,16 @@ STAGE PLANS:
 Select Operator
   expressions: _col0 (type: int), _col1 (type: int), _col5 
(type: int), _col6 (type: int)
   outputColumnNames: _col0, _col1, _col2, _col3
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3]
   Statistics: Num rows: 6 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
+File Sink Vectorization:
+className: VectorFileSinkOperator
+native: false
 Statistics: Num rows: 6 Data size: 26 Basic stats: 
COMPLETE Column stats: NONE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -95,19 +114,42 @@ STAGE PLANS:
 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 2 
 Map Operator Tree:
 TableScan
   alias: b
   Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1]
   Reduce Output Operator
 key expressions: value (type: int)
 sort order: +
 Map-reduce partition columns: value (type: int)
+Reduce Sink Vectorization:
+className: VectorReduceSinkLongOperator
+native:

[16/67] [abbrv] [partial] hive git commit: Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)""

2016-10-17 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out 
b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
index 9eeb0d6..26fa9d9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
@@ -256,12 +256,16 @@ POSTHOOK: Input: default@flights_tiny_orc
 2010-10-29 12
 2010-10-30 11
 2010-10-31 8
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select * from flights_tiny_orc sort by fl_num, fl_date limit 25
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select * from flights_tiny_orc sort by fl_num, fl_date limit 25
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -280,46 +284,102 @@ STAGE PLANS:
 TableScan
   alias: flights_tiny_orc
   Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  projectedOutputColumns: [0, 1, 2, 3, 4, 5]
   Select Operator
 expressions: origin_city_name (type: string), 
dest_city_name (type: string), fl_date (type: date), fl_time (type: timestamp), 
arr_delay (type: float), fl_num (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [0, 1, 2, 3, 4, 5]
 Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col5 (type: int), _col2 (type: date)
   sort order: ++
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkOperator
+  native: false
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
+  nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
   Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
   TopN Hash Memory Usage: 0.1
   value expressions: _col0 (type: string), _col1 (type: 
string), _col3 (type: timestamp), _col4 (type: float)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+groupByVectorOutput: true
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
+Reduce Vectorization:
+enabled: true
+enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
+groupByVectorOutput: true
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), 
VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumns: [2, 3, 1, 4, 5, 0]
 Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
 Limit
   Number of rows: 25
+  Limit Vectorization:
+  className: VectorLimitOperator
+  native: true
   Statistics: Num rows: 25 Data size: 7200

[16/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out 
b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
index 26fa9d9..9eeb0d6 100644
--- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
@@ -256,16 +256,12 @@ POSTHOOK: Input: default@flights_tiny_orc
 2010-10-29 12
 2010-10-30 11
 2010-10-31 8
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select * from flights_tiny_orc sort by fl_num, fl_date limit 25
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select * from flights_tiny_orc sort by fl_num, fl_date limit 25
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -284,102 +280,46 @@ STAGE PLANS:
 TableScan
   alias: flights_tiny_orc
   Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1, 2, 3, 4, 5]
   Select Operator
 expressions: origin_city_name (type: string), 
dest_city_name (type: string), fl_date (type: date), fl_time (type: timestamp), 
arr_delay (type: float), fl_num (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [0, 1, 2, 3, 4, 5]
 Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col5 (type: int), _col2 (type: date)
   sort order: ++
-  Reduce Sink Vectorization:
-  className: VectorReduceSinkOperator
-  native: false
-  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, 
LazyBinarySerDe for values IS true
-  nativeConditionsNotMet: No TopN IS false, Uniform 
Hash IS false
   Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
   TopN Hash Memory Usage: 0.1
   value expressions: _col0 (type: string), _col1 (type: 
string), _col3 (type: timestamp), _col4 (type: float)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reducer 2 
 Execution mode: vectorized, llap
-Reduce Vectorization:
-enabled: true
-enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-groupByVectorOutput: true
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions: VALUE._col0 (type: string), VALUE._col1 (type: 
string), KEY.reducesinkkey1 (type: date), VALUE._col2 (type: timestamp), 
VALUE._col3 (type: float), KEY.reducesinkkey0 (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [2, 3, 1, 4, 5, 0]
 Statistics: Num rows: 137 Data size: 39456 Basic stats: 
COMPLETE Column stats: NONE
 Limit
   Number of rows: 25
-  Limit Vectorization:
-  className: VectorLimitOperator
-  native: true
   Statistics: Num rows: 25 Data size: 7200

[07/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
index 636463b..ceaac4f 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
@@ -19,10 +19,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__1
 POSTHOOK: Output: default@test
 POSTHOOK: Lineage: test.ts EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -48,10 +48,10 @@ POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00
 -12-31 23:59:59.9
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -87,10 +87,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00-12-31 23:59:59.9   3652060 
23:59:59.9
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN
 SELECT ts FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN
 SELECT ts FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.
@@ -116,10 +116,10 @@ POSTHOOK: Input: default@test
  A masked pattern was here 
 0001-01-01 00:00:00
 -12-31 23:59:59.9
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN
 SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
 POSTHOOK: type: QUERY
 Plan optimized by CBO.

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index ae59b06..4092911 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -73,7 +73,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@alltypesorc_wrong
 POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE []
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
+PREHOOK: query: EXPLAIN SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -86,7 +86,7 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
 FROM alltypesorc_string
 ORDER BY c1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
+POSTHOOK: query: EXPLAIN SELECT
   to_unix_timestamp(ctimestamp1) AS c1,
   year(ctimestamp1),
   month(ctimestamp1),
@@ -99,10 +99,6 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION  SELECT
 FROM alltypesorc_string
 ORDER BY c1
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -120,61 +116,26 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc_string
   Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE 
Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: to_unix_timestamp(ctimestamp1) (type: 
bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), 
day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), 
weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), 
minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-

[43/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q 
b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
index 11df12e..8ed041b 100644
--- a/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
+++ b/ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part.q
@@ -1,4 +1,4 @@
-set hive.explain.user=false;
+set hive.explain.user=true;
 set hive.mapred.mode=nonstrict;
 set hive.cli.print.header=true;
 SET hive.exec.schema.evolution=true;
@@ -39,7 +39,7 @@ alter table part_add_int_permute_select add columns(c int);
 
 insert into table part_add_int_permute_select partition(part=1) VALUES (2, 
, 'new', );
 
-explain vectorization detail
+explain
 select insert_num,part,a,b from part_add_int_permute_select;
 
 -- SELECT permutation columns to make sure NULL defaulting works right
@@ -62,7 +62,7 @@ alter table part_add_int_string_permute_select add columns(c 
int, d string);
 
 insert into table part_add_int_string_permute_select partition(part=1) VALUES 
(2, , 'new', , '');
 
-explain vectorization detail
+explain
 select insert_num,part,a,b from part_add_int_string_permute_select;
 
 -- SELECT permutation columns to make sure NULL defaulting works right
@@ -94,7 +94,7 @@ alter table part_change_string_group_double replace columns 
(insert_num int, c1
 
 insert into table part_change_string_group_double partition(part=1) SELECT 
insert_num, double1, double1, double1, 'new' FROM schema_evolution_data WHERE 
insert_num = 111;
 
-explain vectorization detail
+explain
 select insert_num,part,c1,c2,c3,b from part_change_string_group_double;
 
 select insert_num,part,c1,c2,c3,b from part_change_string_group_double;
@@ -117,7 +117,7 @@ alter table 
part_change_date_group_string_group_date_timestamp replace columns(i
 
 insert into table part_change_date_group_string_group_date_timestamp 
partition(part=1) VALUES (111, 'filler', 'filler', 'filler', 'filler', 
'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 'new');
 
-explain vectorization detail
+explain
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_date_group_string_group_date_timestamp;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_date_group_string_group_date_timestamp;
@@ -165,7 +165,7 @@ insert into table 
part_change_numeric_group_string_group_multi_ints_string_group
 'filler', 'filler', 'filler', 'filler', 'filler', 'filler', 
'filler', 'filler',
 'new');
 
-explain vectorization detail
+explain
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b
 from part_change_numeric_group_string_group_multi_ints_string_group;
 
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,b
 from part_change_numeric_group_string_group_multi_ints_string_group;
@@ -208,7 +208,7 @@ insert into table 
part_change_numeric_group_string_group_floating_string_group p
  'filler', 'filler', 'filler', 'filler', 'filler', 'filler',
  'new');
 
-explain vectorization detail
+explain
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b 
from part_change_numeric_group_string_group_floating_string_group;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,b 
from part_change_numeric_group_string_group_floating_string_group;
@@ -250,7 +250,7 @@ insert into table 
part_change_string_group_string_group_string partition(part=1)
   'filler', 'filler', 'filler',
   'new');
 
-explain vectorization detail
+explain
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_string_group_string_group_string;
 
 select insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,b from 
part_change_string_group_string_group_string;
@@ -300,7 +300,7 @@ insert into table 
part_change_lower_to_higher_numeric_group_tinyint_to_bigint pa
 1234.5678, 9876.543, 789.321,
'new');
 
-explain vectorization detail
+explain
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b
 from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
 
 select 
insert_num,part,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,b
 from part_change_lower_to_higher_numeric_group_tinyint_to_bigint;
@@ -331,7 +331,7 @@ alter table 
part_change_lower_to_higher_numeric_group_decimal_to_float replace c
 
 insert into table part_change_lower_to_higher_numeric_group_decimal_to_float 
partition(part=1) VALUES (111, 1234.5678, 9876.543, 1234.5678, 'new');
 
-explain vectorization detail
+explain
 select insert_num,part,c1,c2,c3,b from 
part_change_lower_to_higher_numeric_group_decimal_to_float;
 
 select

[50/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
index 9114932..a72b882 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt
@@ -477,11 +477,6 @@ public class  extends VectorExpression {
   }
 
   @Override
-  public String vectorExpressionParameters() {
-return "col " + colNum1 + ", col " + + colNum2;
-  }
-
-  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
index b56d451..8b1c366 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt
@@ -18,8 +18,6 @@
  
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
-import java.nio.charset.StandardCharsets;
-
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -154,9 +152,4 @@ public abstract class  extends VectorExpression {
 this.value = value;
   }
 
-  @Override
-  public String vectorExpressionParameters() {
-return "col " + colNum + ", val " + new String(value, 
StandardCharsets.UTF_8);
-  }
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
index 4fb5035..930069c 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt
@@ -18,8 +18,6 @@
  
 package org.apache.hadoop.hive.ql.exec.vector.expressions.gen;
 
-import java.nio.charset.StandardCharsets;
-
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -157,10 +155,4 @@ public abstract class  extends VectorExpression 
{
   public void setValue(byte[] value) {
 this.value = value;
   }
-
-  @Override
-  public String vectorExpressionParameters() {
-return "val " + new String(value, StandardCharsets.UTF_8) + ", col " + + 
colNum;
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
index 7863b16..4298d79 100644
--- 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
+++ 
b/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnBetween.txt
@@ -154,11 +154,6 @@ public class  extends VectorExpression {
   }
 
   @Override
-  public String vectorExpressionParameters() {
-return "col " + colNum + ", left " + leftValue.toString() + ", right " + 
rightValue.toString();
-  }
-
-  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt
--
diff --git 
a/ql/src/gen/vectorization/ExpressionTemplates/FilterTimestampColumnCompareLongDoubleColumn.txt

[11/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
index 14606ed..6c6c6d6 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
@@ -94,16 +94,12 @@ POSTHOOK: Input: default@alltypesorc
 POSTHOOK: Output: default@vsmb_bucket_txt
 POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
 POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, 
comment:null), ]
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = 
b.key
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -121,71 +117,33 @@ STAGE PLANS:
 TableScan
   alias: a
   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1]
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: key (type: int)
   sort order: +
   Map-reduce partition columns: key (type: int)
-  Reduce Sink Vectorization:
-  className: VectorReduceSinkLongOperator
-  native: true
-  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
   Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: value (type: string)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 3 
 Map Operator Tree:
 TableScan
   alias: b
   Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE 
Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1]
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0) -> 
boolean
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 2 Data size: 208 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: key (type: int)
   sort order: +
   Map-reduce partition columns: key (type: int)
-  Reduce Sink Vectorization:
-  className: VectorReduceSinkLongOperator
-  native: true
-  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true,

[06/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out 
b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
index 7d722d0..fbb43c4 100644
--- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
@@ -12,14 +12,10 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION 
[(alltypesorc)alltypesorc.
 POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
 POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
 POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE 
[(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), 
]
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM 
decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" 
AS DATE)) ORDER BY cdate
+PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN 
(CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM 
decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" 
AS DATE)) ORDER BY cdate
+POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN 
(CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -36,65 +32,27 @@ STAGE PLANS:
 TableScan
   alias: decimal_date_test
   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1, 2, 3]
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterLongColumnInList(col 3, 
values [-67, -171]) -> boolean
 predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: 
boolean)
 Statistics: Num rows: 6144 Data size: 1233808 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: cdate (type: date)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumns: [3]
   Statistics: Num rows: 6144 Data size: 1233808 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: date)
 sort order: +
-Reduce Sink Vectorization:
-className: VectorReduceSinkOperator
-native: false
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS 
true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for 
keys IS true, LazyBinarySerDe for values IS true
-nativeConditionsNotMet: Uniform Hash IS false
 Statistics: Num rows: 6144 Data size: 1233808 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-groupByVectorOutput: true
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reducer 2 
 Execution mode: vectorized
-Reduce Vectorization:
-enabled: true
-enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-groupByVectorOutput: true
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reduce Operator Tree:
   Select Operator
 expressions:

[48/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
index 3e4a195..2162f17 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarColumnBase.java
@@ -131,10 +131,4 @@ public abstract class IfExprTimestampScalarColumnBase 
extends VectorExpression {
   public String getOutputType() {
 return "timestamp";
   }
-
-  @Override
-  public String vectorExpressionParameters() {
-return "col " + arg1Column + ", val "+ arg2Scalar + ", col "+ arg3Column;
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
index 5273131..707f574 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampScalarScalarBase.java
@@ -117,10 +117,4 @@ public abstract class IfExprTimestampScalarScalarBase 
extends VectorExpression {
   public String getOutputType() {
 return "timestamp";
   }
-
-  @Override
-  public String vectorExpressionParameters() {
-return "col " + arg1Column + ", val "+ arg2Scalar + ", val "+ arg3Scalar;
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
index 2f6e7b9..f19551e 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java
@@ -107,11 +107,6 @@ public class IsNotNull extends VectorExpression {
   }
 
   @Override
-  public String vectorExpressionParameters() {
-return "col " + colNum;
-  }
-
-  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
index 583ab7a..3169bae 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java
@@ -105,11 +105,6 @@ public class IsNull extends VectorExpression {
   }
 
   @Override
-  public String vectorExpressionParameters() {
-return "col " + colNum;
-  }
-
-  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 VectorExpressionDescriptor.Builder b = new 
VectorExpressionDescriptor.Builder();
 b.setMode(VectorExpressionDescriptor.Mode.PROJECTION)

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
index 6fa9779..33f50e0 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColDivideLongColumn.java
@@ -174,11 +174,6 @@ public class LongColDivideLongColumn extends 
VectorExpression {
   }
 
   @Override
-  public String vectorExpressionParameters() {
-return "col " + colNum1 + ", col " + colNum2;
-  }
-
-  @Override
   public VectorExpressionDescriptor.Descriptor getDescriptor() {
 return (new VectorExpressionDescriptor.Builder())
 .setMode(

[09/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
index a7c0d10..1bab6f7 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
@@ -1,19 +1,15 @@
 PREHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN VECTORIZATION EXPRESSION  SELECT COUNT(t1.cint), MAX(t2.cint), 
MIN(t1.cint), AVG(t1.cint+t2.cint)
+EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
   FROM alltypesorc t1
   JOIN alltypesorc t2 ON t1.cint = t2.cint
 PREHOOK: type: QUERY
 POSTHOOK: query: -- SORT_QUERY_RESULTS
 
-EXPLAIN VECTORIZATION EXPRESSION  SELECT COUNT(t1.cint), MAX(t2.cint), 
MIN(t1.cint), AVG(t1.cint+t2.cint)
+EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG(t1.cint+t2.cint)
   FROM alltypesorc t1
   JOIN alltypesorc t2 ON t1.cint = t2.cint
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -32,23 +28,12 @@ STAGE PLANS:
 TableScan
   alias: t1
   Statistics: Num rows: 12288 Data size: 36696 Basic stats: 
COMPLETE Column stats: COMPLETE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 2) -> 
boolean
 predicate: cint is not null (type: boolean)
 Statistics: Num rows: 9173 Data size: 27396 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cint (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumns: [2]
   Statistics: Num rows: 9173 Data size: 27396 Basic stats: 
COMPLETE Column stats: COMPLETE
   Map Join Operator
 condition map:
@@ -56,10 +41,6 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
-Map Join Vectorization:
-className: VectorMapJoinInnerBigOnlyLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, 
then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
 outputColumnNames: _col0, _col1
 input vertices:
   1 Map 3
@@ -67,21 +48,9 @@ STAGE PLANS:
 Select Operator
   expressions: _col0 (type: int), _col1 (type: int), 
(_col0 + _col1) (type: int)
   outputColumnNames: _col0, _col1, _col2
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumns: [2, 2, 12]
-  selectExpressions: LongColAddLongColumn(col 2, 
col 2) -> 12:long
   Statistics: Num rows: 19518 Data size: 156144 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: count(_col0), max(_col1), 
min(_col0), avg(_col2)
-Group By Vectorization:
-aggregators: VectorUDAFCount(col 2) -> bigint, 
VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, 
VectorUDAFAvgLong(col 12) -> struct
-className: VectorGroupByOperator
-vectorOutput: false
-native: false
-projectedOutputColumns: [0, 1, 2, 3]
-vectorOutputConditionsNotMet: Vector output of 
VectorUDAFAvgLong(col 12) -> struct output type

[22/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
index f3ffee8..69911f5 100644
--- a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
@@ -132,17 +132,91 @@ POSTHOOK: query: select * from t4
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t4
  A masked pattern was here 
-PREHOOK: query: explain vectorization only summary
-
-select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value
+PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key 
sort by a.key, a.value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization only summary
-
-select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value
+POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key 
sort by a.key, a.value
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: false
-  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 1 <- Map 3 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: a
+  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
+Map Join Operator
+  condition map:
+   Left Semi Join 0 to 1
+  keys:
+0 key (type: int)
+1 _col0 (type: int)
+  outputColumnNames: _col0, _col1
+  input vertices:
+1 Map 3
+  Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: int), _col1 (type: 
string)
+sort order: ++
+Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
+Execution mode: llap
+LLAP IO: all inputs
+Map 3 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
+  Group By Operator
+keys: _col0 (type: int)
+mode: hash
+outputColumnNames: _col0
+Statistics: Num rows: 11 Data size: 1023 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 11 Data size: 1023 Basic 
stats: COMPLETE Column stats: NONE
+Execution mode: llap
+LLAP IO: all inputs
+Reducer 2 
+Execution mode: llap
+Reduce Operator Tree:
+  Select Operator
+expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 12 Data size: 1125 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 12 Data size: 1125 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde:

[12/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
index d6c405e..a14d515 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
@@ -30,8 +30,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
 -- ArithmeticOps: Add, Multiply, Subtract, Divide
 -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual
 -- GroupBy: NoGroupByProjectAggs
-EXPLAIN VECTORIZATION EXPRESSION
-SELECT AVG(cint),
+EXPLAIN SELECT AVG(cint),
(AVG(cint) + -3728),
(-((AVG(cint) + -3728))),
(-((-((AVG(cint) + -3728),
@@ -99,8 +98,7 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
 -- ArithmeticOps: Add, Multiply, Subtract, Divide
 -- FilterOps: Equal, NotEqual, GreaterThan, LessThan, LessThanOrEqual
 -- GroupBy: NoGroupByProjectAggs
-EXPLAIN VECTORIZATION EXPRESSION
-SELECT AVG(cint),
+EXPLAIN SELECT AVG(cint),
(AVG(cint) + -3728),
(-((AVG(cint) + -3728))),
(-((-((AVG(cint) + -3728),
@@ -136,10 +134,6 @@ WHERE  ((762 = cbigint)
 AND ((79.553 != cint)
  AND (cboolean2 != cboolean1)
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -157,33 +151,15 @@ STAGE PLANS:
 TableScan
   alias: alltypesorc
   Statistics: Num rows: 12288 Data size: 2601650 Basic stats: 
COMPLETE Column stats: COMPLETE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprOrExpr(children: 
FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, 
FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 
4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, 
FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: 
CastTimestampToDouble(col 9) -> 12:double) -> boolean, 
FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: 
CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, 
FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, 
FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val 
-1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, 
FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, 
FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: 
CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, 
FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) 
 -> boolean) -> boolean
 predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < 
cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> 
UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= 
-1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and 
(cboolean2 <> cboolean1))) (type: boolean)
 Statistics: Num rows: 5466 Data size: 1157380 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: cint (type: int), cdouble (type: double), 
csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint)
   outputColumnNames: cint, cdouble, csmallint, cfloat, 
ctinyint
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumns: [2, 5, 1, 4, 0]
   Statistics: Num rows: 5466 Data size: 1157380 Basic 
stats: COMPLETE Column stats: COMPLETE
   Group By Operator
 aggregations: avg(cint), sum(cdouble), 
stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), 
stddev_samp(cint), min(ctinyint), count(csmallint)
-Group By Vectorization:
-aggregators: VectorUDAFAvgLong(col 2) -> 
struct, VectorUDAFSumDouble(col 5) -> double, 
VectorUDAFStdPopLong(col 2) -> struct, 
VectorUDAFStdSampLong(col 1) -> 
struct, VectorUDAFVarSampLong(col 2) 
->

[02/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out 
b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
index eb61044..bd9b852 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out
@@ -66,21 +66,105 @@ POSTHOOK: query: ANALYZE TABLE small_table COMPUTE 
STATISTICS FOR COLUMNS
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_table
  A masked pattern was here 
-PREHOOK: query: explain vectorization detail formatted
+PREHOOK: query: explain
 select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
 on s.ctinyint = st.ctinyint
 ) t1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail formatted
+POSTHOOK: query: explain
 select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
 on s.ctinyint = st.ctinyint
 ) t1
 POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-1 depends on stages: Stage-2
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-2
+Spark
+ A masked pattern was here 
+  Vertices:
+Map 3 
+Map Operator Tree:
+TableScan
+  alias: st
+  Statistics: Num rows: 100 Data size: 380 Basic stats: 
COMPLETE Column stats: NONE
+  Select Operator
+expressions: ctinyint (type: tinyint)
+outputColumnNames: _col0
+Statistics: Num rows: 100 Data size: 380 Basic stats: 
COMPLETE Column stats: NONE
+Spark HashTable Sink Operator
+  keys:
+0 _col0 (type: tinyint)
+1 _col0 (type: tinyint)
+Execution mode: vectorized
+Local Work:
+  Map Reduce Local Work
+
+  Stage: Stage-1
+Spark
+  Edges:
+Reducer 2 <- Map 1 (GROUP, 1)
  A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: s
+  Statistics: Num rows: 6058 Data size: 2027 Basic stats: 
COMPLETE Column stats: NONE
+  Select Operator
+expressions: ctinyint (type: tinyint)
+outputColumnNames: _col0
+Statistics: Num rows: 6058 Data size: 2027 Basic stats: 
COMPLETE Column stats: NONE
+Map Join Operator
+  condition map:
+   Left Outer Join0 to 1
+  keys:
+0 _col0 (type: tinyint)
+1 _col0 (type: tinyint)
+  input vertices:
+1 Map 3
+  Statistics: Num rows: 6663 Data size: 2229 Basic stats: 
COMPLETE Column stats: NONE
+  Group By Operator
+aggregations: count()
+mode: hash
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: bigint)
+Execution mode: vectorized
+Local Work:
+  Map Reduce Local Work
+Reducer 2 
+Execution mode: vectorized
+Reduce Operator Tree:
+  Group By Operator
+aggregations: count(VALUE._col0)
+mode: mergepartial
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
+
 PREHOOK: query: select count(*) from (select s.*, st.*
 from sorted_mod_4 s
 left outer join small_table st
@@ -100,21 +184,111 @@ POSTHOOK: Input: default@small_table
 POSTHOOK: Input: default@sorted_mod_4
  A masked pattern was here 
 6876
-PREHOOK: query: explain

[51/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization 
(Matt McCline, reviewed by Gopal Vijayaraghavan)"""

This reverts commit 16d28b343b76c998b8fdbd8a91bae07ac82357de.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ad6ce078
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ad6ce078
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ad6ce078

Branch: refs/heads/master
Commit: ad6ce0781a4e68fad2960c1053c325753a9504db
Parents: 36e810f
Author: Matt McCline 
Authored: Mon Oct 17 13:31:42 2016 -0700
Committer: Matt McCline 
Committed: Mon Oct 17 13:31:42 2016 -0700

--
 .../org/apache/hive/common/util/DateUtils.java  |20 -
 .../ColumnArithmeticColumn.txt  | 7 +-
 .../ColumnArithmeticColumnDecimal.txt   | 5 -
 .../ColumnArithmeticColumnWithConvert.txt   |   173 +
 .../ColumnArithmeticScalar.txt  | 5 -
 .../ColumnArithmeticScalarDecimal.txt   | 5 -
 .../ColumnArithmeticScalarWithConvert.txt   |   150 +
 .../ExpressionTemplates/ColumnCompareColumn.txt | 5 -
 .../ExpressionTemplates/ColumnCompareScalar.txt | 5 -
 .../ExpressionTemplates/ColumnDivideColumn.txt  | 5 -
 .../ColumnDivideColumnDecimal.txt   | 5 -
 .../ExpressionTemplates/ColumnDivideScalar.txt  | 5 -
 .../ColumnDivideScalarDecimal.txt   | 5 -
 .../ExpressionTemplates/ColumnUnaryFunc.txt | 5 -
 .../ExpressionTemplates/ColumnUnaryMinus.txt| 5 -
 ...eColumnArithmeticIntervalYearMonthColumn.txt | 5 -
 ...eColumnArithmeticIntervalYearMonthScalar.txt | 5 -
 .../DateColumnArithmeticTimestampColumn.txt | 5 -
 .../DateColumnArithmeticTimestampScalar.txt | 5 -
 ...eScalarArithmeticIntervalYearMonthColumn.txt | 5 -
 .../DateScalarArithmeticTimestampColumn.txt | 5 -
 .../DecimalColumnUnaryFunc.txt  | 5 -
 .../ExpressionTemplates/FilterColumnBetween.txt | 7 +-
 .../FilterColumnCompareColumn.txt   | 9 +-
 .../FilterColumnCompareScalar.txt   | 9 +-
 .../FilterDecimalColumnBetween.txt  | 5 -
 .../FilterDecimalColumnCompareDecimalColumn.txt | 5 -
 .../FilterDecimalColumnCompareDecimalScalar.txt | 5 -
 .../FilterDecimalScalarCompareDecimalColumn.txt | 5 -
 ...erLongDoubleColumnCompareTimestampColumn.txt | 5 -
 ...erLongDoubleScalarCompareTimestampColumn.txt | 5 -
 .../FilterScalarCompareColumn.txt   | 9 +-
 .../FilterStringColumnBetween.txt   | 9 +-
 ...tringGroupColumnCompareStringGroupColumn.txt | 5 -
 ...gGroupColumnCompareStringGroupScalarBase.txt | 7 -
 ...gGroupScalarCompareStringGroupColumnBase.txt | 8 -
 .../FilterTimestampColumnBetween.txt| 5 -
 ...erTimestampColumnCompareLongDoubleColumn.txt | 5 -
 ...erTimestampColumnCompareLongDoubleScalar.txt | 5 -
 ...terTimestampColumnCompareTimestampColumn.txt | 5 -
 ...terTimestampColumnCompareTimestampScalar.txt | 5 -
 ...erTimestampScalarCompareLongDoubleColumn.txt | 5 -
 ...terTimestampScalarCompareTimestampColumn.txt | 5 -
 .../FilterTruncStringColumnBetween.txt  |10 +-
 .../ExpressionTemplates/IfExprColumnScalar.txt  | 5 -
 .../ExpressionTemplates/IfExprScalarColumn.txt  | 5 -
 .../ExpressionTemplates/IfExprScalarScalar.txt  | 5 -
 ...ervalYearMonthColumnArithmeticDateColumn.txt | 5 -
 ...ervalYearMonthColumnArithmeticDateScalar.txt | 5 -
 ...YearMonthColumnArithmeticTimestampColumn.txt | 5 -
 ...YearMonthColumnArithmeticTimestampScalar.txt | 5 -
 ...ervalYearMonthScalarArithmeticDateColumn.txt | 5 -
 ...YearMonthScalarArithmeticTimestampColumn.txt | 5 -
 .../LongDoubleColumnCompareTimestampColumn.txt  | 5 -
 .../LongDoubleColumnCompareTimestampScalar.txt  | 4 -
 .../LongDoubleScalarCompareTimestampColumn.txt  | 5 -
 .../ScalarArithmeticColumn.txt  | 5 -
 .../ScalarArithmeticColumnDecimal.txt   | 5 -
 .../ScalarArithmeticColumnWithConvert.txt   |   163 +
 .../ExpressionTemplates/ScalarCompareColumn.txt | 5 -
 .../ExpressionTemplates/ScalarDivideColumn.txt  | 5 -
 .../ScalarDivideColumnDecimal.txt   | 5 -
 ...tringGroupColumnCompareStringGroupColumn.txt | 5 -
 ...gGroupColumnCompareStringGroupScalarBase.txt | 6 -
 ...tringGroupColumnCompareTruncStringScalar.txt | 7 -
 ...gGroupScalarCompareStringGroupColumnBase.txt | 7 -
 .../TimestampColumnArithmeticDateColumn.txt | 5 -
 .../TimestampColumnArithmeticDateScalar.txt | 5 -
 ...pColumnArithmeticIntervalYearMonthColumn.txt | 5 -
 ...pColumnArithmeticIntervalYearMonthScalar.txt | 5 -
 ...TimestampColumnArithmeticTimestampColumn.txt | 5 -

[17/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
index 9a95606..9369661 100644
--- a/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_outer_join6.q.out
@@ -126,15 +126,113 @@ POSTHOOK: Output: default@TJOIN4
 POSTHOOK: Lineage: tjoin4.c1 SIMPLE 
[(tjoin4_txt)tjoin4_txt.FieldSchema(name:c1, type:int, comment:null), ]
 POSTHOOK: Lineage: tjoin4.c2 SIMPLE 
[(tjoin4_txt)tjoin4_txt.FieldSchema(name:c2, type:char(2), comment:null), ]
 POSTHOOK: Lineage: tjoin4.rnum SIMPLE 
[(tjoin4_txt)tjoin4_txt.FieldSchema(name:rnum, type:int, comment:null), ]
-PREHOOK: query: explain vectorization detail formatted
+PREHOOK: query: explain
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail formatted
+POSTHOOK: query: explain
 select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from
(select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from 
tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join 
tjoin3 on tj2c1 = tjoin3.c1
 POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE)
  A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: tjoin1
+  Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: rnum (type: int), c1 (type: int)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 3 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
+Map Join Operator
+  condition map:
+   Left Outer Join0 to 1
+  keys:
+0 _col1 (type: int)
+1 _col1 (type: int)
+  outputColumnNames: _col0, _col2, _col3
+  input vertices:
+1 Map 2
+  Statistics: Num rows: 4 Data size: 409 Basic stats: 
COMPLETE Column stats: NONE
+  Select Operator
+expressions: _col0 (type: int), _col2 (type: int), 
_col3 (type: int)
+outputColumnNames: _col0, _col1, _col2
+Statistics: Num rows: 4 Data size: 409 Basic stats: 
COMPLETE Column stats: NONE
+Map Join Operator
+  condition map:
+   Left Outer Join0 to 1
+  keys:
+0 _col2 (type: int)
+1 _col1 (type: int)
+  outputColumnNames: _col0, _col1, _col3
+  input vertices:
+1 Map 3
+  Statistics: Num rows: 4 Data size: 449 Basic stats: 
COMPLETE Column stats: NONE
+  Select Operator
+expressions: _col0 (type: int), _col1 (type: int), 
_col3 (type: int)
+outputColumnNames: _col0, _col1, _col2
+Statistics: Num rows: 4 Data size: 449 Basic 
stats: COMPLETE Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 4 Data size: 449 Basic 
stats: COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+Execution mode: vectorized, llap
+LLAP IO: all inputs
+Map 2 
+Map Operator Tree:
+TableScan
+  alias: tjoin2
+  Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: rnum (type: int), c1 (type: int)
+outputColumnNames: _col0, _col1
+

[34/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out 
b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
index 739d0e1..6b59497 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_columns.q.out
@@ -69,17 +69,13 @@ POSTHOOK: Lineage: tint.cint SIMPLE 
[(tint_txt)tint_txt.FieldSchema(name:cint, t
 POSTHOOK: Lineage: tint.rnum SIMPLE [(tint_txt)tint_txt.FieldSchema(name:rnum, 
type:int, comment:null), ]
 tint_txt.rnum  tint_txt.cint
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint 
between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col 
from tint , tsint
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select tint.rnum, tsint.rnum, tint.cint, tsint.csint, (case when (tint.cint 
between tsint.csint and tsint.csint) then "Ok" else "NoOk" end) as between_col 
from tint , tsint
 POSTHOOK: type: QUERY
 Explain
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -97,16 +93,9 @@ STAGE PLANS:
 TableScan
   alias: tint
   Statistics: Num rows: 5 Data size: 36 Basic stats: COMPLETE 
Column stats: NONE
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1]
   Select Operator
 expressions: rnum (type: int), cint (type: int)
 outputColumnNames: _col0, _col1
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [0, 1]
 Statistics: Num rows: 5 Data size: 36 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
@@ -114,11 +103,6 @@ STAGE PLANS:
   keys:
 0 
 1 
-  Map Join Vectorization:
-  className: VectorMapJoinOperator
-  native: false
-  nativeConditionsMet: 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Supports Key Types IS true, When Fast Hash Table, then requires no Hybrid 
Hash Join IS true, Small table vectorizes IS true
-  nativeConditionsNotMet: Not empty key IS false
   outputColumnNames: _col0, _col1, _col2, _col3
   input vertices:
 1 Map 2
@@ -126,17 +110,9 @@ STAGE PLANS:
   Select Operator
 expressions: _col0 (type: int), _col2 (type: int), 
_col1 (type: int), _col3 (type: smallint), CASE WHEN (_col1 BETWEEN _col3 AND 
_col3) THEN ('Ok') ELSE ('NoOk') END (type: string)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [0, 2, 1, 3, 5]
-selectExpressions: VectorUDFAdaptor(CASE WHEN 
(_col1 BETWEEN _col3 AND _col3) THEN ('Ok') ELSE ('NoOk') END)(children: 
VectorUDFAdaptor(_col1 BETWEEN _col3 AND _col3) -> 4:Long) -> 5:String
 Statistics: Num rows: 25 Data size: 385 Basic stats: 
COMPLETE Column stats: NONE
 File Output Operator
   compressed: false
-  File Sink Vectorization:
-  className: VectorFileSinkOperator
-  native: false
   Statistics: Num rows: 25 Data size: 385 Basic stats: 
COMPLETE Column stats: NONE
   table:
   input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -144,49 +120,21 @@ STAGE PLANS:
   serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: all inputs
-Map Vectorization:
-enabled: true
-enabledConditionsMet:

[38/51] [partial] hive git commit: Revert "Revert "Revert "HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)"""

2016-10-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out 
b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
index 1d4163c..437770d 100644
--- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
+++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part.q.out
@@ -87,73 +87,25 @@ POSTHOOK: Lineage: part_add_int_permute_select 
PARTITION(part=1).b SIMPLE [(valu
 POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).c EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, 
type:string, comment:), ]
 POSTHOOK: Lineage: part_add_int_permute_select PARTITION(part=1).insert_num 
EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 _col0  _col1   _col2   _col3
-PREHOOK: query: explain vectorization detail
+PREHOOK: query: explain
 select insert_num,part,a,b from part_add_int_permute_select
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
+POSTHOOK: query: explain
 select insert_num,part,a,b from part_add_int_permute_select
 POSTHOOK: type: QUERY
 Explain
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+Plan optimized by CBO.
 
-STAGE DEPENDENCIES:
-  Stage-1 is a root stage
-  Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
-  Stage: Stage-1
-Tez
- A masked pattern was here 
-  Vertices:
-Map 1 
-Map Operator Tree:
-TableScan
-  alias: part_add_int_permute_select
-  Statistics: Num rows: 2 Data size: 33 Basic stats: COMPLETE 
Column stats: PARTIAL
-  TableScan Vectorization:
-  native: true
-  projectedOutputColumns: [0, 1, 2, 3, 4]
-  Select Operator
-expressions: insert_num (type: int), part (type: int), a 
(type: int), b (type: string)
-outputColumnNames: _col0, _col1, _col2, _col3
-Select Vectorization:
-className: VectorSelectOperator
-native: true
-projectedOutputColumns: [0, 4, 1, 2]
-Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE 
Column stats: PARTIAL
-File Output Operator
-  compressed: false
-  File Sink Vectorization:
-  className: VectorFileSinkOperator
-  native: false
-  Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: PARTIAL
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-Execution mode: vectorized, llap
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
-groupByVectorOutput: true
-inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
-rowBatchContext:
-dataColumnCount: 4
-includeColumns: [0, 1, 2]
-dataColumns: insert_num:int, a:int, b:string, c:int
-partitionColumnCount: 1
-partitionColumns: part:int
-
-  Stage: Stage-0
-Fetch Operator
-  limit: -1
-  Processor Tree:
-ListSink
+Stage-0
+  Fetch Operator
+limit:-1
+Stage-1
+  Map 1 vectorized, llap
+  File Output Operator [FS_4]
+Select Operator [SEL_3] (rows=2 width=4)
+  Output:["_col0","_col1","_col2","_col3"]
+  TableScan [TS_0] (rows=2 width=16)
+
default@part_add_int_permute_select,part_add_int_permute_select,Tbl:COMPLETE,Col:PARTIAL,Output:["insert_num","a","b"]
 
 PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting 
works right
 select insert_num,part,a,b from part_add_int_permute_select
@@ -254,73 +206,25 @@ POSTHOOK: Lineage: part_add_int_string_permute_select 
PARTITION(part=1).c EXPRES
 POSTHOOK: Lineage: part_add_int_string_permute_select PARTITION(part=1).d 
SIMPLE 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, 
type:string, comment:), ]
 POSTHOOK: Lineage: part_add_int_string_permute_select 
PARTITION(part=1).insert_num EXPRESSION

1 2 >

1 - 100 of 130 matches

Mail list logo