[1/2] hive git commit: HIVE-17006 : LLAP: Parquet caching v1 (Sergey Shelukhin, reviewed by Gunther Hagleitner)

sershe Thu, 31 Aug 2017 14:27:08 -0700

Repository: hive
Updated Branches:
  refs/heads/master 642acdf76 -> 9e673a73d



http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/queries/clientpositive/parquet_types.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_types.q 
b/ql/src/test/queries/clientpositive/parquet_types.q
index db37d2e..1a8844b 100644
--- a/ql/src/test/queries/clientpositive/parquet_types.q
+++ b/ql/src/test/queries/clientpositive/parquet_types.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.llap.cache.allow.synthetic.fileid=true;
 DROP TABLE parquet_types_staging;
 DROP TABLE parquet_types;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/queries/clientpositive/parquet_types_vectorization.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/parquet_types_vectorization.q 
b/ql/src/test/queries/clientpositive/parquet_types_vectorization.q
index bb0e5b2..1f353ae 100644
--- a/ql/src/test/queries/clientpositive/parquet_types_vectorization.q
+++ b/ql/src/test/queries/clientpositive/parquet_types_vectorization.q
@@ -7,6 +7,7 @@ set hive.vectorized.execution.reduce.enabled=true;
 set hive.vectorized.use.row.serde.deserialize=true;
 set hive.vectorized.use.vector.serde.deserialize=true;
 set hive.vectorized.execution.reduce.groupby.enabled = true;
+set hive.llap.cache.allow.synthetic.fileid=true;
 
 CREATE TABLE parquet_types_staging (
   cint int,

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/queries/clientpositive/vectorized_parquet.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_parquet.q 
b/ql/src/test/queries/clientpositive/vectorized_parquet.q
index db02ec0..1efc60e 100644
--- a/ql/src/test/queries/clientpositive/vectorized_parquet.q
+++ b/ql/src/test/queries/clientpositive/vectorized_parquet.q
@@ -1,6 +1,7 @@
 set hive.explain.user=false;
 set hive.exec.submitviachild=false;
 set hive.exec.submit.local.task.via.child=false;
+set hive.llap.cache.allow.synthetic.fileid=true;
 
 create table if not exists alltypes_parquet (
   cint int, 

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/queries/clientpositive/vectorized_parquet_types.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_parquet_types.q 
b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q
index 7467cb3..63f811b 100644
--- a/ql/src/test/queries/clientpositive/vectorized_parquet_types.q
+++ b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q
@@ -1,5 +1,6 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
+set hive.llap.cache.allow.synthetic.fileid=true;
 
 DROP TABLE parquet_types_staging;
 DROP TABLE parquet_types;

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out 
b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
index aecbcfd..01458fe 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out
@@ -140,7 +140,7 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: bigint)
             Execution mode: llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -198,7 +198,7 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col0 (type: bigint)
             Execution mode: llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -540,7 +540,7 @@ STAGE PLANS:
                         sort order: ++
                         Statistics: Num rows: 29 Data size: 319 Basic stats: 
COMPLETE Column stats: NONE
             Execution mode: llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -606,7 +606,7 @@ STAGE PLANS:
                         sort order: ++
                         Statistics: Num rows: 29 Data size: 319 Basic stats: 
COMPLETE Column stats: NONE
             Execution mode: llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -739,7 +739,7 @@ STAGE PLANS:
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: double)
             Execution mode: llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -818,7 +818,7 @@ STAGE PLANS:
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: double)
             Execution mode: llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1008,7 +1008,7 @@ STAGE PLANS:
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: double)
             Execution mode: llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1108,7 +1108,7 @@ STAGE PLANS:
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: double)
             Execution mode: llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -1220,7 +1220,7 @@ STAGE PLANS:
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col1 (type: int), _col2 (type: 
bigint)
             Execution mode: llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out 
b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
new file mode 100644
index 0000000..66bfb81
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
@@ -0,0 +1,849 @@
+PREHOOK: query: DROP TABLE parquet_types_staging
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types_staging
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: DROP TABLE parquet_types
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE parquet_types
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string,
+  t timestamp,
+  cchar char(5),
+  cvarchar varchar(10),
+  cbinary string,
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>,
+  d date
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet_types_staging
+POSTHOOK: query: CREATE TABLE parquet_types_staging (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string,
+  t timestamp,
+  cchar char(5),
+  cvarchar varchar(10),
+  cbinary string,
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>,
+  d date
+) ROW FORMAT DELIMITED
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string,
+  t timestamp,
+  cchar char(5),
+  cvarchar varchar(10),
+  cbinary binary,
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>,
+  d date
+) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@parquet_types
+POSTHOOK: query: CREATE TABLE parquet_types (
+  cint int,
+  ctinyint tinyint,
+  csmallint smallint,
+  cfloat float,
+  cdouble double,
+  cstring1 string,
+  t timestamp,
+  cchar char(5),
+  cvarchar varchar(10),
+  cbinary binary,
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>,
+  d date
+) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@parquet_types
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' 
OVERWRITE INTO TABLE parquet_types_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@parquet_types_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' 
OVERWRITE INTO TABLE parquet_types_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@parquet_types_staging
+PREHOOK: query: SELECT * FROM parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+#### A masked pattern was here ####
+100    1       1       1.0     0.0     abc     2011-01-01 01:01:01.111111111   
a       a       B4F3CAFDBEDD    {"k1":"v1"}     [101,200]       
{"c1":10,"c2":"a"}      2011-01-01
+101    2       2       1.1     0.3     def     2012-02-02 02:02:02.222222222   
ab      ab      68692CCAC0BDE7  {"k2":"v2"}     [102,200]       
{"c1":10,"c2":"d"}      2012-02-02
+102    3       3       1.2     0.6     ghi     2013-03-03 03:03:03.333333333   
abc     abc     B4F3CAFDBEDD    {"k3":"v3"}     [103,200]       
{"c1":10,"c2":"g"}      2013-03-03
+103    1       4       1.3     0.9     jkl     2014-04-04 04:04:04.444444444   
abcd    abcd    68692CCAC0BDE7  {"k4":"v4"}     [104,200]       
{"c1":10,"c2":"j"}      2014-04-04
+104    2       5       1.4     1.2     mno     2015-05-05 05:05:05.555555555   
abcde   abcde   B4F3CAFDBEDD    {"k5":"v5"}     [105,200]       
{"c1":10,"c2":"m"}      2015-05-05
+105    3       1       1.0     1.5     pqr     2016-06-06 06:06:06.666666666   
abcde   abcdef  68692CCAC0BDE7  {"k6":"v6"}     [106,200]       
{"c1":10,"c2":"p"}      2016-06-06
+106    1       2       1.1     1.8     stu     2017-07-07 07:07:07.777777777   
abcde   abcdefg B4F3CAFDBEDD    {"k7":"v7"}     [107,200]       
{"c1":10,"c2":"s"}      2017-07-07
+107    2       3       1.2     2.1     vwx     2018-08-08 08:08:08.888888888   
bcdef   abcdefgh        68692CCAC0BDE7  {"k8":"v8"}     [108,200]       
{"c1":10,"c2":"v"}      2018-08-08
+108    3       4       1.3     2.4     yza     2019-09-09 09:09:09.999999999   
cdefg   B4F3CAFDBE      68656C6C6F      {"k9":"v9"}     [109,200]       
{"c1":10,"c2":"y"}      2019-09-09
+109    1       5       1.4     2.7     bcd     2020-10-10 10:10:10.101010101   
klmno   abcdedef        68692CCAC0BDE7  {"k10":"v10"}   [110,200]       
{"c1":10,"c2":"b"}      2020-10-10
+110    2       1       1.0     3.0     efg     2021-11-11 11:11:11.111111111   
pqrst   abcdede B4F3CAFDBEDD    {"k11":"v11"}   [111,200]       
{"c1":10,"c2":"e"}      2021-11-11
+111    3       2       1.1     3.3     hij     2022-12-12 12:12:12.121212121   
nopqr   abcded  68692CCAC0BDE7  {"k12":"v12"}   [112,200]       
{"c1":10,"c2":"h"}      2022-12-12
+112    1       3       1.2     3.6     klm     2023-01-02 13:13:13.131313131   
opqrs   abcdd   B4F3CAFDBEDD    {"k13":"v13"}   [113,200]       
{"c1":10,"c2":"k"}      2023-01-02
+113    2       4       1.3     3.9     nop     2024-02-02 14:14:14.141414141   
pqrst   abc     68692CCAC0BDE7  {"k14":"v14"}   [114,200]       
{"c1":10,"c2":"n"}      2024-02-02
+114    3       5       1.4     4.2     qrs     2025-03-03 15:15:15.151515151   
qrstu   b       B4F3CAFDBEDD    {"k15":"v15"}   [115,200]       
{"c1":10,"c2":"q"}      2025-03-03
+115    1       1       1.0     4.5     qrs     2026-04-04 16:16:16.161616161   
rstuv   abcded  68692CCAC0BDE7  {"k16":"v16"}   [116,200]       
{"c1":10,"c2":"q"}      2026-04-04
+116    2       2       1.1     4.8     wxy     2027-05-05 17:17:17.171717171   
stuvw   abcded  B4F3CAFDBEDD    {"k17":"v17"}   [117,200]       
{"c1":10,"c2":"w"}      2027-05-05
+117    3       3       1.2     5.1     zab     2028-06-06 18:18:18.181818181   
tuvwx   abcded  68692CCAC0BDE7  {"k18":"v18"}   [118,200]       
{"c1":10,"c2":"z"}      2028-06-06
+118    1       4       1.3     5.4     cde     2029-07-07 19:19:19.191919191   
uvwzy   abcdede B4F3CAFDBEDD    {"k19":"v19"}   [119,200]       
{"c1":10,"c2":"c"}      2029-07-07
+119    2       5       1.4     5.7     fgh     2030-08-08 20:20:20.202020202   
vwxyz   abcdede 68692CCAC0BDE7  {"k20":"v20"}   [120,200]       
{"c1":10,"c2":"f"}      2030-08-08
+120    3       1       1.0     6.0     ijk     2031-09-09 21:21:21.212121212   
wxyza   abcde   B4F3CAFDBEDD    {"k21":"v21"}   [121,200]       
{"c1":10,"c2":"i"}      2031-09-09
+121    1       2       1.1     6.3     lmn     2032-10-10 22:22:22.222222222   
bcdef   abcde           {"k22":"v22"}   [122,200]       {"c1":10,"c2":"l"}      
2032-10-10
+PREHOOK: query: INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, 
cvarchar,
+unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+PREHOOK: Output: default@parquet_types
+POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, 
cvarchar,
+unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+POSTHOOK: Output: default@parquet_types
+POSTHOOK: Lineage: parquet_types.cbinary EXPRESSION 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cbinary, 
type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cchar SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cchar, 
type:char(5), comment:null), ]
+POSTHOOK: Lineage: parquet_types.cdouble SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, 
type:double, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cfloat SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, 
type:float, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, 
comment:null), ]
+POSTHOOK: Lineage: parquet_types.csmallint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, 
type:smallint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, 
type:string, comment:null), ]
+POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, 
type:tinyint, comment:null), ]
+POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, 
type:varchar(10), comment:null), ]
+POSTHOOK: Lineage: parquet_types.d SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:d, type:date, 
comment:null), ]
+POSTHOOK: Lineage: parquet_types.l1 SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:l1, 
type:array<int>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.m1 SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, 
type:map<string,varchar(3)>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.st1 SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, 
type:struct<c1:int,c2:char(1)>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.t SIMPLE 
[(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, 
type:timestamp, comment:null), ]
+PREHOOK: query: EXPLAIN SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  ROUND(AVG(cfloat), 5),
+  ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  ROUND(AVG(cfloat), 5),
+  ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: parquet_types
+                  Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cint (type: int), ctinyint (type: tinyint), 
csmallint (type: smallint), cfloat (type: float), cdouble (type: double), 
cstring1 (type: string)
+                    outputColumnNames: cint, ctinyint, csmallint, cfloat, 
cdouble, cstring1
+                    Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: max(cint), min(csmallint), 
count(cstring1), avg(cfloat), stddev_pop(cdouble)
+                      keys: ctinyint (type: tinyint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+                      Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: tinyint)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: tinyint)
+                        Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int), _col2 (type: 
smallint), _col3 (type: bigint), _col4 (type: 
struct<count:bigint,sum:double,input:float>), _col5 (type: 
struct<count:bigint,sum:double,variance:double>)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0), min(VALUE._col1), 
count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
+                keys: KEY._col0 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: tinyint), _col1 (type: int), _col2 
(type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), 
round(_col5, 5) (type: double)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: tinyint)
+                    sort order: +
+                    Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                    value expressions: _col1 (type: int), _col2 (type: 
smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 
(type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), 
VALUE._col3 (type: double), VALUE._col4 (type: double)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  ROUND(AVG(cfloat), 5),
+  ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT ctinyint,
+  MAX(cint),
+  MIN(csmallint),
+  COUNT(cstring1),
+  ROUND(AVG(cfloat), 5),
+  ROUND(STDDEV_POP(cdouble),5)
+FROM parquet_types
+GROUP BY ctinyint
+ORDER BY ctinyint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+1      121     1       8       1.175   2.06216
+2      119     1       7       1.21429 1.8
+3      120     1       7       1.17143 1.8
+PREHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY 
cfloat ORDER BY cfloat
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY 
cfloat ORDER BY cfloat
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: parquet_types
+                  Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cfloat (type: float)
+                    outputColumnNames: cfloat
+                    Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: cfloat (type: float)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: float)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: float)
+                        Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: float)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: float)
+                  sort order: +
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat 
ORDER BY cfloat
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat 
ORDER BY cfloat
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+1.0    5
+1.1    5
+1.2    4
+1.3    4
+1.4    4
+PREHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY 
cchar ORDER BY cchar
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY 
cchar ORDER BY cchar
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: parquet_types
+                  Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cchar (type: char(5))
+                    outputColumnNames: cchar
+                    Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: cchar (type: char(5))
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: char(5))
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: char(5))
+                        Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: char(5))
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: char(5))
+                  sort order: +
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: char(5)), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER 
BY cchar
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar 
ORDER BY cchar
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+a      1
+ab     1
+abc    1
+abcd   1
+abcde  3
+bcdef  2
+cdefg  1
+klmno  1
+nopqr  1
+opqrs  1
+pqrst  2
+qrstu  1
+rstuv  1
+stuvw  1
+tuvwx  1
+uvwzy  1
+vwxyz  1
+wxyza  1
+PREHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY 
cvarchar ORDER BY cvarchar
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY 
cvarchar ORDER BY cvarchar
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: parquet_types
+                  Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cvarchar (type: varchar(10))
+                    outputColumnNames: cvarchar
+                    Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: cvarchar (type: varchar(10))
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: varchar(10))
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: varchar(10))
+                        Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: varchar(10))
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: varchar(10))
+                  sort order: +
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: varchar(10)), 
VALUE._col0 (type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar 
ORDER BY cvarchar
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY 
cvarchar ORDER BY cvarchar
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+B4F3CAFDBE     1
+a      1
+ab     1
+abc    2
+abcd   1
+abcdd  1
+abcde  3
+abcded 4
+abcdede        3
+abcdedef       1
+abcdef 1
+abcdefg        1
+abcdefgh       1
+b      1
+PREHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY 
cstring1 ORDER BY cstring1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY 
cstring1 ORDER BY cstring1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: parquet_types
+                  Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cstring1 (type: string)
+                    outputColumnNames: cstring1
+                    Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: cstring1 (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 
ORDER BY cstring1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY 
cstring1 ORDER BY cstring1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+abc    1
+bcd    1
+cde    1
+def    1
+efg    1
+fgh    1
+ghi    1
+hij    1
+ijk    1
+jkl    1
+klm    1
+lmn    1
+mno    1
+nop    1
+pqr    1
+qrs    2
+stu    1
+vwx    1
+wxy    1
+yza    1
+zab    1
+PREHOOK: query: EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t ORDER 
BY t
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t 
ORDER BY t
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: parquet_types
+                  Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: t (type: timestamp)
+                    outputColumnNames: t
+                    Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: t (type: timestamp)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: timestamp)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: timestamp)
+                        Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: timestamp)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: timestamp)
+                  sort order: +
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 
(type: bigint)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+2011-01-01 01:01:01.111111111  1
+2012-02-02 02:02:02.222222222  1
+2013-03-03 03:03:03.333333333  1
+2014-04-04 04:04:04.444444444  1
+2015-05-05 05:05:05.555555555  1
+2016-06-06 06:06:06.666666666  1
+2017-07-07 07:07:07.777777777  1
+2018-08-08 08:08:08.888888888  1
+2019-09-09 09:09:09.999999999  1
+2020-10-10 10:10:10.101010101  1
+2021-11-11 11:11:11.111111111  1
+2022-12-12 12:12:12.121212121  1
+2023-01-02 13:13:13.131313131  1
+2024-02-02 14:14:14.141414141  1
+2025-03-03 15:15:15.151515151  1
+2026-04-04 16:16:16.161616161  1
+2027-05-05 17:17:17.171717171  1
+2028-06-06 18:18:18.181818181  1
+2029-07-07 19:19:19.191919191  1
+2030-08-08 20:20:20.202020202  1
+2031-09-09 21:21:21.212121212  1
+2032-10-10 22:22:22.222222222  1
+PREHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP 
BY cbinary
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types 
GROUP BY cbinary
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: parquet_types
+                  Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: cbinary (type: binary)
+                    outputColumnNames: cbinary
+                    Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: cbinary (type: binary)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: binary)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: binary)
+                        Statistics: Num rows: 22 Data size: 308 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: binary)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: hex(_col0) (type: string), _col1 (type: bigint)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 11 Data size: 154 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY 
cbinary
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY 
cbinary
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+68692CCAC0BDE7 10
+       1
+68656C6C6F     1
+B4F3CAFDBEDD   10

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out 
b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
index c5f7128..dd62df6 100644
--- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
@@ -2734,7 +2734,7 @@ STAGE PLANS:
                       TopN Hash Memory Usage: 0.1
                       value expressions: _col0 (type: string), _col1 (type: 
string), _col3 (type: timestamp), _col4 (type: float)
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
@@ -2915,7 +2915,7 @@ STAGE PLANS:
                         Statistics: Num rows: 137 Data size: 822 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
@@ -3411,7 +3411,7 @@ STAGE PLANS:
                           output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                           serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
@@ -3646,7 +3646,7 @@ STAGE PLANS:
                       TopN Hash Memory Usage: 0.1
                       value expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: timestamp), _col3 (type: float)
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
@@ -3851,7 +3851,7 @@ STAGE PLANS:
                         Statistics: Num rows: 12 Data size: 768 Basic stats: 
COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
@@ -4371,7 +4371,7 @@ STAGE PLANS:
                           output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                           serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
@@ -4606,7 +4606,7 @@ STAGE PLANS:
                       TopN Hash Memory Usage: 0.1
                       value expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: date), _col3 (type: float)
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
@@ -4811,7 +4811,7 @@ STAGE PLANS:
                         Statistics: Num rows: 12 Data size: 576 Basic stats: 
COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
index 8a84d3d..781c8a1 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
@@ -159,7 +159,7 @@ STAGE PLANS:
                         Statistics: Num rows: 12288 Data size: 73728 Basic 
stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: 
smallint), _col3 (type: bigint), _col4 (type: 
struct<count:bigint,sum:double,input:float>), _col5 (type: 
struct<count:bigint,sum:double,variance:double>)
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
index cdf6b3d..0303e0e 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
@@ -312,7 +312,7 @@ STAGE PLANS:
                         Statistics: Num rows: 22 Data size: 242 Basic stats: 
COMPLETE Column stats: NONE
                         value expressions: _col1 (type: int), _col2 (type: 
smallint), _col3 (type: bigint), _col4 (type: 
struct<count:bigint,sum:double,input:float>), _col5 (type: 
struct<count:bigint,sum:double,variance:double>), _col6 (type: decimal(4,2))
             Execution mode: vectorized, llap
-            LLAP IO: no inputs
+            LLAP IO: all inputs (cache only)
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/storage-api/src/java/org/apache/hadoop/hive/common/io/FileMetadataCache.java
----------------------------------------------------------------------
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/common/io/FileMetadataCache.java 
b/storage-api/src/java/org/apache/hadoop/hive/common/io/FileMetadataCache.java
new file mode 100644
index 0000000..403c3ad
--- /dev/null
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/common/io/FileMetadataCache.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers;
+
+public interface FileMetadataCache {
+  /**
+   * @return Metadata for a given file (ORC or Parquet footer).
+   *         The caller must decref this buffer when done.
+   */
+  MemoryBufferOrBuffers getFileMetadata(Object fileKey);
+
+  // TODO: add BB put method(s) when merging with ORC off-heap metadata cache
+
+  /**
+   * Puts the metadata for a given file (e.g. a footer buffer into cache).
+   * @param fileKey The file key.
+   * @param length The footer length.
+   * @param is The stream to read the footer from.
+   * @return The buffer or buffers representing the cached footer.
+   *         The caller must decref this buffer when done.
+   */
+  MemoryBufferOrBuffers putFileMetadata(
+      Object fileKey, int length, InputStream is) throws IOException;
+
+  /**
+   * Releases the buffer returned from getFileMetadata or putFileMetadata 
method.
+   * @param buffer The buffer to release.
+   */
+  void decRefBuffer(MemoryBufferOrBuffers buffer);
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/MemoryBufferOrBuffers.java
----------------------------------------------------------------------
diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/MemoryBufferOrBuffers.java
 
b/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/MemoryBufferOrBuffers.java
new file mode 100644
index 0000000..c04310a
--- /dev/null
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/MemoryBufferOrBuffers.java
@@ -0,0 +1,24 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.io.encoded;
+
+public interface MemoryBufferOrBuffers {
+  MemoryBuffer getSingleBuffer();
+  MemoryBuffer[] getMultipleBuffers();
+}
\ No newline at end of file

[1/2] hive git commit: HIVE-17006 : LLAP: Parquet caching v1 (Sergey Shelukhin, reviewed by Gunther Hagleitner)

Reply via email to