Repository: hive Updated Branches: refs/heads/master 642acdf76 -> 9e673a73d
http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/queries/clientpositive/parquet_types.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_types.q b/ql/src/test/queries/clientpositive/parquet_types.q index db37d2e..1a8844b 100644 --- a/ql/src/test/queries/clientpositive/parquet_types.q +++ b/ql/src/test/queries/clientpositive/parquet_types.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.llap.cache.allow.synthetic.fileid=true; DROP TABLE parquet_types_staging; DROP TABLE parquet_types; http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/queries/clientpositive/parquet_types_vectorization.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_types_vectorization.q b/ql/src/test/queries/clientpositive/parquet_types_vectorization.q index bb0e5b2..1f353ae 100644 --- a/ql/src/test/queries/clientpositive/parquet_types_vectorization.q +++ b/ql/src/test/queries/clientpositive/parquet_types_vectorization.q @@ -7,6 +7,7 @@ set hive.vectorized.execution.reduce.enabled=true; set hive.vectorized.use.row.serde.deserialize=true; set hive.vectorized.use.vector.serde.deserialize=true; set hive.vectorized.execution.reduce.groupby.enabled = true; +set hive.llap.cache.allow.synthetic.fileid=true; CREATE TABLE parquet_types_staging ( cint int, http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/queries/clientpositive/vectorized_parquet.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_parquet.q b/ql/src/test/queries/clientpositive/vectorized_parquet.q index db02ec0..1efc60e 100644 --- a/ql/src/test/queries/clientpositive/vectorized_parquet.q +++ b/ql/src/test/queries/clientpositive/vectorized_parquet.q @@ -1,6 +1,7 @@ set hive.explain.user=false; set hive.exec.submitviachild=false; set hive.exec.submit.local.task.via.child=false; +set hive.llap.cache.allow.synthetic.fileid=true; create table if not exists alltypes_parquet ( cint int, http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/queries/clientpositive/vectorized_parquet_types.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_parquet_types.q b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q index 7467cb3..63f811b 100644 --- a/ql/src/test/queries/clientpositive/vectorized_parquet_types.q +++ b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q @@ -1,5 +1,6 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; +set hive.llap.cache.allow.synthetic.fileid=true; DROP TABLE parquet_types_staging; DROP TABLE parquet_types; http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index aecbcfd..01458fe 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -140,7 +140,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -198,7 +198,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -540,7 +540,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -606,7 +606,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -739,7 +739,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -818,7 +818,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1008,7 +1008,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1108,7 +1108,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1220,7 +1220,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: bigint) Execution mode: llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Reducer 2 Execution mode: llap Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out new file mode 100644 index 0000000..66bfb81 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out @@ -0,0 +1,849 @@ +PREHOOK: query: DROP TABLE parquet_types_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE parquet_types +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map<string, varchar(3)>, + l1 array<int>, + st1 struct<c1:int, c2:char(1)>, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map<string, varchar(3)>, + l1 array<int>, + st1 struct<c1:int, c2:char(1)>, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + m1 map<string, varchar(3)>, + l1 array<int>, + st1 struct<c1:int, c2:char(1)>, + d date +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types +POSTHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + m1 map<string, varchar(3)>, + l1 array<int>, + st1 struct<c1:int, c2:char(1)>, + d date +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: SELECT * FROM parquet_types_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_types_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +#### A masked pattern was here #### +100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a B4F3CAFDBEDD {"k1":"v1"} [101,200] {"c1":10,"c2":"a"} 2011-01-01 +101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab 68692CCAC0BDE7 {"k2":"v2"} [102,200] {"c1":10,"c2":"d"} 2012-02-02 +102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc B4F3CAFDBEDD {"k3":"v3"} [103,200] {"c1":10,"c2":"g"} 2013-03-03 +103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd 68692CCAC0BDE7 {"k4":"v4"} [104,200] {"c1":10,"c2":"j"} 2014-04-04 +104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde B4F3CAFDBEDD {"k5":"v5"} [105,200] {"c1":10,"c2":"m"} 2015-05-05 +105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef 68692CCAC0BDE7 {"k6":"v6"} [106,200] {"c1":10,"c2":"p"} 2016-06-06 +106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg B4F3CAFDBEDD {"k7":"v7"} [107,200] {"c1":10,"c2":"s"} 2017-07-07 +107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh 68692CCAC0BDE7 {"k8":"v8"} [108,200] {"c1":10,"c2":"v"} 2018-08-08 +108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg B4F3CAFDBE 68656C6C6F {"k9":"v9"} [109,200] {"c1":10,"c2":"y"} 2019-09-09 +109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef 68692CCAC0BDE7 {"k10":"v10"} [110,200] {"c1":10,"c2":"b"} 2020-10-10 +110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede B4F3CAFDBEDD {"k11":"v11"} [111,200] {"c1":10,"c2":"e"} 2021-11-11 +111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded 68692CCAC0BDE7 {"k12":"v12"} [112,200] {"c1":10,"c2":"h"} 2022-12-12 +112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd B4F3CAFDBEDD {"k13":"v13"} [113,200] {"c1":10,"c2":"k"} 2023-01-02 +113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc 68692CCAC0BDE7 {"k14":"v14"} [114,200] {"c1":10,"c2":"n"} 2024-02-02 +114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b B4F3CAFDBEDD {"k15":"v15"} [115,200] {"c1":10,"c2":"q"} 2025-03-03 +115 1 1 1.0 4.5 qrs 2026-04-04 16:16:16.161616161 rstuv abcded 68692CCAC0BDE7 {"k16":"v16"} [116,200] {"c1":10,"c2":"q"} 2026-04-04 +116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded B4F3CAFDBEDD {"k17":"v17"} [117,200] {"c1":10,"c2":"w"} 2027-05-05 +117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded 68692CCAC0BDE7 {"k18":"v18"} [118,200] {"c1":10,"c2":"z"} 2028-06-06 +118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede B4F3CAFDBEDD {"k19":"v19"} [119,200] {"c1":10,"c2":"c"} 2029-07-07 +119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 {"k20":"v20"} [120,200] {"c1":10,"c2":"f"} 2030-08-08 +120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD {"k21":"v21"} [121,200] {"c1":10,"c2":"i"} 2031-09-09 +121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde {"k22":"v22"} [122,200] {"c1":10,"c2":"l"} 2032-10-10 +PREHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_types +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_types +POSTHOOK: Lineage: parquet_types.cbinary EXPRESSION [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cbinary, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.cchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cchar, type:char(5), comment:null), ] +POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: parquet_types.d SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:d, type:date, comment:null), ] +POSTHOOK: Lineage: parquet_types.l1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:l1, type:array<int>, comment:null), ] +POSTHOOK: Lineage: parquet_types.m1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, type:map<string,varchar(3)>, comment:null), ] +POSTHOOK: Lineage: parquet_types.st1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, type:struct<c1:int,c2:char(1)>, comment:null), ] +POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] +PREHOOK: query: EXPLAIN SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) + outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,input:float>), _col5 (type: struct<count:bigint,sum:double,variance:double>) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), round(_col5, 5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +1 121 1 8 1.175 2.06216 +2 119 1 7 1.21429 1.8 +3 120 1 7 1.17143 1.8 +PREHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cfloat (type: float) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Map-reduce partition columns: _col0 (type: float) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +1.0 5 +1.1 5 +1.2 4 +1.3 4 +1.4 4 +PREHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cchar (type: char(5)) + outputColumnNames: cchar + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cchar (type: char(5)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(5)) + sort order: + + Map-reduce partition columns: _col0 (type: char(5)) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(5)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(5)) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(5)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +a 1 +ab 1 +abc 1 +abcd 1 +abcde 3 +bcdef 2 +cdefg 1 +klmno 1 +nopqr 1 +opqrs 1 +pqrst 2 +qrstu 1 +rstuv 1 +stuvw 1 +tuvwx 1 +uvwzy 1 +vwxyz 1 +wxyza 1 +PREHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cvarchar (type: varchar(10)) + outputColumnNames: cvarchar + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cvarchar (type: varchar(10)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(10)) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: varchar(10)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +B4F3CAFDBE 1 +a 1 +ab 1 +abc 2 +abcd 1 +abcdd 1 +abcde 3 +abcded 4 +abcdede 3 +abcdedef 1 +abcdef 1 +abcdefg 1 +abcdefgh 1 +b 1 +PREHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring1 (type: string) + outputColumnNames: cstring1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cstring1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +abc 1 +bcd 1 +cde 1 +def 1 +efg 1 +fgh 1 +ghi 1 +hij 1 +ijk 1 +jkl 1 +klm 1 +lmn 1 +mno 1 +nop 1 +pqr 1 +qrs 2 +stu 1 +vwx 1 +wxy 1 +yza 1 +zab 1 +PREHOOK: query: EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: timestamp) + outputColumnNames: t + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: t (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +2011-01-01 01:01:01.111111111 1 +2012-02-02 02:02:02.222222222 1 +2013-03-03 03:03:03.333333333 1 +2014-04-04 04:04:04.444444444 1 +2015-05-05 05:05:05.555555555 1 +2016-06-06 06:06:06.666666666 1 +2017-07-07 07:07:07.777777777 1 +2018-08-08 08:08:08.888888888 1 +2019-09-09 09:09:09.999999999 1 +2020-10-10 10:10:10.101010101 1 +2021-11-11 11:11:11.111111111 1 +2022-12-12 12:12:12.121212121 1 +2023-01-02 13:13:13.131313131 1 +2024-02-02 14:14:14.141414141 1 +2025-03-03 15:15:15.151515151 1 +2026-04-04 16:16:16.161616161 1 +2027-05-05 17:17:17.171717171 1 +2028-06-06 18:18:18.181818181 1 +2029-07-07 19:19:19.191919191 1 +2030-08-08 20:20:20.202020202 1 +2031-09-09 21:21:21.212121212 1 +2032-10-10 22:22:22.222222222 1 +PREHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbinary (type: binary) + outputColumnNames: cbinary + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cbinary (type: binary) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: binary) + sort order: + + Map-reduce partition columns: _col0 (type: binary) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs (cache only) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hex(_col0) (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +68692CCAC0BDE7 10 + 1 +68656C6C6F 1 +B4F3CAFDBEDD 10 http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index c5f7128..dd62df6 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -2734,7 +2734,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col4 (type: float) Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true @@ -2915,7 +2915,7 @@ STAGE PLANS: Statistics: Num rows: 137 Data size: 822 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true @@ -3411,7 +3411,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true @@ -3646,7 +3646,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true @@ -3851,7 +3851,7 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true @@ -4371,7 +4371,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true @@ -4606,7 +4606,7 @@ STAGE PLANS: TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: date), _col3 (type: float) Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true @@ -4811,7 +4811,7 @@ STAGE PLANS: Statistics: Num rows: 12 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 8a84d3d..781c8a1 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -159,7 +159,7 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,input:float>), _col5 (type: struct<count:bigint,sum:double,variance:double>) Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index cdf6b3d..0303e0e 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -312,7 +312,7 @@ STAGE PLANS: Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,input:float>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: decimal(4,2)) Execution mode: vectorized, llap - LLAP IO: no inputs + LLAP IO: all inputs (cache only) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/storage-api/src/java/org/apache/hadoop/hive/common/io/FileMetadataCache.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/io/FileMetadataCache.java b/storage-api/src/java/org/apache/hadoop/hive/common/io/FileMetadataCache.java new file mode 100644 index 0000000..403c3ad --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/common/io/FileMetadataCache.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.io; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers; + +public interface FileMetadataCache { + /** + * @return Metadata for a given file (ORC or Parquet footer). + * The caller must decref this buffer when done. + */ + MemoryBufferOrBuffers getFileMetadata(Object fileKey); + + // TODO: add BB put method(s) when merging with ORC off-heap metadata cache + + /** + * Puts the metadata for a given file (e.g. a footer buffer into cache). + * @param fileKey The file key. + * @param length The footer length. + * @param is The stream to read the footer from. + * @return The buffer or buffers representing the cached footer. + * The caller must decref this buffer when done. + */ + MemoryBufferOrBuffers putFileMetadata( + Object fileKey, int length, InputStream is) throws IOException; + + /** + * Releases the buffer returned from getFileMetadata or putFileMetadata method. + * @param buffer The buffer to release. + */ + void decRefBuffer(MemoryBufferOrBuffers buffer); +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/9e673a73/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/MemoryBufferOrBuffers.java ---------------------------------------------------------------------- diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/MemoryBufferOrBuffers.java b/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/MemoryBufferOrBuffers.java new file mode 100644 index 0000000..c04310a --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/common/io/encoded/MemoryBufferOrBuffers.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.io.encoded; + +public interface MemoryBufferOrBuffers { + MemoryBuffer getSingleBuffer(); + MemoryBuffer[] getMultipleBuffers(); +} \ No newline at end of file
