hive git commit: HIVE-20787 : MapJoinBytesTableContainer dummyRow case doesn't handle reuse (Sergey Shelukhin, reviewed by Jason Dere)
Repository: hive Updated Branches: refs/heads/master 148e7acba -> ccbc5c383 HIVE-20787 : MapJoinBytesTableContainer dummyRow case doesn't handle reuse (Sergey Shelukhin, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ccbc5c38 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ccbc5c38 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ccbc5c38 Branch: refs/heads/master Commit: ccbc5c383b13f81855d58e8b1d2dc168a7f2893e Parents: 148e7ac Author: sergey Authored: Tue Nov 13 13:38:40 2018 -0800 Committer: sergey Committed: Tue Nov 13 13:38:40 2018 -0800 -- .../persistence/MapJoinBytesTableContainer.java | 22 ++-- 1 file changed, 16 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ccbc5c38/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java index 0e4b8df..bf4250d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java @@ -708,6 +708,8 @@ public class MapJoinBytesTableContainer * This container does not normally support adding rows; this is for the dummy row. */ private List dummyRow = null; +// TODO: the API here is not ideal, first/next + hasRows are redundant. +private boolean wasFirstCalledOnDummyRow = false; private final ByteArrayRef uselessIndirection; // LBStruct needs ByteArrayRef private final LazyBinaryStruct valueStruct; @@ -747,6 +749,7 @@ public class MapJoinBytesTableContainer aliasFilter = hashMap.getValueResult( output.getData(), 0, output.getLength(), hashMapResult, /* matchTracker */ null); dummyRow = null; + wasFirstCalledOnDummyRow = false; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; } else { @@ -760,6 +763,7 @@ public class MapJoinBytesTableContainer aliasFilter = hashMap.getValueResult( output.getData(), 0, output.getLength(), hashMapResult, matchTracker); dummyRow = null; + wasFirstCalledOnDummyRow = false; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; } else { @@ -774,7 +778,7 @@ public class MapJoinBytesTableContainer @Override public boolean hasRows() { - return hashMapResult.hasRows() || (dummyRow != null); + return hashMapResult.hasRows() || (dummyRow != null && !wasFirstCalledOnDummyRow); } @Override @@ -803,6 +807,7 @@ public class MapJoinBytesTableContainer // Doesn't clear underlying hashtable hashMapResult.forget(); dummyRow = null; + wasFirstCalledOnDummyRow = false; aliasFilter = (byte) 0xff; } @@ -819,12 +824,9 @@ public class MapJoinBytesTableContainer // Implementation of row iterator @Override public List first() throws HiveException { - - // A little strange that we forget the dummy row on read. if (dummyRow != null) { -List result = dummyRow; -dummyRow = null; -return result; +wasFirstCalledOnDummyRow = true; +return dummyRow; } WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); @@ -838,6 +840,13 @@ public class MapJoinBytesTableContainer @Override public List next() throws HiveException { + if (dummyRow != null) { +// TODO: what should we do if first was never called? for now, assert for clarity +if (!wasFirstCalledOnDummyRow) { + throw new AssertionError("next called without first"); +} +return null; + } WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.next(); if (byteSegmentRef == null) { @@ -874,6 +883,7 @@ public class MapJoinBytesTableContainer throw new RuntimeException("Cannot add rows when not empty"); } dummyRow = t; + wasFirstCalledOnDummyRow = false; } // Various unsupported methods.
[57/59] [abbrv] hive git commit: HIVE-20676 : HiveServer2: PrivilegeSynchronizer is not set to daemon status (Vaibhav Gumashta via Thejas Nair)
HIVE-20676 : HiveServer2: PrivilegeSynchronizer is not set to daemon status (Vaibhav Gumashta via Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/52f94b8f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/52f94b8f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/52f94b8f Branch: refs/heads/master-tez092 Commit: 52f94b8f1dd99950f9323a6c2a5e3a694db46269 Parents: 1ceb4eb Author: Vaibhav Gumashta Authored: Tue Nov 13 11:34:10 2018 -0700 Committer: Thejas M Nair Committed: Tue Nov 13 11:34:10 2018 -0700 -- service/src/java/org/apache/hive/service/server/HiveServer2.java | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/52f94b8f/service/src/java/org/apache/hive/service/server/HiveServer2.java -- diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index 4335574..9376e87 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -1017,6 +1017,7 @@ public class HiveServer2 extends CompositeService { LOG.info("Find " + policyContainer.size() + " policy to synchronize, start PrivilegeSynchronizer"); Thread privilegeSynchronizerThread = new Thread( new PrivilegeSynchronizer(privilegeSynchronizerLatch, policyContainer, hiveConf), "PrivilegeSynchronizer"); + privilegeSynchronizerThread.setDaemon(true); privilegeSynchronizerThread.start(); } else { LOG.warn(
[46/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join_filters_overlap.q.out -- diff --git a/ql/src/test/results/clientpositive/join_filters_overlap.q.out b/ql/src/test/results/clientpositive/join_filters_overlap.q.out index 7b5c9f2..847b45d 100644 --- a/ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ b/ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -18,15 +18,6 @@ POSTHOOK: query: explain extended select * from a_n4 left outer join a_n4 b on ( POSTHOOK: type: QUERY POSTHOOK: Input: default@a_n4 A masked pattern was here -OPTIMIZED SQL: SELECT * -FROM (SELECT `key`, `value` -FROM `default`.`a_n4`) AS `t` -LEFT JOIN (SELECT `key`, CAST(50 AS INTEGER) AS `value` -FROM `default`.`a_n4` -WHERE `value` = 50) AS `t1` ON `t`.`key` = `t1`.`key` AND `t`.`value` = 50 -LEFT JOIN (SELECT `key`, CAST(60 AS INTEGER) AS `value` -FROM `default`.`a_n4` -WHERE `value` = 60) AS `t3` ON `t`.`key` = `t3`.`key` AND `t`.`value` = 60 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -40,8 +31,8 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator - expressions: key (type: int), value (type: int) - outputColumnNames: _col0, _col1 + expressions: key (type: int), value (type: int), (value = 60) (type: boolean), (value = 50) (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -50,7 +41,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE tag: 0 -value expressions: _col1 (type: int) +value expressions: _col1 (type: int), _col2 (type: boolean), _col3 (type: boolean) auto parallelism: false TableScan alias: b @@ -158,37 +149,41 @@ STAGE PLANS: filter mappings: 0 [1, 1, 2, 1] filter predicates: -0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)} +0 {VALUE._col2} {VALUE._col1} 1 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false -GlobalTableId: 0 - A masked pattern was here -NumFilesPerFileSink: 1 + Select Operator +expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - A masked pattern was here -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -properties: - columns _col0,_col1,_col2,_col3,_col4,_col5 - columns.types int:int:int:int:int:int - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -TotalFiles: 1 -GatherStats: false -MultiFileSpray: false +File Output Operator + compressed: false + GlobalTableId: 0 + A masked pattern was here + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE + A masked pattern was here + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: +columns _col0,_col1,_col2,_col3,_col4,_col5 +columns.types int:int:int:int:int:int +escape.delim \ +hive.serialization.extend.additional.nesting.levels
[07/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query75.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query75.q.out b/ql/src/test/results/clientpositive/perf/tez/query75.q.out index 9968ade..f4bd046 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query75.q.out @@ -244,7 +244,7 @@ Stage-0 Select Operator [SEL_539] (rows=170474971 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Merge Join Operator [MERGEJOIN_538] (rows=170474971 width=234) - Conds:RS_103._col1, _col2=RS_625._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] + Conds:RS_103._col1, _col2=RS_625._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] <-Map 44 [SIMPLE_EDGE] vectorized SHUFFLE [RS_625] PartitionCols:_col0, _col1 @@ -258,12 +258,12 @@ Stage-0 SHUFFLE [RS_103] PartitionCols:_col1, _col2 Merge Join Operator [MERGEJOIN_510] (rows=96821196 width=138) - Conds:RS_100._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"] + Conds:RS_100._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"] <-Map 37 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_599] PartitionCols:_col0 -Select Operator [SEL_592] (rows=45745 width=109) - Output:["_col0","_col1","_col2","_col3","_col5"] +Select Operator [SEL_592] (rows=45745 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_591] (rows=45745 width=109) predicate:((i_category = 'Sports') and i_brand_id is not null and i_category_id is not null and i_class_id is not null and i_item_sk is not null and i_manufact_id is not null) TableScan [TS_6] (rows=462000 width=109) @@ -276,7 +276,7 @@ Stage-0 <-Map 11 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_571] PartitionCols:_col0 -Select Operator [SEL_562] (rows=652 width=8) +Select Operator [SEL_562] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_558] (rows=652 width=8) predicate:((d_year = 2002) and d_date_sk is not null) @@ -321,7 +321,7 @@ Stage-0 Select Operator [SEL_548] (rows=450703984 width=131) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Merge Join Operator [MERGEJOIN_547] (rows=450703984 width=204) - Conds:RS_125._col1, _col2=RS_649._col0, _col1(Left Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"] + Conds:RS_125._col1, _col2=RS_649._col0, _col1(Left Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"] <-Map 46 [SIMPLE_EDGE] vectorized SHUFFLE [RS_649] PartitionCols:_col0, _col1 @@ -335,7 +335,7 @@ Stage-0 SHUFFLE [RS_125] PartitionCols:_col1, _col2
[48/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join45.q.out -- diff --git a/ql/src/test/results/clientpositive/join45.q.out b/ql/src/test/results/clientpositive/join45.q.out index 6cf6c33..7865e0e 100644 --- a/ql/src/test/results/clientpositive/join45.q.out +++ b/ql/src/test/results/clientpositive/join45.q.out @@ -363,24 +363,24 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: string), _col1 (type: string) +value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: boolean) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: string), _col1 (type: string) +value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: @@ -388,21 +388,25 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: ((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D) (type: boolean) -Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE -Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false +predicate: ((_col0 = _col3) or _col2 or _col5) (type: boolean) +Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE + Limit +Number of rows: 10 Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -474,24 +478,24 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(key)
[13/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query27.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query27.q.out b/ql/src/test/results/clientpositive/perf/tez/query27.q.out index 59cca4f..d7fd2ed 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query27.q.out @@ -94,7 +94,7 @@ Stage-0 Select Operator [SEL_27] (rows=1427275 width=186) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Merge Join Operator [MERGEJOIN_100] (rows=1427275 width=186) - Conds:RS_24._col1=RS_130._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col15","_col17"] + Conds:RS_24._col1=RS_130._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_130] PartitionCols:_col0 @@ -108,7 +108,7 @@ Stage-0 SHUFFLE [RS_24] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_99] (rows=1427275 width=90) - Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col15"] + Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_119] PartitionCols:_col0 @@ -126,7 +126,7 @@ Stage-0 <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_111] PartitionCols:_col0 - Select Operator [SEL_110] (rows=652 width=8) + Select Operator [SEL_110] (rows=652 width=4) Output:["_col0"] Filter Operator [FIL_109] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) @@ -140,7 +140,7 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_102] (rows=14776 width=269) + Select Operator [SEL_102] (rows=14776 width=4) Output:["_col0"] Filter Operator [FIL_101] (rows=14776 width=268) predicate:((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query29.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out index a21c3c7..19f121e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -144,20 +144,20 @@ Stage-0 SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_48] (rows=21091879 width=496) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col14)","sum(_col22)","sum(_col3)"],keys:_col7, _col8, _col27, _col28 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col13)","sum(_col19)","sum(_col3)"],keys:_col6, _col7, _col22, _col23 Top N Key Operator [TNK_93] (rows=4156223234 width=483) - keys:_col7, _col8, _col27, _col28,sort order:,top n:100 + keys:_col6, _col7, _col22, _col23,sort order:,top n:100 Merge Join Operator [MERGEJOIN_205] (rows=4156223234 width=483) -Conds:RS_44._col1, _col2=RS_45._col14, _col13(Inner),Output:["_col3","_col7","_col8","_col14","_col22","_col27","_col28"] +Conds:RS_44._col2, _col1=RS_45._col11, _col12(Inner),Output:["_col3","_col6","_col7","_col13","_col19","_col22","_col23"] <-Reducer 2 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_44] - PartitionCols:_col1,
[03/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out -- diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out index 98789d7..ddb436b 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out @@ -187,25 +187,28 @@ STAGE PLANS: alias: test1_n5 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: int), col_1 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: - 0 {_col0 BETWEEN 100 AND 102} + 0 {_col3} 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 +Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -283,7 +286,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator filter predicates: -0 {_col0 BETWEEN 100 AND 102} +0 {_col3} 1 keys: 0 @@ -296,27 +299,31 @@ STAGE PLANS: alias: test1_n5 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: int), col_1 (type: string) - outputColumnNames: _col0, _col1, _col2 + expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: - 0 {_col0 BETWEEN 100 AND 102} + 0 {_col3} 1 keys: 0 1 -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 +outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false +Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +
[21/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out index ace7cf5..b55f2c1 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product -Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product -Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product +Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product +Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product +Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product PREHOOK: query: explain cbo with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -128,16 +128,15 @@ HiveSortLimit(fetch=[100]) HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject($f0=[$0]) HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$0], $f1=[$1]) -HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject($f0=[$3], $f1=[*(CAST($1):DECIMAL(10, 0), $2)]) -HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) -HiveFilter(condition=[IS NOT NULL($3)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(c_customer_sk=[$0]) -HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(c_customer_sk=[$0], $f1=[$1]) +HiveAggregate(group=[{2}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(ss_customer_sk=[$3], *=[*(CAST($10):DECIMAL(10, 0), $13)]) + HiveFilter(condition=[IS NOT NULL($3)]) +HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) +HiveProject(c_customer_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) +HiveTableScan(table=[[default, customer]], table:alias=[customer]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cnt=[$0]) HiveFilter(condition=[<=(sq_count_check($0), 1)]) @@ -146,75 +145,71 @@ HiveSortLimit(fetch=[100]) HiveProject HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count($0)]) - HiveProject($f0=[$0], $f1=[$1]) -HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject($f0=[$0], $f1=[*(CAST($3):DECIMAL(10, 0), $4)]) -HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0]) -HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) -HiveTableScan(table=[[default,
[02/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 3165970..2fc9a3d 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -2662,7 +2662,7 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), substr(value, 5) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2703,22 +2703,18 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Select Operator - expressions: _col1 (type: int), substr(_col2, 5) (type: string) +Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator -aggregations: sum(_col1) -keys: _col0 (type: int) -mode: hash -outputColumnNames: _col0, _col1 + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) +value expressions: _col1 (type: double) Reducer 5 Execution mode: vectorized Reduce Operator Tree: @@ -2827,7 +2823,7 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) + expressions: key (type: int), substr(value, 5) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -2868,22 +2864,18 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Select Operator - expressions: _col1 (type: int), substr(_col2, 5) (type: string) +Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator -aggregations: sum(_col1) -keys: _col0 (type: int) -mode: hash -outputColumnNames: _col0, _col1 + Reduce Output Operator +key expressions: _col0 (type: int) +sort order: + +Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type:
[19/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out index 2504d78..b4410ff 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out @@ -141,10 +141,10 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) - HiveProject(item_id=[$0], ss_item_rev=[$3], ss_dev=[*(/(/($3, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$5], ws_dev=[*(/(/($5, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($3, $1), $5), CAST(3):DECIMAL(10, 0))]) -HiveJoin(condition=[AND(AND(AND(AND(=($0, $4), BETWEEN(false, $3, *(0.9, $5), *(1.1, $5))), BETWEEN(false, $1, *(0.9, $5), *(1.1, $5))), BETWEEN(false, $5, *(0.9, $3), *(1.1, $3))), BETWEEN(false, $5, *(0.9, $1), *(1.1, $1)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(AND(=($2, $0), BETWEEN(false, $3, *(0.9, $1), *(1.1, $1))), BETWEEN(false, $1, *(0.9, $3), *(1.1, $3)))], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(i_item_id=[$0], $f1=[$1]) + HiveProject(item_id=[$0], ss_item_rev=[$5], ss_dev=[*(/(/($5, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], cs_dev=[*(/(/($1, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], ws_item_rev=[$9], ws_dev=[*(/(/($9, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($5, $1), $9), CAST(3):DECIMAL(10, 0))]) +HiveJoin(condition=[AND(AND(AND(AND(=($0, $8), BETWEEN(false, $5, $10, $11)), BETWEEN(false, $1, $10, $11)), BETWEEN(false, $9, $6, $7)), BETWEEN(false, $9, $2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($4, $0), BETWEEN(false, $5, $2, $3)), BETWEEN(false, $1, $6, $7))], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -175,7 +175,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -HiveProject(i_item_id=[$0], $f1=[$1]) +HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -206,7 +206,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(d_week_seq=[$4]) HiveFilter(condition=[AND(=($2, _UTF-16LE'1998-02-19'), IS NOT NULL($4))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_id=[$0], $f1=[$1]) + HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)]) HiveAggregate(group=[{4}], agg#0=[sum($2)]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out index bb92a1f..8674a8a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out @@ -94,24 +94,24 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) -
[41/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/join46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out b/ql/src/test/results/clientpositive/llap/join46.q.out index fcd0d83..ec58429 100644 --- a/ql/src/test/results/clientpositive/llap/join46.q.out +++ b/ql/src/test/results/clientpositive/llap/join46.q.out @@ -187,15 +187,15 @@ STAGE PLANS: alias: test1_n2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: boolean) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -226,20 +226,24 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 filter predicates: - 0 {VALUE._col0 BETWEEN 100 AND 102} + 0 {VALUE._col2} 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 +outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE -File Output Operator - compressed: false +Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -308,13 +312,13 @@ STAGE PLANS: alias: test1_n2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE +
[04/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/pointlookup3.q.out -- diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out index a5fa5e8..a3056a5 100644 --- a/ql/src/test/results/clientpositive/pointlookup3.q.out +++ b/ql/src/test/results/clientpositive/pointlookup3.q.out @@ -391,15 +391,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@pcr_t1_n1 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08 A masked pattern was here -OPTIMIZED SQL: SELECT `t4`.`key`, `t4`.`value`, CAST('2000-04-08' AS STRING) AS `ds1`, `t4`.`ds2`, `t4`.`key1`, `t4`.`value1`, `t4`.`ds11`, CAST('2001-04-08' AS STRING) AS `ds21` -FROM (SELECT `t0`.`key`, `t0`.`value`, `t0`.`ds2`, `t2`.`key` AS `key1`, `t2`.`value` AS `value1`, `t2`.`ds1` AS `ds11` -FROM (SELECT `key`, `value`, CAST('2000-04-08' AS STRING) AS `ds1`, `ds2` +OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) AS `ds1`, `t3`.`ds2`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, `t3`.`ds1` AS `ds11`, CAST('2001-04-08' AS STRING) AS `ds21` +FROM (SELECT * +FROM (SELECT `key`, `value`, `ds2` FROM `default`.`pcr_t1_n1` WHERE `ds1` = '2000-04-08' AND `key` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `key`, `value`, `ds1`, CAST('2001-04-08' AS STRING) AS `ds2` +INNER JOIN (SELECT `key`, `value`, `ds1` FROM `default`.`pcr_t1_n1` WHERE `ds2` = '2001-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = `t2`.`key` -ORDER BY `t2`.`key`, `t2`.`value`) AS `t4` +ORDER BY `t2`.`key`, `t2`.`value`) AS `t3` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -420,7 +420,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds2 (type: string) -outputColumnNames: _col0, _col1, _col3 +outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -429,7 +429,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: 0 - value expressions: _col1 (type: string), _col3 (type: string) + value expressions: _col1 (type: string), _col2 (type: string) auto parallelism: false TableScan alias: t2 @@ -515,30 +515,26 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE - Select Operator -expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: string) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 -Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - GlobalTableId: 0 + File Output Operator +compressed: false +GlobalTableId: 0 A masked pattern was here - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: -column.name.delimiter , -columns _col0,_col1,_col2,_col3,_col4,_col5 -columns.types int,string,string,int,string,string -escape.delim \ -serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false +NumFilesPerFileSink: 1 +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int,string,string,int,string,string + escape.delim \ +
[36/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out index 37970ab..7e09d5e 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out @@ -51,14 +51,14 @@ STAGE PLANS: predicate: (csmallint < 100S) (type: boolean) Statistics: Num rows: 4096 Data size: 1031250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 4096 Data size: 1031250 Basic stats: COMPLETE Column stats: COMPLETE + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 4096 Data size: 1043486 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator -key expressions: UDFToInteger(_col1) (type: int) +key expressions: _col12 (type: int) sort order: + -Map-reduce partition columns: UDFToInteger(_col1) (type: int) -Statistics: Num rows: 4096 Data size: 1031250 Basic stats: COMPLETE Column stats: COMPLETE +Map-reduce partition columns: _col12 (type: int) +Statistics: Num rows: 4096 Data size: 1043486 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -72,14 +72,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: UDFToInteger(key) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator -key expressions: UDFToInteger(_col0) (type: int) +key expressions: _col0 (type: int) sort order: + -Map-reduce partition columns: UDFToInteger(_col0) (type: int) -Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE +Map-reduce partition columns: _col0 (type: int) +Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -92,14 +92,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: (UDFToInteger(key) + 0) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats:
[49/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out -- diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out index 1e195bc..b6d726e 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out @@ -542,13 +542,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@encrypted_table POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### -OPTIMIZED SQL: SELECT * -FROM (SELECT `key`, `value` +OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`key` AS `key1`, `t2`.`value` AS `value1` +FROM (SELECT `key`, `value`, CAST(`key` AS DOUBLE) AS `CAST` FROM `default`.`src` WHERE `key` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `key`, `value` +INNER JOIN (SELECT `key`, `value`, CAST(`key` AS DOUBLE) AS `CAST` FROM `default`.`encrypted_table` -WHERE `key` IS NOT NULL) AS `t2` ON CAST(`t0`.`key` AS DOUBLE) = CAST(`t2`.`key` AS DOUBLE) +WHERE `key` IS NOT NULL) AS `t2` ON `t0`.`CAST` = `t2`.`CAST` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -567,14 +567,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col2 (type: double) null sort order: a sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col2 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col0 (type: string), _col1 (type: string) @@ -589,14 +589,14 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: int), value (type: string) -outputColumnNames: _col0, _col1 +expressions: key (type: int), value (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col2 (type: double) null sort order: a sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col2 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col0 (type: int), _col1 (type: string) @@ -714,32 +714,36 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: -0 UDFToDouble(_col0) (type: double) -1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3 +0 _col2 (type: double) +1 _col2 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false -GlobalTableId: 0 -directory: hdfs://### HDFS PATH ### -NumFilesPerFileSink: 1 + Select Operator +expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -Stats Publishing Key Prefix: hdfs://### HDFS PATH ### -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format:
[10/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query56.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query56.q.out b/ql/src/test/results/clientpositive/perf/tez/query56.q.out index 18f64cc..17458f4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query56.q.out @@ -204,9 +204,9 @@ Stage-0 SHUFFLE [RS_71] PartitionCols:_col0 Group By Operator [GBY_70] (rows=355 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 Merge Join Operator [MERGEJOIN_303] (rows=339151 width=100) - Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"] + Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col0 @@ -239,15 +239,15 @@ Stage-0 default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"] <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_67] -PartitionCols:_col4 +PartitionCols:_col3 Select Operator [SEL_62] (rows=1550375 width=13) - Output:["_col4","_col5"] + Output:["_col3","_col4"] Merge Join Operator [MERGEJOIN_298] (rows=1550375 width=13) Conds:RS_59._col1=RS_346._col0(Inner),Output:["_col2","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_346] PartitionCols:_col0 - Select Operator [SEL_343] (rows=800 width=116) + Select Operator [SEL_343] (rows=800 width=4) Output:["_col0"] Filter Operator [FIL_342] (rows=800 width=112) predicate:((ca_gmt_offset = -8) and ca_address_sk is not null) @@ -261,7 +261,7 @@ Stage-0 <-Map 20 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_330] PartitionCols:_col0 - Select Operator [SEL_327] (rows=50 width=12) + Select Operator [SEL_327] (rows=50 width=4) Output:["_col0"] Filter Operator [FIL_326] (rows=50 width=12) predicate:((d_moy = 1) and (d_year = 2000) and d_date_sk is not null) @@ -320,18 +320,18 @@ Stage-0 SHUFFLE [RS_109] PartitionCols:_col0 Group By Operator [GBY_108] (rows=355 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1 + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 Merge Join Operator [MERGEJOIN_304] (rows=172427 width=188) - Conds:RS_104._col0=RS_105._col3(Inner),Output:["_col1","_col8"] + Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_104] PartitionCols:_col0 Please refer to the previous Merge Join Operator [MERGEJOIN_293] <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_105] -PartitionCols:_col3 +PartitionCols:_col2 Select Operator [SEL_100] (rows=788222 width=110) - Output:["_col3","_col5"] + Output:["_col2","_col4"] Merge Join Operator [MERGEJOIN_301] (rows=788222 width=110) Conds:RS_97._col2=RS_348._col0(Inner),Output:["_col1","_col3"] <-Map 28 [SIMPLE_EDGE] vectorized @@ -400,18 +400,18 @@
[26/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query49.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query49.q.out b/ql/src/test/results/clientpositive/perf/spark/query49.q.out index 07d14b5..354c178 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query49.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query49.q.out @@ -304,7 +304,7 @@ STAGE PLANS: predicate: ((ws_net_paid > 0) and (ws_net_profit > 1) and (ws_quantity > 0) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_paid (type: decimal(7,2)) + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), CASE WHEN (ws_quantity is not null) THEN (ws_quantity) ELSE (0) END (type: int), CASE WHEN (ws_net_paid is not null) THEN (ws_net_paid) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -312,7 +312,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE -value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) +value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(12,2)) Execution mode: vectorized Map 10 Map Operator Tree: @@ -343,7 +343,7 @@ STAGE PLANS: predicate: ((wr_return_amt > 1) and wr_item_sk is not null and wr_order_number is not null) (type: boolean) Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wr_item_sk (type: int), wr_order_number (type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2)) + expressions: wr_item_sk (type: int), wr_order_number (type: int), CASE WHEN (wr_return_quantity is not null) THEN (wr_return_quantity) ELSE (0) END (type: int), CASE WHEN (wr_return_amt is not null) THEN (wr_return_amt) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -351,7 +351,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 4799489 Data size: 441731394 Basic stats: COMPLETE Column stats: NONE -value expressions: _col2 (type: int), _col3 (type: decimal(7,2)) +value expressions: _col2 (type: int), _col3 (type: decimal(12,2)) Execution mode: vectorized Map 12 Map Operator Tree: @@ -363,7 +363,7 @@ STAGE PLANS: predicate: ((cs_net_paid > 0) and (cs_net_profit > 1) and (cs_quantity > 0) and cs_item_sk is not null and cs_order_number is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 10666290 Data size: 129931 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_net_paid (type: decimal(7,2)) + expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), CASE WHEN (cs_quantity is not null) THEN (cs_quantity) ELSE (0) END (type: int), CASE WHEN (cs_net_paid is not null) THEN (cs_net_paid) ELSE (0) END (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10666290 Data size: 129931 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -371,7 +371,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int)
[50/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/auto_join_stats.q.out -- diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out b/ql/src/test/results/clientpositive/auto_join_stats.q.out index 43a248b..42e165d 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -63,8 +63,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -82,8 +82,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -91,7 +91,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -119,13 +119,13 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: -0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) -1 UDFToDouble(_col0) (type: double) +0 (_col1 + _col3) (type: double) +1 _col1 (type: double) Stage: Stage-5 Map Reduce @@ -135,17 +135,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: -0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) -1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col2 +0 (_col1 + _col3) (type: double) +1 _col1 (type: double) + outputColumnNames: _col0, _col2, _col4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false + Select Operator +expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) +outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Local Work: Map Reduce Local Work @@ -166,8 +170,8 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -
[37/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/subquery_select.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 6870ad1..0435530 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -32,14 +32,15 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: p_size (type: int) -outputColumnNames: _col0 -Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE +expressions: p_size (type: int), p_size is null (type: boolean) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean) Select Operator expressions: p_size (type: int) outputColumnNames: p_size @@ -77,12 +78,12 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col2 -Statistics: Num rows: 27 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE +outputColumnNames: _col0, _col1, _col3 +Statistics: Num rows: 27 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 27 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: boolean) + Statistics: Num rows: 27 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -92,10 +93,10 @@ STAGE PLANS: keys: 0 1 -outputColumnNames: _col0, _col2, _col3, _col4 -Statistics: Num rows: 27 Data size: 548 Basic stats: COMPLETE Column stats: COMPLETE +outputColumnNames: _col0, _col1, _col3, _col4, _col5 +Statistics: Num rows: 27 Data size: 440 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), CASE WHEN ((_col3 = 0L)) THEN (false) WHEN (_col2 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (null) ELSE (false) END (type: boolean) + expressions: _col0 (type: int), CASE WHEN (_col4) THEN (false) WHEN (_col3 is not null) THEN (true) WHEN (_col1) THEN (null) WHEN (_col5) THEN (null) ELSE (false) END (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -136,10 +137,14 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator -sort order: -Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE -value expressions: _col0 (type: bigint), _col1 (type: bigint) + Select Operator +expressions: (_col0 = 0L) (type: boolean), (_col1 < _col0) (type: boolean) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: boolean), _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -219,15 +224,15 @@ STAGE
[54/59] [abbrv] hive git commit: HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin)
HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af401702 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af401702 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af401702 Branch: refs/heads/master-tez092 Commit: af401702847391ab41fcf2ef8216a94a1b7bfc76 Parents: bc39c49 Author: Miklos Gergely Authored: Thu Oct 25 13:03:00 2018 -0700 Committer: Ashutosh Chauhan Committed: Mon Nov 12 15:28:18 2018 -0800 -- bin/ext/llapstatus.sh | 4 +- .../hadoop/hive/llap/cli/LlapSliderUtils.java | 55 +- .../llap/cli/LlapStatusOptionsProcessor.java| 278 --- .../hive/llap/cli/LlapStatusServiceDriver.java | 811 --- .../hadoop/hive/llap/cli/status/AmInfo.java | 93 +++ .../hive/llap/cli/status/AppStatusBuilder.java | 231 ++ .../hadoop/hive/llap/cli/status/ExitCode.java | 44 + .../hive/llap/cli/status/LlapInstance.java | 134 +++ .../llap/cli/status/LlapStatusCliException.java | 40 + .../hive/llap/cli/status/LlapStatusHelpers.java | 449 -- .../status/LlapStatusServiceCommandLine.java| 302 +++ .../cli/status/LlapStatusServiceDriver.java | 775 ++ .../hadoop/hive/llap/cli/status/State.java | 31 + .../hive/llap/cli/status/package-info.java | 24 + .../llap/cli/TestLlapStatusServiceDriver.java | 98 --- .../TestLlapStatusServiceCommandLine.java | 91 +++ .../hive/llap/cli/status/package-info.java | 23 + .../java/org/apache/hive/http/LlapServlet.java | 9 +- 18 files changed, 1799 insertions(+), 1693 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/bin/ext/llapstatus.sh -- diff --git a/bin/ext/llapstatus.sh b/bin/ext/llapstatus.sh index 2d2c8f4..23e6be6 100644 --- a/bin/ext/llapstatus.sh +++ b/bin/ext/llapstatus.sh @@ -17,7 +17,7 @@ THISSERVICE=llapstatus export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} " llapstatus () { - CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver; + CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver; if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then echo "Missing Hive CLI Jar" exit 3; @@ -36,7 +36,7 @@ llapstatus () { } llapstatus_help () { - CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver; + CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver; execHiveCmd $CLASS "--help" } http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java index af47b26..5ec9e1d 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java @@ -24,69 +24,24 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.client.ServiceClient; import org.apache.hadoop.yarn.service.utils.CoreFileSystem; -import org.apache.hadoop.yarn.util.Clock; -import org.apache.hadoop.yarn.util.SystemClock; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class LlapSliderUtils { - private static final Logger LOG = LoggerFactory - .getLogger(LlapSliderUtils.class); + private static final Logger LOG = LoggerFactory.getLogger(LlapSliderUtils.class); private static final String LLAP_PACKAGE_DIR = ".yarn/package/LLAP/"; - public static ServiceClient createServiceClient( - Configuration conf) throws Exception { + public static ServiceClient createServiceClient(Configuration conf) throws Exception { ServiceClient serviceClient = new ServiceClient(); serviceClient.init(conf); serviceClient.start(); return serviceClient; } - public static ApplicationReport getAppReport(String appName, ServiceClient serviceClient, - long timeoutMs) throws - LlapStatusServiceDriver.LlapStatusCliException { -Clock clock = SystemClock.getInstance(); -long startTime = clock.getTime(); -long timeoutTime =
[06/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query85.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out index f5800b9..1ada394 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -183,15 +183,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 15 (BROADCAST_EDGE) +Map 11 <- Reducer 13 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 17 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -200,134 +200,138 @@ Stage-0 limit:-1 Stage-1 Reducer 10 vectorized - File Output Operator [FS_239] -Limit [LIM_238] (rows=72 width=832) + File Output Operator [FS_240] +Limit [LIM_239] (rows=7 width=832) Number of rows:100 - Select Operator [SEL_237] (rows=72 width=832) + Select Operator [SEL_238] (rows=7 width=832) Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_236] - Select Operator [SEL_235] (rows=72 width=832) +SHUFFLE [RS_237] + Select Operator [SEL_236] (rows=7 width=832) Output:["_col4","_col5","_col6","_col7"] -Group By Operator [GBY_234] (rows=72 width=353) +Group By Operator [GBY_235] (rows=7 width=353) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 -Group By Operator [GBY_48] (rows=72 width=353) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col22 - Merge Join Operator [MERGEJOIN_206] (rows=8055 width=100) -Conds:RS_44._col3, _col24, _col25=RS_232._col0, _col1, _col2(Inner),Output:["_col6","_col7","_col12","_col22"] - <-Map 17 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_232] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_231] (rows=265971 width=183) -Output:["_col0","_col1","_col2"] -Filter Operator [FIL_230] (rows=265971 width=183) - predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) - TableScan [TS_21] (rows=1861800 width=183) - default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 7 [SIMPLE_EDGE] -SHUFFLE [RS_44] - PartitionCols:_col3, _col24, _col25 - Filter Operator [FIL_43] (rows=8055 width=390) -predicate:(((_col24 = 'D') and (_col25 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) -Merge Join Operator [MERGEJOIN_205] (rows=24166 width=390) - Conds:RS_40._col1=RS_233._col0(Inner),Output:["_col3","_col6","_col7","_col12","_col13","_col22","_col24","_col25"] -<-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] -PartitionCols:_col0 - Please refer to the previous Select Operator
[25/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query66.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query66.q.out b/ql/src/test/results/clientpositive/perf/spark/query66.q.out index e8ef1dc..80723d8 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query66.q.out @@ -624,8 +624,8 @@ STAGE PLANS: predicate: (ws_ship_mode_sk is not null and ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_warehouse_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_paid_inc_tax (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + expressions: ws_sold_date_sk (type: int), ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk (type: int), (ws_sales_price * CAST( ws_quantity AS decimal(10,0))) (type: decimal(18,2)), (ws_net_paid_inc_tax * CAST( ws_quantity AS decimal(10,0))) (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -633,7 +633,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 +outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: 1 Map 6 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE @@ -642,7 +642,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2)) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -656,8 +656,8 @@ STAGE PLANS: predicate: (cs_ship_mode_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_warehouse_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)), cs_net_paid_inc_ship_tax (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + expressions: cs_sold_date_sk (type: int), cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk (type: int), (cs_ext_sales_price * CAST( cs_quantity AS decimal(10,0))) (type: decimal(18,2)), (cs_net_paid_inc_ship_tax * CAST( cs_quantity AS decimal(10,0))) (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -665,7 +665,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 +outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: 1 Map 13 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE @@ -674,7 +674,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int)
[42/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/explainuser_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 067a43c..c86450a 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -457,16 +457,16 @@ Stage-0 SHUFFLE [RS_23] PartitionCols:_col0, _col1 Group By Operator [GBY_22] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col1 -Select Operator [SEL_21] (rows=2 width=20) - Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_57] (rows=2 width=20) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)} + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col5, _col1 +Select Operator [SEL_21] (rows=1 width=24) + Output:["_col1","_col5"] + Merge Join Operator [MERGEJOIN_57] (rows=1 width=24) + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col7"],residual filter predicates:{((_col4 > 0) or _col2)} {((_col4 + _col7) >= 0)} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_17] PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=84) -Output:["_col0","_col1"] + Select Operator [SEL_2] (rows=18 width=88) +Output:["_col0","_col1","_col2"] Filter Operator [FIL_36] (rows=18 width=84) predicate:key is not null TableScan [TS_0] (rows=20 width=84) @@ -547,16 +547,16 @@ Stage-0 SHUFFLE [RS_23] PartitionCols:_col0, _col1 Group By Operator [GBY_22] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 -Select Operator [SEL_21] (rows=1 width=20) - Output:["_col1","_col4"] - Merge Join Operator [MERGEJOIN_57] (rows=1 width=20) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or _col1 is not null)} {((_col1 >= 1) or (_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)} + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7 +Select Operator [SEL_21] (rows=1 width=36) + Output:["_col1","_col7"] + Merge Join Operator [MERGEJOIN_57] (rows=1 width=36) + Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"],residual filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 + _col7) >= 0)} {((_col6 + _col9) >= 0)} <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_17] PartitionCols:_col0 - Select Operator [SEL_2] (rows=18 width=84) -Output:["_col0","_col1"] + Select Operator [SEL_2] (rows=18 width=99) +Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_36] (rows=18 width=84) predicate:((c_int > 0) and key is not null) TableScan [TS_0] (rows=20 width=84) @@ -630,16 +630,16 @@ Stage-0 SHUFFLE [RS_23] PartitionCols:_col0, _col1 Group By Operator [GBY_22] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4 - Select Operator [SEL_21] (rows=1 width=20) -Output:["_col1","_col4"] -Merge Join Operator [MERGEJOIN_54] (rows=1 width=20) - Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)} + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5 + Select Operator [SEL_21] (rows=1
[47/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join47.q.out -- diff --git a/ql/src/test/results/clientpositive/join47.q.out b/ql/src/test/results/clientpositive/join47.q.out index 2892b8b..169244e 100644 --- a/ql/src/test/results/clientpositive/join47.q.out +++ b/ql/src/test/results/clientpositive/join47.q.out @@ -363,24 +363,24 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: string), _col1 (type: string) +value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: boolean) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: string), _col1 (type: string) +value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: boolean) Reduce Operator Tree: Join Operator condition map: @@ -388,19 +388,23 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3 - residual filter predicates: {((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D)} - Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE - Limit -Number of rows: 10 -Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col0 = _col3) or _col2 or _col5)} + Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE +Limit + Number of rows: 10 Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator +compressed: false +Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -472,24 +476,24 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE
[58/59] [abbrv] hive git commit: HIVE-20905 : querying streaming table fails with out of memory exception (mahesh kumar behera via Thejas Nair)
HIVE-20905 : querying streaming table fails with out of memory exception (mahesh kumar behera via Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/148e7acb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/148e7acb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/148e7acb Branch: refs/heads/master-tez092 Commit: 148e7acba46da997a023b57794c7f1f209097320 Parents: 52f94b8 Author: Mahesh Kumar Behera Authored: Tue Nov 13 14:03:23 2018 -0700 Committer: Thejas M Nair Committed: Tue Nov 13 14:03:23 2018 -0700 -- .../apache/hadoop/hive/ql/txn/compactor/Cleaner.java| 12 +++- .../org/apache/hadoop/hive/metastore/HiveMetaStore.java | 3 +++ 2 files changed, 10 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/148e7acb/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java index 3565616..3bc1f8a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java @@ -20,8 +20,7 @@ package org.apache.hadoop.hive.ql.txn.compactor; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.metastore.ReplChangeManager; import org.apache.hadoop.hive.metastore.txn.TxnStore; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; @@ -58,6 +57,8 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog; + /** * A class to clean directories after compactions. This will run in a separate thread. */ @@ -323,7 +324,7 @@ public class Cleaner extends CompactorThread { return " id=" + ci.id; } private void removeFiles(String location, ValidWriteIdList writeIdList, CompactionInfo ci) - throws IOException, HiveException { + throws IOException, NoSuchObjectException { Path locPath = new Path(location); AcidUtils.Directory dir = AcidUtils.getAcidState(locPath, conf, writeIdList); List obsoleteDirs = dir.getObsolete(); @@ -349,11 +350,12 @@ public class Cleaner extends CompactorThread { } FileSystem fs = filesToDelete.get(0).getFileSystem(conf); -Database db = Hive.get().getDatabase(ci.dbname); +Database db = rs.getDatabase(getDefaultCatalog(conf), ci.dbname); +Boolean isSourceOfRepl = ReplChangeManager.isSourceOfReplication(db); for (Path dead : filesToDelete) { LOG.debug("Going to delete path " + dead.toString()); - if (ReplChangeManager.isSourceOfReplication(db)) { + if (isSourceOfRepl) { replChangeManager.recycle(dead, ReplChangeManager.RecycleType.MOVE, true); } fs.delete(dead, true); http://git-wip-us.apache.org/repos/asf/hive/blob/148e7acb/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java -- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 0485184..23a78ca 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -9085,6 +9085,9 @@ public class HiveMetaStore extends ThriftHiveMetastore { conf.set((String) item.getKey(), (String) item.getValue()); } + //for metastore process, all metastore call should be embedded metastore call. + conf.set(ConfVars.THRIFT_URIS.getHiveName(), ""); + // Add shutdown hook. shutdownHookMgr.addShutdownHook(() -> { String shutdownMsg = "Shutting down hive metastore.";
[17/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out index 50474bc..f5a71b4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out @@ -184,36 +184,36 @@ CBO PLAN: HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3]) HiveSortLimit(sort0=[$7], sort1=[$4], sort2=[$5], sort3=[$6], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) HiveProject(_o__c0=[substr($0, 1, 20)], _o__c1=[/(CAST($1):DOUBLE, $2)], _o__c2=[/($3, $4)], _o__c3=[/($5, $6)], (tok_function avg (tok_table_or_col ws_quantity))=[/(CAST($1):DOUBLE, $2)], (tok_function avg (tok_table_or_col wr_refunded_cash))=[/($3, $4)], (tok_function avg (tok_table_or_col wr_fee))=[/($5, $6)], (tok_function substr (tok_table_or_col r_reason_desc) 1 20)=[substr($0, 1, 20)]) - HiveAggregate(group=[{7}], agg#0=[sum($26)], agg#1=[count($26)], agg#2=[sum($21)], agg#3=[count($21)], agg#4=[sum($20)], agg#5=[count($20)]) -HiveJoin(condition=[AND(AND(=($0, $17), =($4, $1)), =($5, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveAggregate(group=[{14}], agg#0=[sum($32)], agg#1=[count($32)], agg#2=[sum($27)], agg#3=[count($27)], agg#4=[sum($26)], agg#5=[count($26)]) +HiveJoin(condition=[AND(AND(AND(=($1, $18), =($2, $19)), =($0, $21)), OR(AND($3, $4, $36), AND($5, $6, $37), AND($7, $8, $38)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, _UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')]) HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), =($2, _UTF-16LE'4 yr Degree'), BETWEEN(false, $24, 100, 150)), AND(=($1, _UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $24, 50, 100)), AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, $24, 150, 200], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) - HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))]) -HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) -HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[AND(=($0, $13), OR(AND($1, $24), AND($2, $25), AND($3, $26)))], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) + HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) +HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) +HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(r_reason_sk=[$0], r_reason_desc=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, reason]], table:alias=[reason]) - HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER]) - HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))]) -HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wp_web_page_sk=[$0]) -HiveFilter(condition=[IS NOT NULL($0)]) -
[28/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query26.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out b/ql/src/test/results/clientpositive/perf/spark/query26.q.out index b6ee41e..48c0e11 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out @@ -221,11 +221,11 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) -outputColumnNames: _col4, _col5, _col6, _col7, _col18 +outputColumnNames: _col4, _col5, _col6, _col7, _col12 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col4), count(_col4), sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col6), count(_col6) - keys: _col18 (type: string) + keys: _col12 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query27.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out b/ql/src/test/results/clientpositive/perf/spark/query27.q.out index 4063c4f..6c64664 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out @@ -207,7 +207,7 @@ STAGE PLANS: keys: 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col15 + outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col11 input vertices: 1 Map 9 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE @@ -216,7 +216,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE -value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 (type: string) +value expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col11 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -225,10 +225,10 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col4, _col5, _col6, _col7, _col15, _col17 +outputColumnNames: _col4, _col5, _col6, _col7, _col11, _col13 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col17 (type: string), _col15 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + expressions: _col13 (type: string), _col11 (type: string), _col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col6 (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query29.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out b/ql/src/test/results/clientpositive/perf/spark/query29.q.out index 2e5c0f3..f4a4524 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out @@ -295,7 +295,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col3, _col5, _col10, _col11, _col13, _col18, _col19 +outputColumnNames: _col3, _col5, _col8, _col9, _col11, _col14, _col15 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE
[34/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index a8f06eb..def06a5 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -122,18 +122,19 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int) -outputColumnNames: _col0, _col1 +expressions: key (type: int), value (type: int), ((key > 40) and (value > 50) and (key = value)) (type: boolean) +outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true +selectExpressions: VectorUDFAdaptor(((key > 40) and (value > 50) and (key = value)))(children: LongColGreaterLongScalar(col 0:int, val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean) -> 6:boolean Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 -1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} +1 {_col2} keys: 0 1 @@ -368,18 +369,19 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int) -outputColumnNames: _col0, _col1 +expressions: key (type: int), value (type: int), (key > 40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true +selectExpressions: LongColGreaterLongScalar(col 0:int, val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join 0 to 1 filter predicates: 0 -1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} +1 {_col2} {_col3} {_col4} keys: 0 _col0 (type: int) 1 _col1 (type: int) @@ -397,7 +399,7 @@ STAGE PLANS: Select Vectorization: className: VectorSelectOperator native: true -selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int +selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 8:int Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) @@ -541,18 +543,19 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int) -outputColumnNames: _col0, _col1 +expressions: key (type: int), value (type: int), (key > 40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true +selectExpressions: LongColGreaterLongScalar(col 0:int, val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map:
[20/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out index 9633df1..cbf9bca 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out @@ -75,7 +75,7 @@ HiveSortLimit(fetch=[100]) HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) -HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) +HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) @@ -90,7 +90,7 @@ HiveSortLimit(fetch=[100]) HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) -HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) +HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) @@ -105,7 +105,7 @@ HiveSortLimit(fetch=[100]) HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4]) HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) -HiveProject(d_date_sk=[$0], d_date=[$2], d_month_seq=[$3]) +HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $3, 1212, 1223), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out index fd3038e..51bb901 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out @@ -68,8 +68,8 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], d_moy=[CAST(4):INTEGER], mean=[ HiveProject(w_warehouse_sk=[$1], i_item_sk=[$2], mean=[/(CAST($6):DOUBLE, $5)], cov=[CASE(=(/(CAST($6):DOUBLE, $5), 0), null, /(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)))]) HiveFilter(condition=[CASE(=(/(CAST($6):DOUBLE, $5), 0), false, >(/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), /(CAST($6):DOUBLE, $5)), 1))]) HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], agg#2=[count($3)], agg#3=[sum($3)]) - HiveProject($f0=[$9], $f1=[$8], $f2=[$0], $f4=[$4], $f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)]) -HiveJoin(condition=[=($3, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$7], $f1=[$6], $f2=[$0], $f4=[$4], $f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)]) +HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) @@ -78,7 +78,7 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], d_moy=[CAST(4):INTEGER], mean=[ HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2), IS NOT NULL($0))])
[12/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query4.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query4.q.out b/ql/src/test/results/clientpositive/perf/tez/query4.q.out index 27ce7b5..bb0d7ba 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query4.q.out @@ -271,367 +271,355 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_575] -Limit [LIM_574] (rows=100 width=85) + File Output Operator [FS_557] +Limit [LIM_556] (rows=100 width=85) Number of rows:100 - Select Operator [SEL_573] (rows=7323197 width=85) + Select Operator [SEL_555] (rows=7323197 width=85) Output:["_col0"] <-Reducer 9 [SIMPLE_EDGE] -SHUFFLE [RS_147] - Select Operator [SEL_146] (rows=7323197 width=85) +SHUFFLE [RS_141] + Select Operator [SEL_140] (rows=7323197 width=85) Output:["_col0"] -Filter Operator [FIL_145] (rows=7323197 width=533) - predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > (_col12 / _col3))) ELSE ((null > (_col12 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / _col5) > null)) ELSE (null) END) END - Merge Join Operator [MERGEJOIN_478] (rows=14646395 width=533) - Conds:RS_142._col2=RS_572._col0(Inner),Output:["_col3","_col5","_col9","_col11","_col12"] +Filter Operator [FIL_139] (rows=7323197 width=537) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col9) THEN (((_col11 / _col8) > (_col14 / _col3))) ELSE ((null > (_col14 / _col3))) END) ELSE (CASE WHEN (_col9) THEN (((_col11 / _col8) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_472] (rows=14646395 width=537) + Conds:RS_136._col2=RS_554._col0(Inner),Output:["_col3","_col8","_col9","_col11","_col13","_col14"] <-Reducer 30 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_572] +SHUFFLE [RS_554] PartitionCols:_col0 - Select Operator [SEL_571] (rows=8000 width=297) + Select Operator [SEL_553] (rows=8000 width=297) Output:["_col0","_col1","_col2"] -Group By Operator [GBY_570] (rows=8000 width=764) +Group By Operator [GBY_552] (rows=8000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 <-Reducer 29 [SIMPLE_EDGE] - SHUFFLE [RS_126] + SHUFFLE [RS_120] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 -Group By Operator [GBY_125] (rows=8000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_123] (rows=187573258 width=1043) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] -Merge Join Operator [MERGEJOIN_473] (rows=187573258 width=1043) - Conds:RS_120._col1=RS_518._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] -<-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_518] -PartitionCols:_col0 -Select Operator [SEL_517] (rows=8000 width=656) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_516] (rows=8000 width=656) -predicate:(c_customer_id is not null and c_customer_sk is not null) -TableScan [TS_114] (rows=8000 width=656) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] -<-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_120] -
[29/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query18.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out index e77a918..1d73576 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out @@ -106,15 +106,15 @@ STAGE PLANS: predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 8000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year (type: int) - outputColumnNames: _col0, _col1, _col2, _col4 + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), CAST( c_birth_year AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 8000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 8000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE -value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int) +value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: decimal(12,2)) Execution mode: vectorized Map 13 Map Operator Tree: @@ -145,15 +145,15 @@ STAGE PLANS: predicate: ((cd_education_status = 'College') and (cd_gender = 'M') and cd_demo_sk is not null) (type: boolean) Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cd_demo_sk (type: int), cd_dep_count (type: int) - outputColumnNames: _col0, _col3 + expressions: cd_demo_sk (type: int), CAST( cd_dep_count AS decimal(12,2)) (type: decimal(12,2)) + outputColumnNames: _col0, _col1 Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE -value expressions: _col3 (type: int) +value expressions: _col1 (type: decimal(12,2)) Execution mode: vectorized Map 15 Map Operator Tree: @@ -224,7 +224,7 @@ STAGE PLANS: predicate: (cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) + expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), CAST( cs_quantity AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_list_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_coupon_amt AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_sales_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_net_profit AS decimal(12,2)) (type: decimal(12,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -232,7 +232,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int)
[51/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/55887646 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/55887646 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/55887646 Branch: refs/heads/master-tez092 Commit: 558876462d2589423d7131b51c24dbf61b8a22b9 Parents: dca389b Author: Zoltan Haindrich Authored: Mon Nov 12 10:03:28 2018 -0800 Committer: Jesus Camacho Rodriguez Committed: Mon Nov 12 10:03:57 2018 -0800 -- .../results/positive/accumulo_queries.q.out | 34 +- .../test/results/positive/hbase_queries.q.out | 34 +- .../hive/jdbc/AbstractJdbcTriggersTest.java |5 +- .../jdbc/TestTriggersMoveWorkloadManager.java |9 +- .../jdbc/TestTriggersTezSessionPoolManager.java | 21 +- .../hadoop/hive/ql/parse/CalcitePlanner.java|2 +- .../bucket_mapjoin_mismatch1.q.out |4 +- .../clientpositive/allcolref_in_udf.q.out | 28 +- .../annotate_stats_join_pkfk.q.out | 40 +- .../results/clientpositive/auto_join13.q.out|6 +- .../results/clientpositive/auto_join19.q.out|4 +- .../clientpositive/auto_join19_inclause.q.out |4 +- .../results/clientpositive/auto_join2.q.out | 24 +- .../results/clientpositive/auto_join32.q.out|6 +- .../results/clientpositive/auto_join9.q.out |4 +- .../clientpositive/auto_join_stats.q.out| 122 +- .../clientpositive/auto_join_stats2.q.out | 90 +- .../materialized_view_create_rewrite.q.out |8 +- .../clientpositive/bucket_map_join_spark1.q.out |4 +- .../clientpositive/bucket_map_join_spark2.q.out |4 +- .../clientpositive/bucket_map_join_spark3.q.out |4 +- .../clientpositive/bucket_map_join_spark4.q.out |4 +- .../bucketsortoptimize_insert_4.q.out |4 +- .../bucketsortoptimize_insert_5.q.out |4 +- .../bucketsortoptimize_insert_8.q.out |8 +- .../test/results/clientpositive/cbo_const.q.out | 82 +- .../results/clientpositive/cbo_rp_join1.q.out | 80 +- .../clientpositive/cbo_rp_outer_join_ppr.q.out |8 +- .../constantPropagateForSubQuery.q.out | 10 +- .../results/clientpositive/constprog2.q.out | 16 +- .../clientpositive/constprog_partitioner.q.out |8 +- .../clientpositive/correlationoptimizer8.q.out | 58 +- .../test/results/clientpositive/cte_mat_5.q.out | 18 +- .../results/clientpositive/deleteAnalyze.q.out | 18 +- .../clientpositive/druid/druidmini_mv.q.out | 16 +- .../encryption_join_unencrypted_tbl.q.out | 76 +- .../clientpositive/filter_cond_pushdown.q.out | 90 +- .../clientpositive/filter_join_breaktask.q.out | 24 +- .../infer_bucket_sort_map_operators.q.out | 24 +- .../clientpositive/infer_join_preds.q.out | 22 +- .../results/clientpositive/innerjoin1.q.out | 76 +- .../test/results/clientpositive/input23.q.out |8 +- .../results/clientpositive/interval_3.q.out | 16 +- ql/src/test/results/clientpositive/join13.q.out |8 +- ql/src/test/results/clientpositive/join2.q.out | 32 +- ql/src/test/results/clientpositive/join26.q.out |2 +- ql/src/test/results/clientpositive/join32.q.out |2 +- ql/src/test/results/clientpositive/join33.q.out |2 +- ql/src/test/results/clientpositive/join42.q.out | 28 +- ql/src/test/results/clientpositive/join45.q.out | 392 +- ql/src/test/results/clientpositive/join46.q.out | 290 +- ql/src/test/results/clientpositive/join47.q.out | 392 +- ql/src/test/results/clientpositive/join9.q.out |6 +- .../clientpositive/join_cond_pushdown_1.q.out | 18 +- .../clientpositive/join_cond_pushdown_3.q.out | 18 +- .../join_cond_pushdown_unqual1.q.out| 18 +- .../join_cond_pushdown_unqual3.q.out| 18 +- .../clientpositive/join_emit_interval.q.out | 52 +- .../clientpositive/join_filters_overlap.q.out | 338 +- .../results/clientpositive/join_merging.q.out | 152 +- .../test/results/clientpositive/join_view.q.out |4 +- .../clientpositive/llap/auto_join_filters.q.out | 60 +- .../llap/auto_sortmerge_join_6.q.out| 174 +- .../llap/bucket_map_join_tez2.q.out | 116 +- .../clientpositive/llap/bucketmapjoin1.q.out| 20 +- .../clientpositive/llap/bucketmapjoin2.q.out|4 +- .../clientpositive/llap/bucketmapjoin3.q.out| 16 +- .../llap/bucketsortoptimize_insert_2.q.out | 24 +- .../llap/bucketsortoptimize_insert_6.q.out | 301 +- .../llap/bucketsortoptimize_insert_7.q.out |8 +- .../clientpositive/llap/check_constraint.q.out | 43 +- .../llap/constprog_semijoin.q.out | 54 +- .../llap/constraints_optimization.q.out
[01/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
Repository: hive Updated Branches: refs/heads/master-tez092 c55347d52 -> 750daa4a6 http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out index 44f9b5d..4190a21 100644 --- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out +++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out @@ -396,14 +396,14 @@ STAGE PLANS: Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator -key expressions: _col1 (type: string) +key expressions: _col0 (type: string) sort order: + -Map-reduce partition columns: _col1 (type: string) +Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE -value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) +value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: double), _col7 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: @@ -426,13 +426,13 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col1 (type: string) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + 1 _col0 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: int), _col14 (type: string), _col15 (type: double), _col16 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -441,10 +441,10 @@ STAGE PLANS: keys: 0 1 -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), 1 (type: int), _col10 (type:
[08/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query66.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query66.q.out b/ql/src/test/results/clientpositive/perf/tez/query66.q.out index 225b62f..767d47b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query66.q.out @@ -511,10 +511,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_62] (rows=5559759 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Select Operator [SEL_60] (rows=5559759 width=680) + Select Operator [SEL_60] (rows=5559759 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] -Merge Join Operator [MERGEJOIN_204] (rows=5559759 width=680) - Conds:RS_57._col3=RS_259._col0(Inner),Output:["_col4","_col5","_col6","_col11","_col15","_col16","_col17","_col18","_col19","_col20"] +Merge Join Operator [MERGEJOIN_204] (rows=5559759 width=750) + Conds:RS_57._col3=RS_259._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] <-Map 24 [SIMPLE_EDGE] vectorized SHUFFLE [RS_259] PartitionCols:_col0 @@ -527,12 +527,12 @@ Stage-0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col3 -Merge Join Operator [MERGEJOIN_203] (rows=5559759 width=205) - Conds:RS_54._col2=RS_245._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col11"] +Merge Join Operator [MERGEJOIN_203] (rows=5559759 width=274) + Conds:RS_54._col2=RS_245._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_245] PartitionCols:_col0 -Select Operator [SEL_242] (rows=1 width=88) +Select Operator [SEL_242] (rows=1 width=4) Output:["_col0"] Filter Operator [FIL_241] (rows=1 width=88) predicate:((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) @@ -541,13 +541,13 @@ Stage-0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col2 -Merge Join Operator [MERGEJOIN_202] (rows=9518 width=224) - Conds:RS_51._col0=RS_233._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col11"] +Merge Join Operator [MERGEJOIN_202] (rows=9518 width=278) + Conds:RS_51._col0=RS_233._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 18
[33/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index bd40d8f..1b4d343 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -234,17 +234,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +selectExpressions: LongColumnBetween(col 0:int, left 100, right 102) -> 4:boolean +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: -0 {_col0 BETWEEN 100 AND 102} +0 {_col3} 1 keys: 0 _col1 (type: int) @@ -253,20 +254,27 @@ STAGE PLANS: className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator -compressed: false -File Sink Vectorization: -className: VectorFileSinkOperator -native: false + Select Operator +expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 +Select Vectorization: +className: VectorSelectOperator +native: true Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -396,17 +404,18 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN
[05/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query91.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query91.q.out b/ql/src/test/results/clientpositive/perf/tez/query91.q.out index 5b4952d..98e8adf 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query91.q.out @@ -104,13 +104,13 @@ Stage-0 SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_41] (rows=1 width=585) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col12)"],keys:_col5, _col6, _col17, _col18, _col19 + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col11)"],keys:_col5, _col6, _col14, _col15, _col16 Merge Join Operator [MERGEJOIN_144] (rows=10438 width=473) - Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col12","_col17","_col18","_col19"] + Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col11","_col14","_col15","_col16"] <-Map 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_165] PartitionCols:_col0 -Select Operator [SEL_164] (rows=3600 width=96) +Select Operator [SEL_164] (rows=3600 width=4) Output:["_col0"] Filter Operator [FIL_163] (rows=3600 width=96) predicate:((hd_buy_potential like '0-500%') and hd_demo_sk is not null) @@ -120,12 +120,12 @@ Stage-0 SHUFFLE [RS_37] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_143] (rows=20876 width=473) - Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col12","_col17","_col18","_col19"] + Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col11","_col14","_col15","_col16"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_142] (rows=657590 width=312) - Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"] + Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_162] PartitionCols:_col0 @@ -152,7 +152,7 @@ Stage-0 <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_159] PartitionCols:_col0 -Select Operator [SEL_158] (rows=50 width=12) +Select Operator [SEL_158] (rows=50 width=4) Output:["_col0"] Filter Operator [FIL_157] (rows=50 width=12) predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) @@ -166,7 +166,7 @@ Stage-0 <-Map 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_153] PartitionCols:_col0 -Select Operator [SEL_152] (rows=800 width=116) +Select Operator [SEL_152] (rows=800 width=4) Output:["_col0"] Filter Operator [FIL_151] (rows=800 width=112) predicate:((ca_gmt_offset = -7) and ca_address_sk is not null) http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query92.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query92.q.out b/ql/src/test/results/clientpositive/perf/tez/query92.q.out index 50918f0..1f837dd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query92.q.out @@ -104,9 +104,9 @@ Stage-0 Select Operator [SEL_34] (rows=2478 width=112) Output:["_col2"] Filter Operator [FIL_33] (rows=2478 width=112) - predicate:(_col2 > CAST( (1.3 * _col6) AS decimal(14,7))) + predicate:(_col2 > _col5) Merge Join
[52/59] [abbrv] hive git commit: HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran)
HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bc39c499 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bc39c499 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bc39c499 Branch: refs/heads/master-tez092 Commit: bc39c49988c8a5d881a23ed7dd5d4adba0509ee9 Parents: 5588764 Author: Gour Saha Authored: Mon Nov 12 13:04:21 2018 -0800 Committer: Prasanth Jayachandran Committed: Mon Nov 12 13:04:24 2018 -0800 -- llap-server/src/main/resources/package.py | 2 -- 1 file changed, 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/bc39c499/llap-server/src/main/resources/package.py -- diff --git a/llap-server/src/main/resources/package.py b/llap-server/src/main/resources/package.py index 9eb3fd7..c48ff79 100644 --- a/llap-server/src/main/resources/package.py +++ b/llap-server/src/main/resources/package.py @@ -130,8 +130,6 @@ def main(args): service_keytab_path += "/" + service_keytab else: service_keytab_path = service_keytab - if service_keytab_path: - service_keytab_path = "hdfs:///user/hive/" + service_keytab_path if not input: print "Cannot find input files"
[27/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query4.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out b/ql/src/test/results/clientpositive/perf/spark/query4.q.out index 67e8b4f..93e83ef 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out @@ -237,13 +237,13 @@ STAGE PLANS: Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 (PARTITION-LEVEL SORT, 975) Reducer 12 <- Reducer 11 (GROUP, 481) -Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 306), Map 19 (PARTITION-LEVEL SORT, 306) -Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 873), Reducer 16 (PARTITION-LEVEL SORT, 873) -Reducer 18 <- Reducer 17 (GROUP, 369) +Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 (PARTITION-LEVEL SORT, 154) +Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 (PARTITION-LEVEL SORT, 706) +Reducer 18 <- Reducer 17 (GROUP, 186) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL SORT, 154) -Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 154), Map 25 (PARTITION-LEVEL SORT, 154) -Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 706), Reducer 22 (PARTITION-LEVEL SORT, 706) -Reducer 24 <- Reducer 23 (GROUP, 186) +Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 306), Map 25 (PARTITION-LEVEL SORT, 306) +Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 873), Reducer 22 (PARTITION-LEVEL SORT, 873) +Reducer 24 <- Reducer 23 (GROUP, 369) Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 306), Map 31 (PARTITION-LEVEL SORT, 306) Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 873), Reducer 28 (PARTITION-LEVEL SORT, 873) Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 (PARTITION-LEVEL SORT, 706) @@ -266,16 +266,15 @@ STAGE PLANS: predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2) (type: decimal(14,6)) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE -value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) -Execution mode: vectorized +value expressions: _col1 (type: int), _col2 (type: decimal(14,6)) Map 13 Map Operator Tree: TableScan @@ -318,23 +317,22 @@ STAGE PLANS: Map 15 Map Operator Tree: TableScan - alias: catalog_sales - filterExpr: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: (cs_bill_customer_sk is not null and cs_sold_date_sk is not null) (type: boolean) -Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE +predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) +Statistics: Num rows: 144002668 Data size: 19580198212 Basic
[55/59] [abbrv] hive git commit: HIVE-20682: Async query execution can potentially fail if shared sessionHive is closed by master thread (Sankar Hariappan, reviewed by Mahesh Kumar Behera, Anishek Aga
HIVE-20682: Async query execution can potentially fail if shared sessionHive is closed by master thread (Sankar Hariappan, reviewed by Mahesh Kumar Behera, Anishek Agarwal) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/99d25f02 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/99d25f02 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/99d25f02 Branch: refs/heads/master-tez092 Commit: 99d25f02421a84bf0f96660f9248fd6518dc7c8a Parents: af40170 Author: Sankar Hariappan Authored: Tue Nov 13 16:26:04 2018 +0530 Committer: Sankar Hariappan Committed: Tue Nov 13 16:26:04 2018 +0530 -- .../hive/ql/parse/TestReplicationScenarios.java | 2 +- .../apache/hadoop/hive/ql/metadata/Hive.java| 129 +++ .../service/cli/operation/SQLOperation.java | 20 +-- .../service/cli/session/HiveSessionImpl.java| 56 +++- .../cli/session/TestSessionManagerMetrics.java | 2 + 5 files changed, 138 insertions(+), 71 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/99d25f02/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java -- diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java index 75cd68a..5a88550 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java @@ -389,7 +389,7 @@ public class TestReplicationScenarios { Task replLoadTask = TaskFactory.get(replLoadWork, confTemp); replLoadTask.initialize(null, null, new DriverContext(driver.getContext()), null); replLoadTask.executeTask(null); -Hive.getThreadLocal().closeCurrent(); +Hive.closeCurrent(); return replLoadWork.getRootTask(); } http://git-wip-us.apache.org/repos/asf/hive/blob/99d25f02/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 180b41e..e185bf4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -167,35 +167,36 @@ public class Hive { private IMetaStoreClient metaStoreClient; private SynchronizedMetaStoreClient syncMetaStoreClient; private UserGroupInformation owner; + private boolean isAllowClose = true; // metastore calls timing information private final ConcurrentHashMap metaCallTimeMap = new ConcurrentHashMap<>(); - // Static class to store thread local Hive object and allowClose flag. + // Static class to store thread local Hive object. private static class ThreadLocalHive extends ThreadLocal { -private ThreadLocal allowClose = ThreadLocal.withInitial(() -> true); - @Override protected Hive initialValue() { return null; } @Override -public synchronized void remove() { - if (allowClose() && (this.get() != null)) { -this.get().close(); +public synchronized void set(Hive hiveObj) { + Hive currentHive = this.get(); + if (currentHive != hiveObj) { +// Remove/close current thread-local Hive object before overwriting with new Hive object. +remove(); +super.set(hiveObj); } - super.remove(); - this.allowClose.set(true); -} - -public synchronized void set(Hive hiveObj, boolean allowClose) { - super.set(hiveObj); - this.allowClose.set(allowClose); } -boolean allowClose() { - return this.allowClose.get(); +@Override +public synchronized void remove() { + Hive currentHive = this.get(); + if (currentHive != null) { +// Close the metastore connections before removing it from thread local hiveDB. +currentHive.close(false); +super.remove(); + } } } @@ -317,7 +318,12 @@ public class Hive { Hive db = hiveDB.get(); if (db == null || !db.isCurrentUserOwner() || needsRefresh || (c != null && !isCompatible(db, c, isFastCheck))) { - db = create(c, false, db, doRegisterAllFns); + if (db != null) { +LOG.debug("Creating new db. db = " + db + ", needsRefresh = " + needsRefresh + +", db.isCurrentUserOwner = " + db.isCurrentUserOwner()); +closeCurrent(); + } + db = create(c, doRegisterAllFns); } if (c != null) { db.conf = c; @@ -325,26 +331,16 @@ public class Hive { return db; }
[09/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query64.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/query64.q.out index f670c4f..7c77e9f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out @@ -265,9 +265,9 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 37 <- Reducer 24 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE) -Map 44 <- Reducer 40 (BROADCAST_EDGE) -Map 55 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE), Reducer 51 (BROADCAST_EDGE) +Map 37 <- Reducer 24 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE), Reducer 46 (BROADCAST_EDGE) +Map 43 <- Reducer 40 (BROADCAST_EDGE) +Map 55 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE), Reducer 50 (BROADCAST_EDGE) Map 56 <- Reducer 42 (BROADCAST_EDGE) Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) @@ -276,18 +276,18 @@ Reducer 13 <- Reducer 31 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 14 <- Map 54 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE) -Reducer 18 <- Map 43 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 45 (ONE_TO_ONE_EDGE) +Reducer 19 <- Map 51 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 46 (ONE_TO_ONE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) Reducer 21 <- Map 52 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Reducer 22 <- Map 36 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Map 53 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) Reducer 24 <- Map 16 (CUSTOM_SIMPLE_EDGE) Reducer 25 <- Map 16 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) -Reducer 26 <- Map 43 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 50 (ONE_TO_ONE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 49 (ONE_TO_ONE_EDGE) +Reducer 27 <- Map 51 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) Reducer 29 <- Map 52 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 30 <- Map 36 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) @@ -299,13 +299,13 @@ Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) Reducer 40 <- Map 39 (CUSTOM_SIMPLE_EDGE) Reducer 41 <- Map 39 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) Reducer 42 <- Map 39 (CUSTOM_SIMPLE_EDGE) -Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE) -Reducer 46 <- Reducer 45 (SIMPLE_EDGE) -Reducer 47 <- Reducer 46 (CUSTOM_SIMPLE_EDGE) -Reducer 49 <- Map 48 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE) +Reducer 44 <- Map 43 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE) +Reducer 45 <- Reducer 44 (SIMPLE_EDGE) +Reducer 46 <- Reducer 45 (CUSTOM_SIMPLE_EDGE) +Reducer 48 <- Map 47 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE) +Reducer 49 <- Reducer 48 (SIMPLE_EDGE) Reducer 5 <- Map 36 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 50 <- Reducer 49 (SIMPLE_EDGE) -Reducer 51 <- Reducer 50 (CUSTOM_SIMPLE_EDGE) +Reducer 50 <- Reducer 49 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 54 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 23 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 54 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) @@ -320,10 +320,10 @@ Stage-0 Select Operator [SEL_1200] (rows=2169965329 width=1702) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_259] -Select Operator [SEL_258] (rows=2169965329 width=1694) + SHUFFLE [RS_257] +Select Operator [SEL_256] (rows=2169965329 width=1694) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Filter Operator [FIL_257] (rows=2169965329 width=1694) + Filter Operator [FIL_255] (rows=2169965329 width=1694) predicate:(_col19 <= _col12) Merge Join Operator [MERGEJOIN_1087] (rows=6509895988 width=1694) Conds:RS_1171._col2, _col1, _col3=RS_1199._col1, _col0,
[35/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index b1eec43..93791ac 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -667,238 +667,184 @@ POSTHOOK: query: select unionsrc.key, unionsrc.value FROM (select s1.key as key, POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### +100val_100 104val_104 105val_105 113val_113 118val_118 +119val_119 12 val_12 120val_120 +128val_128 +129val_129 133val_133 136val_136 +145val_145 155val_155 158val_158 160val_160 162val_162 +167val_167 168val_168 17 val_17 170val_170 +174val_174 175val_175 +178val_178 18 val_18 180val_180 +181val_181 186val_186 19 val_19 +193val_193 197val_197 +199val_199 +20 val_20 200val_200 +201val_201 +213val_213 +214val_214 216val_216 218val_218 +219val_219 +221val_221 222val_222 +223val_223 224val_224 +226val_226 228val_228 +233val_233 235val_235 237val_237 239val_239 +241val_241 244val_244 247val_247 +249val_249 256val_256 +260val_260 +262val_262 263val_263 273val_273 +277val_277 28 val_28 +281val_281 283val_283 286val_286 +287val_287 +288val_288 292val_292 +298val_298 +302val_302 306val_306 308val_308 +310val_310 +323val_323 327val_327 33 val_33 +336val_336 +341val_341 +344val_344 348val_348 +351val_351 353val_353 362val_362 366val_366 +375val_375 +382val_382 +384val_384 +393val_393 +395val_395 396val_396 397val_397 +399val_399 401val_401 +403val_403 +406val_406 409val_409 411val_411 +418val_418 419val_419 427val_427 43 val_43 432val_432 +435val_435 436val_436 439val_439 443val_443 453val_453 +455val_455 +459val_459 460val_460 462val_462 47 val_47 472val_472 -485val_485 -496val_496 -54 val_54 -64 val_64 -70 val_70 -8 val_8 -83 val_83 -84 val_84 -85 val_85 -90 val_90 -0 val_0 -103val_103 -114val_114 -125val_125 -138val_138 -146val_146 -150val_150 -152val_152 -153val_153 -156val_156 -157val_157 -165val_165 -172val_172 -177val_177 -179val_179 -187val_187 -195val_195 -196val_196 -217val_217 -242val_242 -248val_248 -252val_252 -265val_265 -27 val_27 -272val_272 -280val_280 -291val_291 -305val_305 -309val_309 -311val_311 -315val_315 -317val_317 -322val_322 -333val_333 -34 val_34 -345val_345 -35 val_35 -356val_356 -364val_364 -368val_368 -369val_369 -37 val_37 -373val_373 -377val_377 -4 val_4 -402val_402 -404val_404 -413val_413 -42 val_42 -430val_430 -431val_431 -444val_444 -449val_449 -452val_452 -454val_454 -457val_457 -463val_463 -466val_466 -470val_470 -475val_475 -481val_481 -489val_489 -491val_491 -57 val_57 -65 val_65 -66 val_66 -74 val_74 -76 val_76 -78 val_78 -9 val_9 -92 val_92 -95 val_95 -100val_100 -119val_119 -128val_128 -129val_129 -145val_145 -167val_167 -174val_174 -178val_178 -181val_181 -193val_193 -199val_199 -20 val_20 -201val_201 -213val_213 -214val_214 -219val_219 -221val_221 -223val_223 -226val_226 -233val_233 -241val_241 -249val_249 -260val_260 -262val_262 -277val_277 -281val_281 -287val_287 -288val_288 -298val_298 -302val_302 -310val_310 -323val_323 -336val_336 -341val_341 -344val_344 -351val_351 -375val_375 -382val_382 -384val_384 -393val_393 -395val_395 -399val_399 -403val_403 -406val_406 -418val_418 -435val_435 -455val_455 -459val_459 477val_477 478val_478 479val_479 482val_482 +485val_485 493val_493 494val_494 495val_495 +496val_496 497val_497 5 val_5 +54 val_54 58 val_58 +64 val_64 67 val_67 +70 val_70 77 val_77 +8 val_8 80 val_80 +83 val_83 +84 val_84 +85 val_85 86 val_86 +90 val_90 97 val_97 98 val_98 +0 val_0 10 val_10 +103val_103 11 val_11 111
[43/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index accb3a7..6954647 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -535,14 +535,14 @@ STAGE PLANS: filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: ds (type: string) +expressions: day(CAST( ds AS DATE)) (type: int) outputColumnNames: _col0 -Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: day(CAST( _col0 AS DATE)) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -555,16 +555,16 @@ STAGE PLANS: predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) + expressions: day(CAST( ds AS DATE)) (type: int) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator -key expressions: day(CAST( _col0 AS DATE)) (type: int) +key expressions: _col0 (type: int) sort order: + -Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) +Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: day(CAST( _col0 AS DATE)) (type: int) +expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -587,9 +587,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 day(CAST( _col0 AS DATE)) (type: int) - 1 day(CAST( _col0 AS DATE)) (type: int) -Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) +Statistics: Num rows: 2200 Data size: 8800 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -678,14 +678,14 @@ STAGE PLANS: filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: ds (type: string) +expressions: day(CAST( ds AS DATE)) (type: int) outputColumnNames: _col0 -Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: day(CAST( _col0 AS DATE)) (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: day(CAST( _col0 AS DATE)) (type: int) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE +
[18/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out index fca31ef..e49b44b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out @@ -82,10 +82,10 @@ CBO PLAN: HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[count($4)], agg#2=[count()]) - HiveProject($f0=[$15], $f1=[$13], $f2=[$22], $f3=[CASE(IS NULL($28), 1, 0)], $f4=[CASE(IS NOT NULL($28), 1, 0)]) -HiveJoin(condition=[AND(=($29, $4), =($30, $6))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], cd_demo_sk=[$22], cd_marital_status=[$23], hd_demo_sk=[$24], hd_buy_potential=[$25], d_date_sk=[$18], d_date=[$19], d_week_seq=[$20], d_year=[$21], d_date_sk0=[$27], d_week_seq0=[$28], d_date_sk1=[$6], d_date0=[$7], p_promo_sk=[$26]) -HiveJoin(condition=[AND(=($0, $27), =($20, $28))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject($f0=[$15], $f1=[$13], $f2=[$19], $f3=[CASE(IS NULL($25), 1, 0)], $f4=[CASE(IS NOT NULL($25), 1, 0)]) +HiveJoin(condition=[AND(=($26, $4), =($27, $6))], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], cd_demo_sk=[$21], hd_demo_sk=[$22], d_date_sk=[$18], d_week_seq=[$19], +=[$20], d_date_sk0=[$24], d_week_seq0=[$25], d_date_sk1=[$6], CAST=[$7], p_promo_sk=[$23]) +HiveJoin(condition=[AND(=($0, $24), =($19, $25))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($14, $1), <($3, $17))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) @@ -94,29 +94,29 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-l HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) -HiveProject(d_date_sk=[$0], d_date=[$1], i_item_sk=[$2], i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], d_date0=[$13], d_week_seq=[$14], d_year=[$15], cd_demo_sk=[$16], cd_marital_status=[$17], hd_demo_sk=[$18], hd_buy_potential=[$19], p_promo_sk=[$20]) - HiveJoin(condition=[AND(=($5, $0), >(CAST($1):DOUBLE, +(CAST($13):DOUBLE, 5)))], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(d_date_sk=[$0], d_date=[$2]) +HiveProject(d_date_sk=[$0], CAST=[$1], i_item_sk=[$2], i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], d_week_seq=[$13], +=[$14], cd_demo_sk=[$15], hd_demo_sk=[$16], p_promo_sk=[$17]) + HiveJoin(condition=[AND(=($5, $0), >($1, $14))], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(d_date_sk=[$0], CAST=[CAST($2):DOUBLE]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_desc=[$4]) HiveFilter(condition=[IS NOT NULL($0)])
[32/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/mapjoin47.q.out -- diff --git a/ql/src/test/results/clientpositive/mapjoin47.q.out b/ql/src/test/results/clientpositive/mapjoin47.q.out index d3e61f8..dadac0d 100644 --- a/ql/src/test/results/clientpositive/mapjoin47.q.out +++ b/ql/src/test/results/clientpositive/mapjoin47.q.out @@ -385,8 +385,8 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -400,8 +400,8 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -409,19 +409,23 @@ STAGE PLANS: keys: 0 1 -outputColumnNames: _col0, _col1, _col2, _col3 -residual filter predicates: {((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D)} -Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE -Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 +residual filter predicates: {((_col0 = _col3) or _col2 or _col5)} +Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE + Limit +Number of rows: 10 Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -501,8 +505,8 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -516,8 +520,8 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string),
[38/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index c72e4b2..c43ad91 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -109,13 +109,13 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 -Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE +expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), UDFToDouble(p_size) (type: double) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 +Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs Map 3 @@ -147,16 +147,16 @@ STAGE PLANS: keys: 0 1 -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 -residual filter predicates: {(UDFToDouble(_col5) > _col9)} -Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 +residual filter predicates: {(_col9 > _col10)} +Statistics: Num rows: 8 Data size: 5184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 5184 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false -Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE +Statistics: Num rows: 8 Data size: 5184 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -397,12 +397,12 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4,
[56/59] [abbrv] hive git commit: HIVE-19701: getDelegationTokenFromMetaStore doesn't need to be synchronized (Sankar Hariappan, reviewed by Thejas M Nair)
HIVE-19701: getDelegationTokenFromMetaStore doesn't need to be synchronized (Sankar Hariappan, reviewed by Thejas M Nair) Signed-off-by: Sankar Hariappan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ceb4eb6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ceb4eb6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ceb4eb6 Branch: refs/heads/master-tez092 Commit: 1ceb4eb6a8329d858241f0aee8880ef68787802a Parents: 99d25f0 Author: Sankar Hariappan Authored: Tue Nov 13 23:18:09 2018 +0530 Committer: Sankar Hariappan Committed: Tue Nov 13 23:18:09 2018 +0530 -- service/src/java/org/apache/hive/service/cli/CLIService.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1ceb4eb6/service/src/java/org/apache/hive/service/cli/CLIService.java -- diff --git a/service/src/java/org/apache/hive/service/cli/CLIService.java b/service/src/java/org/apache/hive/service/cli/CLIService.java index 9cbe7e1..22c4026 100644 --- a/service/src/java/org/apache/hive/service/cli/CLIService.java +++ b/service/src/java/org/apache/hive/service/cli/CLIService.java @@ -569,8 +569,7 @@ public class CLIService extends CompositeService implements ICLIService { } // obtain delegation token for the give user from metastore - // TODO: why is this synchronized? - public synchronized String getDelegationTokenFromMetaStore(String owner) + public String getDelegationTokenFromMetaStore(String owner) throws HiveSQLException, UnsupportedOperationException, LoginException, IOException { HiveConf hiveConf = getHiveConf(); if (!hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL) ||
[14/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query23.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out index 7784792..059195a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product -Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product -Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product -Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product +Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 29' is a cross product +Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product +Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 34' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -166,399 +166,391 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_699] -Limit [LIM_698] (rows=1 width=112) + File Output Operator [FS_689] +Limit [LIM_688] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_697] (rows=1 width=112) + Group By Operator [GBY_687] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] <-Reducer 12 [CONTAINS] - Reduce Output Operator [RS_608] -Group By Operator [GBY_607] (rows=1 width=112) + Reduce Output Operator [RS_598] +Group By Operator [GBY_597] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_605] (rows=1 width=112) + Select Operator [SEL_595] (rows=1 width=112) Output:["_col0"] -Merge Join Operator [MERGEJOIN_604] (rows=1 width=116) - Conds:RS_248._col2=RS_249._col0(Inner),Output:["_col3","_col4"] +Merge Join Operator [MERGEJOIN_594] (rows=1 width=116) + Conds:RS_240._col2=RS_241._col0(Inner),Output:["_col3","_col4"] <-Reducer 11 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_248] + PARTITION_ONLY_SHUFFLE [RS_240] PartitionCols:_col2 -Merge Join Operator [MERGEJOIN_592] (rows=155 width=0) - Conds:RS_245._col1=RS_642._col0(Inner),Output:["_col2","_col3","_col4"] +Merge Join Operator [MERGEJOIN_582] (rows=155 width=0) + Conds:RS_237._col1=RS_632._col0(Inner),Output:["_col2","_col3","_col4"] <-Reducer 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_642] + SHUFFLE [RS_632] PartitionCols:_col0 -Group By Operator [GBY_639] (rows=2235 width=4) +Group By Operator [GBY_629] (rows=2235 width=4) Output:["_col0"],keys:_col1 - Select Operator [SEL_638] (rows=6548799 width=12) + Select Operator [SEL_628] (rows=6548799 width=290) Output:["_col1"] -Filter Operator [FIL_637] (rows=6548799 width=12) +Filter Operator [FIL_627] (rows=6548799 width=290) predicate:(_col3 > 4L) - Select Operator [SEL_636] (rows=19646398 width=12) -Output:["_col0","_col3"] -Group By Operator [GBY_635] (rows=19646398 width=290) + Select Operator [SEL_626] (rows=19646398 width=290) +Output:["_col1","_col3"] +Group By Operator [GBY_625] (rows=19646398 width=290) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 17 [SIMPLE_EDGE] -
[31/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/masking_3.q.out -- diff --git a/ql/src/test/results/clientpositive/masking_3.q.out b/ql/src/test/results/clientpositive/masking_3.q.out index 15a8963..725d905 100644 --- a/ql/src/test/results/clientpositive/masking_3.q.out +++ b/ql/src/test/results/clientpositive/masking_3.q.out @@ -54,12 +54,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator +expressions: _col0 (type: int), UDFToDouble(_col0) (type: double) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -72,20 +76,20 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col1 (type: double) sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col1 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col1 (type: double) sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col1 (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reduce Operator Tree: @@ -93,21 +97,25 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: -0 UDFToDouble(_col0) (type: double) -1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1 +0 _col1 (type: double) +1 _col1 (type: double) + outputColumnNames: _col0, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator -keys: _col0 (type: string), _col1 (type: int) -mode: hash + Select Operator +expressions: _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1
[53/59] [abbrv] hive git commit: HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java new file mode 100644 index 000..bee5079 --- /dev/null +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.llap.cli.status; + +import java.util.Arrays; +import java.util.Properties; + +import jline.TerminalFactory; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Parses, verifies, prints and provides the command line arguments of the Llap Status program. + */ +public class LlapStatusServiceCommandLine { + private static final Logger LOGGER = LoggerFactory.getLogger("LlapStatusServiceDriverConsole"); + + @VisibleForTesting + static final long DEFAULT_FIND_YARN_APP_TIMEOUT_MS = 20 * 1000L; + @VisibleForTesting + static final long DEFAULT_STATUS_REFRESH_INTERVAL_MS = 1 * 1000L; + @VisibleForTesting + static final long DEFAULT_WATCH_MODE_TIMEOUT_MS = 5 * 60 * 1000L; + @VisibleForTesting + static final float DEFAULT_RUNNING_NODES_THRESHOLD = 1.0f; + + @SuppressWarnings("static-access") + private static final Option NAME = OptionBuilder + .withLongOpt("name") + .withDescription("LLAP cluster name") + .withArgName("name") + .hasArg() + .create('n'); + + @SuppressWarnings("static-access") + private static final Option FIND_APP_TIMEOUT = OptionBuilder + .withLongOpt("findAppTimeout") + .withDescription("Amount of time(s) that the tool will sleep to wait for the YARN application to start." + + "negative values=wait forever, 0=Do not wait. default=" + (DEFAULT_FIND_YARN_APP_TIMEOUT_MS / 1000) + "s") + .withArgName("findAppTimeout") + .hasArg() + .create('f'); + + @SuppressWarnings("static-access") + private static final Option OUTPUT_FILE = OptionBuilder + .withLongOpt("outputFile") + .withDescription("File to which output should be written (Default stdout)") + .withArgName("outputFile") + .hasArg() + .create('o'); + + @SuppressWarnings("static-access") + private static final Option WATCH_MODE = OptionBuilder + .withLongOpt("watch") + .withDescription("Watch mode waits until all LLAP daemons are running or subset of the nodes are running " + + "(threshold can be specified via -r option) (Default wait until all nodes are running)") + .withArgName("watch") + .create('w'); + + @SuppressWarnings("static-access") + private static final Option NOT_LAUNCHED = OptionBuilder + .withLongOpt("notLaunched") + .withDescription("In watch mode, do not assume that the application was already launched if there's doubt " + + "(e.g. if the last application instance has failed).") + .withArgName("notLaunched") + .create('l'); + + @SuppressWarnings("static-access") + private static final Option RUNNING_NODES_THRESHOLD = OptionBuilder + .withLongOpt("runningNodesThreshold") + .withDescription("When watch mode is enabled (-w), wait until the specified threshold of nodes are running " + + "(Default 1.0 which means 100% nodes are running)") + .withArgName("runningNodesThreshold") + .hasArg() + .create('r'); + + @SuppressWarnings("static-access") + private static final Option REFRESH_INTERVAL = OptionBuilder + .withLongOpt("refreshInterval") + .withDescription("Amount of time in seconds to wait until subsequent status
[22/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out index ccad088..19f3039 100644 --- a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out @@ -114,28 +114,28 @@ POSTHOOK: Input: default@store_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[/($2, $3)], $f2=[/($4, $5)], $f3=[CAST($4):DECIMAL(17, 2)]) - HiveAggregate(group=[{}], agg#0=[sum($16)], agg#1=[count($16)], agg#2=[sum($18)], agg#3=[count($18)], agg#4=[sum($19)], agg#5=[count($19)]) -HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), =($2, _UTF-16LE'4 yr Degree'), BETWEEN(false, $17, 100, 150), =($7, 3)), AND(=($1, _UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $17, 50, 100), =($7, 1)), AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, $17, 150, 200), =($7, 1], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3]) + HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)], agg#2=[sum($22)], agg#3=[count($22)], agg#4=[sum($23)], agg#5=[count($23)]) +HiveJoin(condition=[AND(=($0, $17), OR(AND($1, $2, $27, $12), AND($3, $4, $28, $13), AND($5, $6, $29, $13)))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, _UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, _UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced Degree')]) HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', _UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveJoin(condition=[AND(=($11, $0), OR(AND(IN($1, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM'), BETWEEN(false, $17, 100, 200)), AND(IN($1, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN'), BETWEEN(false, $17, 150, 300)), AND(IN($1, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), BETWEEN(false, $17, 50, 250], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_country=[CAST(_UTF-16LE'United States'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveJoin(condition=[AND(=($12, $0), OR(AND($1, $17), AND($2, $18), AND($3, $19)))], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')]) HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', _UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT NULL($0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3]) + HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)]) HiveFilter(condition=[AND(IN($3, 3, 1), IS NOT NULL($0))]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) -HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER]) - HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))]) -HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0]) -HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], ss_sales_price=[$13], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], ss_net_profit=[$22]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(s_store_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) +HiveTableScan(table=[[default, store]], table:alias=[store]) +
[59/59] [abbrv] hive git commit: HIVE-20605 : Merge branch 'master' into master-tez092
HIVE-20605 : Merge branch 'master' into master-tez092 Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/750daa4a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/750daa4a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/750daa4a Branch: refs/heads/master-tez092 Commit: 750daa4a639c8704a14b6970741f8600cbf5a863 Parents: c55347d 148e7ac Author: sergey Authored: Tue Nov 13 13:34:22 2018 -0800 Committer: sergey Committed: Tue Nov 13 13:34:22 2018 -0800 -- .../results/positive/accumulo_queries.q.out | 34 +- bin/ext/llapstatus.sh |4 +- .../apache/hadoop/hive/common/FileUtils.java|5 + .../hadoop/hive/common/type/HiveChar.java |7 + .../hadoop/hive/common/type/TimestampUtils.java | 23 + .../org/apache/hadoop/hive/conf/Constants.java | 17 - .../org/apache/hadoop/hive/conf/HiveConf.java | 18 +- .../hadoop/hive/common/TestFileUtils.java | 12 +- .../hadoop/hive/druid/DruidKafkaUtils.java | 167 + .../hadoop/hive/druid/DruidStorageHandler.java | 879 ++--- .../hive/druid/DruidStorageHandlerInfo.java | 53 +- .../hive/druid/DruidStorageHandlerUtils.java| 883 +++-- .../hadoop/hive/druid/io/DruidOutputFormat.java | 32 +- .../druid/io/DruidQueryBasedInputFormat.java| 63 +- .../hadoop/hive/druid/io/DruidRecordWriter.java | 212 +- .../hadoop/hive/druid/io/HiveDruidSplit.java| 19 +- .../druid/json/KafkaSupervisorIOConfig.java | 199 +- .../hive/druid/json/KafkaSupervisorReport.java | 157 +- .../hive/druid/json/KafkaSupervisorSpec.java| 119 +- .../druid/json/KafkaSupervisorTuningConfig.java | 152 +- .../hive/druid/json/KafkaTuningConfig.java | 175 +- .../hadoop/hive/druid/json/TaskReportData.java | 68 +- .../hive/druid/security/DruidKerberosUtil.java | 58 +- .../hive/druid/security/KerberosHttpClient.java | 86 +- .../druid/security/ResponseCookieHandler.java | 44 +- .../RetryIfUnauthorizedResponseHandler.java | 62 +- .../druid/security/RetryResponseHolder.java | 23 +- .../serde/DruidGroupByQueryRecordReader.java| 19 +- .../druid/serde/DruidQueryRecordReader.java | 171 +- .../druid/serde/DruidScanQueryRecordReader.java | 35 +- .../serde/DruidSelectQueryRecordReader.java | 34 +- .../hadoop/hive/druid/serde/DruidSerDe.java | 185 +- .../hive/druid/serde/DruidSerDeUtils.java | 48 +- .../serde/DruidTimeseriesQueryRecordReader.java |3 +- .../druid/serde/DruidTopNQueryRecordReader.java | 46 +- .../hadoop/hive/druid/serde/DruidWritable.java | 60 +- .../hive/druid/DerbyConnectorTestUtility.java | 13 +- .../hadoop/hive/druid/QTestDruidSerDe.java | 73 +- .../hive/druid/TestDruidStorageHandler.java | 107 +- .../TestHiveDruidQueryBasedInputFormat.java |4 +- .../hadoop/hive/druid/serde/TestDruidSerDe.java | 1234 +++ .../hive/ql/io/TestDruidRecordWriter.java | 238 +- .../results/positive/external_table_ppd.q.out |1 + .../positive/hbase_binary_storage_queries.q.out |2 + .../src/test/results/positive/hbase_ddl.q.out |2 + .../test/results/positive/hbase_queries.q.out | 35 +- .../src/test/results/positive/hbasestats.q.out |5 + .../listener/DummyRawStoreFailEvent.java| 44 +- .../hive/ql/parse/TestReplicationScenarios.java |2 +- .../hive/ql/txn/compactor/TestCompactor.java|1 - .../hive/jdbc/AbstractJdbcTriggersTest.java |5 +- .../org/apache/hive/jdbc/TestJdbcDriver2.java | 30 +- .../jdbc/TestTriggersMoveWorkloadManager.java |9 +- .../jdbc/TestTriggersTezSessionPoolManager.java | 21 +- .../test/resources/testconfiguration.properties |6 +- .../hadoop/hive/cli/control/CliConfigs.java |6 + .../org/apache/hadoop/hive/ql/QTestUtil.java|4 +- .../hive/storage/jdbc/JdbcInputFormat.java |2 +- .../hive/storage/jdbc/JdbcInputSplit.java | 27 +- .../hive/storage/jdbc/JdbcRecordReader.java |3 +- .../jdbc/dao/GenericJdbcDatabaseAccessor.java |4 +- .../hadoop/hive/kafka/KafkaInputFormat.java | 13 +- .../hadoop/hive/kafka/KafkaRecordIterator.java | 15 +- .../hadoop/hive/kafka/KafkaRecordReader.java|2 +- .../apache/hadoop/hive/kafka/KafkaSerDe.java| 49 +- .../hadoop/hive/kafka/SimpleKafkaWriter.java|5 +- .../hive/kafka/VectorizedKafkaRecordReader.java | 186 + .../hive/kafka/SimpleKafkaWriterTest.java | 14 +- .../hive/llap/tez/LlapProtocolClientProxy.java | 22 + .../daemon/rpc/LlapDaemonProtocolProtos.java| 1433 +++- .../org/apache/hadoop/hive/llap/LlapUtil.java | 17 + .../hive/llap/impl/LlapProtocolClientImpl.java | 13 + .../src/protobuf/LlapDaemonProtocol.proto |9 + .../hadoop/hive/llap/cli/LlapSliderUtils.java | 55 +-
[15/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query17.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out index bb18527..642a67f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -147,7 +147,7 @@ Stage-0 Select Operator [SEL_47] (rows=8581091759 width=381) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] Merge Join Operator [MERGEJOIN_213] (rows=8581091759 width=381) - Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] + Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"] <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_257] PartitionCols:_col0 @@ -161,12 +161,12 @@ Stage-0 SHUFFLE [RS_44] PartitionCols:_col3 Merge Join Operator [MERGEJOIN_212] (rows=8581091759 width=299) -Conds:RS_41._col1, _col2, _col4=RS_42._col7, _col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"] +Conds:RS_41._col1, _col2, _col4=RS_42._col6, _col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_42] - PartitionCols:_col7, _col8, _col9 + PartitionCols:_col6, _col7, _col8 Merge Join Operator [MERGEJOIN_211] (rows=1640229377 width=19) -Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] +Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] <-Reducer 10 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_28] PartitionCols:_col2, _col1 @@ -175,7 +175,7 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_218] (rows=304 width=94) + Select Operator [SEL_218] (rows=304 width=4) Output:["_col0"] Filter Operator [FIL_215] (rows=304 width=94) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) @@ -205,7 +205,7 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_224] PartitionCols:_col0 - Select Operator [SEL_219] (rows=304 width=94) + Select Operator [SEL_219] (rows=304 width=4) Output:["_col0"] Filter Operator [FIL_216] (rows=304 width=94) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) @@ -249,7 +249,7 @@ Stage-0 SHUFFLE [RS_41] PartitionCols:_col1, _col2, _col4 Merge Join Operator [MERGEJOIN_208] (rows=27749405 width=294) - Conds:RS_38._col1=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] + Conds:RS_38._col1=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"] <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_254] PartitionCols:_col0 @@ -267,7 +267,7 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] vectorized
[45/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index 68fc903..4ed9b60 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -135,10 +135,10 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_n13 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6 OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` -FROM (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` +FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_2_n11` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` +INNER JOIN (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n13` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = `t2`.`key` STAGE DEPENDENCIES: @@ -326,11 +326,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col1, _col4 +outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 156 Data size: 89111 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: CAST( _col0 AS STRING) (type: string), _col1 (type: string), _col4 (type: string) + expressions: CAST( _col0 AS STRING) (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 156 Data size: 89111 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -584,10 +584,10 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_n13 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6 OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1` -FROM (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` +FROM (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_2_n11` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t0` -INNER JOIN (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` +INNER JOIN (SELECT `key`, `value` FROM `default`.`srcbucket_mapjoin_part_n13` WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = `t2`.`key` STAGE DEPENDENCIES: @@ -775,11 +775,11 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col1, _col4 +outputColumnNames: _col0, _col1, _col3 Position of Big Table: 1 Statistics: Num rows: 156 Data size: 89111 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: CAST( _col0 AS STRING) (type: string), _col1 (type: string), _col4 (type: string) + expressions: CAST( _col0 AS STRING) (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 156 Data size: 89111 Basic stats: PARTIAL Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out index acb1e87..8039d0f 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out @@ -125,7 +125,7 @@ STAGE PLANS: Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a @@ -137,17 +137,17 @@ STAGE PLANS: Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 1780
[11/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query48.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/query48.q.out index 76b4ce1..1f63e95 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out @@ -143,15 +143,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 7 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -165,103 +165,103 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_30] Group By Operator [GBY_29] (rows=1 width=8) Output:["_col0"],aggregations:["sum(_col5)"] - Select Operator [SEL_28] (rows=25203 width=86) + Select Operator [SEL_28] (rows=20247 width=24) Output:["_col5"] -Filter Operator [FIL_27] (rows=25203 width=86) - predicate:(((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000)) - Merge Join Operator [MERGEJOIN_96] (rows=75613 width=86) - Conds:RS_24._col3=RS_118._col0(Inner),Output:["_col5","_col7","_col14"] +Filter Operator [FIL_27] (rows=20247 width=24) + predicate:((_col12 and _col6) or (_col13 and _col7) or (_col14 and _col8)) + Merge Join Operator [MERGEJOIN_96] (rows=26999 width=24) + Conds:RS_24._col3=RS_115._col0(Inner),Output:["_col5","_col6","_col7","_col8","_col12","_col13","_col14"] <-Map 12 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_118] +SHUFFLE [RS_115] PartitionCols:_col0 - Select Operator [SEL_117] (rows=3529412 width=187) -Output:["_col0","_col1"] -Filter Operator [FIL_116] (rows=3529412 width=187) + Select Operator [SEL_114] (rows=3529412 width=16) +Output:["_col0","_col1","_col2","_col3"] +Filter Operator [FIL_113] (rows=3529412 width=187) predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) TableScan [TS_12] (rows=4000 width=187) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_95] (rows=856941 width=0) - Conds:RS_21._col2=RS_110._col0(Inner),Output:["_col3","_col5","_col7"] - <-Map 10 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_110] + Merge Join Operator [MERGEJOIN_95] (rows=305980 width=12) + Conds:RS_21._col4=RS_126._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col8"] + <-Map 11 [SIMPLE_EDGE] vectorized +SHUFFLE [RS_126] PartitionCols:_col0 - Select Operator [SEL_109] (rows=29552 width=184) + Select Operator [SEL_125] (rows=1704 width=4) Output:["_col0"] -Filter Operator [FIL_108] (rows=29552 width=183) - predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_9] (rows=1861800 width=183) -
[39/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out b/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out index ba9e81d..f84d13f 100644 --- a/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out +++ b/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out @@ -98,12 +98,12 @@ STAGE PLANS: Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map 5 @@ -118,12 +118,12 @@ STAGE PLANS: Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -152,10 +152,10 @@ STAGE PLANS: keys: 0 _col9 (type: int) 1 _col0 (type: int) -outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col11 +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 100 Data size: 62700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col9 (type: int), 'foo_n1' (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: int), 'bar' (type: string) + expressions: _col9 (type: int), 'foo_n1' (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: int), 'bar' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 100 Data size: 80400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -240,17 +240,17 @@ STAGE PLANS: Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Statistics: Num rows: 10 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE +Statistics: Num rows: 10 Data size: 40
[16/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query11.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/query11.q.out index 2f453f3..da1c349 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query11.q.out @@ -189,249 +189,241 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_358] -Limit [LIM_357] (rows=100 width=85) + File Output Operator [FS_354] +Limit [LIM_353] (rows=100 width=85) Number of rows:100 - Select Operator [SEL_356] (rows=12248093 width=85) + Select Operator [SEL_352] (rows=12248093 width=85) Output:["_col0"] <-Reducer 7 [SIMPLE_EDGE] -SHUFFLE [RS_97] - Select Operator [SEL_96] (rows=12248093 width=85) +SHUFFLE [RS_93] + Select Operator [SEL_92] (rows=12248093 width=85) Output:["_col0"] -Filter Operator [FIL_95] (rows=12248093 width=533) - predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > (_col8 / _col3))) ELSE ((null > (_col8 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / _col5) > null)) ELSE (null) END) END - Merge Join Operator [MERGEJOIN_291] (rows=24496186 width=533) - Conds:RS_92._col2=RS_355._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8"] +Filter Operator [FIL_91] (rows=12248093 width=537) + predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col6) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE ((null > (_col9 / _col3))) END) ELSE (CASE WHEN (_col6) THEN (((_col1 / _col5) > null)) ELSE (null) END) END + Merge Join Operator [MERGEJOIN_287] (rows=24496186 width=537) + Conds:RS_88._col2=RS_351._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"] <-Reducer 20 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_355] +SHUFFLE [RS_351] PartitionCols:_col0 - Select Operator [SEL_354] (rows=8000 width=297) + Select Operator [SEL_350] (rows=8000 width=297) Output:["_col0","_col1","_col2"] -Group By Operator [GBY_353] (rows=8000 width=764) +Group By Operator [GBY_349] (rows=8000 width=764) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6 <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_83] + SHUFFLE [RS_79] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6 -Group By Operator [GBY_82] (rows=8000 width=764) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Operator [SEL_80] (rows=187573258 width=847) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] -Merge Join Operator [MERGEJOIN_288] (rows=187573258 width=847) - Conds:RS_77._col1=RS_321._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] -<-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] -PartitionCols:_col0 -Select Operator [SEL_320] (rows=8000 width=656) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_319] (rows=8000 width=656) -predicate:(c_customer_id is not null and c_customer_sk is not null) -TableScan [TS_71] (rows=8000 width=656) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"] -<-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_77] -PartitionCols:_col1 -
[24/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query75.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out b/ql/src/test/results/clientpositive/perf/spark/query75.q.out index 85e6dca..553d11a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out @@ -219,14 +219,14 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE -value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) +value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized Map 11 Map Operator Tree: @@ -397,14 +397,14 @@ STAGE PLANS: Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_brand_id (type: int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE -value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int) +value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) Execution mode: vectorized Map 34 Map Operator Tree: @@ -530,23 +530,23 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) -outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, _col10, _col12 +outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col2 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col2 (type: int) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) Reducer 15 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 -outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, _col15, _col16 +outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, _col13, _col14 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN (_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) THEN (_col16) ELSE (0) END) (type: decimal(8,2)) + expressions: _col7 (type: int), _col8
[23/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query85.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index 6bdbf7e..a7bf288 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -182,8 +182,7 @@ POSTHOOK: Input: default@web_sales A masked pattern was here STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -191,47 +190,42 @@ STAGE PLANS: Spark A masked pattern was here Vertices: -Map 13 +Map 12 Map Operator Tree: TableScan - alias: reason - filterExpr: r_reason_sk is not null (type: boolean) - Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + alias: web_page + filterExpr: wp_web_page_sk is not null (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: r_reason_sk is not null (type: boolean) -Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE +predicate: wp_web_page_sk is not null (type: boolean) +Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: r_reason_sk (type: int), r_reason_desc (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) + 0 _col10 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-3 -Spark - A masked pattern was here - Vertices: -Map 11 +Map 13 Map Operator Tree: TableScan - alias: web_page - filterExpr: wp_web_page_sk is not null (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + alias: reason + filterExpr: r_reason_sk is not null (type: boolean) + Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Filter Operator -predicate: wp_web_page_sk is not null (type: boolean) -Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE +predicate: r_reason_sk is not null (type: boolean) +Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wp_web_page_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + expressions: r_reason_sk (type: int), r_reason_desc (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col10 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -241,11 +235,11 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 28), Map 9 (PARTITION-LEVEL SORT, 28) -Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 178), Reducer 2 (PARTITION-LEVEL SORT, 178) -Reducer 4 <- Map 12 (PARTITION-LEVEL SORT, 65), Reducer 3 (PARTITION-LEVEL SORT, 65) -Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 83), Reducer 4 (PARTITION-LEVEL SORT, 83) -Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 13), Reducer 5 (PARTITION-LEVEL SORT, 13) -Reducer 7
[40/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/mapjoin46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out index a1fe936..d9d2396 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -180,29 +180,33 @@ STAGE PLANS: alias: test1_n4 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: -0 {_col0 BETWEEN 100 AND 102} +0 {_col3} 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator -compressed: false + Select Operator +expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string) +outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs Map 2 @@ -294,29 +298,33 @@ STAGE PLANS: alias: test1_n4 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator -expressions: key (type: int), value (type: int), col_1 (type: string) -outputColumnNames: _col0, _col1, _col2 -Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE +expressions: key (type: int), value (type: int), col_1 (type: string), key BETWEEN 100 AND 102 (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 6 Data size: 596 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 filter predicates: -0 {_col0 BETWEEN 100 AND 102} +0 {_col3} 1 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator -compressed: false +
[30/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/masking_4.q.out -- diff --git a/ql/src/test/results/clientpositive/masking_4.q.out b/ql/src/test/results/clientpositive/masking_4.q.out index 60cbd0f..54861b0 100644 --- a/ql/src/test/results/clientpositive/masking_4.q.out +++ b/ql/src/test/results/clientpositive/masking_4.q.out @@ -210,12 +210,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator -compressed: false -table: -input format: org.apache.hadoop.mapred.SequenceFileInputFormat -output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat -serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator +expressions: _col0 (type: int), UDFToDouble(_col0) (type: double) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -228,20 +232,20 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator -expressions: key (type: string) -outputColumnNames: _col0 +expressions: key (type: string), UDFToDouble(key) (type: double) +outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col1 (type: double) sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col1 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) TableScan Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) + key expressions: _col1 (type: double) sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map-reduce partition columns: _col1 (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reduce Operator Tree: @@ -249,21 +253,25 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: -0 UDFToDouble(_col0) (type: double) -1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1 +0 _col1 (type: double) +1 _col1 (type: double) + outputColumnNames: _col0, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator -keys: _col0 (type: string), _col1 (type: int) -mode: hash + Select Operator +expressions: _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE -File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe +Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator +compressed: false +table: +input format: org.apache.hadoop.mapred.SequenceFileInputFormat +output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage:
[2/3] hive git commit: HIVE-20853 : Expose ShuffleHandler.registerDag in the llap daemon API (Jaume Marhuenda, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java -- diff --git a/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java b/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java index 8fecc1e..7659140 100644 --- a/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java +++ b/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java @@ -11465,6 +11465,1155 @@ public final class LlapDaemonProtocolProtos { // @@protoc_insertion_point(class_scope:SubmitWorkRequestProto) } + public interface RegisterDagRequestProtoOrBuilder + extends com.google.protobuf.MessageOrBuilder { + +// optional string user = 1; +/** + * optional string user = 1; + */ +boolean hasUser(); +/** + * optional string user = 1; + */ +java.lang.String getUser(); +/** + * optional string user = 1; + */ +com.google.protobuf.ByteString +getUserBytes(); + +// required .QueryIdentifierProto query_identifier = 2; +/** + * required .QueryIdentifierProto query_identifier = 2; + */ +boolean hasQueryIdentifier(); +/** + * required .QueryIdentifierProto query_identifier = 2; + */ + org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto getQueryIdentifier(); +/** + * required .QueryIdentifierProto query_identifier = 2; + */ + org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProtoOrBuilder getQueryIdentifierOrBuilder(); + +// optional bytes credentials_binary = 3; +/** + * optional bytes credentials_binary = 3; + */ +boolean hasCredentialsBinary(); +/** + * optional bytes credentials_binary = 3; + */ +com.google.protobuf.ByteString getCredentialsBinary(); + } + /** + * Protobuf type {@code RegisterDagRequestProto} + */ + public static final class RegisterDagRequestProto extends + com.google.protobuf.GeneratedMessage + implements RegisterDagRequestProtoOrBuilder { +// Use RegisterDagRequestProto.newBuilder() to construct. +private RegisterDagRequestProto(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); +} +private RegisterDagRequestProto(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + +private static final RegisterDagRequestProto defaultInstance; +public static RegisterDagRequestProto getDefaultInstance() { + return defaultInstance; +} + +public RegisterDagRequestProto getDefaultInstanceForType() { + return defaultInstance; +} + +private final com.google.protobuf.UnknownFieldSet unknownFields; +@java.lang.Override +public final com.google.protobuf.UnknownFieldSet +getUnknownFields() { + return this.unknownFields; +} +private RegisterDagRequestProto( +com.google.protobuf.CodedInputStream input, +com.google.protobuf.ExtensionRegistryLite extensionRegistry) +throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { +boolean done = false; +while (!done) { + int tag = input.readTag(); + switch (tag) { +case 0: + done = true; + break; +default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { +done = true; + } + break; +} +case 10: { + bitField0_ |= 0x0001; + user_ = input.readBytes(); + break; +} +case 18: { + org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto.Builder subBuilder = null; + if (((bitField0_ & 0x0002) == 0x0002)) { +subBuilder = queryIdentifier_.toBuilder(); + } + queryIdentifier_ = input.readMessage(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto.PARSER, extensionRegistry); + if (subBuilder != null) { +subBuilder.mergeFrom(queryIdentifier_); +queryIdentifier_ = subBuilder.buildPartial(); + } + bitField0_ |= 0x0002; + break; +} +case 26: { +
[1/3] hive git commit: HIVE-20853 : Expose ShuffleHandler.registerDag in the llap daemon API (Jaume Marhuenda, reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 01cef9230 -> 10f4eadd5 http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-common/src/protobuf/LlapDaemonProtocol.proto -- diff --git a/llap-common/src/protobuf/LlapDaemonProtocol.proto b/llap-common/src/protobuf/LlapDaemonProtocol.proto index d70dd41..3aeacb2 100644 --- a/llap-common/src/protobuf/LlapDaemonProtocol.proto +++ b/llap-common/src/protobuf/LlapDaemonProtocol.proto @@ -134,6 +134,14 @@ message SubmitWorkRequestProto { optional bool is_guaranteed = 12 [default = false]; } +message RegisterDagRequestProto { + optional string user = 1; + required QueryIdentifierProto query_identifier = 2; + optional bytes credentials_binary = 3; +} + +message RegisterDagResponseProto { +} enum SubmissionStateProto { ACCEPTED = 1; @@ -204,6 +212,7 @@ message PurgeCacheResponseProto { } service LlapDaemonProtocol { + rpc registerDag(RegisterDagRequestProto) returns (RegisterDagResponseProto); rpc submitWork(SubmitWorkRequestProto) returns (SubmitWorkResponseProto); rpc sourceStateUpdated(SourceStateUpdatedRequestProto) returns (SourceStateUpdatedResponseProto); rpc queryComplete(QueryCompleteRequestProto) returns (QueryCompleteResponseProto); http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java index 035960e..582f518 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java @@ -16,6 +16,7 @@ package org.apache.hadoop.hive.llap.daemon; import java.io.IOException; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteRequestProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteResponseProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceStateUpdatedRequestProto; @@ -29,6 +30,10 @@ import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.UpdateFra public interface ContainerRunner { + LlapDaemonProtocolProtos.RegisterDagResponseProto registerDag( + LlapDaemonProtocolProtos.RegisterDagRequestProto request) + throws IOException; + SubmitWorkResponseProto submitWork(SubmitWorkRequestProto request) throws IOException; SourceStateUpdatedResponseProto sourceStateUpdated( http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java -- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java index ef5922e..7a3ca2f 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java @@ -27,10 +27,9 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.UgiFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.DaemonId; import org.apache.hadoop.hive.llap.LlapNodeId; +import org.apache.hadoop.hive.llap.LlapUtil; import org.apache.hadoop.hive.llap.NotTezEventHelper; import org.apache.hadoop.hive.llap.counters.FragmentCountersMap; import org.apache.hadoop.hive.llap.counters.LlapWmCounters; @@ -55,6 +54,8 @@ import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceSta import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmissionStateProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.RegisterDagRequestProto; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.RegisterDagResponseProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.TerminateFragmentRequestProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.TerminateFragmentResponseProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.UpdateFragmentRequestProto; @@ -65,7 +66,6 @@ import
[3/3] hive git commit: HIVE-20853 : Expose ShuffleHandler.registerDag in the llap daemon API (Jaume Marhuenda, reviewed by Sergey Shelukhin)
HIVE-20853 : Expose ShuffleHandler.registerDag in the llap daemon API (Jaume Marhuenda, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/10f4eadd Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/10f4eadd Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/10f4eadd Branch: refs/heads/master Commit: 10f4eadd5934c4bfa575c40c98abfe6b6e148d0f Parents: 01cef92 Author: Jaume Marhuenda Authored: Fri Nov 9 12:41:56 2018 -0800 Committer: sergey Committed: Fri Nov 9 12:41:56 2018 -0800 -- .../hive/llap/tez/LlapProtocolClientProxy.java | 22 + .../daemon/rpc/LlapDaemonProtocolProtos.java| 1433 -- .../org/apache/hadoop/hive/llap/LlapUtil.java | 17 + .../hive/llap/impl/LlapProtocolClientImpl.java | 13 + .../src/protobuf/LlapDaemonProtocol.proto |9 + .../hive/llap/daemon/ContainerRunner.java |5 + .../llap/daemon/impl/ContainerRunnerImpl.java | 48 +- .../hive/llap/daemon/impl/LlapDaemon.java | 27 +- .../daemon/impl/LlapProtocolServerImpl.java | 12 + .../hive/llap/daemon/impl/QueryTracker.java | 17 + .../llap/shufflehandler/ShuffleHandler.java | 26 +- .../hive/llap/daemon/LlapDaemonTestUtils.java | 73 + .../daemon/impl/TestContainerRunnerImpl.java| 180 +++ .../TestFirstInFirstOutComparator.java | 63 +- .../llap/tezplugins/LlapTaskCommunicator.java | 74 +- .../tezplugins/LlapTaskSchedulerService.java| 49 +- .../hive/llap/tezplugins/LlapTezUtils.java | 14 + 17 files changed, 1894 insertions(+), 188 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java -- diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java b/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java index 211696a..bc74c55 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java @@ -26,6 +26,8 @@ import org.apache.hadoop.hive.llap.AsyncPbRpcProxy; import org.apache.hadoop.hive.llap.LlapNodeId; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteRequestProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteResponseProto; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.RegisterDagRequestProto; +import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.RegisterDagResponseProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceStateUpdatedRequestProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceStateUpdatedResponseProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto; @@ -55,6 +57,12 @@ public class LlapProtocolClientProxy TimeUnit.MILLISECONDS), -1, 1); } + public void registerDag(RegisterDagRequestProto request, String host, int port, + final ExecuteRequestCallback callback) { +LlapNodeId nodeId = LlapNodeId.getInstance(host, port); +queueRequest(new RegisterDagCallable(nodeId, request, callback)); + } + public void sendSubmitWork(SubmitWorkRequestProto request, String host, int port, final ExecuteRequestCallback callback) { LlapNodeId nodeId = LlapNodeId.getInstance(host, port); @@ -86,6 +94,20 @@ public class LlapProtocolClientProxy queueRequest(new SendUpdateFragmentCallable(nodeId, request, callback)); } + private class RegisterDagCallable extends + NodeCallableRequest { +protected RegisterDagCallable(LlapNodeId nodeId, +RegisterDagRequestProto registerDagRequestProto, +ExecuteRequestCallback callback) { + super(nodeId, registerDagRequestProto, callback); +} + +@Override public +RegisterDagResponseProto call() throws Exception { + return getProxy(nodeId, null).registerDag(null, request); +} + } + private class SubmitWorkCallable extends NodeCallableRequest { protected SubmitWorkCallable(LlapNodeId nodeId,
[4/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java -- diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java index 114cdde..555bc5d 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java @@ -40,6 +40,7 @@ import org.slf4j.LoggerFactory; private static final org.apache.thrift.protocol.TField RESOURCE_PLAN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("resourcePlanName", org.apache.thrift.protocol.TType.STRING, (short)1); private static final org.apache.thrift.protocol.TField POOL_PATH_FIELD_DESC = new org.apache.thrift.protocol.TField("poolPath", org.apache.thrift.protocol.TType.STRING, (short)2); + private static final org.apache.thrift.protocol.TField NS_FIELD_DESC = new org.apache.thrift.protocol.TField("ns", org.apache.thrift.protocol.TType.STRING, (short)3); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -49,11 +50,13 @@ import org.slf4j.LoggerFactory; private String resourcePlanName; // optional private String poolPath; // optional + private String ns; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { RESOURCE_PLAN_NAME((short)1, "resourcePlanName"), -POOL_PATH((short)2, "poolPath"); +POOL_PATH((short)2, "poolPath"), +NS((short)3, "ns"); private static final Map byName = new HashMap(); @@ -72,6 +75,8 @@ import org.slf4j.LoggerFactory; return RESOURCE_PLAN_NAME; case 2: // POOL_PATH return POOL_PATH; +case 3: // NS + return NS; default: return null; } @@ -112,7 +117,7 @@ import org.slf4j.LoggerFactory; } // isset id assignments - private static final _Fields optionals[] = {_Fields.RESOURCE_PLAN_NAME,_Fields.POOL_PATH}; + private static final _Fields optionals[] = {_Fields.RESOURCE_PLAN_NAME,_Fields.POOL_PATH,_Fields.NS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -120,6 +125,8 @@ import org.slf4j.LoggerFactory; new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); tmpMap.put(_Fields.POOL_PATH, new org.apache.thrift.meta_data.FieldMetaData("poolPath", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); +tmpMap.put(_Fields.NS, new org.apache.thrift.meta_data.FieldMetaData("ns", org.apache.thrift.TFieldRequirementType.OPTIONAL, +new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(WMDropPoolRequest.class, metaDataMap); } @@ -137,6 +144,9 @@ import org.slf4j.LoggerFactory; if (other.isSetPoolPath()) { this.poolPath = other.poolPath; } +if (other.isSetNs()) { + this.ns = other.ns; +} } public WMDropPoolRequest deepCopy() { @@ -147,6 +157,7 @@ import org.slf4j.LoggerFactory; public void clear() { this.resourcePlanName = null; this.poolPath = null; +this.ns = null; } public String getResourcePlanName() { @@ -195,6 +206,29 @@ import org.slf4j.LoggerFactory; } } + public String getNs() { +return this.ns; + } + + public void setNs(String ns) { +this.ns = ns; + } + + public void unsetNs() { +this.ns = null; + } + + /** Returns true if field ns is set (has been assigned a value) and false otherwise */ + public boolean isSetNs() { +return this.ns != null; + } + + public void setNsIsSet(boolean value) { +if (!value) { + this.ns = null; +} + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case RESOURCE_PLAN_NAME: @@ -213,6 +247,14 @@ import org.slf4j.LoggerFactory; } break; +case NS: + if (value == null) { +unsetNs(); + } else { +setNs((String)value); + } + break; + } } @@
[6/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5258c67e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5258c67e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5258c67e Branch: refs/heads/master Commit: 5258c67e9558bd2d98e4887d3dd8e3eb8aa5d763 Parents: b701720 Author: sergey Authored: Tue Oct 30 12:44:14 2018 -0700 Committer: sergey Committed: Tue Oct 30 12:57:17 2018 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 + .../listener/DummyRawStoreFailEvent.java| 44 +- .../upgrade/hive/hive-schema-4.0.0.hive.sql | 11 + .../hive/upgrade-3.1.0-to-4.0.0.hive.sql| 137 .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 5 + .../apache/hadoop/hive/ql/metadata/Hive.java| 75 +- .../hadoop/hive/ql/metadata/TestHive.java | 68 +- .../test/queries/clientpositive/resourceplan.q | 6 +- .../clientpositive/llap/resourceplan.q.out | 732 ++- .../api/WMAlterResourcePlanRequest.java | 114 ++- ...CreateOrDropTriggerToPoolMappingRequest.java | 114 ++- .../hive/metastore/api/WMDropPoolRequest.java | 114 ++- .../api/WMDropResourcePlanRequest.java | 114 ++- .../metastore/api/WMDropTriggerRequest.java | 114 ++- .../api/WMGetActiveResourcePlanRequest.java | 112 ++- .../api/WMGetAllResourcePlanRequest.java| 112 ++- .../metastore/api/WMGetResourcePlanRequest.java | 114 ++- .../api/WMGetTriggersForResourePlanRequest.java | 114 ++- .../hadoop/hive/metastore/api/WMMapping.java| 114 ++- .../hive/metastore/api/WMNullablePool.java | 114 ++- .../metastore/api/WMNullableResourcePlan.java | 114 ++- .../hadoop/hive/metastore/api/WMPool.java | 114 ++- .../hive/metastore/api/WMPoolTrigger.java | 112 ++- .../hive/metastore/api/WMResourcePlan.java | 114 ++- .../hadoop/hive/metastore/api/WMTrigger.java| 114 ++- .../api/WMValidateResourcePlanRequest.java | 114 ++- .../src/gen/thrift/gen-php/metastore/Types.php | 399 +- .../gen/thrift/gen-py/hive_metastore/ttypes.py | 263 ++- .../gen/thrift/gen-rb/hive_metastore_types.rb | 66 +- .../hive/metastore/HiveMetaStoreClient.java | 35 +- .../hadoop/hive/metastore/IMetaStoreClient.java | 20 +- .../src/main/thrift/hive_metastore.thrift | 17 + .../hadoop/hive/metastore/HiveMetaStore.java| 26 +- .../hadoop/hive/metastore/ObjectStore.java | 204 -- .../apache/hadoop/hive/metastore/RawStore.java | 28 +- .../hive/metastore/cache/CachedStore.java | 46 +- .../hive/metastore/model/MWMResourcePlan.java | 9 + .../src/main/resources/package.jdo | 4 + .../main/sql/derby/hive-schema-4.0.0.derby.sql | 4 +- .../sql/derby/upgrade-3.2.0-to-4.0.0.derby.sql | 7 + .../main/sql/mssql/hive-schema-4.0.0.mssql.sql | 3 +- .../sql/mssql/upgrade-3.2.0-to-4.0.0.mssql.sql | 6 + .../main/sql/mysql/hive-schema-4.0.0.mysql.sql | 3 +- .../sql/mysql/upgrade-3.2.0-to-4.0.0.mysql.sql | 7 + .../sql/oracle/hive-schema-4.0.0.oracle.sql | 3 +- .../oracle/upgrade-3.2.0-to-4.0.0.oracle.sql| 6 + .../sql/postgres/hive-schema-4.0.0.postgres.sql | 3 +- .../upgrade-3.2.0-to-4.0.0.postgres.sql | 7 + .../DummyRawStoreControlledCommit.java | 44 +- .../DummyRawStoreForJdoConnection.java | 22 +- .../HiveMetaStoreClientPreCatalog.java | 36 +- 51 files changed, 3603 insertions(+), 679 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 917aaeb..102e6c6 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3206,6 +3206,10 @@ public class HiveConf extends Configuration { HIVE_SERVER2_TEZ_INTERACTIVE_QUEUE("hive.server2.tez.interactive.queue", "", "A single YARN queues to use for Hive Interactive sessions. When this is specified,\n" + "workload management is enabled and used for these sessions."), +HIVE_SERVER2_WM_NAMESPACE("hive.server2.wm.namespace", "default", +"The WM namespace to use when one metastore is used by multiple compute clusters each \n" + +"with their own workload management. The special value 'default' (the default) will \n" + +"also include any resource plans created before the namespaces were introduced."),
[2/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py -- diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py index 03c2a4e..bdfb480 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -18352,6 +18352,7 @@ class WMResourcePlan: - status - queryParallelism - defaultPoolPath + - ns """ thrift_spec = ( @@ -18360,13 +18361,15 @@ class WMResourcePlan: (2, TType.I32, 'status', None, None, ), # 2 (3, TType.I32, 'queryParallelism', None, None, ), # 3 (4, TType.STRING, 'defaultPoolPath', None, None, ), # 4 +(5, TType.STRING, 'ns', None, None, ), # 5 ) - def __init__(self, name=None, status=None, queryParallelism=None, defaultPoolPath=None,): + def __init__(self, name=None, status=None, queryParallelism=None, defaultPoolPath=None, ns=None,): self.name = name self.status = status self.queryParallelism = queryParallelism self.defaultPoolPath = defaultPoolPath +self.ns = ns def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -18397,6 +18400,11 @@ class WMResourcePlan: self.defaultPoolPath = iprot.readString() else: iprot.skip(ftype) + elif fid == 5: +if ftype == TType.STRING: + self.ns = iprot.readString() +else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -18423,6 +18431,10 @@ class WMResourcePlan: oprot.writeFieldBegin('defaultPoolPath', TType.STRING, 4) oprot.writeString(self.defaultPoolPath) oprot.writeFieldEnd() +if self.ns is not None: + oprot.writeFieldBegin('ns', TType.STRING, 5) + oprot.writeString(self.ns) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -18438,6 +18450,7 @@ class WMResourcePlan: value = (value * 31) ^ hash(self.status) value = (value * 31) ^ hash(self.queryParallelism) value = (value * 31) ^ hash(self.defaultPoolPath) +value = (value * 31) ^ hash(self.ns) return value def __repr__(self): @@ -18460,6 +18473,7 @@ class WMNullableResourcePlan: - isSetQueryParallelism - defaultPoolPath - isSetDefaultPoolPath + - ns """ thrift_spec = ( @@ -18471,15 +18485,17 @@ class WMNullableResourcePlan: (5, TType.BOOL, 'isSetQueryParallelism', None, None, ), # 5 (6, TType.STRING, 'defaultPoolPath', None, None, ), # 6 (7, TType.BOOL, 'isSetDefaultPoolPath', None, None, ), # 7 +(8, TType.STRING, 'ns', None, None, ), # 8 ) - def __init__(self, name=None, status=None, queryParallelism=None, isSetQueryParallelism=None, defaultPoolPath=None, isSetDefaultPoolPath=None,): + def __init__(self, name=None, status=None, queryParallelism=None, isSetQueryParallelism=None, defaultPoolPath=None, isSetDefaultPoolPath=None, ns=None,): self.name = name self.status = status self.queryParallelism = queryParallelism self.isSetQueryParallelism = isSetQueryParallelism self.defaultPoolPath = defaultPoolPath self.isSetDefaultPoolPath = isSetDefaultPoolPath +self.ns = ns def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -18520,6 +18536,11 @@ class WMNullableResourcePlan: self.isSetDefaultPoolPath = iprot.readBool() else: iprot.skip(ftype) + elif fid == 8: +if ftype == TType.STRING: + self.ns = iprot.readString() +else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -18554,6 +18575,10 @@ class WMNullableResourcePlan: oprot.writeFieldBegin('isSetDefaultPoolPath', TType.BOOL, 7) oprot.writeBool(self.isSetDefaultPoolPath) oprot.writeFieldEnd() +if self.ns is not None: + oprot.writeFieldBegin('ns', TType.STRING, 8) + oprot.writeString(self.ns) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -18569,6 +18594,7 @@ class WMNullableResourcePlan: value = (value * 31) ^ hash(self.isSetQueryParallelism) value = (value * 31) ^ hash(self.defaultPoolPath) value = (value * 31) ^ hash(self.isSetDefaultPoolPath) +value = (value * 31) ^ hash(self.ns) return value def __repr__(self): @@ -18590,6
[3/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java -- diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java index 4621e10..15bb764 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java @@ -44,6 +44,7 @@ import org.slf4j.LoggerFactory; private static final org.apache.thrift.protocol.TField IS_SET_QUERY_PARALLELISM_FIELD_DESC = new org.apache.thrift.protocol.TField("isSetQueryParallelism", org.apache.thrift.protocol.TType.BOOL, (short)5); private static final org.apache.thrift.protocol.TField DEFAULT_POOL_PATH_FIELD_DESC = new org.apache.thrift.protocol.TField("defaultPoolPath", org.apache.thrift.protocol.TType.STRING, (short)6); private static final org.apache.thrift.protocol.TField IS_SET_DEFAULT_POOL_PATH_FIELD_DESC = new org.apache.thrift.protocol.TField("isSetDefaultPoolPath", org.apache.thrift.protocol.TType.BOOL, (short)7); + private static final org.apache.thrift.protocol.TField NS_FIELD_DESC = new org.apache.thrift.protocol.TField("ns", org.apache.thrift.protocol.TType.STRING, (short)8); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -57,6 +58,7 @@ import org.slf4j.LoggerFactory; private boolean isSetQueryParallelism; // optional private String defaultPoolPath; // optional private boolean isSetDefaultPoolPath; // optional + private String ns; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -69,7 +71,8 @@ import org.slf4j.LoggerFactory; QUERY_PARALLELISM((short)4, "queryParallelism"), IS_SET_QUERY_PARALLELISM((short)5, "isSetQueryParallelism"), DEFAULT_POOL_PATH((short)6, "defaultPoolPath"), -IS_SET_DEFAULT_POOL_PATH((short)7, "isSetDefaultPoolPath"); +IS_SET_DEFAULT_POOL_PATH((short)7, "isSetDefaultPoolPath"), +NS((short)8, "ns"); private static final Map byName = new HashMap(); @@ -96,6 +99,8 @@ import org.slf4j.LoggerFactory; return DEFAULT_POOL_PATH; case 7: // IS_SET_DEFAULT_POOL_PATH return IS_SET_DEFAULT_POOL_PATH; +case 8: // NS + return NS; default: return null; } @@ -140,7 +145,7 @@ import org.slf4j.LoggerFactory; private static final int __ISSETQUERYPARALLELISM_ISSET_ID = 1; private static final int __ISSETDEFAULTPOOLPATH_ISSET_ID = 2; private byte __isset_bitfield = 0; - private static final _Fields optionals[] = {_Fields.NAME,_Fields.STATUS,_Fields.QUERY_PARALLELISM,_Fields.IS_SET_QUERY_PARALLELISM,_Fields.DEFAULT_POOL_PATH,_Fields.IS_SET_DEFAULT_POOL_PATH}; + private static final _Fields optionals[] = {_Fields.NAME,_Fields.STATUS,_Fields.QUERY_PARALLELISM,_Fields.IS_SET_QUERY_PARALLELISM,_Fields.DEFAULT_POOL_PATH,_Fields.IS_SET_DEFAULT_POOL_PATH,_Fields.NS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); @@ -156,6 +161,8 @@ import org.slf4j.LoggerFactory; new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); tmpMap.put(_Fields.IS_SET_DEFAULT_POOL_PATH, new org.apache.thrift.meta_data.FieldMetaData("isSetDefaultPoolPath", org.apache.thrift.TFieldRequirementType.OPTIONAL, new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); +tmpMap.put(_Fields.NS, new org.apache.thrift.meta_data.FieldMetaData("ns", org.apache.thrift.TFieldRequirementType.OPTIONAL, +new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(WMNullableResourcePlan.class, metaDataMap); } @@ -180,6 +187,9 @@ import org.slf4j.LoggerFactory; this.defaultPoolPath = other.defaultPoolPath; } this.isSetDefaultPoolPath = other.isSetDefaultPoolPath; +if (other.isSetNs()) { + this.ns = other.ns; +} } public WMNullableResourcePlan deepCopy() { @@ -197,6 +207,7 @@ import org.slf4j.LoggerFactory;
[1/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Repository: hive Updated Branches: refs/heads/master b701720f5 -> 5258c67e9 http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java -- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 0755483..03e3a2d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -11593,13 +11593,15 @@ public class ObjectStore implements RawStore, Configurable { } else { rp = new MWMResourcePlan(rpName, null, Status.DISABLED); } +rp.setNs(resourcePlan.getNs()); try { openTransaction(); pm.makePersistent(rp); if (copyFromName != null) { -MWMResourcePlan copyFrom = getMWMResourcePlan(copyFromName, false); +String ns = getNsOrDefault(resourcePlan.getNs()); +MWMResourcePlan copyFrom = getMWMResourcePlan(copyFromName, ns, false); if (copyFrom == null) { - throw new NoSuchObjectException(copyFromName); + throw new NoSuchObjectException(copyFromName + " in " + ns); } copyRpContents(rp, copyFrom); } else { @@ -11627,6 +11629,7 @@ public class ObjectStore implements RawStore, Configurable { private void copyRpContents(MWMResourcePlan dest, MWMResourcePlan src) { dest.setQueryParallelism(src.getQueryParallelism()); +dest.setNs(src.getNs()); Map pools = new HashMap<>(); Map> triggersToPools = new HashMap<>(); for (MWMPool copyPool : src.getPools()) { @@ -11686,6 +11689,7 @@ public class ObjectStore implements RawStore, Configurable { } WMResourcePlan rp = new WMResourcePlan(); rp.setName(mplan.getName()); +rp.setNs(mplan.getNs()); rp.setStatus(WMResourcePlanStatus.valueOf(mplan.getStatus().name())); if (mplan.getQueryParallelism() != null) { rp.setQueryParallelism(mplan.getQueryParallelism()); @@ -11724,6 +11728,7 @@ public class ObjectStore implements RawStore, Configurable { assert mPool.getQueryParallelism() != null; result.setQueryParallelism(mPool.getQueryParallelism()); result.setSchedulingPolicy(mPool.getSchedulingPolicy()); +result.setNs(mPool.getResourcePlan().getNs()); return result; } @@ -11736,15 +11741,24 @@ public class ObjectStore implements RawStore, Configurable { if (mMapping.getOrdering() != null) { result.setOrdering(mMapping.getOrdering()); } +result.setNs(mMapping.getResourcePlan().getNs()); return result; } + private final String getNsOrDefault(String ns) { +// This is only needed for old clients not setting NS in requests. +// Not clear how to handle this... this is properly a HS2 config but metastore needs its default +// value for backward compat, and we don't want it configurable separately because it's also +// used in upgrade scripts, were it cannot be configured. + return normalizeIdentifier(ns == null ? "default" : ns); + } + @Override - public WMFullResourcePlan getResourcePlan(String name) throws NoSuchObjectException { + public WMFullResourcePlan getResourcePlan(String name, String ns) throws NoSuchObjectException { boolean commited = false; try { openTransaction(); - WMFullResourcePlan fullRp = fullFromMResourcePlan(getMWMResourcePlan(name, false)); + WMFullResourcePlan fullRp = fullFromMResourcePlan(getMWMResourcePlan(name, ns, false)); commited = commitTransaction(); return fullRp; } catch (InvalidOperationException e) { @@ -11755,12 +11769,12 @@ public class ObjectStore implements RawStore, Configurable { } } - private MWMResourcePlan getMWMResourcePlan(String name, boolean editCheck) + private MWMResourcePlan getMWMResourcePlan(String name, String ns, boolean editCheck) throws NoSuchObjectException, InvalidOperationException { -return getMWMResourcePlan(name, editCheck, true); +return getMWMResourcePlan(name, ns, editCheck, true); } - private MWMResourcePlan getMWMResourcePlan(String name, boolean editCheck, boolean mustExist) + private MWMResourcePlan getMWMResourcePlan(String name, String ns, boolean editCheck, boolean mustExist) throws NoSuchObjectException, InvalidOperationException { MWMResourcePlan resourcePlan; boolean commited = false; @@ -11768,18 +11782,16 @@ public class ObjectStore implements RawStore, Configurable { name = normalizeIdentifier(name); try { - openTransaction(); - query = pm.newQuery(MWMResourcePlan.class, "name == rpname"); -
[5/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/ql/src/test/results/clientpositive/llap/resourceplan.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/resourceplan.q.out b/ql/src/test/results/clientpositive/llap/resourceplan.q.out index c11daf7..7fd7278 100644 --- a/ql/src/test/results/clientpositive/llap/resourceplan.q.out +++ b/ql/src/test/results/clientpositive/llap/resourceplan.q.out @@ -1838,14 +1838,14 @@ FROM POSTHOOK: type: CREATETABLE POSTHOOK: Output: SYS@PART_COL_STATS POSTHOOK: Output: database:sys -PREHOOK: query: CREATE OR REPLACE VIEW `VERSION` AS SELECT 1 AS `VER_ID`, '3.1.0' AS `SCHEMA_VERSION`, - 'Hive release version 3.1.0' AS `VERSION_COMMENT` +PREHOOK: query: CREATE OR REPLACE VIEW `VERSION` AS SELECT 1 AS `VER_ID`, '4.0.0' AS `SCHEMA_VERSION`, + 'Hive release version 4.0.0' AS `VERSION_COMMENT` PREHOOK: type: CREATEVIEW PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: SYS@VERSION PREHOOK: Output: database:sys -POSTHOOK: query: CREATE OR REPLACE VIEW `VERSION` AS SELECT 1 AS `VER_ID`, '3.1.0' AS `SCHEMA_VERSION`, - 'Hive release version 3.1.0' AS `VERSION_COMMENT` +POSTHOOK: query: CREATE OR REPLACE VIEW `VERSION` AS SELECT 1 AS `VER_ID`, '4.0.0' AS `SCHEMA_VERSION`, + 'Hive release version 4.0.0' AS `VERSION_COMMENT` POSTHOOK: type: CREATEVIEW POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: SYS@VERSION @@ -2107,6 +2107,7 @@ POSTHOOK: Lineage: PARTITION_STATS_VIEW.total_size EXPRESSION [(partition_params POSTHOOK: Lineage: PARTITION_STATS_VIEW.transient_last_ddl_time EXPRESSION [(partition_params)partition_params.FieldSchema(name:param_key, type:string, comment:from deserializer), (partition_params)partition_params.FieldSchema(name:param_value, type:string, comment:from deserializer), ] PREHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_RESOURCEPLANS` ( `NAME` string, + `NS` string, `STATUS` string, `QUERY_PARALLELISM` int, `DEFAULT_POOL_PATH` string @@ -2117,6 +2118,7 @@ TBLPROPERTIES ( "hive.sql.query" = "SELECT \"WM_RESOURCEPLAN\".\"NAME\", + case when \"WM_RESOURCEPLAN\".\"NS\" is null then 'default' else \"WM_RESOURCEPLAN\".\"NS\" end AS NS, \"STATUS\", \"WM_RESOURCEPLAN\".\"QUERY_PARALLELISM\", \"WM_POOL\".\"PATH\" @@ -2128,6 +2130,7 @@ PREHOOK: Output: SYS@WM_RESOURCEPLANS PREHOOK: Output: database:sys POSTHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_RESOURCEPLANS` ( `NAME` string, + `NS` string, `STATUS` string, `QUERY_PARALLELISM` int, `DEFAULT_POOL_PATH` string @@ -2138,6 +2141,7 @@ TBLPROPERTIES ( "hive.sql.query" = "SELECT \"WM_RESOURCEPLAN\".\"NAME\", + case when \"WM_RESOURCEPLAN\".\"NS\" is null then 'default' else \"WM_RESOURCEPLAN\".\"NS\" end AS NS, \"STATUS\", \"WM_RESOURCEPLAN\".\"QUERY_PARALLELISM\", \"WM_POOL\".\"PATH\" @@ -2149,6 +2153,7 @@ POSTHOOK: Output: SYS@WM_RESOURCEPLANS POSTHOOK: Output: database:sys PREHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_TRIGGERS` ( `RP_NAME` string, + `NS` string, `NAME` string, `TRIGGER_EXPRESSION` string, `ACTION_EXPRESSION` string @@ -2159,6 +2164,7 @@ TBLPROPERTIES ( "hive.sql.query" = "SELECT r.\"NAME\" AS RP_NAME, + case when r.\"NS\" is null then 'default' else r.\"NS\" end, t.\"NAME\" AS NAME, \"TRIGGER_EXPRESSION\", \"ACTION_EXPRESSION\" @@ -2174,6 +2180,7 @@ PREHOOK: Output: SYS@WM_TRIGGERS PREHOOK: Output: database:sys POSTHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_TRIGGERS` ( `RP_NAME` string, + `NS` string, `NAME` string, `TRIGGER_EXPRESSION` string, `ACTION_EXPRESSION` string @@ -2184,6 +2191,7 @@ TBLPROPERTIES ( "hive.sql.query" = "SELECT r.\"NAME\" AS RP_NAME, + case when r.\"NS\" is null then 'default' else r.\"NS\" end, t.\"NAME\" AS NAME, \"TRIGGER_EXPRESSION\", \"ACTION_EXPRESSION\" @@ -2199,6 +2207,7 @@ POSTHOOK: Output: SYS@WM_TRIGGERS POSTHOOK: Output: database:sys PREHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_POOLS` ( `RP_NAME` string, + `NS` string, `PATH` string, `ALLOC_FRACTION` double, `QUERY_PARALLELISM` int, @@ -2210,6 +2219,7 @@ TBLPROPERTIES ( "hive.sql.query" = "SELECT \"WM_RESOURCEPLAN\".\"NAME\", + case when \"WM_RESOURCEPLAN\".\"NS\" is null then 'default' else \"WM_RESOURCEPLAN\".\"NS\" end AS NS, \"WM_POOL\".\"PATH\", \"WM_POOL\".\"ALLOC_FRACTION\", \"WM_POOL\".\"QUERY_PARALLELISM\", @@ -2226,6 +2236,7 @@ PREHOOK: Output: SYS@WM_POOLS PREHOOK: Output: database:sys POSTHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_POOLS` ( `RP_NAME` string, + `NS` string, `PATH` string, `ALLOC_FRACTION` double, `QUERY_PARALLELISM` int, @@ -2237,6 +2248,7 @@ TBLPROPERTIES ( "hive.sql.query" = "SELECT \"WM_RESOURCEPLAN\".\"NAME\", + case when \"WM_RESOURCEPLAN\".\"NS\" is null then 'default' else \"WM_RESOURCEPLAN\".\"NS\" end AS NS,
[55/75] [abbrv] hive git commit: HIVE-20679: DDL operations on hive might create large messages for DBNotification (Anishek Agarwal, reviewed by Sankar Hariappan)
HIVE-20679: DDL operations on hive might create large messages for DBNotification (Anishek Agarwal, reviewed by Sankar Hariappan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b4302bb7 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b4302bb7 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b4302bb7 Branch: refs/heads/master-tez092 Commit: b4302bb7ad967f15ca1b708685b2ac669e3cf037 Parents: b829955 Author: Anishek Agarwal Authored: Mon Oct 22 13:51:43 2018 +0530 Committer: Anishek Agarwal Committed: Mon Oct 22 13:51:43 2018 +0530 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../listener/DbNotificationListener.java| 182 +--- .../json/JSONCreateFunctionMessage.java | 3 +- .../messaging/json/JSONDropFunctionMessage.java | 3 +- .../messaging/json/JSONMessageFactory.java | 39 +- .../listener/TestDbNotificationListener.java| 14 +- .../TestReplAcidTablesWithJsonMessage.java | 43 ++ ...eplAcrossInstancesWithJsonMessageFormat.java | 45 ++ ...ncrementalLoadAcidTablesWithJsonMessage.java | 46 ++ .../ql/parse/TestReplWithJsonMessageFormat.java | 39 ++ .../hive/ql/parse/TestReplicationScenarios.java | 82 ++-- .../TestReplicationScenariosAcidTables.java | 61 +-- ...TestReplicationScenariosAcrossInstances.java | 103 +++-- ...ationScenariosIncrementalLoadAcidTables.java | 55 ++- .../hadoop/hive/ql/parse/WarehouseInstance.java | 2 +- .../ql/cache/results/QueryResultsCache.java | 14 +- .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 5 +- .../repl/bootstrap/load/LoadConstraint.java | 4 +- .../parse/repl/dump/events/AbortTxnHandler.java | 12 +- .../events/AbstractConstraintEventHandler.java | 3 +- .../repl/dump/events/AbstractEventHandler.java | 32 +- .../repl/dump/events/AddForeignKeyHandler.java | 12 +- .../events/AddNotNullConstraintHandler.java | 12 +- .../repl/dump/events/AddPartitionHandler.java | 10 +- .../repl/dump/events/AddPrimaryKeyHandler.java | 12 +- .../dump/events/AddUniqueConstraintHandler.java | 13 +- .../repl/dump/events/AllocWriteIdHandler.java | 12 +- .../repl/dump/events/AlterDatabaseHandler.java | 12 +- .../repl/dump/events/AlterPartitionHandler.java | 21 +- .../repl/dump/events/AlterTableHandler.java | 18 +- .../repl/dump/events/CommitTxnHandler.java | 28 +- .../repl/dump/events/CreateDatabaseHandler.java | 13 +- .../repl/dump/events/CreateFunctionHandler.java | 13 +- .../repl/dump/events/CreateTableHandler.java| 15 +- .../parse/repl/dump/events/DefaultHandler.java | 9 + .../repl/dump/events/DropConstraintHandler.java | 13 +- .../repl/dump/events/DropDatabaseHandler.java | 12 +- .../repl/dump/events/DropFunctionHandler.java | 12 +- .../repl/dump/events/DropPartitionHandler.java | 12 +- .../repl/dump/events/DropTableHandler.java | 12 +- .../repl/dump/events/EventHandlerFactory.java | 44 +- .../parse/repl/dump/events/InsertHandler.java | 22 +- .../parse/repl/dump/events/OpenTxnHandler.java | 12 +- .../repl/dump/io/ConstraintsSerializer.java | 10 +- .../load/message/AbstractMessageHandler.java| 4 +- .../dump/events/TestEventHandlerFactory.java| 7 +- .../hive/metastore/conf/MetastoreConf.java | 2 +- .../hive/metastore/messaging/EventMessage.java | 64 +-- .../metastore/messaging/MessageBuilder.java | 425 ++ .../metastore/messaging/MessageEncoder.java | 27 ++ .../metastore/messaging/MessageFactory.java | 367 +++- .../metastore/messaging/MessageSerializer.java | 24 ++ .../event/filters/DatabaseAndTableFilter.java | 8 +- .../messaging/json/JSONAcidWriteMessage.java| 9 +- .../json/JSONAddForeignKeyMessage.java | 5 +- .../json/JSONAddNotNullConstraintMessage.java | 5 +- .../messaging/json/JSONAddPartitionMessage.java | 11 +- .../json/JSONAddPrimaryKeyMessage.java | 5 +- .../json/JSONAddUniqueConstraintMessage.java| 5 +- .../messaging/json/JSONAlterCatalogMessage.java | 9 +- .../json/JSONAlterDatabaseMessage.java | 9 +- .../json/JSONAlterPartitionMessage.java | 15 +- .../messaging/json/JSONAlterTableMessage.java | 9 +- .../messaging/json/JSONCommitTxnMessage.java| 5 +- .../json/JSONCreateDatabaseMessage.java | 5 +- .../json/JSONCreateFunctionMessage.java | 5 +- .../messaging/json/JSONCreateTableMessage.java | 5 +- .../json/JSONDropPartitionMessage.java | 5 +- .../messaging/json/JSONDropTableMessage.java| 5 +- .../messaging/json/JSONInsertMessage.java | 9 +- .../messaging/json/JSONMessageEncoder.java | 70 +++ .../messaging/json/JSONMessageFactory.java | 432 --- .../messaging/json/gzip/DeSerializer.java | 181
[40/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out new file mode 100644 index 000..aa04df8 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out @@ -0,0 +1,119 @@ +PREHOOK: query: explain cbo +select +sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from +store_sales + ,date_dim d1 + ,store + where +d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select +sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from +store_sales + ,date_dim d1 + ,store + where +d1.d_month_seq between 1212 and 1212+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1212 and 1212+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveProject(total_sum=[$0], s_state=[$1], s_county=[$2], lochierarchy=[$3], rank_within_parent=[$4]) + HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], fetch=[100]) +HiveProject(total_sum=[$2], s_state=[$0], s_county=[$1], lochierarchy=[+(grouping($3, 1), grouping($3, 0))], rank_within_parent=[rank() OVER (PARTITION BY +(grouping($3, 1), grouping($3, 0)), CASE(=(grouping($3, 0), 0), $0, null) ORDER BY $2 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 0) (tok_table_or_col s_state))=[CASE(=(+(grouping($3, 1), grouping($3, 0)), 0), $0, null)]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], GROUPING__ID=[$3]) +HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], GROUPING__ID=[GROUPING__ID()]) + HiveProject($f0=[$7], $f1=[$6], $f2=[$2]) +HiveJoin(condition=[=($7, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) +HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], ss_net_profit=[$22]) +HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) +
[16/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out new file mode 100644 index 000..324eef2 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out @@ -0,0 +1,555 @@ +PREHOOK: query: explain +select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select +item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio + from +web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 1 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select +item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) +,date_dim + where + cr.cr_return_amount > 1 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select +item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) +,date_dim + where + sr.sr_return_amt > 1 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input:
[21/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out new file mode 100644 index 000..c7fd970 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out @@ -0,0 +1,296 @@ +Warning: Shuffle Join MERGEJOIN[102][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[104][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[105][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 8' is a cross product +PREHOOK: query: explain +select * +from (select avg(ss_list_price) B1_LP +,count(ss_list_price) B1_CNT +,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 +and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP +,count(ss_list_price) B2_CNT +,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 +and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP +,count(ss_list_price) B3_CNT +,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 +and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP +,count(ss_list_price) B4_CNT +,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 +and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP +,count(ss_list_price) B5_CNT +,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 +and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 14180 and 14180+1000 + or ss_wholesale_cost between 38 and 38+20)) B5, + (select avg(ss_list_price) B6_LP +,count(ss_list_price) B6_CNT +,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 +and (ss_list_price between 28 and 28+10 + or ss_coupon_amt between 2513 and 2513+1000 + or ss_wholesale_cost between 42 and 42+20)) B6 +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select * +from (select avg(ss_list_price) B1_LP +,count(ss_list_price) B1_CNT +,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 +and (ss_list_price between 11 and 11+10 + or ss_coupon_amt between 460 and 460+1000 + or ss_wholesale_cost between 14 and 14+20)) B1, + (select avg(ss_list_price) B2_LP +,count(ss_list_price) B2_CNT +,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 +and (ss_list_price between 91 and 91+10 + or ss_coupon_amt between 1430 and 1430+1000 + or ss_wholesale_cost between 32 and 32+20)) B2, + (select avg(ss_list_price) B3_LP +,count(ss_list_price) B3_CNT +,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 +and (ss_list_price between 66 and 66+10 + or ss_coupon_amt between 920 and 920+1000 + or ss_wholesale_cost between 4 and 4+20)) B3, + (select avg(ss_list_price) B4_LP +,count(ss_list_price) B4_CNT +,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 +and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 3054 and 3054+1000 + or ss_wholesale_cost between 80 and 80+20)) B4, + (select avg(ss_list_price) B5_LP +
[33/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out new file mode 100644 index 000..8f2f79f --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out @@ -0,0 +1,68 @@ +PREHOOK: query: explain cbo +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=12 + and dt.d_year=1998 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(fetch=[100]) + HiveProject(d_year=[CAST(1998):INTEGER], i_category_id=[$0], i_category=[$1], _o__c3=[$2]) +HiveSortLimit(sort0=[$3], sort1=[$0], sort2=[$1], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC]) + HiveProject(i_category_id=[$0], i_category=[$1], _o__c3=[$2], (tok_function sum (tok_table_or_col ss_ext_sales_price))=[$2]) +HiveAggregate(group=[{5, 6}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) +HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15]) +HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) +HiveFilter(condition=[AND(=($8, 12), =($6, 1998))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[dt]) +HiveProject(i_item_sk=[$0], i_category_id=[$11], i_category=[$12]) + HiveFilter(condition=[=($20, 1)]) +HiveTableScan(table=[[default, item]], table:alias=[item]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out new file mode 100644 index 000..6b21ee4 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: explain cbo +select s_store_name, s_store_id, +sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, +sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, +sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, +sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, +sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, +sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, +sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK:
[58/75] [abbrv] hive git commit: HIVE-20542: Incremental REPL DUMP progress information log message is incorrect (Ashutosh Bapat, reviewed by Sankar Hariappan)
HIVE-20542: Incremental REPL DUMP progress information log message is incorrect (Ashutosh Bapat, reviewed by Sankar Hariappan) Signed-off-by: Sankar Hariappan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0d4d03fd Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0d4d03fd Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0d4d03fd Branch: refs/heads/master-tez092 Commit: 0d4d03fd1daeb3b75182b73f7b40de7a3b7d48ea Parents: 7765e90 Author: Ashutosh Bapat Authored: Tue Oct 23 17:56:47 2018 +0530 Committer: Sankar Hariappan Committed: Tue Oct 23 17:56:47 2018 +0530 -- .../listener/DbNotificationListener.java| 75 ++- .../listener/TestDbNotificationListener.java| 59 ++ .../TestReplicationScenariosAcidTables.java | 6 +- .../hadoop/hive/ql/parse/WarehouseInstance.java | 17 ++ .../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 9 +- .../hive/ql/metadata/events/EventUtils.java | 16 +- .../api/NotificationEventsCountRequest.java | 206 ++- .../src/gen/thrift/gen-php/metastore/Types.php | 46 + .../gen/thrift/gen-py/hive_metastore/ttypes.py | 28 ++- .../gen/thrift/gen-rb/hive_metastore_types.rb | 6 +- .../src/main/thrift/hive_metastore.thrift | 4 +- .../hadoop/hive/metastore/ObjectStore.java | 58 +- .../hadoop/hive/metastore/txn/TxnHandler.java | 2 +- 13 files changed, 503 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/0d4d03fd/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java -- diff --git a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java index c23aab2..fe101d3 100644 --- a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java +++ b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java @@ -24,6 +24,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Arrays; +import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -939,19 +940,71 @@ public class DbNotificationListener extends TransactionalMetaStoreEventListener long nextNLId = getNextNLId(stmt, sqlGenerator, "org.apache.hadoop.hive.metastore.model.MNotificationLog"); - String insertVal = "(" + nextNLId + "," + nextEventId + "," + now() + ", ?, ?," + - quoteString(" ") + ",?, ?)"; + String insertVal; + String columns; + List params = new ArrayList(); + + // Construct the values string, parameters and column string step by step simultaneously so + // that the positions of columns and of their corresponding values do not go out of sync. + + // Notification log id + columns = "\"NL_ID\""; + insertVal = "" + nextNLId; + + // Event id + columns = columns + ", \"EVENT_ID\""; + insertVal = insertVal + "," + nextEventId; + + // Event time + columns = columns + ", \"EVENT_TIME\""; + insertVal = insertVal + "," + now(); + + // Event type + columns = columns + ", \"EVENT_TYPE\""; + insertVal = insertVal + ", ?"; + params.add(event.getEventType()); + + // Message + columns = columns + ", \"MESSAGE\""; + insertVal = insertVal + ", ?"; + params.add(event.getMessage()); + + // Message format + columns = columns + ", \"MESSAGE_FORMAT\""; + insertVal = insertVal + ", ?"; + params.add(event.getMessageFormat()); + + // Database name, optional + String dbName = event.getDbName(); + if (dbName != null) { +assert dbName.equals(dbName.toLowerCase()); +columns = columns + ", \"DB_NAME\""; +insertVal = insertVal + ", ?"; +params.add(dbName); + } - s = "insert into \"NOTIFICATION_LOG\" (\"NL_ID\", \"EVENT_ID\", \"EVENT_TIME\", " + - " \"EVENT_TYPE\", \"DB_NAME\", " + - " \"TBL_NAME\", \"MESSAGE\", \"MESSAGE_FORMAT\") VALUES " + insertVal; - List params = Arrays.asList( - event.getEventType(), event.getDbName(), event.getMessage(), event.getMessageFormat()); - pst = sqlGenerator.prepareStmtWithParameters(dbConn, s, params); + // Table name, optional + String tableName = event.getTableName(); + if (tableName != null) { +assert tableName.equals(tableName.toLowerCase()); +columns = columns + ", \"TBL_NAME\""; +insertVal = insertVal +
[53/75] [abbrv] hive git commit: HIVE-20679: DDL operations on hive might create large messages for DBNotification (Anishek Agarwal, reviewed by Sankar Hariappan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b4302bb7/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageBuilder.java -- diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageBuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageBuilder.java new file mode 100644 index 000..787b9b2 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageBuilder.java @@ -0,0 +1,425 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.messaging; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Predicate; +import java.util.regex.PatternSyntaxException; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.Iterables; +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.Catalog; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Function; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SQLForeignKey; +import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint; +import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; +import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.TxnToWriteId; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.events.AcidWriteEvent; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAbortTxnMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAcidWriteMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAddForeignKeyMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAddNotNullConstraintMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAddPartitionMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAddPrimaryKeyMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAddUniqueConstraintMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAllocWriteIdMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterCatalogMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterDatabaseMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterPartitionMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterTableMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONCommitTxnMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONCreateCatalogMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONCreateDatabaseMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONCreateFunctionMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONCreateTableMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONDropCatalogMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONDropConstraintMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONDropDatabaseMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONDropFunctionMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONDropPartitionMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONDropTableMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONInsertMessage; +import org.apache.hadoop.hive.metastore.messaging.json.JSONOpenTxnMessage; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.thrift.TBase; +import
[63/75] [abbrv] hive git commit: HIVE-20788: Extended SJ reduction may backtrack columns incorrectly when creating filters (Jesus Camacho Rodriguez, reviewed by Deepak Jaiswal)
http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/query24.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/query24.q.out index 902358a..43ece85 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[301][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name @@ -114,234 +116,242 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) -Map 24 <- Reducer 20 (BROADCAST_EDGE) -Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) -Reducer 11 <- Map 9 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 23 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) -Reducer 19 <- Map 21 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 22 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 23 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 24 <- Reducer 19 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) +Reducer 10 <- Map 23 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 15 <- Map 22 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Map 24 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 19 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 20 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 - File Output Operator [FS_91] -Select Operator [SEL_90] (rows=78393744 width=380) + Reducer 6 + File Output Operator [FS_94] +Select Operator [SEL_93] (rows=1313165 width=380) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_89] (rows=78393744 width=492) + Filter Operator [FIL_92] (rows=1313165 width=492) predicate:(_col3 > _col4) -Merge Join Operator [MERGEJOIN_290] (rows=235181232 width=492) +Merge Join Operator [MERGEJOIN_301] (rows=3939496 width=492) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] -<-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_345] -Select Operator [SEL_344] (rows=1 width=112) +<-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_351] +Select Operator [SEL_350] (rows=1 width=112) Output:["_col0"] - Group By Operator [GBY_343] (rows=1 width=120) + Group By Operator [GBY_349] (rows=1 width=120)
[18/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out new file mode 100644 index 000..812928b --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out @@ -0,0 +1,236 @@ +PREHOOK: query: explain +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy +,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk +and inv_warehouse_sk = w_warehouse_sk +and inv_date_sk = d_date_sk +and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov +,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov +,inv2.d_moy,inv2.mean, inv2.cov +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@inventory +PREHOOK: Input: default@item +PREHOOK: Input: default@warehouse +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy +,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk +and inv_warehouse_sk = w_warehouse_sk +and inv_date_sk = d_date_sk +and d_year =1999 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov +,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=4 + and inv2.d_moy=4+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov +,inv2.d_moy,inv2.mean, inv2.cov +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@inventory +POSTHOOK: Input: default@item +POSTHOOK: Input: default@warehouse +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (ONE_TO_ONE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator +limit:-1 +Stage-1 + Reducer 6 vectorized + File Output Operator [FS_166] +Select Operator [SEL_165] (rows=859 width=56) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] +<-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_44] +Select Operator [SEL_43] (rows=859 width=48) + Output:["_col0","_col1","_col2","_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_144] (rows=859 width=40) +Conds:RS_160._col0, _col1=RS_164._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col6","_col7"] + <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized +FORWARD [RS_160] + PartitionCols:_col0, _col1 + Select Operator [SEL_159] (rows=859 width=24) +Output:["_col0","_col1","_col2","_col3"] +Filter Operator [FIL_158] (rows=859 width=40) + predicate:CASE WHEN (((UDFToDouble(_col2) / _col3) = 0)) THEN (false) ELSE (((power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) / (UDFToDouble(_col2) / _col3)) > 1.0D)) END + Group By Operator [GBY_157] (rows=1719 width=40) +
[05/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out new file mode 100644 index 000..93bce2e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out @@ -0,0 +1,397 @@ +PREHOOK: query: explain +select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + (SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '89436','30868','65085','22977','83927','77557', + '58429','40697','80614','10502','32779', + '91137','61265','98294','17921','18427', + '21203','59362','87291','84093','21505', + '17184','10866','67898','25797','28055', + '18377','80332','74535','21757','29742', + '90885','29898','17819','40811','25990', + '47513','89531','91068','10391','18846', + '99223','82637','41368','83658','86199', + '81625','26696','89338','88425','32200', + '81427','19053','77471','36610','99823', + '43276','41249','48584','83550','82276', + '18842','78890','14090','38123','40936', + '34425','19850','43286','80072','79188', + '54191','11395','50497','84861','90733', + '21068','57666','37119','25004','57835', + '70067','62878','95806','19303','18840', + '19124','29785','16737','16022','49613', + '89977','68310','60069','98360','48649', + '39050','41793','25002','27413','39736', + '47208','16515','94808','57648','15009', + '80015','42961','63982','21744','71853', + '81087','67468','34175','64008','20261', + '11201','51799','48043','45645','61163', + '48375','36447','57042','21218','41100', + '89951','22745','35851','83326','61125', + '78298','80752','49858','52940','96976', + '63792','11376','53582','18717','90226', + '50530','94203','99447','27670','96577', + '57856','56372','16165','23427','54561', + '28806','44439','22926','30123','61451', + '92397','56979','92309','70873','13355', + '21801','46346','37562','56458','28286', + '47306','99555','69399','26234','47546', + '49661','88601','35943','39936','25632', + '24611','44166','56648','30379','59785', + '0','14329','93815','52226','71381', + '13842','25612','63294','14664','21077', + '82626','18799','60915','81020','56447', + '76619','11433','13414','42548','92713', + '70467','30884','47484','16072','38936', + '13036','88376','45539','35901','19506', + '65690','73957','71850','49231','14276', + '20005','18384','76615','11635','38177', + '55607','41369','95447','58581','58149', + '91946','33790','76232','75692','95464', + '22246','51061','56692','53121','77209', + '15482','10688','14868','45907','73520', + '72666','25734','17959','24677','66446', + '94627','53535','15560','41967','69297', + '11929','59403','33283','52232','57350', + '43933','40921','36635','10827','71286', + '19736','80619','25251','95042','15526', + '36496','55854','49124','81980','35375', + '49157','63512','28944','14946','36503', + '54010','18767','23969','43905','66979', + '33113','21286','58471','59080','13395', + '79144','70373','67031','38360','26705', + '50906','52406','26066','73146','15884', + '31897','30045','61068','45550','92454', + '13376','14354','19770','22928','97790', + '50723','46081','30202','14410','20223', +
[12/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out new file mode 100644 index 000..d9543ad --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out @@ -0,0 +1,388 @@ +Warning: Shuffle Join MERGEJOIN[266][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +PREHOOK: query: explain +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales +,store +,promotion +,date_dim +,customer +,customer_address +,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales +,store +,date_dim +,customer +,customer_address +,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@promotion +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales +,store +,promotion +,date_dim +,customer +,customer_address +,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales +,store +,date_dim +,customer +,customer_address +,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -7 + and i_category = 'Electronics' + and s_gmt_offset = -7 + and d_year = 1999 + and d_moy = 11) all_sales +order by promotions, total +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@promotion +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 12 <- Reducer 18 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 27 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Map 30 <- Reducer 10 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE) +Reducer 10 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Map 29 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) +Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 26 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 23 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Map 26 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 16
[17/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out new file mode 100644 index 000..495b6bd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out @@ -0,0 +1,135 @@ +PREHOOK: query: explain +select s_store_name, s_store_id, +sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, +sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, +sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, +sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, +sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, +sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, +sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select s_store_name, s_store_id, +sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, +sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, +sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, +sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, +sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, +sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, +sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -6 and + d_year = 1998 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator +limit:100 +Stage-1 + Reducer 5 vectorized + File Output Operator [FS_79] +Limit [LIM_78] (rows=100 width=972) + Number of rows:100 + Select Operator [SEL_77] (rows=3751 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Reducer 4 [SIMPLE_EDGE] vectorized +SHUFFLE [RS_76] + Group By Operator [GBY_75] (rows=3751 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] +SHUFFLE [RS_18] + PartitionCols:_col0, _col1 + Group By Operator [GBY_17] (rows=142538 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 +Top N Key Operator [TNK_33] (rows=37536846 width=257) + keys:_col0, _col1,sort order:++,top n:100 + Select Operator [SEL_15] (rows=37536846 width=257) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] +Merge Join Operator [MERGEJOIN_55] (rows=37536846 width=257) +
[48/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query67.q -- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query67.q b/ql/src/test/queries/clientpositive/perf/cbo_query67.q new file mode 100644 index 000..5781aac --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query67.q @@ -0,0 +1,46 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query67.tpl and seed 1819994127 +explain cbo +select * +from (select i_category +,i_class +,i_brand +,i_product_name +,d_year +,d_qoy +,d_moy +,s_store_id +,sumsales +,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales +from store_sales +,date_dim +,store +,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1212 and 1212+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category +,i_class +,i_brand +,i_product_name +,d_year +,d_qoy +,d_moy +,s_store_id +,sumsales +,rk +limit 100; + +-- end query 1 in stream 0 using template query67.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query68.q -- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query68.q b/ql/src/test/queries/clientpositive/perf/cbo_query68.q new file mode 100644 index 000..520b9d7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query68.q @@ -0,0 +1,44 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query68.tpl and seed 803547492 +explain cbo +select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk +and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk +and store_sales.ss_addr_sk = customer_address.ca_address_sk +and date_dim.d_dom between 1 and 2 +and (household_demographics.hd_dep_count = 2 or + household_demographics.hd_vehicle_count= 1) +and date_dim.d_year in (1998,1998+1,1998+2) +and store.s_city in ('Cedar Grove','Wildwood') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100; + +-- end query 1 in stream 0 using template query68.tpl http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query69.q -- diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query69.q b/ql/src/test/queries/clientpositive/perf/cbo_query69.q new file mode 100644 index 000..01183fb --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/cbo_query69.q @@ -0,0 +1,49 @@ +set hive.mapred.mode=nonstrict; +-- start query 1 in stream 0 using template query69.tpl and seed 797269820 +explain cbo +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('CO','IL','MN') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and +ss_sold_date_sk = d_date_sk and +
[43/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out new file mode 100644 index 000..9c31d61 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out @@ -0,0 +1,330 @@ +PREHOOK: query: explain cbo +select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select +item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as currency_ratio + from +web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 1 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select +item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) +,date_dim + where + cr.cr_return_amount > 1 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select +item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) +,date_dim + where + sr.sr_return_amt > 1 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2000 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input:
[28/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out new file mode 100644 index 000..ee94ea3 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out @@ -0,0 +1,219 @@ +PREHOOK: query: explain cbo +with sr_items as + (select i_item_id item_id, +sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_datein + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, +sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_datein + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, +sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_datein + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@web_returns +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with sr_items as + (select i_item_id item_id, +sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_datein + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, +sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_datein + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, +sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_datein + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('1998-01-02','1998-10-15','1998-11-10'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@web_returns
[24/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out new file mode 100644 index 000..b41b4e3 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: explain +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2000 + group by ca_zip + order by ca_zip + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 7 <- Reducer 10 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator +limit:100 +Stage-1 + Reducer 5 vectorized + File Output Operator [FS_97] +Limit [LIM_96] (rows=100 width=201) + Number of rows:100 + Select Operator [SEL_95] (rows=2555 width=201) +Output:["_col0","_col1"] + <-Reducer 4 [SIMPLE_EDGE] vectorized +SHUFFLE [RS_94] + Group By Operator [GBY_93] (rows=2555 width=201) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] +SHUFFLE [RS_24] + PartitionCols:_col0 + Group By Operator [GBY_23] (rows=43435 width=201) + Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col3 +Top N Key Operator [TNK_43] (rows=20154874 width=205) + keys:_col3,sort order:+,top n:100 + Select Operator [SEL_22] (rows=20154874 width=205) +Output:["_col3","_col8"] +Filter Operator [FIL_21] (rows=20154874 width=205) + predicate:(_col4 or _col5 or _col9) + Merge Join Operator [MERGEJOIN_76] (rows=20154874 width=205) + Conds:RS_18._col0=RS_19._col1(Inner),Output:["_col3","_col4","_col5","_col8","_col9"] + <-Reducer 2 [SIMPLE_EDGE] +SHUFFLE [RS_18] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_74] (rows=8000 width=101) + Conds:RS_79._col1=RS_81._col0(Inner),Output:["_col0","_col3","_col4","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized +SHUFFLE [RS_79] + PartitionCols:_col1 + Select Operator [SEL_78] (rows=8000 width=8) +Output:["_col0","_col1"] +Filter Operator [FIL_77] (rows=8000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=8000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] +
[01/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/master-tez092 8151911b4 -> c55347d52 http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out new file mode 100644 index 000..673050e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out @@ -0,0 +1,178 @@ +PREHOOK: query: explain +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and +((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') +)) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and +((i_category in ('Home','Books','Electronics') and + i_class in ('wallpaper','parenting','musical') + ) + or (i_category in ('Shoes','Jewelry','Men') and + i_class in ('womens','birdal','pants') +)) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator +limit:-1 +Stage-1 + Reducer 7 vectorized + File Output Operator [FS_115] +Limit [LIM_114] (rows=100 width=801) + Number of rows:100 + Select Operator [SEL_113] (rows=4804228 width=801) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Reducer 6 [SIMPLE_EDGE] vectorized +SHUFFLE [RS_112] + Select Operator [SEL_111] (rows=4804228 width=801) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] +Filter Operator [FIL_110] (rows=4804228 width=689) + predicate:CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END + Select Operator [SEL_109] (rows=9608456 width=577) + Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] +PTF Operator [PTF_108] (rows=9608456 width=577) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col2, _col0, _col4, _col5"}] + Select Operator [SEL_107] (rows=9608456 width=577) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] +
[07/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out new file mode 100644 index 000..fee4e83 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out @@ -0,0 +1,662 @@ +PREHOOK: query: explain +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number +AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk +JOIN date_dim ON d_date_sk=ss_sold_date_sk +LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number +AND ss_item_sk=sr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number +AND ws_item_sk=wr_item_sk) + WHERE i_category='Sports') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number +AND cs_item_sk=cr_item_sk) + WHERE i_category='Sports' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id +
[50/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index ff9f758..da2091a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -1732,7 +1732,104 @@ spark.only.query.negative.files=spark_job_max_tasks.q,\ spark_submit_negative_executor_memory.q spark.perf.disabled.query.files=query14.q,\ - query64.q + query64.q,\ + cbo_query1.q,\ + cbo_query10.q,\ + cbo_query11.q,\ + cbo_query12.q,\ + cbo_query13.q,\ + cbo_query14.q,\ + cbo_query15.q,\ + cbo_query16.q,\ + cbo_query17.q,\ + cbo_query18.q,\ + cbo_query19.q,\ + cbo_query2.q,\ + cbo_query20.q,\ + cbo_query21.q,\ + cbo_query22.q,\ + cbo_query23.q,\ + cbo_query24.q,\ + cbo_query25.q,\ + cbo_query26.q,\ + cbo_query27.q,\ + cbo_query28.q,\ + cbo_query29.q,\ + cbo_query3.q,\ + cbo_query30.q,\ + cbo_query31.q,\ + cbo_query32.q,\ + cbo_query33.q,\ + cbo_query34.q,\ + cbo_query35.q,\ + cbo_query36.q,\ + cbo_query37.q,\ + cbo_query38.q,\ + cbo_query39.q,\ + cbo_query4.q,\ + cbo_query40.q,\ + cbo_query42.q,\ + cbo_query43.q,\ + cbo_query44.q,\ + cbo_query45.q,\ + cbo_query46.q,\ + cbo_query47.q,\ + cbo_query48.q,\ + cbo_query49.q,\ + cbo_query5.q,\ + cbo_query50.q,\ + cbo_query51.q,\ + cbo_query52.q,\ + cbo_query53.q,\ + cbo_query54.q,\ + cbo_query55.q,\ + cbo_query56.q,\ + cbo_query57.q,\ + cbo_query58.q,\ + cbo_query59.q,\ + cbo_query6.q,\ + cbo_query60.q,\ + cbo_query61.q,\ + cbo_query63.q,\ + cbo_query64.q,\ + cbo_query65.q,\ + cbo_query66.q,\ + cbo_query67.q,\ + cbo_query68.q,\ + cbo_query69.q,\ + cbo_query7.q,\ + cbo_query70.q,\ + cbo_query71.q,\ + cbo_query72.q,\ + cbo_query73.q,\ + cbo_query74.q,\ + cbo_query75.q,\ + cbo_query76.q,\ + cbo_query77.q,\ + cbo_query78.q,\ + cbo_query79.q,\ + cbo_query8.q,\ + cbo_query80.q,\ + cbo_query81.q,\ + cbo_query82.q,\ + cbo_query83.q,\ + cbo_query84.q,\ + cbo_query85.q,\ + cbo_query86.q,\ + cbo_query87.q,\ + cbo_query88.q,\ + cbo_query89.q,\ + cbo_query9.q,\ + cbo_query90.q,\ + cbo_query91.q,\ + cbo_query92.q,\ + cbo_query93.q,\ + cbo_query94.q,\ + cbo_query95.q,\ + cbo_query96.q,\ + cbo_query97.q,\ + cbo_query98.q,\ + cbo_query99.q druid.query.files=druidmini_test1.q,\ druidmini_test_ts.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java -- diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 5e1e88e..afff0df 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -280,7 +280,7 @@ public class CliConfigs { } public static class TezPerfCliConfig extends AbstractCliConfig { -public TezPerfCliConfig() { +public TezPerfCliConfig(boolean useConstraints) { super(CorePerfCliDriver.class); try { setQueryDir("ql/src/test/queries/clientpositive/perf"); @@ -290,10 +290,21 @@ public class CliConfigs { excludesFrom(testConfigProps, "encrypted.query.files"); excludesFrom(testConfigProps, "erasurecoding.only.query.files"); -setResultsDir("ql/src/test/results/clientpositive/perf/tez"); +excludeQuery("cbo_query44.q"); // TODO: Enable when we move to Calcite 1.18 +excludeQuery("cbo_query45.q"); // TODO: Enable when we move to Calcite 1.18 +excludeQuery("cbo_query67.q"); // TODO: Enable when we move to Calcite 1.18 +excludeQuery("cbo_query70.q"); // TODO: Enable when we move to Calcite 1.18 +excludeQuery("cbo_query86.q"); // TODO: Enable when we move to Calcite 1.18 + setLogDir("itests/qtest/target/qfile-results/clientpositive/tez"); -setInitScript("q_perf_test_init.sql"); +if (useConstraints) { + setInitScript("q_perf_test_init_constraints.sql"); + setResultsDir("ql/src/test/results/clientpositive/perf/tez/constraints"); +} else { + setInitScript("q_perf_test_init.sql"); + setResultsDir("ql/src/test/results/clientpositive/perf/tez"); +} setCleanupScript("q_perf_test_cleanup.sql"); setHiveConfDir("data/conf/perf-reg/tez"); http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/Context.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
[31/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out new file mode 100644 index 000..ea098f7 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out @@ -0,0 +1,241 @@ +PREHOOK: query: explain cbo +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss +union all +select * from cs +union all +select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Children')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -6 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss +union all +select * from cs +union all +select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_sales +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output:
[57/75] [abbrv] hive git commit: HIVE-20701: Allow HiveStreaming to receive a key value to commit atomically together with the transaction (Jaume M reviewed by Prasanth Jayachandran)
HIVE-20701: Allow HiveStreaming to receive a key value to commit atomically together with the transaction (Jaume M reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7765e90a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7765e90a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7765e90a Branch: refs/heads/master-tez092 Commit: 7765e90aad44747860b3c1adbe8a4857d864912d Parents: cbe3228 Author: Jaume Marhuenda Authored: Mon Oct 22 14:18:20 2018 -0700 Committer: Prasanth Jayachandran Committed: Mon Oct 22 14:18:49 2018 -0700 -- .../streaming/AbstractStreamingTransaction.java | 6 ++- .../hive/streaming/HiveStreamingConnection.java | 13 +-- .../hive/streaming/StreamingConnection.java | 23 --- .../hive/streaming/StreamingTransaction.java| 14 ++- .../apache/hive/streaming/TransactionBatch.java | 26 +++-- .../streaming/UnManagedSingleTransaction.java | 3 +- .../apache/hive/streaming/TestStreaming.java| 41 +++- 7 files changed, 109 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/7765e90a/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java b/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java index a99fdba..6ab3ffe 100644 --- a/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java +++ b/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.metastore.api.TxnToWriteId; import java.io.InputStream; import java.util.List; +import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; /** @@ -151,6 +152,9 @@ abstract class AbstractStreamingTransaction } public void commit() throws StreamingException { -commitWithPartitions(null); +commit(null); + } + public void commit(Set partitions) throws StreamingException { +commit(partitions, null, null); } } http://git-wip-us.apache.org/repos/asf/hive/blob/7765e90a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java index f79b844..74fc531 100644 --- a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java +++ b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java @@ -146,6 +146,7 @@ public class HiveStreamingConnection implements StreamingConnection { private boolean manageTransactions; private int countTransactions = 0; private Set partitions; + private Long tableId; private HiveStreamingConnection(Builder builder) throws StreamingException { this.database = builder.database.toLowerCase(); @@ -574,12 +575,18 @@ public class HiveStreamingConnection implements StreamingConnection { @Override public void commitTransaction() throws StreamingException { -commitTransactionWithPartition(null); +commitTransaction(null); } @Override - public void commitTransactionWithPartition(Set partitions) + public void commitTransaction(Set partitions) throws StreamingException { +commitTransaction(partitions, null, null); + } + + @Override + public void commitTransaction(Set partitions, String key, + String value) throws StreamingException { checkState(); Set createdPartitions = new HashSet<>(); @@ -598,7 +605,7 @@ public class HiveStreamingConnection implements StreamingConnection { connectionStats.incrementTotalPartitions(partitions.size()); } -currentTransactionBatch.commitWithPartitions(createdPartitions); +currentTransactionBatch.commit(createdPartitions, key, value); this.partitions.addAll( currentTransactionBatch.getPartitions()); connectionStats.incrementCreatedPartitions(createdPartitions.size()); http://git-wip-us.apache.org/repos/asf/hive/blob/7765e90a/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java -- diff --git a/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java b/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java index 92016e5..ba4c6a5 100644 --- a/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java +++ b/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java @@ -66,13 +66,26 @@ public interface
[73/75] [abbrv] hive git commit: HIVE-20638 : Upgrade version of Jetty to 9.3.25.v20180904 (Laszlo Bodor via Thejas Nair)
HIVE-20638 : Upgrade version of Jetty to 9.3.25.v20180904 (Laszlo Bodor via Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1002e89b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1002e89b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1002e89b Branch: refs/heads/master-tez092 Commit: 1002e89b6501afca7f886323e96f2f37b4b9ac60 Parents: 0d70154 Author: Thejas M Nair Authored: Fri Oct 26 09:37:34 2018 -0700 Committer: Thejas M Nair Committed: Fri Oct 26 09:37:34 2018 -0700 -- hbase-handler/pom.xml| 12 +++- hcatalog/webhcat/svr/pom.xml | 10 +- pom.xml | 8 +++- serde/pom.xml| 20 4 files changed, 43 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1002e89b/hbase-handler/pom.xml -- diff --git a/hbase-handler/pom.xml b/hbase-handler/pom.xml index 06939a4..58666f8 100644 --- a/hbase-handler/pom.xml +++ b/hbase-handler/pom.xml @@ -51,7 +51,7 @@ ${hadoop.version} true - + org.slf4j slf4j-log4j12 @@ -59,6 +59,10 @@ commons-logging commons-logging + +org.eclipse.jetty +jetty-util + @@ -130,6 +134,12 @@ ${hadoop.version} tests test + + + org.eclipse.jetty + jetty-util + + org.apache.hbase http://git-wip-us.apache.org/repos/asf/hive/blob/1002e89b/hcatalog/webhcat/svr/pom.xml -- diff --git a/hcatalog/webhcat/svr/pom.xml b/hcatalog/webhcat/svr/pom.xml index d19e99a..4dfade5 100644 --- a/hcatalog/webhcat/svr/pom.xml +++ b/hcatalog/webhcat/svr/pom.xml @@ -164,13 +164,13 @@ ${hadoop.version} - org.mortbay.jetty + org.eclipse.jetty jetty - org.mortbay.jetty + org.eclipse.jetty jetty-util - + @@ -179,11 +179,11 @@ ${hadoop.version} - org.mortbay.jetty + org.eclipse.jetty jetty - org.mortbay.jetty + org.eclipse.jetty jetty-util http://git-wip-us.apache.org/repos/asf/hive/blob/1002e89b/pom.xml -- diff --git a/pom.xml b/pom.xml index 716db28..842a143 100644 --- a/pom.xml +++ b/pom.xml @@ -171,7 +171,7 @@ 5.5.1 3.0.1 1.1 -9.3.20.v20170531 +9.3.25.v20180904 1.19 2.22.2 @@ -1003,6 +1003,12 @@ org.mockito mockito-all test + + + org.eclipse.jetty + jetty-util + + http://git-wip-us.apache.org/repos/asf/hive/blob/1002e89b/serde/pom.xml -- diff --git a/serde/pom.xml b/serde/pom.xml index 8c37414..3756582 100644 --- a/serde/pom.xml +++ b/serde/pom.xml @@ -114,6 +114,10 @@ commons-logging commons-logging + +org.eclipse.jetty +jetty-util + @@ -163,6 +167,10 @@ commons-logging commons-logging + +org.eclipse.jetty +jetty-util + @@ -176,6 +184,12 @@ hadoop-hdfs ${hadoop.version} test + + + org.eclipse.jetty + jetty-util + + org.apache.hadoop @@ -183,6 +197,12 @@ ${hadoop.version} tests test + + + org.eclipse.jetty + jetty-util + +
[15/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out new file mode 100644 index 000..9862559 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: explain +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales +,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales +,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk +,d_date +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store_sales +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales +,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales +,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1212 and 1212+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk +,d_date +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store_sales +POSTHOOK: Input: default@web_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Reducer 8 (BROADCAST_EDGE) +Map 12 <- Reducer 11 (BROADCAST_EDGE) +Reducer 10 <- Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator +limit:100 +Stage-1 + Reducer 6 vectorized + File Output Operator [FS_117] +Limit [LIM_116] (rows=100 width=636) +
[67/75] [abbrv] hive git commit: HIVE-20788: Extended SJ reduction may backtrack columns incorrectly when creating filters (Jesus Camacho Rodriguez, reviewed by Deepak Jaiswal)
http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out index ff4c05f..b7f9778 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out @@ -99,56 +99,56 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_179] -Limit [LIM_178] (rows=100 width=1165) + File Output Operator [FS_177] +Limit [LIM_176] (rows=100 width=1165) Number of rows:100 - Select Operator [SEL_177] (rows=10969055 width=1165) + Select Operator [SEL_175] (rows=10969055 width=1165) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] <-Reducer 5 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_176] - Select Operator [SEL_175] (rows=10969055 width=1165) +SHUFFLE [RS_174] + Select Operator [SEL_173] (rows=10969055 width=1165) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] -Group By Operator [GBY_174] (rows=10969055 width=1229) +Group By Operator [GBY_172] (rows=10969055 width=1229) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_39] (rows=10969055 width=1229) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col15)","count(_col15)","sum(_col16)","count(_col16)","sum(_col17)","count(_col17)","sum(_col18)","count(_col18)","sum(_col19)","count(_col19)","sum(_col3)","count(_col3)","sum(_col22)","count(_col22)"],keys:_col5, _col6, _col7, _col10, 0L - Merge Join Operator [MERGEJOIN_142] (rows=2193811 width=811) + Merge Join Operator [MERGEJOIN_140] (rows=2193811 width=811) Conds:RS_35._col0=RS_36._col3(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col15","_col16","_col17","_col18","_col19","_col22"] <-Reducer 3 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_35] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_138] (rows=4959744 width=368) - Conds:RS_32._col1=RS_150._col0(Inner),Output:["_col0","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_136] (rows=4959744 width=368) + Conds:RS_32._col1=RS_148._col0(Inner),Output:["_col0","_col3","_col5","_col6","_col7"] <-Map 9 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_150] +SHUFFLE [RS_148] PartitionCols:_col0 - Select Operator [SEL_149] (rows=1861800 width=4) + Select Operator [SEL_147] (rows=1861800 width=4) Output:["_col0"] TableScan [TS_6] (rows=1861800 width=4) default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_137] (rows=4890586 width=371) - Conds:RS_145._col2=RS_148._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_135] (rows=4890586 width=371) + Conds:RS_143._col2=RS_146._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_145] +
[37/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out new file mode 100644 index 000..60b7557 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: explain cbo +select ss_customer_sk +,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price +else (ss_quantity*ss_sales_price) end act_sales +from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) +,reason +where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@reason +PREHOOK: Input: default@store_returns +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select ss_customer_sk +,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price +else (ss_quantity*ss_sales_price) end act_sales +from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) +,reason +where sr_reason_sk = r_reason_sk + and r_reason_desc = 'Did not like the warranty') t + group by ss_customer_sk + order by sumsales, ss_customer_sk +limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@reason +POSTHOOK: Input: default@store_returns +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +CBO PLAN: +HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) +HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[$1], act_sales=[CASE(IS NOT NULL($8), *(CAST(-($3, $8)):DECIMAL(10, 0), $4), *(CAST($3):DECIMAL(10, 0), $4))]) +HiveJoin(condition=[AND(=($5, $0), =($7, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13]) +HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available]) +HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($2), IS NOT NULL($9))]) +HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) +HiveProject(r_reason_sk=[$0], r_reason_desc=[CAST(_UTF-16LE'Did not like the warranty'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"]) + HiveFilter(condition=[AND(=($2, _UTF-16LE'Did not like the warranty'), IS NOT NULL($0))]) +HiveTableScan(table=[[default, reason]], table:alias=[reason]) + http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out new file mode 100644 index 000..4f97a67 --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out @@ -0,0 +1,100 @@ +PREHOOK: query: explain cbo +select + count(distinct ws_order_number) as `order count` + ,sum(ws_ext_ship_cost) as `total shipping cost` + ,sum(ws_net_profit) as `total net profit` +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where +d_date between '1999-5-01' and + (cast('1999-5-01' as date) + 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'TX' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name =
[25/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out new file mode 100644 index 000..e8a6eaa --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out @@ -0,0 +1,1400 @@ +Warning: Shuffle Join MERGEJOIN[1431][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[1443][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1433][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 13' is a cross product +Warning: Shuffle Join MERGEJOIN[1456][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[1435][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 18' is a cross product +Warning: Shuffle Join MERGEJOIN[1469][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 19' is a cross product +PREHOOK: query: explain +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 2001 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1998 and 1998 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_sales +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store_sales +PREHOOK:
[38/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out new file mode 100644 index 000..50474bc --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out @@ -0,0 +1,219 @@ +PREHOOK: query: explain cbo +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( +( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 +) + or +( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 +) + or +( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 +) + ) + and + ( +( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 +) +or +( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 +) +or +( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 +) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) +,avg(ws_quantity) +,avg(wr_refunded_cash) +,avg(wr_fee) +limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@reason +PREHOOK: Input: default@web_page +PREHOOK: Input: default@web_returns +PREHOOK: Input: default@web_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain cbo +select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 1998 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( +( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '4 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 +) + or +( + cd1.cd_marital_status = 'D' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Primary' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 +) + or +( + cd1.cd_marital_status = 'U' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 +) + ) + and + ( +( + ca_country = 'United States' + and + ca_state in ('KY', 'GA', 'NM') + and ws_net_profit between 100 and 200 +) +or +( + ca_country = 'United States' + and + ca_state in ('MT', 'OR', 'IN') + and ws_net_profit between 150 and 300 +) +or +( + ca_country = 'United States' + and + ca_state in ('WI', 'MO', 'WV') + and ws_net_profit between 50 and 250 +) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) +
[54/75] [abbrv] hive git commit: HIVE-20679: DDL operations on hive might create large messages for DBNotification (Anishek Agarwal, reviewed by Sankar Hariappan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b4302bb7/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java -- diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java index 3fe8b58..314ca48 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java @@ -17,32 +17,19 @@ */ package org.apache.hadoop.hive.ql.parse; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsRequest; -import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsResponse; -import org.apache.hadoop.hive.metastore.api.OpenTxnRequest; -import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse; -import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; -import org.apache.hadoop.hive.metastore.txn.TxnStore; -import org.apache.hadoop.hive.metastore.txn.TxnUtils; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder; import org.apache.hadoop.hive.shims.Utils; -import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore; -import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments; -import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection; + import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; + import org.junit.rules.TestName; -import org.junit.rules.TestRule; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.junit.After; -import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -50,13 +37,11 @@ import org.junit.BeforeClass; import org.junit.AfterClass; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.List; -import javax.annotation.Nullable; -import java.util.Collections; +import java.util.Map; + import com.google.common.collect.Lists; -import org.junit.Ignore; /** * TestReplicationScenariosAcidTables - test replication for ACID tables @@ -65,11 +50,9 @@ public class TestReplicationScenariosIncrementalLoadAcidTables { @Rule public final TestName testName = new TestName(); - @Rule - public TestRule replV1BackwardCompat; - protected static final Logger LOG = LoggerFactory.getLogger(TestReplicationScenariosIncrementalLoadAcidTables.class); - private static WarehouseInstance primary, replica, replicaNonAcid; + static WarehouseInstance primary; + private static WarehouseInstance replica, replicaNonAcid; private static HiveConf conf; private String primaryDbName, replicatedDbName, primaryDbNameExtra; private enum OperationType { @@ -80,12 +63,21 @@ public class TestReplicationScenariosIncrementalLoadAcidTables { @BeforeClass public static void classLevelSetup() throws Exception { -conf = new HiveConf(TestReplicationScenariosAcidTables.class); +HashMap overrides = new HashMap<>(); +overrides.put(MetastoreConf.ConfVars.EVENT_MESSAGE_FACTORY.getHiveName(), +GzipJSONMessageEncoder.class.getCanonicalName()); + +internalBeforeClassSetup(overrides, TestReplicationScenariosAcidTables.class); + } + + static void internalBeforeClassSetup(Map overrides, Class clazz) + throws Exception { +conf = new HiveConf(clazz); conf.set("dfs.client.use.datanode.hostname", "true"); conf.set("hadoop.proxyuser." + Utils.getUGI().getShortUserName() + ".hosts", "*"); MiniDFSCluster miniDFSCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).build(); -HashMap overridesForHiveConf = new HashMap() {{ +HashMap acidConfs = new HashMap() {{ put("fs.defaultFS", miniDFSCluster.getFileSystem().getUri().toString()); put("hive.support.concurrency", "true"); put("hive.txn.manager", "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager"); @@ -97,9 +89,11 @@ public class TestReplicationScenariosIncrementalLoadAcidTables { put("mapred.input.dir.recursive", "true"); put("hive.metastore.disallow.incompatible.col.type.changes", "false"); }}; -primary = new WarehouseInstance(LOG, miniDFSCluster,
[10/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out new file mode 100644 index 000..f82272c --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out @@ -0,0 +1,702 @@ +PREHOOK: query: explain +select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country +,ship_carriers +,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( +(select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net +
[02/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out new file mode 100644 index 000..08079cd --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out @@ -0,0 +1,946 @@ +Warning: Shuffle Join MERGEJOIN[599][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[600][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[601][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[602][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product +Warning: Shuffle Join MERGEJOIN[603][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product +Warning: Shuffle Join MERGEJOIN[604][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product +Warning: Shuffle Join MERGEJOIN[605][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross product +PREHOOK: query: explain +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk =
[66/75] [abbrv] hive git commit: HIVE-20788: Extended SJ reduction may backtrack columns incorrectly when creating filters (Jesus Camacho Rodriguez, reviewed by Deepak Jaiswal)
http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out index c82c415..6d7c620 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out @@ -194,57 +194,57 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_372] -Limit [LIM_371] (rows=59 width=115) + File Output Operator [FS_368] +Limit [LIM_367] (rows=59 width=115) Number of rows:100 - Select Operator [SEL_370] (rows=59 width=115) + Select Operator [SEL_366] (rows=59 width=115) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized -SHUFFLE [RS_369] - Group By Operator [GBY_368] (rows=59 width=115) +SHUFFLE [RS_365] + Group By Operator [GBY_364] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Union 5 [SIMPLE_EDGE] <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_392] + Reduce Output Operator [RS_388] PartitionCols:_col0 -Group By Operator [GBY_391] (rows=59 width=115) +Group By Operator [GBY_387] (rows=59 width=115) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Group By Operator [GBY_390] (rows=19 width=115) + Group By Operator [GBY_386] (rows=19 width=115) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col0 Group By Operator [GBY_108] (rows=19 width=115) Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 -Merge Join Operator [MERGEJOIN_308] (rows=11364 width=3) +Merge Join Operator [MERGEJOIN_304] (rows=11364 width=3) Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_104] PartitionCols:_col0 -Merge Join Operator [MERGEJOIN_297] (rows=461514 width=7) - Conds:RS_323._col1=RS_329._col0(Inner),Output:["_col0","_col1"] +Merge Join Operator [MERGEJOIN_293] (rows=461514 width=7) + Conds:RS_319._col1=RS_325._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] + SHUFFLE [RS_319] PartitionCols:_col1 -Select Operator [SEL_322] (rows=460848 width=7) +Select Operator [SEL_318] (rows=460848 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_321] (rows=460848 width=7) + Filter Operator [FIL_317] (rows=460848 width=7) predicate:i_manufact_id is not null TableScan [TS_0] (rows=462000 width=7) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"] <-Reducer 13 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_329] + FORWARD [RS_325] PartitionCols:_col0 -Group By Operator [GBY_328] (rows=692 width=3) +Group By Operator [GBY_324] (rows=692 width=3) Output:["_col0"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] + SHUFFLE [RS_323] PartitionCols:_col0 -Group By Operator [GBY_326] (rows=692 width=3) +Group By Operator [GBY_322] (rows=692 width=3)
[04/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out new file mode 100644 index 000..bcfe19e --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out @@ -0,0 +1,220 @@ +PREHOOK: query: explain +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk +,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@catalog_returns +PREHOOK: Input: default@customer +PREHOOK: Input: default@customer_address +PREHOOK: Input: default@date_dim +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk +,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =1998 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'IL' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@catalog_returns +POSTHOOK: Input: default@customer +POSTHOOK: Input: default@customer_address +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 10 <- Reducer 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 7 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Map 15 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator +limit:-1 +Stage-1 + Reducer 4 vectorized + File Output Operator [FS_210] +Select Operator [SEL_209] (rows=100 width=1506) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] + Limit [LIM_208] (rows=100 width=1420) +Number of rows:100 +Select Operator [SEL_207] (rows=1577696 width=1418) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] +<-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_63] +Select Operator [SEL_62] (rows=1577696