from:"sershe"

hive git commit: HIVE-20787 : MapJoinBytesTableContainer dummyRow case doesn't handle reuse (Sergey Shelukhin, reviewed by Jason Dere)

2018-11-13 Thread sershe

Repository: hive
Updated Branches:
  refs/heads/master 148e7acba -> ccbc5c383


HIVE-20787 : MapJoinBytesTableContainer dummyRow case doesn't handle reuse 
(Sergey Shelukhin, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ccbc5c38
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ccbc5c38
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ccbc5c38

Branch: refs/heads/master
Commit: ccbc5c383b13f81855d58e8b1d2dc168a7f2893e
Parents: 148e7ac
Author: sergey 
Authored: Tue Nov 13 13:38:40 2018 -0800
Committer: sergey 
Committed: Tue Nov 13 13:38:40 2018 -0800

--
 .../persistence/MapJoinBytesTableContainer.java | 22 ++--
 1 file changed, 16 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ccbc5c38/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
index 0e4b8df..bf4250d 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
@@ -708,6 +708,8 @@ public class MapJoinBytesTableContainer
  * This container does not normally support adding rows; this is for the 
dummy row.
  */
 private List dummyRow = null;
+// TODO: the API here is not ideal, first/next + hasRows are redundant.
+private boolean wasFirstCalledOnDummyRow = false;
 
 private final ByteArrayRef uselessIndirection; // LBStruct needs 
ByteArrayRef
 private final LazyBinaryStruct valueStruct;
@@ -747,6 +749,7 @@ public class MapJoinBytesTableContainer
   aliasFilter = hashMap.getValueResult(
   output.getData(), 0, output.getLength(), hashMapResult, /* 
matchTracker */ null);
   dummyRow = null;
+  wasFirstCalledOnDummyRow = false;
   if (hashMapResult.hasRows()) {
 return JoinUtil.JoinResult.MATCH;
   } else {
@@ -760,6 +763,7 @@ public class MapJoinBytesTableContainer
   aliasFilter = hashMap.getValueResult(
   output.getData(), 0, output.getLength(), hashMapResult, 
matchTracker);
   dummyRow = null;
+  wasFirstCalledOnDummyRow = false;
   if (hashMapResult.hasRows()) {
 return JoinUtil.JoinResult.MATCH;
   } else {
@@ -774,7 +778,7 @@ public class MapJoinBytesTableContainer
 
 @Override
 public boolean hasRows() {
-  return hashMapResult.hasRows() || (dummyRow != null);
+  return hashMapResult.hasRows() || (dummyRow != null && 
!wasFirstCalledOnDummyRow);
 }
 
 @Override
@@ -803,6 +807,7 @@ public class MapJoinBytesTableContainer
   // Doesn't clear underlying hashtable
   hashMapResult.forget();
   dummyRow = null;
+  wasFirstCalledOnDummyRow = false;
   aliasFilter = (byte) 0xff;
 }
 
@@ -819,12 +824,9 @@ public class MapJoinBytesTableContainer
 // Implementation of row iterator
 @Override
 public List first() throws HiveException {
-
-  // A little strange that we forget the dummy row on read.
   if (dummyRow != null) {
-List result = dummyRow;
-dummyRow = null;
-return result;
+wasFirstCalledOnDummyRow = true;
+return dummyRow;
   }
 
   WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first();
@@ -838,6 +840,13 @@ public class MapJoinBytesTableContainer
 
 @Override
 public List next() throws HiveException {
+  if (dummyRow != null) {
+// TODO: what should we do if first was never called? for now, assert 
for clarity
+if (!wasFirstCalledOnDummyRow) {
+  throw new AssertionError("next called without first");
+}
+return null;
+  }
 
   WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.next();
   if (byteSegmentRef == null) {
@@ -874,6 +883,7 @@ public class MapJoinBytesTableContainer
 throw new RuntimeException("Cannot add rows when not empty");
   }
   dummyRow = t;
+  wasFirstCalledOnDummyRow = false;
 }
 
 // Various unsupported methods.

[57/59] [abbrv] hive git commit: HIVE-20676 : HiveServer2: PrivilegeSynchronizer is not set to daemon status (Vaibhav Gumashta via Thejas Nair)

2018-11-13 Thread sershe

HIVE-20676 : HiveServer2: PrivilegeSynchronizer is not set to daemon status 
(Vaibhav Gumashta via Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/52f94b8f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/52f94b8f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/52f94b8f

Branch: refs/heads/master-tez092
Commit: 52f94b8f1dd99950f9323a6c2a5e3a694db46269
Parents: 1ceb4eb
Author: Vaibhav Gumashta 
Authored: Tue Nov 13 11:34:10 2018 -0700
Committer: Thejas M Nair 
Committed: Tue Nov 13 11:34:10 2018 -0700

--
 service/src/java/org/apache/hive/service/server/HiveServer2.java | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/52f94b8f/service/src/java/org/apache/hive/service/server/HiveServer2.java
--
diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java 
b/service/src/java/org/apache/hive/service/server/HiveServer2.java
index 4335574..9376e87 100644
--- a/service/src/java/org/apache/hive/service/server/HiveServer2.java
+++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java
@@ -1017,6 +1017,7 @@ public class HiveServer2 extends CompositeService {
   LOG.info("Find " + policyContainer.size() + " policy to synchronize, 
start PrivilegeSynchronizer");
   Thread privilegeSynchronizerThread = new Thread(
   new PrivilegeSynchronizer(privilegeSynchronizerLatch, 
policyContainer, hiveConf), "PrivilegeSynchronizer");
+  privilegeSynchronizerThread.setDaemon(true);
   privilegeSynchronizerThread.start();
 } else {
   LOG.warn(

[46/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join_filters_overlap.q.out
--
diff --git a/ql/src/test/results/clientpositive/join_filters_overlap.q.out 
b/ql/src/test/results/clientpositive/join_filters_overlap.q.out
index 7b5c9f2..847b45d 100644
--- a/ql/src/test/results/clientpositive/join_filters_overlap.q.out
+++ b/ql/src/test/results/clientpositive/join_filters_overlap.q.out
@@ -18,15 +18,6 @@ POSTHOOK: query: explain extended select * from a_n4 left 
outer join a_n4 b on (
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@a_n4
  A masked pattern was here 
-OPTIMIZED SQL: SELECT *
-FROM (SELECT `key`, `value`
-FROM `default`.`a_n4`) AS `t`
-LEFT JOIN (SELECT `key`, CAST(50 AS INTEGER) AS `value`
-FROM `default`.`a_n4`
-WHERE `value` = 50) AS `t1` ON `t`.`key` = `t1`.`key` AND `t`.`value` = 50
-LEFT JOIN (SELECT `key`, CAST(60 AS INTEGER) AS `value`
-FROM `default`.`a_n4`
-WHERE `value` = 60) AS `t3` ON `t`.`key` = `t3`.`key` AND `t`.`value` = 60
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -40,8 +31,8 @@ STAGE PLANS:
 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column 
stats: NONE
 GatherStats: false
 Select Operator
-  expressions: key (type: int), value (type: int)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: int), value (type: int), (value = 60) 
(type: boolean), (value = 50) (type: boolean)
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
@@ -50,7 +41,7 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE 
Column stats: NONE
 tag: 0
-value expressions: _col1 (type: int)
+value expressions: _col1 (type: int), _col2 (type: boolean), 
_col3 (type: boolean)
 auto parallelism: false
   TableScan
 alias: b
@@ -158,37 +149,41 @@ STAGE PLANS:
   filter mappings:
 0 [1, 1, 2, 1]
   filter predicates:
-0 {(VALUE._col0 = 50)} {(VALUE._col0 = 60)}
+0 {VALUE._col2} {VALUE._col1}
 1 
 2 
   keys:
 0 _col0 (type: int)
 1 _col0 (type: int)
 2 _col0 (type: int)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+  outputColumnNames: _col0, _col1, _col4, _col5, _col6, _col7
   Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
-  File Output Operator
-compressed: false
-GlobalTableId: 0
- A masked pattern was here 
-NumFilesPerFileSink: 1
+  Select Operator
+expressions: _col0 (type: int), _col1 (type: int), _col4 (type: 
int), _col5 (type: int), _col6 (type: int), _col7 (type: int)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column 
stats: NONE
- A masked pattern was here 
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-properties:
-  columns _col0,_col1,_col2,_col3,_col4,_col5
-  columns.types int:int:int:int:int:int
-  escape.delim \
-  hive.serialization.extend.additional.nesting.levels true
-  serialization.escape.crlf true
-  serialization.format 1
-  serialization.lib 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-TotalFiles: 1
-GatherStats: false
-MultiFileSpray: false
+File Output Operator
+  compressed: false
+  GlobalTableId: 0
+ A masked pattern was here 
+  NumFilesPerFileSink: 1
+  Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE 
Column stats: NONE
+ A masked pattern was here 
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  properties:
+columns _col0,_col1,_col2,_col3,_col4,_col5
+columns.types int:int:int:int:int:int
+escape.delim \
+hive.serialization.extend.additional.nesting.levels

[07/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query75.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query75.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query75.q.out
index 9968ade..f4bd046 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query75.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query75.q.out
@@ -244,7 +244,7 @@ Stage-0
   Select Operator [SEL_539] 
(rows=170474971 width=131)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
 Merge Join Operator 
[MERGEJOIN_538] (rows=170474971 width=234)
-  Conds:RS_103._col1, 
_col2=RS_625._col0, _col1(Left 
Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"]
+  Conds:RS_103._col1, 
_col2=RS_625._col0, _col1(Left 
Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"]
 <-Map 44 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_625]
 PartitionCols:_col0, _col1
@@ -258,12 +258,12 @@ Stage-0
   SHUFFLE [RS_103]
 PartitionCols:_col1, _col2
 Merge Join Operator 
[MERGEJOIN_510] (rows=96821196 width=138)
-  
Conds:RS_100._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col8","_col9","_col10","_col12"]
+  
Conds:RS_100._col1=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col10"]
 <-Map 37 [SIMPLE_EDGE] 
vectorized
   PARTITION_ONLY_SHUFFLE 
[RS_599]
 PartitionCols:_col0
-Select Operator [SEL_592] 
(rows=45745 width=109)
-  
Output:["_col0","_col1","_col2","_col3","_col5"]
+Select Operator [SEL_592] 
(rows=45745 width=19)
+  
Output:["_col0","_col1","_col2","_col3","_col4"]
   Filter Operator 
[FIL_591] (rows=45745 width=109)
 predicate:((i_category 
= 'Sports') and i_brand_id is not null and i_category_id is not null and 
i_class_id is not null and i_item_sk is not null and i_manufact_id is not null)
 TableScan [TS_6] 
(rows=462000 width=109)
@@ -276,7 +276,7 @@ Stage-0
 <-Map 11 [SIMPLE_EDGE] 
vectorized
   PARTITION_ONLY_SHUFFLE 
[RS_571]
 PartitionCols:_col0
-Select Operator 
[SEL_562] (rows=652 width=8)
+Select Operator 
[SEL_562] (rows=652 width=4)
   Output:["_col0"]
   Filter Operator 
[FIL_558] (rows=652 width=8)
 predicate:((d_year 
= 2002) and d_date_sk is not null)
@@ -321,7 +321,7 @@ Stage-0
   Select Operator [SEL_548] 
(rows=450703984 width=131)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
 Merge Join Operator 
[MERGEJOIN_547] (rows=450703984 width=204)
-  Conds:RS_125._col1, 
_col2=RS_649._col0, _col1(Left 
Outer),Output:["_col3","_col4","_col8","_col9","_col10","_col12","_col15","_col16"]
+  Conds:RS_125._col1, 
_col2=RS_649._col0, _col1(Left 
Outer),Output:["_col3","_col4","_col7","_col8","_col9","_col10","_col13","_col14"]
 <-Map 46 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_649]
 PartitionCols:_col0, _col1
@@ -335,7 +335,7 @@ Stage-0
   SHUFFLE [RS_125]
 PartitionCols:_col1, _col2

[48/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join45.q.out
--
diff --git a/ql/src/test/results/clientpositive/join45.q.out 
b/ql/src/test/results/clientpositive/join45.q.out
index 6cf6c33..7865e0e 100644
--- a/ql/src/test/results/clientpositive/join45.q.out
+++ b/ql/src/test/results/clientpositive/join45.q.out
@@ -363,24 +363,24 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 sort order: 
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: _col0 (type: string), _col1 (type: string)
+value expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: boolean)
   TableScan
 alias: src
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 sort order: 
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: _col0 (type: string), _col1 (type: string)
+value expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: boolean)
   Reduce Operator Tree:
 Join Operator
   condition map:
@@ -388,21 +388,25 @@ STAGE PLANS:
   keys:
 0 
 1 
-  outputColumnNames: _col0, _col1, _col2, _col3
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE 
Column stats: NONE
   Filter Operator
-predicate: ((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0D 
AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D) (type: boolean)
-Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE 
Column stats: NONE
-Limit
-  Number of rows: 10
-  Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
+predicate: ((_col0 = _col3) or _col2 or _col5) (type: boolean)
+Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: _col0 (type: string), _col1 (type: string), _col3 
(type: string), _col4 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3
+  Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+  Limit
+Number of rows: 10
 Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 10 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
 Fetch Operator
@@ -474,24 +478,24 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(key)

[13/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query27.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query27.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query27.q.out
index 59cca4f..d7fd2ed 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query27.q.out
@@ -94,7 +94,7 @@ Stage-0
 Select Operator [SEL_27] (rows=1427275 width=186)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
   Merge Join Operator [MERGEJOIN_100] (rows=1427275 
width=186)
-
Conds:RS_24._col1=RS_130._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col15","_col17"]
+
Conds:RS_24._col1=RS_130._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"]
   <-Map 14 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_130]
   PartitionCols:_col0
@@ -108,7 +108,7 @@ Stage-0
 SHUFFLE [RS_24]
   PartitionCols:_col1
   Merge Join Operator [MERGEJOIN_99] (rows=1427275 
width=90)
-
Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col15"]
+
Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"]
   <-Map 12 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_119]
   PartitionCols:_col0
@@ -126,7 +126,7 @@ Stage-0
   <-Map 10 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_111]
   PartitionCols:_col0
-  Select Operator [SEL_110] (rows=652 
width=8)
+  Select Operator [SEL_110] (rows=652 
width=4)
 Output:["_col0"]
 Filter Operator [FIL_109] (rows=652 
width=8)
   predicate:((d_year = 2001) and 
d_date_sk is not null)
@@ -140,7 +140,7 @@ Stage-0
   <-Map 8 [SIMPLE_EDGE] vectorized
 PARTITION_ONLY_SHUFFLE [RS_103]
   PartitionCols:_col0
-  Select Operator [SEL_102] 
(rows=14776 width=269)
+  Select Operator [SEL_102] 
(rows=14776 width=4)
 Output:["_col0"]
 Filter Operator [FIL_101] 
(rows=14776 width=268)
   predicate:((cd_education_status 
= '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and 
cd_demo_sk is not null)

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query29.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query29.q.out
index a21c3c7..19f121e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out
@@ -144,20 +144,20 @@ Stage-0
 SHUFFLE [RS_49]
   PartitionCols:_col0, _col1, _col2, _col3
   Group By Operator [GBY_48] (rows=21091879 width=496)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col14)","sum(_col22)","sum(_col3)"],keys:_col7,
 _col8, _col27, _col28
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col13)","sum(_col19)","sum(_col3)"],keys:_col6,
 _col7, _col22, _col23
 Top N Key Operator [TNK_93] (rows=4156223234 width=483)
-  keys:_col7, _col8, _col27, _col28,sort order:,top 
n:100
+  keys:_col6, _col7, _col22, _col23,sort order:,top 
n:100
   Merge Join Operator [MERGEJOIN_205] (rows=4156223234 
width=483)
-Conds:RS_44._col1, _col2=RS_45._col14, 
_col13(Inner),Output:["_col3","_col7","_col8","_col14","_col22","_col27","_col28"]
+Conds:RS_44._col2, _col1=RS_45._col11, 
_col12(Inner),Output:["_col3","_col6","_col7","_col13","_col19","_col22","_col23"]
   <-Reducer 2 [SIMPLE_EDGE]
 PARTITION_ONLY_SHUFFLE [RS_44]
-  PartitionCols:_col1,

[03/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out
--
diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out 
b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out
index 98789d7..ddb436b 100644
--- a/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out
+++ b/ql/src/test/results/clientpositive/smb_mapjoin_46.q.out
@@ -187,25 +187,28 @@ STAGE PLANS:
 alias: test1_n5
 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column 
stats: NONE
 Select Operator
-  expressions: key (type: int), value (type: int), col_1 (type: 
string)
-  outputColumnNames: _col0, _col1, _col2
+  expressions: key (type: int), value (type: int), col_1 (type: 
string), key BETWEEN 100 AND 102 (type: boolean)
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE 
Column stats: NONE
   Sorted Merge Bucket Map Join Operator
 condition map:
  Left Outer Join 0 to 1
 filter predicates:
-  0 {_col0 BETWEEN 100 AND 102}
+  0 {_col3}
   1 
 keys:
   0 _col1 (type: int)
   1 _col1 (type: int)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-File Output Operator
-  compressed: false
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
+Select Operator
+  expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+  File Output Operator
+compressed: false
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
 Fetch Operator
@@ -283,7 +286,7 @@ STAGE PLANS:
 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
 HashTable Sink Operator
   filter predicates:
-0 {_col0 BETWEEN 100 AND 102}
+0 {_col3}
 1 
   keys:
 0 
@@ -296,27 +299,31 @@ STAGE PLANS:
 alias: test1_n5
 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column 
stats: NONE
 Select Operator
-  expressions: key (type: int), value (type: int), col_1 (type: 
string)
-  outputColumnNames: _col0, _col1, _col2
+  expressions: key (type: int), value (type: int), col_1 (type: 
string), key BETWEEN 100 AND 102 (type: boolean)
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE 
Column stats: NONE
   Map Join Operator
 condition map:
  Left Outer Join 0 to 1
 filter predicates:
-  0 {_col0 BETWEEN 100 AND 102}
+  0 {_col3}
   1 
 keys:
   0 
   1 
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
 Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
+Select Operator
+  expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 24 Data size: 476 Basic stats: 
COMPLETE Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  File Output Operator
+compressed: false
+

[21/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
index ace7cf5..b55f2c1 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query23.q.out
@@ -1,7 +1,7 @@
-Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 29' is a cross product
-Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 30' is a cross product
-Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 33' is a cross product
-Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 34' is a cross product
+Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 29' is a cross product
+Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 30' is a cross product
+Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 33' is a cross product
+Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 34' is a cross product
 PREHOOK: query: explain cbo
 with frequent_ss_items as 
  (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date 
solddate,count(*) cnt
@@ -128,16 +128,15 @@ HiveSortLimit(fetch=[100])
 HiveJoin(condition=[=($3, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveProject($f0=[$0])
 HiveJoin(condition=[>($1, *(0.95, $3))], joinType=[inner], 
algorithm=[none], cost=[not available])
-  HiveProject($f0=[$0], $f1=[$1])
-HiveAggregate(group=[{0}], agg#0=[sum($1)])
-  HiveProject($f0=[$3], $f1=[*(CAST($1):DECIMAL(10, 0), 
$2)])
-HiveJoin(condition=[=($0, $3)], joinType=[inner], 
algorithm=[none], cost=[not available])
-  HiveProject(ss_customer_sk=[$3], ss_quantity=[$10], 
ss_sales_price=[$13])
-HiveFilter(condition=[IS NOT NULL($3)])
-  HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
-  HiveProject(c_customer_sk=[$0])
-HiveFilter(condition=[IS NOT NULL($0)])
-  HiveTableScan(table=[[default, customer]], 
table:alias=[customer])
+  HiveProject(c_customer_sk=[$0], $f1=[$1])
+HiveAggregate(group=[{2}], agg#0=[sum($1)])
+  HiveJoin(condition=[=($0, $2)], joinType=[inner], 
algorithm=[none], cost=[not available])
+HiveProject(ss_customer_sk=[$3], 
*=[*(CAST($10):DECIMAL(10, 0), $13)])
+  HiveFilter(condition=[IS NOT NULL($3)])
+HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
+HiveProject(c_customer_sk=[$0])
+  HiveFilter(condition=[IS NOT NULL($0)])
+HiveTableScan(table=[[default, customer]], 
table:alias=[customer])
   HiveJoin(condition=[true], joinType=[inner], 
algorithm=[none], cost=[not available])
 HiveProject(cnt=[$0])
   HiveFilter(condition=[<=(sq_count_check($0), 1)])
@@ -146,75 +145,71 @@ HiveSortLimit(fetch=[100])
 HiveProject
   HiveProject($f0=[$0])
 HiveAggregate(group=[{}], agg#0=[count($0)])
-  HiveProject($f0=[$0], $f1=[$1])
-HiveAggregate(group=[{0}], agg#0=[sum($1)])
-  HiveProject($f0=[$0], 
$f1=[*(CAST($3):DECIMAL(10, 0), $4)])
-HiveJoin(condition=[=($2, $0)], 
joinType=[inner], algorithm=[none], cost=[not available])
-  HiveProject(c_customer_sk=[$0])
-HiveFilter(condition=[IS NOT 
NULL($0)])
-  HiveTableScan(table=[[default, 
customer]], table:alias=[customer])
-  HiveJoin(condition=[=($0, $4)], 
joinType=[inner], algorithm=[none], cost=[not available])
-HiveProject(ss_sold_date_sk=[$0], 
ss_customer_sk=[$3], ss_quantity=[$10], ss_sales_price=[$13])
-  HiveFilter(condition=[AND(IS NOT 
NULL($3), IS NOT NULL($0))])
-HiveTableScan(table=[[default,

[02/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out 
b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
index 3165970..2fc9a3d 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
@@ -2662,7 +2662,7 @@ STAGE PLANS:
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: key (type: int), value (type: string)
+  expressions: key (type: int), substr(value, 5) (type: 
string)
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -2703,22 +2703,18 @@ STAGE PLANS:
   1 _col0 (type: int)
 outputColumnNames: _col1, _col2
 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-Select Operator
-  expressions: _col1 (type: int), substr(_col2, 5) (type: 
string)
+Group By Operator
+  aggregations: sum(_col2)
+  keys: _col1 (type: int)
+  mode: hash
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-aggregations: sum(_col1)
-keys: _col0 (type: int)
-mode: hash
-outputColumnNames: _col0, _col1
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+sort order: +
+Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col1 (type: double)
+value expressions: _col1 (type: double)
 Reducer 5 
 Execution mode: vectorized
 Reduce Operator Tree:
@@ -2827,7 +2823,7 @@ STAGE PLANS:
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: key (type: int), value (type: string)
+  expressions: key (type: int), substr(value, 5) (type: 
string)
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 242 Data size: 2566 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -2868,22 +2864,18 @@ STAGE PLANS:
   1 _col0 (type: int)
 outputColumnNames: _col1, _col2
 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-Select Operator
-  expressions: _col1 (type: int), substr(_col2, 5) (type: 
string)
+Group By Operator
+  aggregations: sum(_col2)
+  keys: _col1 (type: int)
+  mode: hash
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-  Group By Operator
-aggregations: sum(_col1)
-keys: _col0 (type: int)
-mode: hash
-outputColumnNames: _col0, _col1
+  Reduce Output Operator
+key expressions: _col0 (type: int)
+sort order: +
+Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-Reduce Output Operator
-  key expressions: _col0 (type: int)
-  sort order: +
-  Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col1 (type:

[19/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out
index 2504d78..b4410ff 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query58.q.out
@@ -141,10 +141,10 @@ POSTHOOK: Input: default@web_sales
 POSTHOOK: Output: hdfs://### HDFS PATH ###
 CBO PLAN:
 HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
-  HiveProject(item_id=[$0], ss_item_rev=[$3], ss_dev=[*(/(/($3, +(+($3, $1), 
$5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], 
cs_dev=[*(/(/($1, +(+($3, $1), $5)), CAST(3):DECIMAL(10, 0)), 
CAST(100):DECIMAL(10, 0))], ws_item_rev=[$5], ws_dev=[*(/(/($5, +(+($3, $1), 
$5)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($3, 
$1), $5), CAST(3):DECIMAL(10, 0))])
-HiveJoin(condition=[AND(AND(AND(AND(=($0, $4), BETWEEN(false, $3, *(0.9, 
$5), *(1.1, $5))), BETWEEN(false, $1, *(0.9, $5), *(1.1, $5))), BETWEEN(false, 
$5, *(0.9, $3), *(1.1, $3))), BETWEEN(false, $5, *(0.9, $1), *(1.1, $1)))], 
joinType=[inner], algorithm=[none], cost=[not available])
-  HiveJoin(condition=[AND(AND(=($2, $0), BETWEEN(false, $3, *(0.9, $1), 
*(1.1, $1))), BETWEEN(false, $1, *(0.9, $3), *(1.1, $3)))], joinType=[inner], 
algorithm=[none], cost=[not available])
-HiveProject(i_item_id=[$0], $f1=[$1])
+  HiveProject(item_id=[$0], ss_item_rev=[$5], ss_dev=[*(/(/($5, +(+($5, $1), 
$9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], cs_item_rev=[$1], 
cs_dev=[*(/(/($1, +(+($5, $1), $9)), CAST(3):DECIMAL(10, 0)), 
CAST(100):DECIMAL(10, 0))], ws_item_rev=[$9], ws_dev=[*(/(/($9, +(+($5, $1), 
$9)), CAST(3):DECIMAL(10, 0)), CAST(100):DECIMAL(10, 0))], average=[/(+(+($5, 
$1), $9), CAST(3):DECIMAL(10, 0))])
+HiveJoin(condition=[AND(AND(AND(AND(=($0, $8), BETWEEN(false, $5, $10, 
$11)), BETWEEN(false, $1, $10, $11)), BETWEEN(false, $9, $6, $7)), 
BETWEEN(false, $9, $2, $3))], joinType=[inner], algorithm=[none], cost=[not 
available])
+  HiveJoin(condition=[AND(AND(=($4, $0), BETWEEN(false, $5, $2, $3)), 
BETWEEN(false, $1, $6, $7))], joinType=[inner], algorithm=[none], cost=[not 
available])
+HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)])
   HiveAggregate(group=[{4}], agg#0=[sum($2)])
 HiveJoin(condition=[=($0, $5)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveJoin(condition=[=($1, $3)], joinType=[inner], 
algorithm=[none], cost=[not available])
@@ -175,7 +175,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], 
dir1=[ASC], fetch=[100])
   HiveProject(d_week_seq=[$4])
 HiveFilter(condition=[AND(=($2, 
_UTF-16LE'1998-02-19'), IS NOT NULL($4))])
   HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
-HiveProject(i_item_id=[$0], $f1=[$1])
+HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)])
   HiveAggregate(group=[{4}], agg#0=[sum($2)])
 HiveJoin(condition=[=($0, $5)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveJoin(condition=[=($1, $3)], joinType=[inner], 
algorithm=[none], cost=[not available])
@@ -206,7 +206,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], 
dir1=[ASC], fetch=[100])
   HiveProject(d_week_seq=[$4])
 HiveFilter(condition=[AND(=($2, 
_UTF-16LE'1998-02-19'), IS NOT NULL($4))])
   HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
-  HiveProject(i_item_id=[$0], $f1=[$1])
+  HiveProject($f0=[$0], $f1=[$1], *=[*(0.9, $1)], *3=[*(1.1, $1)])
 HiveAggregate(group=[{4}], agg#0=[sum($2)])
   HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], 
cost=[not available])
 HiveJoin(condition=[=($1, $3)], joinType=[inner], 
algorithm=[none], cost=[not available])

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out
index bb92a1f..8674a8a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query59.q.out
@@ -94,24 +94,24 @@ POSTHOOK: Input: default@store_sales
 POSTHOOK: Output: hdfs://### HDFS PATH ###
 CBO PLAN:
 HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], 
dir2=[ASC], fetch=[100])
-

[41/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/join46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out 
b/ql/src/test/results/clientpositive/llap/join46.q.out
index fcd0d83..ec58429 100644
--- a/ql/src/test/results/clientpositive/llap/join46.q.out
+++ b/ql/src/test/results/clientpositive/llap/join46.q.out
@@ -187,15 +187,15 @@ STAGE PLANS:
   alias: test1_n2
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   key expressions: _col1 (type: int)
   sort order: +
   Map-reduce partition columns: _col1 (type: int)
-  Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
-  value expressions: _col0 (type: int), _col2 (type: 
string)
+  Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col0 (type: int), _col2 (type: 
string), _col3 (type: boolean)
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map 3 
@@ -226,20 +226,24 @@ STAGE PLANS:
 condition map:
  Left Outer Join 0 to 1
 filter predicates:
-  0 {VALUE._col0 BETWEEN 100 AND 102}
+  0 {VALUE._col2}
   1 
 keys:
   0 _col1 (type: int)
   1 _col1 (type: int)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6
 Statistics: Num rows: 8 Data size: 1049 Basic stats: COMPLETE 
Column stats: COMPLETE
-File Output Operator
-  compressed: false
+Select Operator
+  expressions: _col0 (type: int), _col1 (type: int), _col2 
(type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  File Output Operator
+compressed: false
+Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
 Fetch Operator
@@ -308,13 +312,13 @@ STAGE PLANS:
   alias: test1_n2
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   sort order: 
-  Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
-  value expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: string)
+  Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
+

[04/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/pointlookup3.q.out
--
diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out 
b/ql/src/test/results/clientpositive/pointlookup3.q.out
index a5fa5e8..a3056a5 100644
--- a/ql/src/test/results/clientpositive/pointlookup3.q.out
+++ b/ql/src/test/results/clientpositive/pointlookup3.q.out
@@ -391,15 +391,15 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@pcr_t1_n1
 POSTHOOK: Input: default@pcr_t1_n1@ds1=2000-04-08/ds2=2001-04-08
  A masked pattern was here 
-OPTIMIZED SQL: SELECT `t4`.`key`, `t4`.`value`, CAST('2000-04-08' AS STRING) 
AS `ds1`, `t4`.`ds2`, `t4`.`key1`, `t4`.`value1`, `t4`.`ds11`, 
CAST('2001-04-08' AS STRING) AS `ds21`
-FROM (SELECT `t0`.`key`, `t0`.`value`, `t0`.`ds2`, `t2`.`key` AS `key1`, 
`t2`.`value` AS `value1`, `t2`.`ds1` AS `ds11`
-FROM (SELECT `key`, `value`, CAST('2000-04-08' AS STRING) AS `ds1`, `ds2`
+OPTIMIZED SQL: SELECT `t3`.`key`, `t3`.`value`, CAST('2000-04-08' AS STRING) 
AS `ds1`, `t3`.`ds2`, `t3`.`key0` AS `key1`, `t3`.`value0` AS `value1`, 
`t3`.`ds1` AS `ds11`, CAST('2001-04-08' AS STRING) AS `ds21`
+FROM (SELECT *
+FROM (SELECT `key`, `value`, `ds2`
 FROM `default`.`pcr_t1_n1`
 WHERE `ds1` = '2000-04-08' AND `key` IS NOT NULL) AS `t0`
-INNER JOIN (SELECT `key`, `value`, `ds1`, CAST('2001-04-08' AS STRING) AS `ds2`
+INNER JOIN (SELECT `key`, `value`, `ds1`
 FROM `default`.`pcr_t1_n1`
 WHERE `ds2` = '2001-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = 
`t2`.`key`
-ORDER BY `t2`.`key`, `t2`.`value`) AS `t4`
+ORDER BY `t2`.`key`, `t2`.`value`) AS `t3`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
@@ -420,7 +420,7 @@ STAGE PLANS:
   Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
 expressions: key (type: int), value (type: string), ds2 (type: 
string)
-outputColumnNames: _col0, _col1, _col3
+outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE 
Column stats: NONE
 Reduce Output Operator
   key expressions: _col0 (type: int)
@@ -429,7 +429,7 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: int)
   Statistics: Num rows: 20 Data size: 160 Basic stats: 
COMPLETE Column stats: NONE
   tag: 0
-  value expressions: _col1 (type: string), _col3 (type: string)
+  value expressions: _col1 (type: string), _col2 (type: string)
   auto parallelism: false
   TableScan
 alias: t2
@@ -515,30 +515,26 @@ STAGE PLANS:
   keys:
 0 _col0 (type: int)
 1 _col0 (type: int)
-  outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column 
stats: NONE
-  Select Operator
-expressions: _col0 (type: int), _col1 (type: string), _col3 (type: 
string), _col4 (type: int), _col5 (type: string), _col6 (type: string)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  GlobalTableId: 0
+  File Output Operator
+compressed: false
+GlobalTableId: 0
  A masked pattern was here 
-  NumFilesPerFileSink: 1
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  properties:
-column.name.delimiter ,
-columns _col0,_col1,_col2,_col3,_col4,_col5
-columns.types int,string,string,int,string,string
-escape.delim \
-serialization.lib 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-  TotalFiles: 1
-  GatherStats: false
-  MultiFileSpray: false
+NumFilesPerFileSink: 1
+table:
+input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+properties:
+  column.name.delimiter ,
+  columns _col0,_col1,_col2,_col3,_col4,_col5
+  columns.types int,string,string,int,string,string
+  escape.delim \
+

[36/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out 
b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out
index 37970ab..7e09d5e 100644
--- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out
@@ -51,14 +51,14 @@ STAGE PLANS:
 predicate: (csmallint < 100S) (type: boolean)
 Statistics: Num rows: 4096 Data size: 1031250 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
-  expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
-  Statistics: Num rows: 4096 Data size: 1031250 Basic 
stats: COMPLETE Column stats: COMPLETE
+  expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int)
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+  Statistics: Num rows: 4096 Data size: 1043486 Basic 
stats: COMPLETE Column stats: COMPLETE
   Reduce Output Operator
-key expressions: UDFToInteger(_col1) (type: int)
+key expressions: _col12 (type: int)
 sort order: +
-Map-reduce partition columns: UDFToInteger(_col1) 
(type: int)
-Statistics: Num rows: 4096 Data size: 1031250 Basic 
stats: COMPLETE Column stats: COMPLETE
+Map-reduce partition columns: _col12 (type: int)
+Statistics: Num rows: 4096 Data size: 1043486 Basic 
stats: COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type: tinyint), _col1 (type: 
smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 
(type: double), _col6 (type: string), _col7 (type: string), _col8 (type: 
timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: 
boolean)
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -72,14 +72,14 @@ STAGE PLANS:
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
-  expressions: key (type: string)
+  expressions: UDFToInteger(key) (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
-key expressions: UDFToInteger(_col0) (type: int)
+key expressions: _col0 (type: int)
 sort order: +
-Map-reduce partition columns: UDFToInteger(_col0) 
(type: int)
-Statistics: Num rows: 500 Data size: 43500 Basic 
stats: COMPLETE Column stats: COMPLETE
+Map-reduce partition columns: _col0 (type: int)
+Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map 5 
@@ -92,14 +92,14 @@ STAGE PLANS:
 predicate: key is not null (type: boolean)
 Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
-  expressions: key (type: string)
+  expressions: (UDFToInteger(key) + 0) (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 500 Data size: 2000 Basic stats: 
COMPLETE Column stats:

[49/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
 
b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
index 1e195bc..b6d726e 100644
--- 
a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
+++ 
b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out
@@ -542,13 +542,13 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@encrypted_table
 POSTHOOK: Input: default@src
 POSTHOOK: Output: hdfs://### HDFS PATH ###
-OPTIMIZED SQL: SELECT *
-FROM (SELECT `key`, `value`
+OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`key` AS `key1`, 
`t2`.`value` AS `value1`
+FROM (SELECT `key`, `value`, CAST(`key` AS DOUBLE) AS `CAST`
 FROM `default`.`src`
 WHERE `key` IS NOT NULL) AS `t0`
-INNER JOIN (SELECT `key`, `value`
+INNER JOIN (SELECT `key`, `value`, CAST(`key` AS DOUBLE) AS `CAST`
 FROM `default`.`encrypted_table`
-WHERE `key` IS NOT NULL) AS `t2` ON CAST(`t0`.`key` AS DOUBLE) = 
CAST(`t2`.`key` AS DOUBLE)
+WHERE `key` IS NOT NULL) AS `t2` ON `t0`.`CAST` = `t2`.`CAST`
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -567,14 +567,14 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string), value (type: string)
-outputColumnNames: _col0, _col1
+expressions: key (type: string), value (type: string), 
UDFToDouble(key) (type: double)
+outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col2 (type: double)
   null sort order: a
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
+  Map-reduce partition columns: _col2 (type: double)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   tag: 0
   value expressions: _col0 (type: string), _col1 (type: string)
@@ -589,14 +589,14 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: int), value (type: string)
-outputColumnNames: _col0, _col1
+expressions: key (type: int), value (type: string), 
UDFToDouble(key) (type: double)
+outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col2 (type: double)
   null sort order: a
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
+  Map-reduce partition columns: _col2 (type: double)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   tag: 1
   value expressions: _col0 (type: int), _col1 (type: string)
@@ -714,32 +714,36 @@ STAGE PLANS:
   condition map:
Inner Join 0 to 1
   keys:
-0 UDFToDouble(_col0) (type: double)
-1 UDFToDouble(_col0) (type: double)
-  outputColumnNames: _col0, _col1, _col2, _col3
+0 _col2 (type: double)
+1 _col2 (type: double)
+  outputColumnNames: _col0, _col1, _col3, _col4
   Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
-GlobalTableId: 0
-directory: hdfs://### HDFS PATH ###
-NumFilesPerFileSink: 1
+  Select Operator
+expressions: _col0 (type: string), _col1 (type: string), _col3 
(type: int), _col4 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-Stats Publishing Key Prefix: hdfs://### HDFS PATH ###
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format:

[10/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query56.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query56.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query56.q.out
index 18f64cc..17458f4 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query56.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query56.q.out
@@ -204,9 +204,9 @@ Stage-0
 SHUFFLE [RS_71]
   PartitionCols:_col0
   Group By Operator [GBY_70] (rows=355 width=212)
-
Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+
Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1
 Merge Join Operator [MERGEJOIN_303] (rows=339151 
width=100)
-  
Conds:RS_66._col0=RS_67._col4(Inner),Output:["_col1","_col8"]
+  
Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"]
 <-Reducer 2 [SIMPLE_EDGE]
   SHUFFLE [RS_66]
 PartitionCols:_col0
@@ -239,15 +239,15 @@ Stage-0
 
default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_color"]
 <-Reducer 23 [SIMPLE_EDGE]
   SHUFFLE [RS_67]
-PartitionCols:_col4
+PartitionCols:_col3
 Select Operator [SEL_62] (rows=1550375 
width=13)
-  Output:["_col4","_col5"]
+  Output:["_col3","_col4"]
   Merge Join Operator [MERGEJOIN_298] 
(rows=1550375 width=13)
 
Conds:RS_59._col1=RS_346._col0(Inner),Output:["_col2","_col3"]
   <-Map 28 [SIMPLE_EDGE] vectorized
 PARTITION_ONLY_SHUFFLE [RS_346]
   PartitionCols:_col0
-  Select Operator [SEL_343] (rows=800 
width=116)
+  Select Operator [SEL_343] (rows=800 
width=4)
 Output:["_col0"]
 Filter Operator [FIL_342] 
(rows=800 width=112)
   predicate:((ca_gmt_offset = -8) and 
ca_address_sk is not null)
@@ -261,7 +261,7 @@ Stage-0
   <-Map 20 [SIMPLE_EDGE] vectorized
 PARTITION_ONLY_SHUFFLE [RS_330]
   PartitionCols:_col0
-  Select Operator [SEL_327] (rows=50 
width=12)
+  Select Operator [SEL_327] (rows=50 
width=4)
 Output:["_col0"]
 Filter Operator [FIL_326] (rows=50 
width=12)
   predicate:((d_moy = 1) and 
(d_year = 2000) and d_date_sk is not null)
@@ -320,18 +320,18 @@ Stage-0
 SHUFFLE [RS_109]
   PartitionCols:_col0
   Group By Operator [GBY_108] (rows=355 width=212)
-
Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col1
+
Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1
 Merge Join Operator [MERGEJOIN_304] (rows=172427 
width=188)
-  
Conds:RS_104._col0=RS_105._col3(Inner),Output:["_col1","_col8"]
+  
Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"]
 <-Reducer 2 [SIMPLE_EDGE]
   SHUFFLE [RS_104]
 PartitionCols:_col0
  Please refer to the previous Merge Join 
Operator [MERGEJOIN_293]
 <-Reducer 26 [SIMPLE_EDGE]
   SHUFFLE [RS_105]
-PartitionCols:_col3
+PartitionCols:_col2
 Select Operator [SEL_100] (rows=788222 
width=110)
-  Output:["_col3","_col5"]
+  Output:["_col2","_col4"]
   Merge Join Operator [MERGEJOIN_301] 
(rows=788222 width=110)
 
Conds:RS_97._col2=RS_348._col0(Inner),Output:["_col1","_col3"]
   <-Map 28 [SIMPLE_EDGE] vectorized
@@ -400,18 +400,18 @@

[26/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query49.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query49.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query49.q.out
index 07d14b5..354c178 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query49.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query49.q.out
@@ -304,7 +304,7 @@ STAGE PLANS:
 predicate: ((ws_net_paid > 0) and (ws_net_profit > 1) and 
(ws_quantity > 0) and ws_item_sk is not null and ws_order_number is not null 
and ws_sold_date_sk is not null) (type: boolean)
 Statistics: Num rows: 5333432 Data size: 725192506 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: ws_sold_date_sk (type: int), ws_item_sk 
(type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_paid 
(type: decimal(7,2))
+  expressions: ws_sold_date_sk (type: int), ws_item_sk 
(type: int), ws_order_number (type: int), CASE WHEN (ws_quantity is not null) 
THEN (ws_quantity) ELSE (0) END (type: int), CASE WHEN (ws_net_paid is not 
null) THEN (ws_net_paid) ELSE (0) END (type: decimal(12,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4
   Statistics: Num rows: 5333432 Data size: 725192506 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -312,7 +312,7 @@ STAGE PLANS:
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 5333432 Data size: 725192506 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: decimal(7,2))
+value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: decimal(12,2))
 Execution mode: vectorized
 Map 10 
 Map Operator Tree:
@@ -343,7 +343,7 @@ STAGE PLANS:
 predicate: ((wr_return_amt > 1) and wr_item_sk is not 
null and wr_order_number is not null) (type: boolean)
 Statistics: Num rows: 4799489 Data size: 441731394 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: wr_item_sk (type: int), wr_order_number 
(type: int), wr_return_quantity (type: int), wr_return_amt (type: decimal(7,2))
+  expressions: wr_item_sk (type: int), wr_order_number 
(type: int), CASE WHEN (wr_return_quantity is not null) THEN 
(wr_return_quantity) ELSE (0) END (type: int), CASE WHEN (wr_return_amt is not 
null) THEN (wr_return_amt) ELSE (0) END (type: decimal(12,2))
   outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 4799489 Data size: 441731394 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -351,7 +351,7 @@ STAGE PLANS:
 sort order: ++
 Map-reduce partition columns: _col0 (type: int), _col1 
(type: int)
 Statistics: Num rows: 4799489 Data size: 441731394 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col2 (type: int), _col3 (type: 
decimal(7,2))
+value expressions: _col2 (type: int), _col3 (type: 
decimal(12,2))
 Execution mode: vectorized
 Map 12 
 Map Operator Tree:
@@ -363,7 +363,7 @@ STAGE PLANS:
 predicate: ((cs_net_paid > 0) and (cs_net_profit > 1) and 
(cs_quantity > 0) and cs_item_sk is not null and cs_order_number is not null 
and cs_sold_date_sk is not null) (type: boolean)
 Statistics: Num rows: 10666290 Data size: 129931 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: cs_sold_date_sk (type: int), cs_item_sk 
(type: int), cs_order_number (type: int), cs_quantity (type: int), cs_net_paid 
(type: decimal(7,2))
+  expressions: cs_sold_date_sk (type: int), cs_item_sk 
(type: int), cs_order_number (type: int), CASE WHEN (cs_quantity is not null) 
THEN (cs_quantity) ELSE (0) END (type: int), CASE WHEN (cs_net_paid is not 
null) THEN (cs_net_paid) ELSE (0) END (type: decimal(12,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4
   Statistics: Num rows: 10666290 Data size: 129931 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -371,7 +371,7 @@ STAGE PLANS:
 sort order: +
 Map-reduce partition columns: _col0 (type: int)

[50/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/auto_join_stats.q.out
--
diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out 
b/ql/src/test/results/clientpositive/auto_join_stats.q.out
index 43a248b..42e165d 100644
--- a/ql/src/test/results/clientpositive/auto_join_stats.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out
@@ -63,8 +63,8 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 HashTable Sink Operator
   keys:
@@ -82,8 +82,8 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
@@ -91,7 +91,7 @@ STAGE PLANS:
   keys:
 0 _col0 (type: string)
 1 _col0 (type: string)
-  outputColumnNames: _col0, _col1
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 550 Data size: 5843 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
@@ -119,13 +119,13 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
 HashTable Sink Operator
   keys:
-0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
-1 UDFToDouble(_col0) (type: double)
+0 (_col1 + _col3) (type: double)
+1 _col1 (type: double)
 
   Stage: Stage-5
 Map Reduce
@@ -135,17 +135,21 @@ STAGE PLANS:
   condition map:
Inner Join 0 to 1
   keys:
-0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
-1 UDFToDouble(_col0) (type: double)
-  outputColumnNames: _col0, _col1, _col2
+0 (_col1 + _col3) (type: double)
+1 _col1 (type: double)
+  outputColumnNames: _col0, _col2, _col4
   Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
+  Select Operator
+expressions: _col0 (type: string), _col2 (type: string), _col4 
(type: string)
+outputColumnNames: _col0, _col1, _col2
 Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
   Execution mode: vectorized
   Local Work:
 Map Reduce Local Work
@@ -166,8 +170,8 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-

[37/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/subquery_select.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_select.q.out
index 6870ad1..0435530 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out
@@ -32,14 +32,15 @@ STAGE PLANS:
   alias: part
   Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
   Select Operator
-expressions: p_size (type: int)
-outputColumnNames: _col0
-Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: p_size (type: int), p_size is null (type: 
boolean)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 26 Data size: 208 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   key expressions: _col0 (type: int)
   sort order: +
   Map-reduce partition columns: _col0 (type: int)
-  Statistics: Num rows: 26 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 26 Data size: 208 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col1 (type: boolean)
   Select Operator
 expressions: p_size (type: int)
 outputColumnNames: p_size
@@ -77,12 +78,12 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col2
-Statistics: Num rows: 27 Data size: 116 Basic stats: COMPLETE 
Column stats: COMPLETE
+outputColumnNames: _col0, _col1, _col3
+Statistics: Num rows: 27 Data size: 224 Basic stats: COMPLETE 
Column stats: COMPLETE
 Reduce Output Operator
   sort order: 
-  Statistics: Num rows: 27 Data size: 116 Basic stats: 
COMPLETE Column stats: COMPLETE
-  value expressions: _col0 (type: int), _col2 (type: boolean)
+  Statistics: Num rows: 27 Data size: 224 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col0 (type: int), _col1 (type: boolean), 
_col3 (type: boolean)
 Reducer 3 
 Execution mode: llap
 Reduce Operator Tree:
@@ -92,10 +93,10 @@ STAGE PLANS:
 keys:
   0 
   1 
-outputColumnNames: _col0, _col2, _col3, _col4
-Statistics: Num rows: 27 Data size: 548 Basic stats: COMPLETE 
Column stats: COMPLETE
+outputColumnNames: _col0, _col1, _col3, _col4, _col5
+Statistics: Num rows: 27 Data size: 440 Basic stats: COMPLETE 
Column stats: COMPLETE
 Select Operator
-  expressions: _col0 (type: int), CASE WHEN ((_col3 = 0L)) 
THEN (false) WHEN (_col2 is not null) THEN (true) WHEN (_col0 is null) THEN 
(null) WHEN ((_col4 < _col3)) THEN (null) ELSE (false) END (type: boolean)
+  expressions: _col0 (type: int), CASE WHEN (_col4) THEN 
(false) WHEN (_col3 is not null) THEN (true) WHEN (_col1) THEN (null) WHEN 
(_col5) THEN (null) ELSE (false) END (type: boolean)
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 27 Data size: 216 Basic stats: 
COMPLETE Column stats: COMPLETE
   File Output Operator
@@ -136,10 +137,14 @@ STAGE PLANS:
   mode: complete
   outputColumnNames: _col0, _col1
   Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
-  Reduce Output Operator
-sort order: 
-Statistics: Num rows: 1 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
-value expressions: _col0 (type: bigint), _col1 (type: 
bigint)
+  Select Operator
+expressions: (_col0 = 0L) (type: boolean), (_col1 < _col0) 
(type: boolean)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col0 (type: boolean), _col1 (type: 
boolean)
 
   Stage: Stage-0
 Fetch Operator
@@ -219,15 +224,15 @@ STAGE

[54/59] [abbrv] hive git commit: HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin)

2018-11-13 Thread sershe

HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey 
Shelukhin)

Signed-off-by: Ashutosh Chauhan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af401702
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af401702
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af401702

Branch: refs/heads/master-tez092
Commit: af401702847391ab41fcf2ef8216a94a1b7bfc76
Parents: bc39c49
Author: Miklos Gergely 
Authored: Thu Oct 25 13:03:00 2018 -0700
Committer: Ashutosh Chauhan 
Committed: Mon Nov 12 15:28:18 2018 -0800

--
 bin/ext/llapstatus.sh   |   4 +-
 .../hadoop/hive/llap/cli/LlapSliderUtils.java   |  55 +-
 .../llap/cli/LlapStatusOptionsProcessor.java| 278 ---
 .../hive/llap/cli/LlapStatusServiceDriver.java  | 811 ---
 .../hadoop/hive/llap/cli/status/AmInfo.java |  93 +++
 .../hive/llap/cli/status/AppStatusBuilder.java  | 231 ++
 .../hadoop/hive/llap/cli/status/ExitCode.java   |  44 +
 .../hive/llap/cli/status/LlapInstance.java  | 134 +++
 .../llap/cli/status/LlapStatusCliException.java |  40 +
 .../hive/llap/cli/status/LlapStatusHelpers.java | 449 --
 .../status/LlapStatusServiceCommandLine.java| 302 +++
 .../cli/status/LlapStatusServiceDriver.java | 775 ++
 .../hadoop/hive/llap/cli/status/State.java  |  31 +
 .../hive/llap/cli/status/package-info.java  |  24 +
 .../llap/cli/TestLlapStatusServiceDriver.java   |  98 ---
 .../TestLlapStatusServiceCommandLine.java   |  91 +++
 .../hive/llap/cli/status/package-info.java  |  23 +
 .../java/org/apache/hive/http/LlapServlet.java  |   9 +-
 18 files changed, 1799 insertions(+), 1693 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/bin/ext/llapstatus.sh
--
diff --git a/bin/ext/llapstatus.sh b/bin/ext/llapstatus.sh
index 2d2c8f4..23e6be6 100644
--- a/bin/ext/llapstatus.sh
+++ b/bin/ext/llapstatus.sh
@@ -17,7 +17,7 @@ THISSERVICE=llapstatus
 export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
 
 llapstatus () {
-  CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver;
+  CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver;
   if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then
 echo "Missing Hive CLI Jar"
 exit 3;
@@ -36,7 +36,7 @@ llapstatus () {
 }
 
 llapstatus_help () {
-  CLASS=org.apache.hadoop.hive.llap.cli.LlapStatusServiceDriver;
+  CLASS=org.apache.hadoop.hive.llap.cli.status.LlapStatusServiceDriver;
   execHiveCmd $CLASS "--help"
 } 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java
index af47b26..5ec9e1d 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/LlapSliderUtils.java
@@ -24,69 +24,24 @@ import java.io.IOException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.service.api.records.Service;
 import org.apache.hadoop.yarn.service.client.ServiceClient;
 import org.apache.hadoop.yarn.service.utils.CoreFileSystem;
-import org.apache.hadoop.yarn.util.Clock;
-import org.apache.hadoop.yarn.util.SystemClock;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class LlapSliderUtils {
-  private static final Logger LOG = LoggerFactory
-  .getLogger(LlapSliderUtils.class);
+  private static final Logger LOG = 
LoggerFactory.getLogger(LlapSliderUtils.class);
   private static final String LLAP_PACKAGE_DIR = ".yarn/package/LLAP/";
 
-  public static ServiceClient createServiceClient(
-  Configuration conf) throws Exception {
+  public static ServiceClient createServiceClient(Configuration conf) throws 
Exception {
 ServiceClient serviceClient = new ServiceClient();
 serviceClient.init(conf);
 serviceClient.start();
 return serviceClient;
   }
 
-  public static ApplicationReport getAppReport(String appName, ServiceClient 
serviceClient,
-   long timeoutMs) throws
-  LlapStatusServiceDriver.LlapStatusCliException {
-Clock clock = SystemClock.getInstance();
-long startTime = clock.getTime();
-long timeoutTime =

[06/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query85.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query85.q.out
index f5800b9..1ada394 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out
@@ -183,15 +183,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 11 <- Reducer 15 (BROADCAST_EDGE)
+Map 11 <- Reducer 13 (BROADCAST_EDGE)
 Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
-Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE)
+Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE)
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE)
-Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
 Reducer 8 <- Map 17 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
 Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
 
@@ -200,134 +200,138 @@ Stage-0
 limit:-1
 Stage-1
   Reducer 10 vectorized
-  File Output Operator [FS_239]
-Limit [LIM_238] (rows=72 width=832)
+  File Output Operator [FS_240]
+Limit [LIM_239] (rows=7 width=832)
   Number of rows:100
-  Select Operator [SEL_237] (rows=72 width=832)
+  Select Operator [SEL_238] (rows=7 width=832)
 Output:["_col0","_col1","_col2","_col3"]
   <-Reducer 9 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_236]
-  Select Operator [SEL_235] (rows=72 width=832)
+SHUFFLE [RS_237]
+  Select Operator [SEL_236] (rows=7 width=832)
 Output:["_col4","_col5","_col6","_col7"]
-Group By Operator [GBY_234] (rows=72 width=353)
+Group By Operator [GBY_235] (rows=7 width=353)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0
 <-Reducer 8 [SIMPLE_EDGE]
   SHUFFLE [RS_49]
 PartitionCols:_col0
-Group By Operator [GBY_48] (rows=72 width=353)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col22
-  Merge Join Operator [MERGEJOIN_206] (rows=8055 width=100)
-Conds:RS_44._col3, _col24, _col25=RS_232._col0, _col1, 
_col2(Inner),Output:["_col6","_col7","_col12","_col22"]
-  <-Map 17 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_232]
-  PartitionCols:_col0, _col1, _col2
-  Select Operator [SEL_231] (rows=265971 width=183)
-Output:["_col0","_col1","_col2"]
-Filter Operator [FIL_230] (rows=265971 width=183)
-  predicate:((cd_education_status) IN ('4 yr 
Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 
'U') and cd_demo_sk is not null)
-  TableScan [TS_21] (rows=1861800 width=183)
-
default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"]
-  <-Reducer 7 [SIMPLE_EDGE]
-SHUFFLE [RS_44]
-  PartitionCols:_col3, _col24, _col25
-  Filter Operator [FIL_43] (rows=8055 width=390)
-predicate:(((_col24 = 'D') and (_col25 = 
'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 
yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 
'Advanced Degree') and _col13 BETWEEN 150 AND 200))
-Merge Join Operator [MERGEJOIN_205] (rows=24166 
width=390)
-  
Conds:RS_40._col1=RS_233._col0(Inner),Output:["_col3","_col6","_col7","_col12","_col13","_col22","_col24","_col25"]
-<-Map 17 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_233]
-PartitionCols:_col0
- Please refer to the previous Select Operator

[25/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query66.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query66.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query66.q.out
index e8ef1dc..80723d8 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query66.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query66.q.out
@@ -624,8 +624,8 @@ STAGE PLANS:
 predicate: (ws_ship_mode_sk is not null and 
ws_sold_date_sk is not null and ws_sold_time_sk is not null and ws_warehouse_sk 
is not null) (type: boolean)
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: ws_sold_date_sk (type: int), 
ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk 
(type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), 
ws_net_paid_inc_tax (type: decimal(7,2))
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6
+  expressions: ws_sold_date_sk (type: int), 
ws_sold_time_sk (type: int), ws_ship_mode_sk (type: int), ws_warehouse_sk 
(type: int), (ws_sales_price * CAST( ws_quantity AS decimal(10,0))) (type: 
decimal(18,2)), (ws_net_paid_inc_tax * CAST( ws_quantity AS decimal(10,0))) 
(type: decimal(18,2))
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
   Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -633,7 +633,7 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6
+outputColumnNames: _col0, _col2, _col3, _col4, _col5
 input vertices:
   1 Map 6
 Statistics: Num rows: 158402938 Data size: 21538218500 
Basic stats: COMPLETE Column stats: NONE
@@ -642,7 +642,7 @@ STAGE PLANS:
   sort order: +
   Map-reduce partition columns: _col0 (type: int)
   Statistics: Num rows: 158402938 Data size: 
21538218500 Basic stats: COMPLETE Column stats: NONE
-  value expressions: _col2 (type: int), _col3 (type: 
int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2))
+  value expressions: _col2 (type: int), _col3 (type: 
int), _col4 (type: decimal(18,2)), _col5 (type: decimal(18,2))
 Execution mode: vectorized
 Local Work:
   Map Reduce Local Work
@@ -656,8 +656,8 @@ STAGE PLANS:
 predicate: (cs_ship_mode_sk is not null and 
cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_warehouse_sk 
is not null) (type: boolean)
 Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: cs_sold_date_sk (type: int), 
cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk 
(type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)), 
cs_net_paid_inc_ship_tax (type: decimal(7,2))
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6
+  expressions: cs_sold_date_sk (type: int), 
cs_sold_time_sk (type: int), cs_ship_mode_sk (type: int), cs_warehouse_sk 
(type: int), (cs_ext_sales_price * CAST( cs_quantity AS decimal(10,0))) (type: 
decimal(18,2)), (cs_net_paid_inc_ship_tax * CAST( cs_quantity AS 
decimal(10,0))) (type: decimal(18,2))
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
   Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
   Map Join Operator
 condition map:
@@ -665,7 +665,7 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6
+outputColumnNames: _col0, _col2, _col3, _col4, _col5
 input vertices:
   1 Map 13
 Statistics: Num rows: 316788826 Data size: 42899570777 
Basic stats: COMPLETE Column stats: NONE
@@ -674,7 +674,7 @@ STAGE PLANS:
   sort order: +
   Map-reduce partition columns: _col0 (type: int)

[42/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out 
b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index 067a43c..c86450a 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -457,16 +457,16 @@ Stage-0
 SHUFFLE [RS_23]
   PartitionCols:_col0, _col1
   Group By Operator [GBY_22] (rows=1 width=20)
-
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col1
-Select Operator [SEL_21] (rows=2 width=20)
-  Output:["_col1","_col4"]
-  Merge Join Operator [MERGEJOIN_57] (rows=2 width=20)
-
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual
 filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 0)}
+
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col5, _col1
+Select Operator [SEL_21] (rows=1 width=24)
+  Output:["_col1","_col5"]
+  Merge Join Operator [MERGEJOIN_57] (rows=1 width=24)
+
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col7"],residual
 filter predicates:{((_col4 > 0) or _col2)} {((_col4 + _col7) >= 0)}
   <-Map 1 [SIMPLE_EDGE] llap
 SHUFFLE [RS_17]
   PartitionCols:_col0
-  Select Operator [SEL_2] (rows=18 width=84)
-Output:["_col0","_col1"]
+  Select Operator [SEL_2] (rows=18 width=88)
+Output:["_col0","_col1","_col2"]
 Filter Operator [FIL_36] (rows=18 width=84)
   predicate:key is not null
   TableScan [TS_0] (rows=20 width=84)
@@ -547,16 +547,16 @@ Stage-0
 SHUFFLE [RS_23]
   PartitionCols:_col0, _col1
   Group By Operator [GBY_22] (rows=1 width=20)
-
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4
-Select Operator [SEL_21] (rows=1 width=20)
-  Output:["_col1","_col4"]
-  Merge Join Operator [MERGEJOIN_57] (rows=1 width=20)
-
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual
 filter predicates:{((_col3 > 0) or _col1 is not null)} {((_col1 >= 1) or 
(_col4 >= 1L))} {((UDFToLong(_col1) + _col4) >= 0)} {((_col3 + _col6) >= 0)}
+
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col7
+Select Operator [SEL_21] (rows=1 width=36)
+  Output:["_col1","_col7"]
+  Merge Join Operator [MERGEJOIN_57] (rows=1 width=36)
+
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col9"],residual
 filter predicates:{((_col6 > 0) or _col2)} {(_col3 or (_col7 >= 1L))} {((_col4 
+ _col7) >= 0)} {((_col6 + _col9) >= 0)}
   <-Map 1 [SIMPLE_EDGE] llap
 SHUFFLE [RS_17]
   PartitionCols:_col0
-  Select Operator [SEL_2] (rows=18 width=84)
-Output:["_col0","_col1"]
+  Select Operator [SEL_2] (rows=18 width=99)
+Output:["_col0","_col1","_col2","_col3","_col4"]
 Filter Operator [FIL_36] (rows=18 width=84)
   predicate:((c_int > 0) and key is not null)
   TableScan [TS_0] (rows=20 width=84)
@@ -630,16 +630,16 @@ Stage-0
   SHUFFLE [RS_23]
 PartitionCols:_col0, _col1
 Group By Operator [GBY_22] (rows=1 width=20)
-  
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col4
-  Select Operator [SEL_21] (rows=1 width=20)
-Output:["_col1","_col4"]
-Merge Join Operator [MERGEJOIN_54] (rows=1 width=20)
-  
Conds:RS_17._col0=RS_18._col0(Inner),RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col6"],residual
 filter predicates:{((_col3 > 0) or (_col1 >= 0))} {((_col3 + _col6) >= 2)}
+  
Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col5
+  Select Operator [SEL_21] (rows=1

[47/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/join47.q.out
--
diff --git a/ql/src/test/results/clientpositive/join47.q.out 
b/ql/src/test/results/clientpositive/join47.q.out
index 2892b8b..169244e 100644
--- a/ql/src/test/results/clientpositive/join47.q.out
+++ b/ql/src/test/results/clientpositive/join47.q.out
@@ -363,24 +363,24 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 sort order: 
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-value expressions: _col0 (type: string), _col1 (type: string)
+value expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: boolean)
   TableScan
 alias: src
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Reduce Output Operator
 sort order: 
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: _col0 (type: string), _col1 (type: string)
+value expressions: _col0 (type: string), _col1 (type: string), 
_col2 (type: boolean)
   Reduce Operator Tree:
 Join Operator
   condition map:
@@ -388,19 +388,23 @@ STAGE PLANS:
   keys:
 0 
 1 
-  outputColumnNames: _col0, _col1, _col2, _col3
-  residual filter predicates: {((_col0 = _col2) or UDFToDouble(_col1) 
BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 100.0D AND 102.0D)}
-  Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE 
Column stats: NONE
-  Limit
-Number of rows: 10
-Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+  residual filter predicates: {((_col0 = _col3) or _col2 or _col5)}
+  Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: _col0 (type: string), _col1 (type: string), _col3 
(type: string), _col4 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+Limit
+  Number of rows: 10
   Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  File Output Operator
+compressed: false
+Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
 Fetch Operator
@@ -472,24 +476,24 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(key) (type: double)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE

[58/59] [abbrv] hive git commit: HIVE-20905 : querying streaming table fails with out of memory exception (mahesh kumar behera via Thejas Nair)

2018-11-13 Thread sershe

HIVE-20905 : querying streaming table fails with out of memory exception 
(mahesh kumar behera via Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/148e7acb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/148e7acb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/148e7acb

Branch: refs/heads/master-tez092
Commit: 148e7acba46da997a023b57794c7f1f209097320
Parents: 52f94b8
Author: Mahesh Kumar Behera 
Authored: Tue Nov 13 14:03:23 2018 -0700
Committer: Thejas M Nair 
Committed: Tue Nov 13 14:03:23 2018 -0700

--
 .../apache/hadoop/hive/ql/txn/compactor/Cleaner.java| 12 +++-
 .../org/apache/hadoop/hive/metastore/HiveMetaStore.java |  3 +++
 2 files changed, 10 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/148e7acb/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
index 3565616..3bc1f8a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
@@ -20,8 +20,7 @@ package org.apache.hadoop.hive.ql.txn.compactor;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.metastore.ReplChangeManager;
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.fs.FileStatus;
@@ -58,6 +57,8 @@ import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import static 
org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog;
+
 /**
  * A class to clean directories after compactions.  This will run in a 
separate thread.
  */
@@ -323,7 +324,7 @@ public class Cleaner extends CompactorThread {
 return " id=" + ci.id;
   }
   private void removeFiles(String location, ValidWriteIdList writeIdList, 
CompactionInfo ci)
-  throws IOException, HiveException {
+  throws IOException, NoSuchObjectException {
 Path locPath = new Path(location);
 AcidUtils.Directory dir = AcidUtils.getAcidState(locPath, conf, 
writeIdList);
 List obsoleteDirs = dir.getObsolete();
@@ -349,11 +350,12 @@ public class Cleaner extends CompactorThread {
 }
 
 FileSystem fs = filesToDelete.get(0).getFileSystem(conf);
-Database db = Hive.get().getDatabase(ci.dbname);
+Database db = rs.getDatabase(getDefaultCatalog(conf), ci.dbname);
+Boolean isSourceOfRepl = ReplChangeManager.isSourceOfReplication(db);
 
 for (Path dead : filesToDelete) {
   LOG.debug("Going to delete path " + dead.toString());
-  if (ReplChangeManager.isSourceOfReplication(db)) {
+  if (isSourceOfRepl) {
 replChangeManager.recycle(dead, ReplChangeManager.RecycleType.MOVE, 
true);
   }
   fs.delete(dead, true);

http://git-wip-us.apache.org/repos/asf/hive/blob/148e7acb/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
--
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 0485184..23a78ca 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -9085,6 +9085,9 @@ public class HiveMetaStore extends ThriftHiveMetastore {
 conf.set((String) item.getKey(), (String) item.getValue());
   }
 
+  //for metastore process, all metastore call should be embedded metastore 
call.
+  conf.set(ConfVars.THRIFT_URIS.getHiveName(), "");
+
   // Add shutdown hook.
   shutdownHookMgr.addShutdownHook(() -> {
 String shutdownMsg = "Shutting down hive metastore.";

[17/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
index 50474bc..f5a71b4 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
@@ -184,36 +184,36 @@ CBO PLAN:
 HiveProject(_o__c0=[$0], _o__c1=[$1], _o__c2=[$2], _o__c3=[$3])
   HiveSortLimit(sort0=[$7], sort1=[$4], sort2=[$5], sort3=[$6], dir0=[ASC], 
dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100])
 HiveProject(_o__c0=[substr($0, 1, 20)], _o__c1=[/(CAST($1):DOUBLE, $2)], 
_o__c2=[/($3, $4)], _o__c3=[/($5, $6)], (tok_function avg (tok_table_or_col 
ws_quantity))=[/(CAST($1):DOUBLE, $2)], (tok_function avg (tok_table_or_col 
wr_refunded_cash))=[/($3, $4)], (tok_function avg (tok_table_or_col 
wr_fee))=[/($5, $6)], (tok_function substr (tok_table_or_col r_reason_desc) 1 
20)=[substr($0, 1, 20)])
-  HiveAggregate(group=[{7}], agg#0=[sum($26)], agg#1=[count($26)], 
agg#2=[sum($21)], agg#3=[count($21)], agg#4=[sum($20)], agg#5=[count($20)])
-HiveJoin(condition=[AND(AND(=($0, $17), =($4, $1)), =($5, $2))], 
joinType=[inner], algorithm=[none], cost=[not available])
-  HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], 
cd_education_status=[$3])
+  HiveAggregate(group=[{14}], agg#0=[sum($32)], agg#1=[count($32)], 
agg#2=[sum($27)], agg#3=[count($27)], agg#4=[sum($26)], agg#5=[count($26)])
+HiveJoin(condition=[AND(AND(AND(=($1, $18), =($2, $19)), =($0, $21)), 
OR(AND($3, $4, $36), AND($5, $6, $37), AND($7, $8, $38)))], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], 
cd_education_status=[$3], ==[=($2, _UTF-16LE'M')], =4=[=($3, _UTF-16LE'4 yr 
Degree')], =5=[=($2, _UTF-16LE'D')], =6=[=($3, _UTF-16LE'Primary')], =7=[=($2, 
_UTF-16LE'U')], =8=[=($3, _UTF-16LE'Advanced Degree')])
 HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', 
_UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', 
_UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))])
-  HiveTableScan(table=[[default, customer_demographics]], 
table:alias=[cd2])
-  HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), 
=($2, _UTF-16LE'4 yr Degree'), BETWEEN(false, $24, 100, 150)), AND(=($1, 
_UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $24, 50, 100)), 
AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, $24, 
150, 200], joinType=[inner], algorithm=[none], cost=[not available])
-HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], 
cd_education_status=[$3])
-  HiveFilter(condition=[AND(IN($3, _UTF-16LE'4 yr Degree', 
_UTF-16LE'Primary', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', 
_UTF-16LE'D', _UTF-16LE'U'), IS NOT NULL($0))])
-HiveTableScan(table=[[default, customer_demographics]], 
table:alias=[cd1])
-HiveJoin(condition=[=($0, $12)], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveTableScan(table=[[default, customer_demographics]], 
table:alias=[cd1])
+  HiveJoin(condition=[AND(=($0, $13), OR(AND($1, $24), AND($2, $25), 
AND($3, $26)))], joinType=[inner], algorithm=[none], cost=[not available])
+HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', 
_UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', 
_UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')])
+  HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', 
_UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', 
_UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT 
NULL($0))])
+HiveTableScan(table=[[default, customer_address]], 
table:alias=[customer_address])
+HiveJoin(condition=[=($0, $11)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveProject(r_reason_sk=[$0], r_reason_desc=[$2])
 HiveFilter(condition=[IS NOT NULL($0)])
   HiveTableScan(table=[[default, reason]], 
table:alias=[reason])
-  HiveJoin(condition=[=($14, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
-HiveProject(d_date_sk=[$0], d_year=[CAST(1998):INTEGER])
-  HiveFilter(condition=[AND(=($6, 1998), IS NOT NULL($0))])
-HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
-HiveJoin(condition=[=($14, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
-  HiveProject(wp_web_page_sk=[$0])
-HiveFilter(condition=[IS NOT NULL($0)])
-

[28/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query26.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
index b6ee41e..48c0e11 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query26.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
@@ -221,11 +221,11 @@ STAGE PLANS:
 keys:
   0 _col2 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col4, _col5, _col6, _col7, _col18
+outputColumnNames: _col4, _col5, _col6, _col7, _col12
 Statistics: Num rows: 421645953 Data size: 57099332415 Basic 
stats: COMPLETE Column stats: NONE
 Group By Operator
   aggregations: sum(_col4), count(_col4), sum(_col5), 
count(_col5), sum(_col7), count(_col7), sum(_col6), count(_col6)
-  keys: _col18 (type: string)
+  keys: _col12 (type: string)
   mode: hash
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
   Statistics: Num rows: 421645953 Data size: 57099332415 Basic 
stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query27.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
index 4063c4f..6c64664 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query27.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out
@@ -207,7 +207,7 @@ STAGE PLANS:
   keys:
 0 _col3 (type: int)
 1 _col0 (type: int)
-  outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col15
+  outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col11
   input vertices:
 1 Map 9
   Statistics: Num rows: 766650239 Data size: 67634106676 Basic 
stats: COMPLETE Column stats: NONE
@@ -216,7 +216,7 @@ STAGE PLANS:
 sort order: +
 Map-reduce partition columns: _col1 (type: int)
 Statistics: Num rows: 766650239 Data size: 67634106676 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col4 (type: int), _col5 (type: 
decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col15 
(type: string)
+value expressions: _col4 (type: int), _col5 (type: 
decimal(7,2)), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col11 
(type: string)
 Reducer 4 
 Reduce Operator Tree:
   Join Operator
@@ -225,10 +225,10 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col4, _col5, _col6, _col7, _col15, _col17
+outputColumnNames: _col4, _col5, _col6, _col7, _col11, _col13
 Statistics: Num rows: 843315281 Data size: 74397518956 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: _col17 (type: string), _col15 (type: string), 
_col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), 
_col6 (type: decimal(7,2))
+  expressions: _col13 (type: string), _col11 (type: string), 
_col4 (type: int), _col5 (type: decimal(7,2)), _col7 (type: decimal(7,2)), 
_col6 (type: decimal(7,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
   Statistics: Num rows: 843315281 Data size: 74397518956 Basic 
stats: COMPLETE Column stats: NONE
   Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query29.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query29.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
index 2e5c0f3..f4a4524 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query29.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query29.q.out
@@ -295,7 +295,7 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col3, _col5, _col10, _col11, _col13, 
_col18, _col19
+outputColumnNames: _col3, _col5, _col8, _col9, _col11, _col14, 
_col15
 Statistics: Num rows: 766650239 Data size: 67634106676 Basic 
stats: COMPLETE Column stats: NONE

[34/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
index a8f06eb..def06a5 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
@@ -122,18 +122,19 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int)
-outputColumnNames: _col0, _col1
+expressions: key (type: int), value (type: int), ((key > 
40) and (value > 50) and (key = value)) (type: boolean)
+outputColumnNames: _col0, _col1, _col2
 Select Vectorization:
 className: VectorSelectOperator
 native: true
+selectExpressions: VectorUDFAdaptor(((key > 40) and 
(value > 50) and (key = value)))(children: LongColGreaterLongScalar(col 0:int, 
val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, 
LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean) -> 6:boolean
 Statistics: Num rows: 4 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
Right Outer Join 0 to 1
   filter predicates:
 0 
-1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)}
+1 {_col2}
   keys:
 0 
 1 
@@ -368,18 +369,19 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int)
-outputColumnNames: _col0, _col1
+expressions: key (type: int), value (type: int), (key > 
40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4
 Select Vectorization:
 className: VectorSelectOperator
 native: true
+selectExpressions: LongColGreaterLongScalar(col 0:int, 
val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, 
LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean
 Statistics: Num rows: 4 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:
Right Outer Join 0 to 1
   filter predicates:
 0 
-1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)}
+1 {_col2} {_col3} {_col4}
   keys:
 0 _col0 (type: int)
 1 _col1 (type: int)
@@ -397,7 +399,7 @@ STAGE PLANS:
 Select Vectorization:
 className: VectorSelectOperator
 native: true
-selectExpressions: 
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int
+selectExpressions: 
VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 8:int
 Statistics: Num rows: 4 Data size: 35 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations: sum(_col0)
@@ -541,18 +543,19 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int)
-outputColumnNames: _col0, _col1
+expressions: key (type: int), value (type: int), (key > 
40) (type: boolean), (value > 50) (type: boolean), (key = value) (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4
 Select Vectorization:
 className: VectorSelectOperator
 native: true
+selectExpressions: LongColGreaterLongScalar(col 0:int, 
val 40) -> 3:boolean, LongColGreaterLongScalar(col 1:int, val 50) -> 4:boolean, 
LongColEqualLongColumn(col 0:int, col 1:int) -> 5:boolean
 Statistics: Num rows: 4 Data size: 32 Basic stats: 
COMPLETE Column stats: NONE
 Map Join Operator
   condition map:

[20/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
index 9633df1..cbf9bca 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query38.q.out
@@ -75,7 +75,7 @@ HiveSortLimit(fetch=[100])
 HiveProject(ss_sold_date_sk=[$0], 
ss_customer_sk=[$3])
   HiveFilter(condition=[AND(IS NOT NULL($0), IS 
NOT NULL($3))])
 HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
-HiveProject(d_date_sk=[$0], d_date=[$2], 
d_month_seq=[$3])
+HiveProject(d_date_sk=[$0], d_date=[$2])
   HiveFilter(condition=[AND(BETWEEN(false, $3, 
1212, 1223), IS NOT NULL($0))])
 HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
 HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], 
$f3=[$3])
@@ -90,7 +90,7 @@ HiveSortLimit(fetch=[100])
 HiveProject(cs_sold_date_sk=[$0], 
cs_bill_customer_sk=[$3])
   HiveFilter(condition=[AND(IS NOT NULL($0), IS 
NOT NULL($3))])
 HiveTableScan(table=[[default, 
catalog_sales]], table:alias=[catalog_sales])
-HiveProject(d_date_sk=[$0], d_date=[$2], 
d_month_seq=[$3])
+HiveProject(d_date_sk=[$0], d_date=[$2])
   HiveFilter(condition=[AND(BETWEEN(false, $3, 
1212, 1223), IS NOT NULL($0))])
 HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
 HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], 
$f3=[$3])
@@ -105,7 +105,7 @@ HiveSortLimit(fetch=[100])
 HiveProject(ws_sold_date_sk=[$0], 
ws_bill_customer_sk=[$4])
   HiveFilter(condition=[AND(IS NOT NULL($0), IS 
NOT NULL($4))])
 HiveTableScan(table=[[default, web_sales]], 
table:alias=[web_sales])
-HiveProject(d_date_sk=[$0], d_date=[$2], 
d_month_seq=[$3])
+HiveProject(d_date_sk=[$0], d_date=[$2])
   HiveFilter(condition=[AND(BETWEEN(false, $3, 
1212, 1223), IS NOT NULL($0))])
 HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
 

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out
index fd3038e..51bb901 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query39.q.out
@@ -68,8 +68,8 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], 
d_moy=[CAST(4):INTEGER], mean=[
 HiveProject(w_warehouse_sk=[$1], i_item_sk=[$2], 
mean=[/(CAST($6):DOUBLE, $5)], cov=[CASE(=(/(CAST($6):DOUBLE, $5), 0), null, 
/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), 
/(CAST($6):DOUBLE, $5)))])
   HiveFilter(condition=[CASE(=(/(CAST($6):DOUBLE, $5), 0), false, 
>(/(POWER(/(-($3, /(*($4, $4), $5)), CASE(=($5, 1), null, -($5, 1))), 0.5), 
/(CAST($6):DOUBLE, $5)), 1))])
 HiveAggregate(group=[{0, 1, 2}], agg#0=[sum($5)], agg#1=[sum($4)], 
agg#2=[count($3)], agg#3=[sum($3)])
-  HiveProject($f0=[$9], $f1=[$8], $f2=[$0], $f4=[$4], 
$f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)])
-HiveJoin(condition=[=($3, $8)], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveProject($f0=[$7], $f1=[$6], $f2=[$0], $f4=[$4], 
$f40=[CAST($4):DOUBLE], $f6=[*(CAST($4):DOUBLE, CAST($4):DOUBLE)])
+HiveJoin(condition=[=($3, $6)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveJoin(condition=[=($2, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
 HiveProject(i_item_sk=[$0])
   HiveFilter(condition=[IS NOT NULL($0)])
@@ -78,7 +78,7 @@ HiveProject(w_warehouse_sk=[$0], i_item_sk=[$1], 
d_moy=[CAST(4):INTEGER], mean=[
   HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], 
inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3])
 HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT 
NULL($2), IS NOT NULL($0))])

[12/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query4.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query4.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query4.q.out
index 27ce7b5..bb0d7ba 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query4.q.out
@@ -271,367 +271,355 @@ Stage-0
 limit:100
 Stage-1
   Reducer 10 vectorized
-  File Output Operator [FS_575]
-Limit [LIM_574] (rows=100 width=85)
+  File Output Operator [FS_557]
+Limit [LIM_556] (rows=100 width=85)
   Number of rows:100
-  Select Operator [SEL_573] (rows=7323197 width=85)
+  Select Operator [SEL_555] (rows=7323197 width=85)
 Output:["_col0"]
   <-Reducer 9 [SIMPLE_EDGE]
-SHUFFLE [RS_147]
-  Select Operator [SEL_146] (rows=7323197 width=85)
+SHUFFLE [RS_141]
+  Select Operator [SEL_140] (rows=7323197 width=85)
 Output:["_col0"]
-Filter Operator [FIL_145] (rows=7323197 width=533)
-  predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN 
(_col5 is not null) THEN (((_col9 / _col5) > (_col12 / _col3))) ELSE ((null > 
(_col12 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col9 / 
_col5) > null)) ELSE (null) END) END
-  Merge Join Operator [MERGEJOIN_478] (rows=14646395 width=533)
-
Conds:RS_142._col2=RS_572._col0(Inner),Output:["_col3","_col5","_col9","_col11","_col12"]
+Filter Operator [FIL_139] (rows=7323197 width=537)
+  predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN 
(_col9) THEN (((_col11 / _col8) > (_col14 / _col3))) ELSE ((null > (_col14 / 
_col3))) END) ELSE (CASE WHEN (_col9) THEN (((_col11 / _col8) > null)) ELSE 
(null) END) END
+  Merge Join Operator [MERGEJOIN_472] (rows=14646395 width=537)
+
Conds:RS_136._col2=RS_554._col0(Inner),Output:["_col3","_col8","_col9","_col11","_col13","_col14"]
   <-Reducer 30 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_572]
+SHUFFLE [RS_554]
   PartitionCols:_col0
-  Select Operator [SEL_571] (rows=8000 width=297)
+  Select Operator [SEL_553] (rows=8000 width=297)
 Output:["_col0","_col1","_col2"]
-Group By Operator [GBY_570] (rows=8000 width=764)
+Group By Operator [GBY_552] (rows=8000 width=764)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6
 <-Reducer 29 [SIMPLE_EDGE]
-  SHUFFLE [RS_126]
+  SHUFFLE [RS_120]
 PartitionCols:_col0, _col1, _col2, _col3, _col4, 
_col5, _col6
-Group By Operator [GBY_125] (rows=8000 
width=764)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0,
 _col1, _col2, _col3, _col4, _col5, _col6
-  Select Operator [SEL_123] (rows=187573258 
width=1043)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-Merge Join Operator [MERGEJOIN_473] 
(rows=187573258 width=1043)
-  
Conds:RS_120._col1=RS_518._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col9","_col10","_col11","_col12","_col13","_col14","_col15"]
-<-Map 38 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_518]
-PartitionCols:_col0
-Select Operator [SEL_517] (rows=8000 
width=656)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-  Filter Operator [FIL_516] (rows=8000 
width=656)
-predicate:(c_customer_id is not null 
and c_customer_sk is not null)
-TableScan [TS_114] (rows=8000 
width=656)
-  
default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"]
-<-Reducer 28 [SIMPLE_EDGE]
-  SHUFFLE [RS_120]
-

[29/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query18.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
index e77a918..1d73576 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out
@@ -106,15 +106,15 @@ STAGE PLANS:
 predicate: ((c_birth_month) IN (9, 5, 12, 4, 1, 10) and 
c_current_addr_sk is not null and c_current_cdemo_sk is not null and 
c_customer_sk is not null) (type: boolean)
 Statistics: Num rows: 8000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: c_customer_sk (type: int), 
c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), c_birth_year 
(type: int)
-  outputColumnNames: _col0, _col1, _col2, _col4
+  expressions: c_customer_sk (type: int), 
c_current_cdemo_sk (type: int), c_current_addr_sk (type: int), CAST( 
c_birth_year AS decimal(12,2)) (type: decimal(12,2))
+  outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 8000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col2 (type: int)
 sort order: +
 Map-reduce partition columns: _col2 (type: int)
 Statistics: Num rows: 8000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col0 (type: int), _col1 (type: 
int), _col4 (type: int)
+value expressions: _col0 (type: int), _col1 (type: 
int), _col3 (type: decimal(12,2))
 Execution mode: vectorized
 Map 13 
 Map Operator Tree:
@@ -145,15 +145,15 @@ STAGE PLANS:
 predicate: ((cd_education_status = 'College') and 
(cd_gender = 'M') and cd_demo_sk is not null) (type: boolean)
 Statistics: Num rows: 465450 Data size: 179296539 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: cd_demo_sk (type: int), cd_dep_count (type: 
int)
-  outputColumnNames: _col0, _col3
+  expressions: cd_demo_sk (type: int), CAST( cd_dep_count 
AS decimal(12,2)) (type: decimal(12,2))
+  outputColumnNames: _col0, _col1
   Statistics: Num rows: 465450 Data size: 179296539 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 465450 Data size: 179296539 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col3 (type: int)
+value expressions: _col1 (type: decimal(12,2))
 Execution mode: vectorized
 Map 15 
 Map Operator Tree:
@@ -224,7 +224,7 @@ STAGE PLANS:
 predicate: (cs_bill_cdemo_sk is not null and 
cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: cs_sold_date_sk (type: int), 
cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk 
(type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)), 
cs_sales_price (type: decimal(7,2)), cs_coupon_amt (type: decimal(7,2)), 
cs_net_profit (type: decimal(7,2))
+  expressions: cs_sold_date_sk (type: int), 
cs_bill_customer_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk 
(type: int), CAST( cs_quantity AS decimal(12,2)) (type: decimal(12,2)), CAST( 
cs_list_price AS decimal(12,2)) (type: decimal(12,2)), CAST( cs_coupon_amt AS 
decimal(12,2)) (type: decimal(12,2)), CAST( cs_sales_price AS decimal(12,2)) 
(type: decimal(12,2)), CAST( cs_net_profit AS decimal(12,2)) (type: 
decimal(12,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
   Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
@@ -232,7 +232,7 @@ STAGE PLANS:
 sort order: +
 Map-reduce partition columns: _col0 (type: int)

[51/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

HIVE-20850: Push case conditional from projections to dimension tables if 
possible (Zoltan Haindrich via Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/55887646
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/55887646
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/55887646

Branch: refs/heads/master-tez092
Commit: 558876462d2589423d7131b51c24dbf61b8a22b9
Parents: dca389b
Author: Zoltan Haindrich 
Authored: Mon Nov 12 10:03:28 2018 -0800
Committer: Jesus Camacho Rodriguez 
Committed: Mon Nov 12 10:03:57 2018 -0800

--
 .../results/positive/accumulo_queries.q.out |   34 +-
 .../test/results/positive/hbase_queries.q.out   |   34 +-
 .../hive/jdbc/AbstractJdbcTriggersTest.java |5 +-
 .../jdbc/TestTriggersMoveWorkloadManager.java   |9 +-
 .../jdbc/TestTriggersTezSessionPoolManager.java |   21 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java|2 +-
 .../bucket_mapjoin_mismatch1.q.out  |4 +-
 .../clientpositive/allcolref_in_udf.q.out   |   28 +-
 .../annotate_stats_join_pkfk.q.out  |   40 +-
 .../results/clientpositive/auto_join13.q.out|6 +-
 .../results/clientpositive/auto_join19.q.out|4 +-
 .../clientpositive/auto_join19_inclause.q.out   |4 +-
 .../results/clientpositive/auto_join2.q.out |   24 +-
 .../results/clientpositive/auto_join32.q.out|6 +-
 .../results/clientpositive/auto_join9.q.out |4 +-
 .../clientpositive/auto_join_stats.q.out|  122 +-
 .../clientpositive/auto_join_stats2.q.out   |   90 +-
 .../materialized_view_create_rewrite.q.out  |8 +-
 .../clientpositive/bucket_map_join_spark1.q.out |4 +-
 .../clientpositive/bucket_map_join_spark2.q.out |4 +-
 .../clientpositive/bucket_map_join_spark3.q.out |4 +-
 .../clientpositive/bucket_map_join_spark4.q.out |4 +-
 .../bucketsortoptimize_insert_4.q.out   |4 +-
 .../bucketsortoptimize_insert_5.q.out   |4 +-
 .../bucketsortoptimize_insert_8.q.out   |8 +-
 .../test/results/clientpositive/cbo_const.q.out |   82 +-
 .../results/clientpositive/cbo_rp_join1.q.out   |   80 +-
 .../clientpositive/cbo_rp_outer_join_ppr.q.out  |8 +-
 .../constantPropagateForSubQuery.q.out  |   10 +-
 .../results/clientpositive/constprog2.q.out |   16 +-
 .../clientpositive/constprog_partitioner.q.out  |8 +-
 .../clientpositive/correlationoptimizer8.q.out  |   58 +-
 .../test/results/clientpositive/cte_mat_5.q.out |   18 +-
 .../results/clientpositive/deleteAnalyze.q.out  |   18 +-
 .../clientpositive/druid/druidmini_mv.q.out |   16 +-
 .../encryption_join_unencrypted_tbl.q.out   |   76 +-
 .../clientpositive/filter_cond_pushdown.q.out   |   90 +-
 .../clientpositive/filter_join_breaktask.q.out  |   24 +-
 .../infer_bucket_sort_map_operators.q.out   |   24 +-
 .../clientpositive/infer_join_preds.q.out   |   22 +-
 .../results/clientpositive/innerjoin1.q.out |   76 +-
 .../test/results/clientpositive/input23.q.out   |8 +-
 .../results/clientpositive/interval_3.q.out |   16 +-
 ql/src/test/results/clientpositive/join13.q.out |8 +-
 ql/src/test/results/clientpositive/join2.q.out  |   32 +-
 ql/src/test/results/clientpositive/join26.q.out |2 +-
 ql/src/test/results/clientpositive/join32.q.out |2 +-
 ql/src/test/results/clientpositive/join33.q.out |2 +-
 ql/src/test/results/clientpositive/join42.q.out |   28 +-
 ql/src/test/results/clientpositive/join45.q.out |  392 +-
 ql/src/test/results/clientpositive/join46.q.out |  290 +-
 ql/src/test/results/clientpositive/join47.q.out |  392 +-
 ql/src/test/results/clientpositive/join9.q.out  |6 +-
 .../clientpositive/join_cond_pushdown_1.q.out   |   18 +-
 .../clientpositive/join_cond_pushdown_3.q.out   |   18 +-
 .../join_cond_pushdown_unqual1.q.out|   18 +-
 .../join_cond_pushdown_unqual3.q.out|   18 +-
 .../clientpositive/join_emit_interval.q.out |   52 +-
 .../clientpositive/join_filters_overlap.q.out   |  338 +-
 .../results/clientpositive/join_merging.q.out   |  152 +-
 .../test/results/clientpositive/join_view.q.out |4 +-
 .../clientpositive/llap/auto_join_filters.q.out |   60 +-
 .../llap/auto_sortmerge_join_6.q.out|  174 +-
 .../llap/bucket_map_join_tez2.q.out |  116 +-
 .../clientpositive/llap/bucketmapjoin1.q.out|   20 +-
 .../clientpositive/llap/bucketmapjoin2.q.out|4 +-
 .../clientpositive/llap/bucketmapjoin3.q.out|   16 +-
 .../llap/bucketsortoptimize_insert_2.q.out  |   24 +-
 .../llap/bucketsortoptimize_insert_6.q.out  |  301 +-
 .../llap/bucketsortoptimize_insert_7.q.out  |8 +-
 .../clientpositive/llap/check_constraint.q.out  |   43 +-
 .../llap/constprog_semijoin.q.out   |   54 +-
 .../llap/constraints_optimization.q.out

[01/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

Repository: hive
Updated Branches:
  refs/heads/master-tez092 c55347d52 -> 750daa4a6


http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out 
b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
index 44f9b5d..4190a21 100644
--- a/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out
@@ -396,14 +396,14 @@ STAGE PLANS:
 Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: p_name (type: string), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
-  outputColumnNames: _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7
   Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
-key expressions: _col1 (type: string)
+key expressions: _col0 (type: string)
 sort order: +
-Map-reduce partition columns: _col1 (type: string)
+Map-reduce partition columns: _col0 (type: string)
 Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
-value expressions: _col2 (type: string), _col3 (type: 
string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 
(type: double), _col8 (type: string)
+value expressions: _col1 (type: string), _col2 (type: 
string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 
(type: double), _col7 (type: string)
 Execution mode: vectorized
 Map 5 
 Map Operator Tree:
@@ -426,13 +426,13 @@ STAGE PLANS:
  Inner Join 0 to 1
 keys:
   0 _col1 (type: string)
-  1 _col1 (type: string)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16, 
_col17
+  1 _col0 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16
 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE 
Column stats: NONE
 Reduce Output Operator
   sort order: 
   Statistics: Num rows: 28 Data size: 3461 Basic stats: 
COMPLETE Column stats: NONE
-  value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 
(type: string), _col11 (type: string), _col12 (type: string), _col13 (type: 
string), _col14 (type: int), _col15 (type: string), _col16 (type: double), 
_col17 (type: string)
+  value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 
(type: string), _col10 (type: string), _col11 (type: string), _col12 (type: 
string), _col13 (type: int), _col14 (type: string), _col15 (type: double), 
_col16 (type: string)
 Reducer 3 
 Reduce Operator Tree:
   Join Operator
@@ -441,10 +441,10 @@ STAGE PLANS:
 keys:
   0 
   1 
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col10, _col11, _col12, _col13, _col14, _col15, _col16, 
_col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25
 Statistics: Num rows: 728 Data size: 178830 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: _col18 (type: int), _col19 (type: string), 
_col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 
(type: int), _col24 (type: string), _col25 (type: double), _col26 (type: 
string), 1 (type: int), _col10 (type:

[08/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query66.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query66.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query66.q.out
index 225b62f..767d47b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query66.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query66.q.out
@@ -511,10 +511,10 @@ Stage-0
 PartitionCols:_col0, _col1, _col2, _col3, 
_col4, _col5
 Group By Operator [GBY_62] (rows=5559759 
width=3166)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0,
 _col1, _col2, _col3, _col4, _col5
-  Select Operator [SEL_60] (rows=5559759 
width=680)
+  Select Operator [SEL_60] (rows=5559759 
width=750)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"]
-Merge Join Operator [MERGEJOIN_204] 
(rows=5559759 width=680)
-  
Conds:RS_57._col3=RS_259._col0(Inner),Output:["_col4","_col5","_col6","_col11","_col15","_col16","_col17","_col18","_col19","_col20"]
+Merge Join Operator [MERGEJOIN_204] 
(rows=5559759 width=750)
+  
Conds:RS_57._col3=RS_259._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"]
 <-Map 24 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_259]
 PartitionCols:_col0
@@ -527,12 +527,12 @@ Stage-0
 <-Reducer 14 [SIMPLE_EDGE]
   SHUFFLE [RS_57]
 PartitionCols:_col3
-Merge Join Operator [MERGEJOIN_203] 
(rows=5559759 width=205)
-  
Conds:RS_54._col2=RS_245._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col11"]
+Merge Join Operator [MERGEJOIN_203] 
(rows=5559759 width=274)
+  
Conds:RS_54._col2=RS_245._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"]
 <-Map 21 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_245]
 PartitionCols:_col0
-Select Operator [SEL_242] (rows=1 
width=88)
+Select Operator [SEL_242] (rows=1 
width=4)
   Output:["_col0"]
   Filter Operator [FIL_241] 
(rows=1 width=88)
 predicate:((sm_carrier) IN 
('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null)
@@ -541,13 +541,13 @@ Stage-0
 <-Reducer 13 [SIMPLE_EDGE]
   SHUFFLE [RS_54]
 PartitionCols:_col2
-Merge Join Operator 
[MERGEJOIN_202] (rows=9518 width=224)
-  
Conds:RS_51._col0=RS_233._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col11"]
+Merge Join Operator 
[MERGEJOIN_202] (rows=9518 width=278)
+  
Conds:RS_51._col0=RS_233._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"]
 <-Map 18

[33/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
index bd40d8f..1b4d343 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
@@ -234,17 +234,18 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
 Select Vectorization:
 className: VectorSelectOperator
 native: true
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+selectExpressions: LongColumnBetween(col 0:int, left 
100, right 102) -> 4:boolean
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Join Operator
   condition map:
Left Outer Join 0 to 1
   filter predicates:
-0 {_col0 BETWEEN 100 AND 102}
+0 {_col3}
 1 
   keys:
 0 _col1 (type: int)
@@ -253,20 +254,27 @@ STAGE PLANS:
   className: VectorMapJoinOuterLongOperator
   native: true
   nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+  outputColumnNames: _col0, _col1, _col2, _col4, _col5, 
_col6
   input vertices:
 1 Map 2
   Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-  File Output Operator
-compressed: false
-File Sink Vectorization:
-className: VectorFileSinkOperator
-native: false
+  Select Operator
+expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+Select Vectorization:
+className: VectorSelectOperator
+native: true
 Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  File Sink Vectorization:
+  className: VectorFileSinkOperator
+  native: false
+  Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map Vectorization:
@@ -396,17 +404,18 @@ STAGE PLANS:
   TableScan Vectorization:
   native: true
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN

[05/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query91.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query91.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query91.q.out
index 5b4952d..98e8adf 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query91.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query91.q.out
@@ -104,13 +104,13 @@ Stage-0
 SHUFFLE [RS_42]
   PartitionCols:_col0, _col1, _col2, _col3, _col4
   Group By Operator [GBY_41] (rows=1 width=585)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col12)"],keys:_col5,
 _col6, _col17, _col18, _col19
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col11)"],keys:_col5,
 _col6, _col14, _col15, _col16
 Merge Join Operator [MERGEJOIN_144] (rows=10438 width=473)
-  
Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col12","_col17","_col18","_col19"]
+  
Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col11","_col14","_col15","_col16"]
 <-Map 15 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_165]
 PartitionCols:_col0
-Select Operator [SEL_164] (rows=3600 width=96)
+Select Operator [SEL_164] (rows=3600 width=4)
   Output:["_col0"]
   Filter Operator [FIL_163] (rows=3600 width=96)
 predicate:((hd_buy_potential like '0-500%') and 
hd_demo_sk is not null)
@@ -120,12 +120,12 @@ Stage-0
   SHUFFLE [RS_37]
 PartitionCols:_col2
 Merge Join Operator [MERGEJOIN_143] (rows=20876 
width=473)
-  
Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col12","_col17","_col18","_col19"]
+  
Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col11","_col14","_col15","_col16"]
 <-Reducer 12 [SIMPLE_EDGE]
   SHUFFLE [RS_35]
 PartitionCols:_col1
 Merge Join Operator [MERGEJOIN_142] (rows=657590 
width=312)
-  
Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"]
+  
Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col8"]
 <-Map 14 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_162]
 PartitionCols:_col0
@@ -152,7 +152,7 @@ Stage-0
 <-Map 13 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_159]
 PartitionCols:_col0
-Select Operator [SEL_158] (rows=50 
width=12)
+Select Operator [SEL_158] (rows=50 width=4)
   Output:["_col0"]
   Filter Operator [FIL_157] (rows=50 
width=12)
 predicate:((d_moy = 11) and (d_year = 
1999) and d_date_sk is not null)
@@ -166,7 +166,7 @@ Stage-0
 <-Map 9 [SIMPLE_EDGE] vectorized
   SHUFFLE [RS_153]
 PartitionCols:_col0
-Select Operator [SEL_152] (rows=800 
width=116)
+Select Operator [SEL_152] (rows=800 
width=4)
   Output:["_col0"]
   Filter Operator [FIL_151] (rows=800 
width=112)
 predicate:((ca_gmt_offset = -7) and 
ca_address_sk is not null)

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query92.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query92.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query92.q.out
index 50918f0..1f837dd 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query92.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query92.q.out
@@ -104,9 +104,9 @@ Stage-0
   Select Operator [SEL_34] (rows=2478 width=112)
 Output:["_col2"]
 Filter Operator [FIL_33] (rows=2478 width=112)
-  predicate:(_col2 > CAST( (1.3 * _col6) AS 
decimal(14,7)))
+  predicate:(_col2 > _col5)
   Merge Join

[52/59] [abbrv] hive git commit: HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS only (Gour Saha reviewd by Prasanth Jayachandran)

2018-11-13 Thread sershe

HIVE-20899: Keytab URI for LLAP YARN Service is restrictive to support HDFS 
only (Gour Saha reviewd by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bc39c499
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bc39c499
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bc39c499

Branch: refs/heads/master-tez092
Commit: bc39c49988c8a5d881a23ed7dd5d4adba0509ee9
Parents: 5588764
Author: Gour Saha 
Authored: Mon Nov 12 13:04:21 2018 -0800
Committer: Prasanth Jayachandran 
Committed: Mon Nov 12 13:04:24 2018 -0800

--
 llap-server/src/main/resources/package.py | 2 --
 1 file changed, 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/bc39c499/llap-server/src/main/resources/package.py
--
diff --git a/llap-server/src/main/resources/package.py 
b/llap-server/src/main/resources/package.py
index 9eb3fd7..c48ff79 100644
--- a/llap-server/src/main/resources/package.py
+++ b/llap-server/src/main/resources/package.py
@@ -130,8 +130,6 @@ def main(args):
service_keytab_path += "/" + service_keytab
else:
service_keytab_path = service_keytab
-   if service_keytab_path:
-   service_keytab_path = "hdfs:///user/hive/" + service_keytab_path
 
if not input:
print "Cannot find input files"

[27/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query4.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query4.q.out
index 67e8b4f..93e83ef 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out
@@ -237,13 +237,13 @@ STAGE PLANS:
 Reducer 10 <- Map 13 (PARTITION-LEVEL SORT, 398), Map 9 
(PARTITION-LEVEL SORT, 398)
 Reducer 11 <- Map 14 (PARTITION-LEVEL SORT, 975), Reducer 10 
(PARTITION-LEVEL SORT, 975)
 Reducer 12 <- Reducer 11 (GROUP, 481)
-Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 306), Map 19 
(PARTITION-LEVEL SORT, 306)
-Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 873), Reducer 16 
(PARTITION-LEVEL SORT, 873)
-Reducer 18 <- Reducer 17 (GROUP, 369)
+Reducer 16 <- Map 15 (PARTITION-LEVEL SORT, 154), Map 19 
(PARTITION-LEVEL SORT, 154)
+Reducer 17 <- Map 20 (PARTITION-LEVEL SORT, 706), Reducer 16 
(PARTITION-LEVEL SORT, 706)
+Reducer 18 <- Reducer 17 (GROUP, 186)
 Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 154), Map 7 (PARTITION-LEVEL 
SORT, 154)
-Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 154), Map 25 
(PARTITION-LEVEL SORT, 154)
-Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 706), Reducer 22 
(PARTITION-LEVEL SORT, 706)
-Reducer 24 <- Reducer 23 (GROUP, 186)
+Reducer 22 <- Map 21 (PARTITION-LEVEL SORT, 306), Map 25 
(PARTITION-LEVEL SORT, 306)
+Reducer 23 <- Map 26 (PARTITION-LEVEL SORT, 873), Reducer 22 
(PARTITION-LEVEL SORT, 873)
+Reducer 24 <- Reducer 23 (GROUP, 369)
 Reducer 28 <- Map 27 (PARTITION-LEVEL SORT, 306), Map 31 
(PARTITION-LEVEL SORT, 306)
 Reducer 29 <- Map 32 (PARTITION-LEVEL SORT, 873), Reducer 28 
(PARTITION-LEVEL SORT, 873)
 Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 706), Reducer 2 
(PARTITION-LEVEL SORT, 706)
@@ -266,16 +266,15 @@ STAGE PLANS:
 predicate: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: ws_sold_date_sk (type: int), 
ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), 
ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: 
decimal(7,2)), ws_ext_list_price (type: decimal(7,2))
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+  expressions: ws_sold_date_sk (type: int), 
ws_bill_customer_sk (type: int), ws_ext_list_price - ws_ext_wholesale_cost) 
- ws_ext_discount_amt) + ws_ext_sales_price) / 2) (type: decimal(14,6))
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col1 (type: int), _col2 (type: 
decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 
(type: decimal(7,2))
-Execution mode: vectorized
+value expressions: _col1 (type: int), _col2 (type: 
decimal(14,6))
 Map 13 
 Map Operator Tree:
 TableScan
@@ -318,23 +317,22 @@ STAGE PLANS:
 Map 15 
 Map Operator Tree:
 TableScan
-  alias: catalog_sales
-  filterExpr: (cs_bill_customer_sk is not null and 
cs_sold_date_sk is not null) (type: boolean)
-  Statistics: Num rows: 287989836 Data size: 38999608952 Basic 
stats: COMPLETE Column stats: NONE
+  alias: web_sales
+  filterExpr: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
+  Statistics: Num rows: 144002668 Data size: 19580198212 Basic 
stats: COMPLETE Column stats: NONE
   Filter Operator
-predicate: (cs_bill_customer_sk is not null and 
cs_sold_date_sk is not null) (type: boolean)
-Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
+predicate: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
+Statistics: Num rows: 144002668 Data size: 19580198212 
Basic

[55/59] [abbrv] hive git commit: HIVE-20682: Async query execution can potentially fail if shared sessionHive is closed by master thread (Sankar Hariappan, reviewed by Mahesh Kumar Behera, Anishek Aga

2018-11-13 Thread sershe

HIVE-20682: Async query execution can potentially fail if shared sessionHive is 
closed by master thread (Sankar Hariappan, reviewed by Mahesh Kumar Behera, 
Anishek Agarwal)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/99d25f02
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/99d25f02
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/99d25f02

Branch: refs/heads/master-tez092
Commit: 99d25f02421a84bf0f96660f9248fd6518dc7c8a
Parents: af40170
Author: Sankar Hariappan 
Authored: Tue Nov 13 16:26:04 2018 +0530
Committer: Sankar Hariappan 
Committed: Tue Nov 13 16:26:04 2018 +0530

--
 .../hive/ql/parse/TestReplicationScenarios.java |   2 +-
 .../apache/hadoop/hive/ql/metadata/Hive.java| 129 +++
 .../service/cli/operation/SQLOperation.java |  20 +--
 .../service/cli/session/HiveSessionImpl.java|  56 +++-
 .../cli/session/TestSessionManagerMetrics.java  |   2 +
 5 files changed, 138 insertions(+), 71 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/99d25f02/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
index 75cd68a..5a88550 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenarios.java
@@ -389,7 +389,7 @@ public class TestReplicationScenarios {
 Task replLoadTask = TaskFactory.get(replLoadWork, confTemp);
 replLoadTask.initialize(null, null, new 
DriverContext(driver.getContext()), null);
 replLoadTask.executeTask(null);
-Hive.getThreadLocal().closeCurrent();
+Hive.closeCurrent();
 return replLoadWork.getRootTask();
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/99d25f02/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 180b41e..e185bf4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -167,35 +167,36 @@ public class Hive {
   private IMetaStoreClient metaStoreClient;
   private SynchronizedMetaStoreClient syncMetaStoreClient;
   private UserGroupInformation owner;
+  private boolean isAllowClose = true;
 
   // metastore calls timing information
   private final ConcurrentHashMap metaCallTimeMap = new 
ConcurrentHashMap<>();
 
-  // Static class to store thread local Hive object and allowClose flag.
+  // Static class to store thread local Hive object.
   private static class ThreadLocalHive extends ThreadLocal {
-private ThreadLocal allowClose = ThreadLocal.withInitial(() -> 
true);
-
 @Override
 protected Hive initialValue() {
   return null;
 }
 
 @Override
-public synchronized void remove() {
-  if (allowClose() && (this.get() != null)) {
-this.get().close();
+public synchronized void set(Hive hiveObj) {
+  Hive currentHive = this.get();
+  if (currentHive != hiveObj) {
+// Remove/close current thread-local Hive object before overwriting 
with new Hive object.
+remove();
+super.set(hiveObj);
   }
-  super.remove();
-  this.allowClose.set(true);
-}
-
-public synchronized void set(Hive hiveObj, boolean allowClose) {
-  super.set(hiveObj);
-  this.allowClose.set(allowClose);
 }
 
-boolean allowClose() {
-  return this.allowClose.get();
+@Override
+public synchronized void remove() {
+  Hive currentHive = this.get();
+  if (currentHive != null) {
+// Close the metastore connections before removing it from thread 
local hiveDB.
+currentHive.close(false);
+super.remove();
+  }
 }
   }
 
@@ -317,7 +318,12 @@ public class Hive {
 Hive db = hiveDB.get();
 if (db == null || !db.isCurrentUserOwner() || needsRefresh
 || (c != null && !isCompatible(db, c, isFastCheck))) {
-  db = create(c, false, db, doRegisterAllFns);
+  if (db != null) {
+LOG.debug("Creating new db. db = " + db + ", needsRefresh = " + 
needsRefresh +
+", db.isCurrentUserOwner = " + db.isCurrentUserOwner());
+closeCurrent();
+  }
+  db = create(c, doRegisterAllFns);
 }
 if (c != null) {
   db.conf = c;
@@ -325,26 +331,16 @@ public class Hive {
 return db;
   }

[09/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query64.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query64.q.out
index f670c4f..7c77e9f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out
@@ -265,9 +265,9 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 37 <- Reducer 24 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE), Reducer 47 
(BROADCAST_EDGE)
-Map 44 <- Reducer 40 (BROADCAST_EDGE)
-Map 55 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 42 
(BROADCAST_EDGE), Reducer 51 (BROADCAST_EDGE)
+Map 37 <- Reducer 24 (BROADCAST_EDGE), Reducer 40 (BROADCAST_EDGE), Reducer 46 
(BROADCAST_EDGE)
+Map 43 <- Reducer 40 (BROADCAST_EDGE)
+Map 55 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 42 
(BROADCAST_EDGE), Reducer 50 (BROADCAST_EDGE)
 Map 56 <- Reducer 42 (BROADCAST_EDGE)
 Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
 Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
@@ -276,18 +276,18 @@ Reducer 13 <- Reducer 31 (SIMPLE_EDGE), Reducer 6 
(SIMPLE_EDGE)
 Reducer 14 <- Map 54 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE)
 Reducer 15 <- Reducer 14 (SIMPLE_EDGE)
 Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 38 (SIMPLE_EDGE)
-Reducer 18 <- Map 43 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE)
-Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 45 (ONE_TO_ONE_EDGE)
+Reducer 19 <- Map 51 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE)
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE)
-Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 46 (ONE_TO_ONE_EDGE)
+Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
 Reducer 21 <- Map 52 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
 Reducer 22 <- Map 36 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE)
 Reducer 23 <- Map 53 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE)
 Reducer 24 <- Map 16 (CUSTOM_SIMPLE_EDGE)
 Reducer 25 <- Map 16 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE)
-Reducer 26 <- Map 43 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE)
-Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
-Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 50 (ONE_TO_ONE_EDGE)
+Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 49 (ONE_TO_ONE_EDGE)
+Reducer 27 <- Map 51 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE)
+Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
 Reducer 29 <- Map 52 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE)
 Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
 Reducer 30 <- Map 36 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE)
@@ -299,13 +299,13 @@ Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 34 
(SIMPLE_EDGE)
 Reducer 40 <- Map 39 (CUSTOM_SIMPLE_EDGE)
 Reducer 41 <- Map 39 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE)
 Reducer 42 <- Map 39 (CUSTOM_SIMPLE_EDGE)
-Reducer 45 <- Map 44 (SIMPLE_EDGE), Map 48 (SIMPLE_EDGE)
-Reducer 46 <- Reducer 45 (SIMPLE_EDGE)
-Reducer 47 <- Reducer 46 (CUSTOM_SIMPLE_EDGE)
-Reducer 49 <- Map 48 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE)
+Reducer 44 <- Map 43 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE)
+Reducer 45 <- Reducer 44 (SIMPLE_EDGE)
+Reducer 46 <- Reducer 45 (CUSTOM_SIMPLE_EDGE)
+Reducer 48 <- Map 47 (SIMPLE_EDGE), Map 56 (SIMPLE_EDGE)
+Reducer 49 <- Reducer 48 (SIMPLE_EDGE)
 Reducer 5 <- Map 36 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 50 <- Reducer 49 (SIMPLE_EDGE)
-Reducer 51 <- Reducer 50 (CUSTOM_SIMPLE_EDGE)
+Reducer 50 <- Reducer 49 (CUSTOM_SIMPLE_EDGE)
 Reducer 6 <- Map 54 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
 Reducer 7 <- Reducer 23 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
 Reducer 8 <- Map 54 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
@@ -320,10 +320,10 @@ Stage-0
 Select Operator [SEL_1200] (rows=2169965329 width=1702)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"]
 <-Reducer 10 [SIMPLE_EDGE]
-  SHUFFLE [RS_259]
-Select Operator [SEL_258] (rows=2169965329 width=1694)
+  SHUFFLE [RS_257]
+Select Operator [SEL_256] (rows=2169965329 width=1694)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"]
-  Filter Operator [FIL_257] (rows=2169965329 width=1694)
+  Filter Operator [FIL_255] (rows=2169965329 width=1694)
 predicate:(_col19 <= _col12)
 Merge Join Operator [MERGEJOIN_1087] (rows=6509895988 
width=1694)
   Conds:RS_1171._col2, _col1, _col3=RS_1199._col1, _col0,

[35/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out 
b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
index b1eec43..93791ac 100644
--- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out
@@ -667,238 +667,184 @@ POSTHOOK: query: select unionsrc.key, unionsrc.value 
FROM (select s1.key as key,
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: hdfs://### HDFS PATH ###
+100val_100
 104val_104
 105val_105
 113val_113
 118val_118
+119val_119
 12 val_12
 120val_120
+128val_128
+129val_129
 133val_133
 136val_136
+145val_145
 155val_155
 158val_158
 160val_160
 162val_162
+167val_167
 168val_168
 17 val_17
 170val_170
+174val_174
 175val_175
+178val_178
 18 val_18
 180val_180
+181val_181
 186val_186
 19 val_19
+193val_193
 197val_197
+199val_199
+20 val_20
 200val_200
+201val_201
+213val_213
+214val_214
 216val_216
 218val_218
+219val_219
+221val_221
 222val_222
+223val_223
 224val_224
+226val_226
 228val_228
+233val_233
 235val_235
 237val_237
 239val_239
+241val_241
 244val_244
 247val_247
+249val_249
 256val_256
+260val_260
+262val_262
 263val_263
 273val_273
+277val_277
 28 val_28
+281val_281
 283val_283
 286val_286
+287val_287
+288val_288
 292val_292
+298val_298
+302val_302
 306val_306
 308val_308
+310val_310
+323val_323
 327val_327
 33 val_33
+336val_336
+341val_341
+344val_344
 348val_348
+351val_351
 353val_353
 362val_362
 366val_366
+375val_375
+382val_382
+384val_384
+393val_393
+395val_395
 396val_396
 397val_397
+399val_399
 401val_401
+403val_403
+406val_406
 409val_409
 411val_411
+418val_418
 419val_419
 427val_427
 43 val_43
 432val_432
+435val_435
 436val_436
 439val_439
 443val_443
 453val_453
+455val_455
+459val_459
 460val_460
 462val_462
 47 val_47
 472val_472
-485val_485
-496val_496
-54 val_54
-64 val_64
-70 val_70
-8  val_8
-83 val_83
-84 val_84
-85 val_85
-90 val_90
-0  val_0
-103val_103
-114val_114
-125val_125
-138val_138
-146val_146
-150val_150
-152val_152
-153val_153
-156val_156
-157val_157
-165val_165
-172val_172
-177val_177
-179val_179
-187val_187
-195val_195
-196val_196
-217val_217
-242val_242
-248val_248
-252val_252
-265val_265
-27 val_27
-272val_272
-280val_280
-291val_291
-305val_305
-309val_309
-311val_311
-315val_315
-317val_317
-322val_322
-333val_333
-34 val_34
-345val_345
-35 val_35
-356val_356
-364val_364
-368val_368
-369val_369
-37 val_37
-373val_373
-377val_377
-4  val_4
-402val_402
-404val_404
-413val_413
-42 val_42
-430val_430
-431val_431
-444val_444
-449val_449
-452val_452
-454val_454
-457val_457
-463val_463
-466val_466
-470val_470
-475val_475
-481val_481
-489val_489
-491val_491
-57 val_57
-65 val_65
-66 val_66
-74 val_74
-76 val_76
-78 val_78
-9  val_9
-92 val_92
-95 val_95
-100val_100
-119val_119
-128val_128
-129val_129
-145val_145
-167val_167
-174val_174
-178val_178
-181val_181
-193val_193
-199val_199
-20 val_20
-201val_201
-213val_213
-214val_214
-219val_219
-221val_221
-223val_223
-226val_226
-233val_233
-241val_241
-249val_249
-260val_260
-262val_262
-277val_277
-281val_281
-287val_287
-288val_288
-298val_298
-302val_302
-310val_310
-323val_323
-336val_336
-341val_341
-344val_344
-351val_351
-375val_375
-382val_382
-384val_384
-393val_393
-395val_395
-399val_399
-403val_403
-406val_406
-418val_418
-435val_435
-455val_455
-459val_459
 477val_477
 478val_478
 479val_479
 482val_482
+485val_485
 493val_493
 494val_494
 495val_495
+496val_496
 497val_497
 5  val_5
+54 val_54
 58 val_58
+64 val_64
 67 val_67
+70 val_70
 77 val_77
+8  val_8
 80 val_80
+83 val_83
+84 val_84
+85 val_85
 86 val_86
+90 val_90
 97 val_97
 98 val_98
+0  val_0
 10 val_10
+103val_103
 11 val_11
 111

[43/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out 
b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
index accb3a7..6954647 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out
@@ -535,14 +535,14 @@ STAGE PLANS:
   filterExpr: ds is not null (type: boolean)
   Statistics: Num rows: 2000 Data size: 389248 Basic stats: 
COMPLETE Column stats: COMPLETE
   Select Operator
-expressions: ds (type: string)
+expressions: day(CAST( ds AS DATE)) (type: int)
 outputColumnNames: _col0
-Statistics: Num rows: 2000 Data size: 368000 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 2000 Data size: 8000 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
-  key expressions: day(CAST( _col0 AS DATE)) (type: int)
+  key expressions: _col0 (type: int)
   sort order: +
-  Map-reduce partition columns: day(CAST( _col0 AS DATE)) 
(type: int)
-  Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 2000 Data size: 8000 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: llap
 LLAP IO: no inputs
 Map 4 
@@ -555,16 +555,16 @@ STAGE PLANS:
 predicate: ((date = '2008-04-08') and ds is not null) 
(type: boolean)
 Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: ds (type: string)
+  expressions: day(CAST( ds AS DATE)) (type: int)
   outputColumnNames: _col0
   Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
   Reduce Output Operator
-key expressions: day(CAST( _col0 AS DATE)) (type: int)
+key expressions: _col0 (type: int)
 sort order: +
-Map-reduce partition columns: day(CAST( _col0 AS 
DATE)) (type: int)
+Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
   Select Operator
-expressions: day(CAST( _col0 AS DATE)) (type: int)
+expressions: _col0 (type: int)
 outputColumnNames: _col0
 Statistics: Num rows: 2 Data size: 736 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
@@ -587,9 +587,9 @@ STAGE PLANS:
 condition map:
  Inner Join 0 to 1
 keys:
-  0 day(CAST( _col0 AS DATE)) (type: int)
-  1 day(CAST( _col0 AS DATE)) (type: int)
-Statistics: Num rows: 2200 Data size: 404800 Basic stats: 
COMPLETE Column stats: NONE
+  0 _col0 (type: int)
+  1 _col0 (type: int)
+Statistics: Num rows: 2200 Data size: 8800 Basic stats: 
COMPLETE Column stats: NONE
 Group By Operator
   aggregations: count()
   mode: hash
@@ -678,14 +678,14 @@ STAGE PLANS:
   filterExpr: ds is not null (type: boolean)
   Statistics: Num rows: 2000 Data size: 389248 Basic stats: 
COMPLETE Column stats: COMPLETE
   Select Operator
-expressions: ds (type: string)
+expressions: day(CAST( ds AS DATE)) (type: int)
 outputColumnNames: _col0
-Statistics: Num rows: 2000 Data size: 368000 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 2000 Data size: 8000 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
-  key expressions: day(CAST( _col0 AS DATE)) (type: int)
+  key expressions: _col0 (type: int)
   sort order: +
-  Map-reduce partition columns: day(CAST( _col0 AS DATE)) 
(type: int)
-  Statistics: Num rows: 2000 Data size: 368000 Basic 
stats: COMPLETE Column stats: COMPLETE
+

[18/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out
index fca31ef..e49b44b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query72.q.out
@@ -82,10 +82,10 @@ CBO PLAN:
 HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], 
dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100])
   HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5])
 HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[count($4)], 
agg#2=[count()])
-  HiveProject($f0=[$15], $f1=[$13], $f2=[$22], $f3=[CASE(IS NULL($28), 1, 
0)], $f4=[CASE(IS NOT NULL($28), 1, 0)])
-HiveJoin(condition=[AND(=($29, $4), =($30, $6))], joinType=[left], 
algorithm=[none], cost=[not available])
-  HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], 
cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], 
cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], 
inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], 
w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], 
cd_demo_sk=[$22], cd_marital_status=[$23], hd_demo_sk=[$24], 
hd_buy_potential=[$25], d_date_sk=[$18], d_date=[$19], d_week_seq=[$20], 
d_year=[$21], d_date_sk0=[$27], d_week_seq0=[$28], d_date_sk1=[$6], 
d_date0=[$7], p_promo_sk=[$26])
-HiveJoin(condition=[AND(=($0, $27), =($20, $28))], 
joinType=[inner], algorithm=[none], cost=[not available])
+  HiveProject($f0=[$15], $f1=[$13], $f2=[$19], $f3=[CASE(IS NULL($25), 1, 
0)], $f4=[CASE(IS NOT NULL($25), 1, 0)])
+HiveJoin(condition=[AND(=($26, $4), =($27, $6))], joinType=[left], 
algorithm=[none], cost=[not available])
+  HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], 
cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], 
cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], 
inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], 
w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], 
cd_demo_sk=[$21], hd_demo_sk=[$22], d_date_sk=[$18], d_week_seq=[$19], +=[$20], 
d_date_sk0=[$24], d_week_seq0=[$25], d_date_sk1=[$6], CAST=[$7], 
p_promo_sk=[$23])
+HiveJoin(condition=[AND(=($0, $24), =($19, $25))], 
joinType=[inner], algorithm=[none], cost=[not available])
   HiveJoin(condition=[AND(=($14, $1), <($3, $17))], 
joinType=[inner], algorithm=[none], cost=[not available])
 HiveJoin(condition=[=($4, $2)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], 
inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3])
@@ -94,29 +94,29 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], 
sort3=[$2], dir0=[DESC-nulls-l
   HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2])
 HiveFilter(condition=[IS NOT NULL($0)])
   HiveTableScan(table=[[default, warehouse]], 
table:alias=[warehouse])
-HiveProject(d_date_sk=[$0], d_date=[$1], i_item_sk=[$2], 
i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], 
cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], 
cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], 
d_date0=[$13], d_week_seq=[$14], d_year=[$15], cd_demo_sk=[$16], 
cd_marital_status=[$17], hd_demo_sk=[$18], hd_buy_potential=[$19], 
p_promo_sk=[$20])
-  HiveJoin(condition=[AND(=($5, $0), >(CAST($1):DOUBLE, 
+(CAST($13):DOUBLE, 5)))], joinType=[inner], algorithm=[none], cost=[not 
available])
-HiveProject(d_date_sk=[$0], d_date=[$2])
+HiveProject(d_date_sk=[$0], CAST=[$1], i_item_sk=[$2], 
i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], 
cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], 
cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], 
d_week_seq=[$13], +=[$14], cd_demo_sk=[$15], hd_demo_sk=[$16], p_promo_sk=[$17])
+  HiveJoin(condition=[AND(=($5, $0), >($1, $14))], 
joinType=[inner], algorithm=[none], cost=[not available])
+HiveProject(d_date_sk=[$0], CAST=[CAST($2):DOUBLE])
   HiveFilter(condition=[IS NOT NULL($0)])
 HiveTableScan(table=[[default, date_dim]], 
table:alias=[d3])
 HiveJoin(condition=[=($0, $6)], joinType=[inner], 
algorithm=[none], cost=[not available])
   HiveProject(i_item_sk=[$0], i_item_desc=[$4])
 HiveFilter(condition=[IS NOT NULL($0)])

[32/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/mapjoin47.q.out
--
diff --git a/ql/src/test/results/clientpositive/mapjoin47.q.out 
b/ql/src/test/results/clientpositive/mapjoin47.q.out
index d3e61f8..dadac0d 100644
--- a/ql/src/test/results/clientpositive/mapjoin47.q.out
+++ b/ql/src/test/results/clientpositive/mapjoin47.q.out
@@ -385,8 +385,8 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
   HashTable Sink Operator
 keys:
@@ -400,8 +400,8 @@ STAGE PLANS:
 alias: src
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(value) BETWEEN 100.0D AND 102.0D (type: boolean)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Map Join Operator
 condition map:
@@ -409,19 +409,23 @@ STAGE PLANS:
 keys:
   0 
   1 
-outputColumnNames: _col0, _col1, _col2, _col3
-residual filter predicates: {((_col0 = _col2) or 
UDFToDouble(_col1) BETWEEN 100.0D AND 102.0D or UDFToDouble(_col3) BETWEEN 
100.0D AND 102.0D)}
-Statistics: Num rows: 9026 Data size: 173876 Basic stats: 
COMPLETE Column stats: NONE
-Limit
-  Number of rows: 10
-  Statistics: Num rows: 10 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
-  File Output Operator
-compressed: false
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+residual filter predicates: {((_col0 = _col3) or _col2 or 
_col5)}
+Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: _col0 (type: string), _col1 (type: string), 
_col3 (type: string), _col4 (type: string)
+  outputColumnNames: _col0, _col1, _col2, _col3
+  Statistics: Num rows: 12500 Data size: 240800 Basic stats: 
COMPLETE Column stats: NONE
+  Limit
+Number of rows: 10
 Statistics: Num rows: 10 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 10 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
   Local Work:
 Map Reduce Local Work
 
@@ -501,8 +505,8 @@ STAGE PLANS:
 alias: src1
 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string), 
UDFToDouble(key) (type: double)
+  outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
   HashTable Sink Operator
 keys:
@@ -516,8 +520,8 @@ STAGE PLANS:
 alias: src
 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
-  expressions: key (type: string), value (type: string)
-  outputColumnNames: _col0, _col1
+  expressions: key (type: string), value (type: string),

[38/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index c72e4b2..c43ad91 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -109,13 +109,13 @@ STAGE PLANS:
   alias: part
   Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
   Select Operator
-expressions: p_partkey (type: int), p_name (type: string), 
p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size 
(type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
-Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: p_partkey (type: int), p_name (type: string), 
p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size 
(type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string), UDFToDouble(p_size) (type: double)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+Statistics: Num rows: 26 Data size: 16302 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reduce Output Operator
   sort order: 
-  Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
-  value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string), _col3 (type: string), _col4 (type: string), 
_col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: 
string)
+  Statistics: Num rows: 26 Data size: 16302 Basic stats: 
COMPLETE Column stats: COMPLETE
+  value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string), _col3 (type: string), _col4 (type: string), 
_col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: 
string), _col9 (type: double)
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map 3 
@@ -147,16 +147,16 @@ STAGE PLANS:
 keys:
   0 
   1 
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
-residual filter predicates: {(UDFToDouble(_col5) > _col9)}
-Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE 
Column stats: NONE
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
+residual filter predicates: {(_col9 > _col10)}
+Statistics: Num rows: 8 Data size: 5184 Basic stats: COMPLETE 
Column stats: NONE
 Select Operator
   expressions: _col0 (type: int), _col1 (type: string), _col2 
(type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), 
_col6 (type: string), _col7 (type: double), _col8 (type: string)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
-  Statistics: Num rows: 8 Data size: 5120 Basic stats: 
COMPLETE Column stats: NONE
+  Statistics: Num rows: 8 Data size: 5184 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
-Statistics: Num rows: 8 Data size: 5120 Basic stats: 
COMPLETE Column stats: NONE
+Statistics: Num rows: 8 Data size: 5184 Basic stats: 
COMPLETE Column stats: NONE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
 output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -397,12 +397,12 @@ STAGE PLANS:
 Statistics: Num rows: 1 Data size: 619 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
   expressions: p_partkey (type: int), p_mfgr (type: 
string), p_brand (type: string), p_type (type: string), p_size (type: int), 
p_container (type: string), p_retailprice (type: double), p_comment (type: 
string)
-  outputColumnNames: _col0, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
-  Statistics: Num rows: 1 Data size: 582 Basic stats: 
COMPLETE Column stats: COMPLETE
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4,

[56/59] [abbrv] hive git commit: HIVE-19701: getDelegationTokenFromMetaStore doesn't need to be synchronized (Sankar Hariappan, reviewed by Thejas M Nair)

2018-11-13 Thread sershe

HIVE-19701: getDelegationTokenFromMetaStore doesn't need to be synchronized 
(Sankar Hariappan, reviewed by Thejas M Nair)

Signed-off-by: Sankar Hariappan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ceb4eb6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ceb4eb6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ceb4eb6

Branch: refs/heads/master-tez092
Commit: 1ceb4eb6a8329d858241f0aee8880ef68787802a
Parents: 99d25f0
Author: Sankar Hariappan 
Authored: Tue Nov 13 23:18:09 2018 +0530
Committer: Sankar Hariappan 
Committed: Tue Nov 13 23:18:09 2018 +0530

--
 service/src/java/org/apache/hive/service/cli/CLIService.java | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1ceb4eb6/service/src/java/org/apache/hive/service/cli/CLIService.java
--
diff --git a/service/src/java/org/apache/hive/service/cli/CLIService.java 
b/service/src/java/org/apache/hive/service/cli/CLIService.java
index 9cbe7e1..22c4026 100644
--- a/service/src/java/org/apache/hive/service/cli/CLIService.java
+++ b/service/src/java/org/apache/hive/service/cli/CLIService.java
@@ -569,8 +569,7 @@ public class CLIService extends CompositeService implements 
ICLIService {
   }
 
   // obtain delegation token for the give user from metastore
-  // TODO: why is this synchronized?
-  public synchronized String getDelegationTokenFromMetaStore(String owner)
+  public String getDelegationTokenFromMetaStore(String owner)
   throws HiveSQLException, UnsupportedOperationException, LoginException, 
IOException {
 HiveConf hiveConf = getHiveConf();
 if (!hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL) ||

[14/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query23.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
index 7784792..059195a 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out
@@ -1,7 +1,7 @@
-Warning: Shuffle Join MERGEJOIN[593][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 29' is a cross product
-Warning: Shuffle Join MERGEJOIN[594][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 30' is a cross product
-Warning: Shuffle Join MERGEJOIN[596][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 33' is a cross product
-Warning: Shuffle Join MERGEJOIN[597][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 34' is a cross product
+Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 29' is a cross product
+Warning: Shuffle Join MERGEJOIN[584][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 30' is a cross product
+Warning: Shuffle Join MERGEJOIN[586][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 33' is a cross product
+Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 34' is a cross product
 PREHOOK: query: explain
 with frequent_ss_items as 
  (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date 
solddate,count(*) cnt
@@ -166,399 +166,391 @@ Stage-0
 limit:100
 Stage-1
   Reducer 6 vectorized
-  File Output Operator [FS_699]
-Limit [LIM_698] (rows=1 width=112)
+  File Output Operator [FS_689]
+Limit [LIM_688] (rows=1 width=112)
   Number of rows:100
-  Group By Operator [GBY_697] (rows=1 width=112)
+  Group By Operator [GBY_687] (rows=1 width=112)
 Output:["_col0"],aggregations:["sum(VALUE._col0)"]
   <-Union 5 [CUSTOM_SIMPLE_EDGE]
 <-Reducer 12 [CONTAINS]
-  Reduce Output Operator [RS_608]
-Group By Operator [GBY_607] (rows=1 width=112)
+  Reduce Output Operator [RS_598]
+Group By Operator [GBY_597] (rows=1 width=112)
   Output:["_col0"],aggregations:["sum(_col0)"]
-  Select Operator [SEL_605] (rows=1 width=112)
+  Select Operator [SEL_595] (rows=1 width=112)
 Output:["_col0"]
-Merge Join Operator [MERGEJOIN_604] (rows=1 width=116)
-  
Conds:RS_248._col2=RS_249._col0(Inner),Output:["_col3","_col4"]
+Merge Join Operator [MERGEJOIN_594] (rows=1 width=116)
+  
Conds:RS_240._col2=RS_241._col0(Inner),Output:["_col3","_col4"]
 <-Reducer 11 [SIMPLE_EDGE]
-  PARTITION_ONLY_SHUFFLE [RS_248]
+  PARTITION_ONLY_SHUFFLE [RS_240]
 PartitionCols:_col2
-Merge Join Operator [MERGEJOIN_592] (rows=155 width=0)
-  
Conds:RS_245._col1=RS_642._col0(Inner),Output:["_col2","_col3","_col4"]
+Merge Join Operator [MERGEJOIN_582] (rows=155 width=0)
+  
Conds:RS_237._col1=RS_632._col0(Inner),Output:["_col2","_col3","_col4"]
 <-Reducer 18 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_642]
+  SHUFFLE [RS_632]
 PartitionCols:_col0
-Group By Operator [GBY_639] (rows=2235 width=4)
+Group By Operator [GBY_629] (rows=2235 width=4)
   Output:["_col0"],keys:_col1
-  Select Operator [SEL_638] (rows=6548799 width=12)
+  Select Operator [SEL_628] (rows=6548799 
width=290)
 Output:["_col1"]
-Filter Operator [FIL_637] (rows=6548799 
width=12)
+Filter Operator [FIL_627] (rows=6548799 
width=290)
   predicate:(_col3 > 4L)
-  Select Operator [SEL_636] (rows=19646398 
width=12)
-Output:["_col0","_col3"]
-Group By Operator [GBY_635] (rows=19646398 
width=290)
+  Select Operator [SEL_626] (rows=19646398 
width=290)
+Output:["_col1","_col3"]
+Group By Operator [GBY_625] (rows=19646398 
width=290)
   
Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2
 <-Reducer 17 [SIMPLE_EDGE]
-

[31/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/masking_3.q.out
--
diff --git a/ql/src/test/results/clientpositive/masking_3.q.out 
b/ql/src/test/results/clientpositive/masking_3.q.out
index 15a8963..725d905 100644
--- a/ql/src/test/results/clientpositive/masking_3.q.out
+++ b/ql/src/test/results/clientpositive/masking_3.q.out
@@ -54,12 +54,16 @@ STAGE PLANS:
   mode: mergepartial
   outputColumnNames: _col0
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+  Select Operator
+expressions: _col0 (type: int), UDFToDouble(_col0) (type: double)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
 Map Reduce
@@ -72,20 +76,20 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col1 (type: double)
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
+  Map-reduce partition columns: _col1 (type: double)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col0 (type: string)
   TableScan
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col1 (type: double)
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+  Map-reduce partition columns: _col1 (type: double)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
   value expressions: _col0 (type: int)
   Reduce Operator Tree:
@@ -93,21 +97,25 @@ STAGE PLANS:
   condition map:
Inner Join 0 to 1
   keys:
-0 UDFToDouble(_col0) (type: double)
-1 UDFToDouble(_col0) (type: double)
-  outputColumnNames: _col0, _col1
+0 _col1 (type: double)
+1 _col1 (type: double)
+  outputColumnNames: _col0, _col2
   Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-  Group By Operator
-keys: _col0 (type: string), _col1 (type: int)
-mode: hash
+  Select Operator
+expressions: _col0 (type: string), _col2 (type: int)
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+Group By Operator
+  keys: _col0 (type: string), _col1 (type: int)
+  mode: hash
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-1

[53/59] [abbrv] hive git commit: HIVE-20807 : Refactor LlapStatusServiceDriver (Miklos Gergely via Sergey Shelukhin)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/af401702/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java
new file mode 100644
index 000..bee5079
--- /dev/null
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cli/status/LlapStatusServiceCommandLine.java
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap.cli.status;
+
+import java.util.Arrays;
+import java.util.Properties;
+
+import jline.TerminalFactory;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Parses, verifies, prints and provides the command line arguments of the 
Llap Status program.
+ */
+public class LlapStatusServiceCommandLine {
+  private static final Logger LOGGER = 
LoggerFactory.getLogger("LlapStatusServiceDriverConsole");
+
+  @VisibleForTesting
+  static final long DEFAULT_FIND_YARN_APP_TIMEOUT_MS = 20 * 1000L;
+  @VisibleForTesting
+  static final long DEFAULT_STATUS_REFRESH_INTERVAL_MS = 1 * 1000L;
+  @VisibleForTesting
+  static final long DEFAULT_WATCH_MODE_TIMEOUT_MS = 5 * 60 * 1000L;
+  @VisibleForTesting
+  static final float DEFAULT_RUNNING_NODES_THRESHOLD = 1.0f;
+
+  @SuppressWarnings("static-access")
+  private static final Option NAME = OptionBuilder
+  .withLongOpt("name")
+  .withDescription("LLAP cluster name")
+  .withArgName("name")
+  .hasArg()
+  .create('n');
+
+  @SuppressWarnings("static-access")
+  private static final Option FIND_APP_TIMEOUT = OptionBuilder
+  .withLongOpt("findAppTimeout")
+  .withDescription("Amount of time(s) that the tool will sleep to wait for 
the YARN application to start." +
+  "negative values=wait forever, 0=Do not wait. default=" + 
(DEFAULT_FIND_YARN_APP_TIMEOUT_MS / 1000) + "s")
+  .withArgName("findAppTimeout")
+  .hasArg()
+  .create('f');
+
+  @SuppressWarnings("static-access")
+  private static final Option OUTPUT_FILE = OptionBuilder
+  .withLongOpt("outputFile")
+  .withDescription("File to which output should be written (Default 
stdout)")
+  .withArgName("outputFile")
+  .hasArg()
+  .create('o');
+
+  @SuppressWarnings("static-access")
+  private static final Option WATCH_MODE = OptionBuilder
+  .withLongOpt("watch")
+  .withDescription("Watch mode waits until all LLAP daemons are running or 
subset of the nodes are running " +
+  "(threshold can be specified via -r option) (Default wait until all 
nodes are running)")
+  .withArgName("watch")
+  .create('w');
+
+  @SuppressWarnings("static-access")
+  private static final Option NOT_LAUNCHED = OptionBuilder
+  .withLongOpt("notLaunched")
+  .withDescription("In watch mode, do not assume that the application was 
already launched if there's doubt " +
+  "(e.g. if the last application instance has failed).")
+  .withArgName("notLaunched")
+  .create('l');
+
+  @SuppressWarnings("static-access")
+  private static final Option RUNNING_NODES_THRESHOLD = OptionBuilder
+  .withLongOpt("runningNodesThreshold")
+  .withDescription("When watch mode is enabled (-w), wait until the 
specified threshold of nodes are running " +
+  "(Default 1.0 which means 100% nodes are running)")
+  .withArgName("runningNodesThreshold")
+  .hasArg()
+  .create('r');
+
+  @SuppressWarnings("static-access")
+  private static final Option REFRESH_INTERVAL = OptionBuilder
+  .withLongOpt("refreshInterval")
+  .withDescription("Amount of time in seconds to wait until subsequent 
status

[22/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out
index ccad088..19f3039 100644
--- a/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query13.q.out
@@ -114,28 +114,28 @@ POSTHOOK: Input: default@store_sales
 POSTHOOK: Output: hdfs://### HDFS PATH ###
 CBO PLAN:
 HiveProject($f0=[/(CAST($0):DOUBLE, $1)], $f1=[/($2, $3)], $f2=[/($4, $5)], 
$f3=[CAST($4):DECIMAL(17, 2)])
-  HiveAggregate(group=[{}], agg#0=[sum($16)], agg#1=[count($16)], 
agg#2=[sum($18)], agg#3=[count($18)], agg#4=[sum($19)], agg#5=[count($19)])
-HiveJoin(condition=[AND(=($0, $12), OR(AND(=($1, _UTF-16LE'M'), =($2, 
_UTF-16LE'4 yr Degree'), BETWEEN(false, $17, 100, 150), =($7, 3)), AND(=($1, 
_UTF-16LE'D'), =($2, _UTF-16LE'Primary'), BETWEEN(false, $17, 50, 100), =($7, 
1)), AND(=($1, _UTF-16LE'U'), =($2, _UTF-16LE'Advanced Degree'), BETWEEN(false, 
$17, 150, 200), =($7, 1], joinType=[inner], algorithm=[none], cost=[not 
available])
-  HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], 
cd_education_status=[$3])
+  HiveAggregate(group=[{}], agg#0=[sum($21)], agg#1=[count($21)], 
agg#2=[sum($22)], agg#3=[count($22)], agg#4=[sum($23)], agg#5=[count($23)])
+HiveJoin(condition=[AND(=($0, $17), OR(AND($1, $2, $27, $12), AND($3, $4, 
$28, $13), AND($5, $6, $29, $13)))], joinType=[inner], algorithm=[none], 
cost=[not available])
+  HiveProject(cd_demo_sk=[$0], ==[=($2, _UTF-16LE'M')], =2=[=($3, 
_UTF-16LE'4 yr Degree')], =3=[=($2, _UTF-16LE'D')], =4=[=($3, 
_UTF-16LE'Primary')], =5=[=($2, _UTF-16LE'U')], =6=[=($3, _UTF-16LE'Advanced 
Degree')])
 HiveFilter(condition=[AND(IN($2, _UTF-16LE'M', _UTF-16LE'D', 
_UTF-16LE'U'), IN($3, _UTF-16LE'4 yr Degree', _UTF-16LE'Primary', 
_UTF-16LE'Advanced Degree'), IS NOT NULL($0))])
   HiveTableScan(table=[[default, customer_demographics]], 
table:alias=[customer_demographics])
-  HiveJoin(condition=[AND(=($11, $0), OR(AND(IN($1, _UTF-16LE'KY', 
_UTF-16LE'GA', _UTF-16LE'NM'), BETWEEN(false, $17, 100, 200)), AND(IN($1, 
_UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN'), BETWEEN(false, $17, 150, 300)), 
AND(IN($1, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV'), BETWEEN(false, $17, 
50, 250], joinType=[inner], algorithm=[none], cost=[not available])
-HiveProject(ca_address_sk=[$0], ca_state=[$8], 
ca_country=[CAST(_UTF-16LE'United States'):VARCHAR(2147483647) CHARACTER SET 
"UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"])
+  HiveJoin(condition=[AND(=($12, $0), OR(AND($1, $17), AND($2, $18), 
AND($3, $19)))], joinType=[inner], algorithm=[none], cost=[not available])
+HiveProject(ca_address_sk=[$0], IN=[IN($8, _UTF-16LE'KY', 
_UTF-16LE'GA', _UTF-16LE'NM')], IN2=[IN($8, _UTF-16LE'MT', _UTF-16LE'OR', 
_UTF-16LE'IN')], IN3=[IN($8, _UTF-16LE'WI', _UTF-16LE'MO', _UTF-16LE'WV')])
   HiveFilter(condition=[AND(IN($8, _UTF-16LE'KY', _UTF-16LE'GA', 
_UTF-16LE'NM', _UTF-16LE'MT', _UTF-16LE'OR', _UTF-16LE'IN', _UTF-16LE'WI', 
_UTF-16LE'MO', _UTF-16LE'WV'), =($10, _UTF-16LE'United States'), IS NOT 
NULL($0))])
 HiveTableScan(table=[[default, customer_address]], 
table:alias=[customer_address])
 HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
-  HiveProject(hd_demo_sk=[$0], hd_dep_count=[$3])
+  HiveProject(hd_demo_sk=[$0], ==[=($3, 3)], =2=[=($3, 1)])
 HiveFilter(condition=[AND(IN($3, 3, 1), IS NOT NULL($0))])
   HiveTableScan(table=[[default, household_demographics]], 
table:alias=[household_demographics])
-  HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
-HiveProject(d_date_sk=[$0], d_year=[CAST(2001):INTEGER])
-  HiveFilter(condition=[AND(=($6, 2001), IS NOT NULL($0))])
-HiveTableScan(table=[[default, date_dim]], 
table:alias=[date_dim])
-HiveJoin(condition=[=($0, $5)], joinType=[inner], 
algorithm=[none], cost=[not available])
-  HiveProject(s_store_sk=[$0])
-HiveFilter(condition=[IS NOT NULL($0)])
-  HiveTableScan(table=[[default, store]], table:alias=[store])
-  HiveProject(ss_sold_date_sk=[$0], ss_cdemo_sk=[$4], 
ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_quantity=[$10], 
ss_sales_price=[$13], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], 
ss_net_profit=[$22])
+  HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], 
cost=[not available])
+HiveProject(s_store_sk=[$0])
+  HiveFilter(condition=[IS NOT NULL($0)])
+HiveTableScan(table=[[default, store]], table:alias=[store])
+

[59/59] [abbrv] hive git commit: HIVE-20605 : Merge branch 'master' into master-tez092

2018-11-13 Thread sershe

HIVE-20605 : Merge branch 'master' into master-tez092


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/750daa4a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/750daa4a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/750daa4a

Branch: refs/heads/master-tez092
Commit: 750daa4a639c8704a14b6970741f8600cbf5a863
Parents: c55347d 148e7ac
Author: sergey 
Authored: Tue Nov 13 13:34:22 2018 -0800
Committer: sergey 
Committed: Tue Nov 13 13:34:22 2018 -0800

--
 .../results/positive/accumulo_queries.q.out |   34 +-
 bin/ext/llapstatus.sh   |4 +-
 .../apache/hadoop/hive/common/FileUtils.java|5 +
 .../hadoop/hive/common/type/HiveChar.java   |7 +
 .../hadoop/hive/common/type/TimestampUtils.java |   23 +
 .../org/apache/hadoop/hive/conf/Constants.java  |   17 -
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   18 +-
 .../hadoop/hive/common/TestFileUtils.java   |   12 +-
 .../hadoop/hive/druid/DruidKafkaUtils.java  |  167 +
 .../hadoop/hive/druid/DruidStorageHandler.java  |  879 ++---
 .../hive/druid/DruidStorageHandlerInfo.java |   53 +-
 .../hive/druid/DruidStorageHandlerUtils.java|  883 +++--
 .../hadoop/hive/druid/io/DruidOutputFormat.java |   32 +-
 .../druid/io/DruidQueryBasedInputFormat.java|   63 +-
 .../hadoop/hive/druid/io/DruidRecordWriter.java |  212 +-
 .../hadoop/hive/druid/io/HiveDruidSplit.java|   19 +-
 .../druid/json/KafkaSupervisorIOConfig.java |  199 +-
 .../hive/druid/json/KafkaSupervisorReport.java  |  157 +-
 .../hive/druid/json/KafkaSupervisorSpec.java|  119 +-
 .../druid/json/KafkaSupervisorTuningConfig.java |  152 +-
 .../hive/druid/json/KafkaTuningConfig.java  |  175 +-
 .../hadoop/hive/druid/json/TaskReportData.java  |   68 +-
 .../hive/druid/security/DruidKerberosUtil.java  |   58 +-
 .../hive/druid/security/KerberosHttpClient.java |   86 +-
 .../druid/security/ResponseCookieHandler.java   |   44 +-
 .../RetryIfUnauthorizedResponseHandler.java |   62 +-
 .../druid/security/RetryResponseHolder.java |   23 +-
 .../serde/DruidGroupByQueryRecordReader.java|   19 +-
 .../druid/serde/DruidQueryRecordReader.java |  171 +-
 .../druid/serde/DruidScanQueryRecordReader.java |   35 +-
 .../serde/DruidSelectQueryRecordReader.java |   34 +-
 .../hadoop/hive/druid/serde/DruidSerDe.java |  185 +-
 .../hive/druid/serde/DruidSerDeUtils.java   |   48 +-
 .../serde/DruidTimeseriesQueryRecordReader.java |3 +-
 .../druid/serde/DruidTopNQueryRecordReader.java |   46 +-
 .../hadoop/hive/druid/serde/DruidWritable.java  |   60 +-
 .../hive/druid/DerbyConnectorTestUtility.java   |   13 +-
 .../hadoop/hive/druid/QTestDruidSerDe.java  |   73 +-
 .../hive/druid/TestDruidStorageHandler.java |  107 +-
 .../TestHiveDruidQueryBasedInputFormat.java |4 +-
 .../hadoop/hive/druid/serde/TestDruidSerDe.java | 1234 +++
 .../hive/ql/io/TestDruidRecordWriter.java   |  238 +-
 .../results/positive/external_table_ppd.q.out   |1 +
 .../positive/hbase_binary_storage_queries.q.out |2 +
 .../src/test/results/positive/hbase_ddl.q.out   |2 +
 .../test/results/positive/hbase_queries.q.out   |   35 +-
 .../src/test/results/positive/hbasestats.q.out  |5 +
 .../listener/DummyRawStoreFailEvent.java|   44 +-
 .../hive/ql/parse/TestReplicationScenarios.java |2 +-
 .../hive/ql/txn/compactor/TestCompactor.java|1 -
 .../hive/jdbc/AbstractJdbcTriggersTest.java |5 +-
 .../org/apache/hive/jdbc/TestJdbcDriver2.java   |   30 +-
 .../jdbc/TestTriggersMoveWorkloadManager.java   |9 +-
 .../jdbc/TestTriggersTezSessionPoolManager.java |   21 +-
 .../test/resources/testconfiguration.properties |6 +-
 .../hadoop/hive/cli/control/CliConfigs.java |6 +
 .../org/apache/hadoop/hive/ql/QTestUtil.java|4 +-
 .../hive/storage/jdbc/JdbcInputFormat.java  |2 +-
 .../hive/storage/jdbc/JdbcInputSplit.java   |   27 +-
 .../hive/storage/jdbc/JdbcRecordReader.java |3 +-
 .../jdbc/dao/GenericJdbcDatabaseAccessor.java   |4 +-
 .../hadoop/hive/kafka/KafkaInputFormat.java |   13 +-
 .../hadoop/hive/kafka/KafkaRecordIterator.java  |   15 +-
 .../hadoop/hive/kafka/KafkaRecordReader.java|2 +-
 .../apache/hadoop/hive/kafka/KafkaSerDe.java|   49 +-
 .../hadoop/hive/kafka/SimpleKafkaWriter.java|5 +-
 .../hive/kafka/VectorizedKafkaRecordReader.java |  186 +
 .../hive/kafka/SimpleKafkaWriterTest.java   |   14 +-
 .../hive/llap/tez/LlapProtocolClientProxy.java  |   22 +
 .../daemon/rpc/LlapDaemonProtocolProtos.java| 1433 +++-
 .../org/apache/hadoop/hive/llap/LlapUtil.java   |   17 +
 .../hive/llap/impl/LlapProtocolClientImpl.java  |   13 +
 .../src/protobuf/LlapDaemonProtocol.proto   |9 +
 .../hadoop/hive/llap/cli/LlapSliderUtils.java   |   55 +-

[15/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query17.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query17.q.out
index bb18527..642a67f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out
@@ -147,7 +147,7 @@ Stage-0
 Select Operator [SEL_47] (rows=8581091759 width=381)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
   Merge Join Operator [MERGEJOIN_213] (rows=8581091759 
width=381)
-
Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"]
+
Conds:RS_44._col3=RS_257._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"]
   <-Map 21 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_257]
   PartitionCols:_col0
@@ -161,12 +161,12 @@ Stage-0
 SHUFFLE [RS_44]
   PartitionCols:_col3
   Merge Join Operator [MERGEJOIN_212] 
(rows=8581091759 width=299)
-Conds:RS_41._col1, _col2, _col4=RS_42._col7, 
_col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"]
+Conds:RS_41._col1, _col2, _col4=RS_42._col6, 
_col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"]
   <-Reducer 11 [SIMPLE_EDGE]
 SHUFFLE [RS_42]
-  PartitionCols:_col7, _col8, _col9
+  PartitionCols:_col6, _col7, _col8
   Merge Join Operator [MERGEJOIN_211] 
(rows=1640229377 width=19)
-Conds:RS_28._col2, _col1=RS_29._col1, 
_col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"]
+Conds:RS_28._col2, _col1=RS_29._col1, 
_col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"]
   <-Reducer 10 [SIMPLE_EDGE]
 PARTITION_ONLY_SHUFFLE [RS_28]
   PartitionCols:_col2, _col1
@@ -175,7 +175,7 @@ Stage-0
   <-Map 8 [SIMPLE_EDGE] vectorized
 PARTITION_ONLY_SHUFFLE [RS_222]
   PartitionCols:_col0
-  Select Operator [SEL_218] (rows=304 
width=94)
+  Select Operator [SEL_218] (rows=304 
width=4)
 Output:["_col0"]
 Filter Operator [FIL_215] 
(rows=304 width=94)
   predicate:((d_quarter_name) IN 
('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null)
@@ -205,7 +205,7 @@ Stage-0
   <-Map 8 
[SIMPLE_EDGE] vectorized
 
PARTITION_ONLY_SHUFFLE [RS_224]
   
PartitionCols:_col0
-  Select Operator 
[SEL_219] (rows=304 width=94)
+  Select Operator 
[SEL_219] (rows=304 width=4)
 
Output:["_col0"]
 Filter 
Operator [FIL_216] (rows=304 width=94)
   
predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is 
not null)
@@ -249,7 +249,7 @@ Stage-0
 SHUFFLE [RS_41]
   PartitionCols:_col1, _col2, _col4
   Merge Join Operator [MERGEJOIN_208] 
(rows=27749405 width=294)
-
Conds:RS_38._col1=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"]
+
Conds:RS_38._col1=RS_254._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"]
   <-Map 18 [SIMPLE_EDGE] vectorized
 SHUFFLE [RS_254]
   PartitionCols:_col0
@@ -267,7 +267,7 @@ Stage-0
   <-Map 8 [SIMPLE_EDGE] vectorized

[45/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out 
b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
index 68fc903..4ed9b60 100644
--- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out
@@ -135,10 +135,10 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_n13
 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08
 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6
 OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1`
-FROM (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
+FROM (SELECT `key`, `value`
 FROM `default`.`srcbucket_mapjoin_part_2_n11`
 WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t0`
-INNER JOIN (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
+INNER JOIN (SELECT `key`, `value`
 FROM `default`.`srcbucket_mapjoin_part_n13`
 WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = 
`t2`.`key`
 STAGE DEPENDENCIES:
@@ -326,11 +326,11 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col1, _col4
+outputColumnNames: _col0, _col1, _col3
 Position of Big Table: 1
 Statistics: Num rows: 156 Data size: 89111 Basic stats: 
PARTIAL Column stats: NONE
 Select Operator
-  expressions: CAST( _col0 AS STRING) (type: string), _col1 
(type: string), _col4 (type: string)
+  expressions: CAST( _col0 AS STRING) (type: string), _col1 
(type: string), _col3 (type: string)
   outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 156 Data size: 89111 Basic stats: 
PARTIAL Column stats: NONE
   File Output Operator
@@ -584,10 +584,10 @@ POSTHOOK: Input: default@srcbucket_mapjoin_part_n13
 POSTHOOK: Input: default@srcbucket_mapjoin_part_n13@ds=2008-04-08
 POSTHOOK: Output: default@bucketmapjoin_tmp_result_n6
 OPTIMIZED SQL: SELECT `t0`.`key`, `t0`.`value`, `t2`.`value` AS `value1`
-FROM (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
+FROM (SELECT `key`, `value`
 FROM `default`.`srcbucket_mapjoin_part_2_n11`
 WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t0`
-INNER JOIN (SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`
+INNER JOIN (SELECT `key`, `value`
 FROM `default`.`srcbucket_mapjoin_part_n13`
 WHERE `ds` = '2008-04-08' AND `key` IS NOT NULL) AS `t2` ON `t0`.`key` = 
`t2`.`key`
 STAGE DEPENDENCIES:
@@ -775,11 +775,11 @@ STAGE PLANS:
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col1, _col4
+outputColumnNames: _col0, _col1, _col3
 Position of Big Table: 1
 Statistics: Num rows: 156 Data size: 89111 Basic stats: 
PARTIAL Column stats: NONE
 Select Operator
-  expressions: CAST( _col0 AS STRING) (type: string), _col1 
(type: string), _col4 (type: string)
+  expressions: CAST( _col0 AS STRING) (type: string), _col1 
(type: string), _col3 (type: string)
   outputColumnNames: _col0, _col1, _col2
   Statistics: Num rows: 156 Data size: 89111 Basic stats: 
PARTIAL Column stats: NONE
   File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out 
b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
index acb1e87..8039d0f 100644
--- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out
@@ -125,7 +125,7 @@ STAGE PLANS:
 Select Operator
   expressions: key (type: int), value (type: string)
   outputColumnNames: _col0, _col1
-  Statistics: Num rows: 84 Data size: 15036 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 84 Data size: 7896 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Operator Tree:
 TableScan
   alias: a
@@ -137,17 +137,17 @@ STAGE PLANS:
 Select Operator
   expressions: key (type: int), value (type: string)
   outputColumnNames: _col0, _col1
-  Statistics: Num rows: 10 Data size: 1780

[11/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query48.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query48.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query48.q.out
index 76b4ce1..1f63e95 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query48.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query48.q.out
@@ -143,15 +143,15 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 7 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 9 
(BROADCAST_EDGE)
-Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 7 
(BROADCAST_EDGE)
+Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE)
 Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
-Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
 Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
 Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
-Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
+Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
@@ -165,103 +165,103 @@ Stage-0
   PARTITION_ONLY_SHUFFLE [RS_30]
 Group By Operator [GBY_29] (rows=1 width=8)
   Output:["_col0"],aggregations:["sum(_col5)"]
-  Select Operator [SEL_28] (rows=25203 width=86)
+  Select Operator [SEL_28] (rows=20247 width=24)
 Output:["_col5"]
-Filter Operator [FIL_27] (rows=25203 width=86)
-  predicate:(((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 
0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) 
or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000))
-  Merge Join Operator [MERGEJOIN_96] (rows=75613 width=86)
-
Conds:RS_24._col3=RS_118._col0(Inner),Output:["_col5","_col7","_col14"]
+Filter Operator [FIL_27] (rows=20247 width=24)
+  predicate:((_col12 and _col6) or (_col13 and _col7) or 
(_col14 and _col8))
+  Merge Join Operator [MERGEJOIN_96] (rows=26999 width=24)
+
Conds:RS_24._col3=RS_115._col0(Inner),Output:["_col5","_col6","_col7","_col8","_col12","_col13","_col14"]
   <-Map 12 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_118]
+SHUFFLE [RS_115]
   PartitionCols:_col0
-  Select Operator [SEL_117] (rows=3529412 width=187)
-Output:["_col0","_col1"]
-Filter Operator [FIL_116] (rows=3529412 width=187)
+  Select Operator [SEL_114] (rows=3529412 width=16)
+Output:["_col0","_col1","_col2","_col3"]
+Filter Operator [FIL_113] (rows=3529412 width=187)
   predicate:((ca_country = 'United States') and 
(ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and 
ca_address_sk is not null)
   TableScan [TS_12] (rows=4000 width=187)
 
default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"]
   <-Reducer 4 [SIMPLE_EDGE]
 SHUFFLE [RS_24]
   PartitionCols:_col3
-  Merge Join Operator [MERGEJOIN_95] (rows=856941 width=0)
-
Conds:RS_21._col2=RS_110._col0(Inner),Output:["_col3","_col5","_col7"]
-  <-Map 10 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_110]
+  Merge Join Operator [MERGEJOIN_95] (rows=305980 width=12)
+
Conds:RS_21._col4=RS_126._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col8"]
+  <-Map 11 [SIMPLE_EDGE] vectorized
+SHUFFLE [RS_126]
   PartitionCols:_col0
-  Select Operator [SEL_109] (rows=29552 width=184)
+  Select Operator [SEL_125] (rows=1704 width=4)
 Output:["_col0"]
-Filter Operator [FIL_108] (rows=29552 width=183)
-  predicate:((cd_education_status = '4 yr Degree') 
and (cd_marital_status = 'M') and cd_demo_sk is not null)
-  TableScan [TS_9] (rows=1861800 width=183)
-

[39/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out 
b/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out
index ba9e81d..f84d13f 100644
--- a/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out
+++ b/ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out
@@ -98,12 +98,12 @@ STAGE PLANS:
 Select Operator
   expressions: i (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Map 5 
@@ -118,12 +118,12 @@ STAGE PLANS:
 Select Operator
   expressions: i (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 10 Data size: 910 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Statistics: Num rows: 10 Data size: 910 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: no inputs
 Reducer 2 
@@ -152,10 +152,10 @@ STAGE PLANS:
 keys:
   0 _col9 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col11
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10
 Statistics: Num rows: 100 Data size: 62700 Basic stats: 
COMPLETE Column stats: COMPLETE
 Select Operator
-  expressions: _col9 (type: int), 'foo_n1' (type: string), 
_col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: 
string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 
(type: double), _col8 (type: string), _col11 (type: int), 'bar' (type: string)
+  expressions: _col9 (type: int), 'foo_n1' (type: string), 
_col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: 
string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 
(type: double), _col8 (type: string), _col10 (type: int), 'bar' (type: string)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12
   Statistics: Num rows: 100 Data size: 80400 Basic stats: 
COMPLETE Column stats: COMPLETE
   File Output Operator
@@ -240,17 +240,17 @@ STAGE PLANS:
 Select Operator
   expressions: i (type: int)
   outputColumnNames: _col0
-  Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 10 Data size: 40 Basic stats: 
COMPLETE Column stats: COMPLETE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Statistics: Num rows: 10 Data size: 940 Basic stats: 
COMPLETE Column stats: COMPLETE
+Statistics: Num rows: 10 Data size: 40

[16/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/tez/query11.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query11.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query11.q.out
index 2f453f3..da1c349 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query11.q.out
@@ -189,249 +189,241 @@ Stage-0
 limit:100
 Stage-1
   Reducer 8 vectorized
-  File Output Operator [FS_358]
-Limit [LIM_357] (rows=100 width=85)
+  File Output Operator [FS_354]
+Limit [LIM_353] (rows=100 width=85)
   Number of rows:100
-  Select Operator [SEL_356] (rows=12248093 width=85)
+  Select Operator [SEL_352] (rows=12248093 width=85)
 Output:["_col0"]
   <-Reducer 7 [SIMPLE_EDGE]
-SHUFFLE [RS_97]
-  Select Operator [SEL_96] (rows=12248093 width=85)
+SHUFFLE [RS_93]
+  Select Operator [SEL_92] (rows=12248093 width=85)
 Output:["_col0"]
-Filter Operator [FIL_95] (rows=12248093 width=533)
-  predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN 
(_col5 is not null) THEN (((_col1 / _col5) > (_col8 / _col3))) ELSE ((null > 
(_col8 / _col3))) END) ELSE (CASE WHEN (_col5 is not null) THEN (((_col1 / 
_col5) > null)) ELSE (null) END) END
-  Merge Join Operator [MERGEJOIN_291] (rows=24496186 width=533)
-
Conds:RS_92._col2=RS_355._col0(Inner),Output:["_col1","_col3","_col5","_col7","_col8"]
+Filter Operator [FIL_91] (rows=12248093 width=537)
+  predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN 
(_col6) THEN (((_col1 / _col5) > (_col9 / _col3))) ELSE ((null > (_col9 / 
_col3))) END) ELSE (CASE WHEN (_col6) THEN (((_col1 / _col5) > null)) ELSE 
(null) END) END
+  Merge Join Operator [MERGEJOIN_287] (rows=24496186 width=537)
+
Conds:RS_88._col2=RS_351._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col8","_col9"]
   <-Reducer 20 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_355]
+SHUFFLE [RS_351]
   PartitionCols:_col0
-  Select Operator [SEL_354] (rows=8000 width=297)
+  Select Operator [SEL_350] (rows=8000 width=297)
 Output:["_col0","_col1","_col2"]
-Group By Operator [GBY_353] (rows=8000 width=764)
+Group By Operator [GBY_349] (rows=8000 width=764)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6
 <-Reducer 19 [SIMPLE_EDGE]
-  SHUFFLE [RS_83]
+  SHUFFLE [RS_79]
 PartitionCols:_col0, _col1, _col2, _col3, _col4, 
_col5, _col6
-Group By Operator [GBY_82] (rows=8000 
width=764)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col7)"],keys:_col0,
 _col1, _col2, _col3, _col4, _col5, _col6
-  Select Operator [SEL_80] (rows=187573258 
width=847)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-Merge Join Operator [MERGEJOIN_288] 
(rows=187573258 width=847)
-  
Conds:RS_77._col1=RS_321._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
-<-Map 26 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_321]
-PartitionCols:_col0
-Select Operator [SEL_320] (rows=8000 
width=656)
-  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
-  Filter Operator [FIL_319] (rows=8000 
width=656)
-predicate:(c_customer_id is not null 
and c_customer_sk is not null)
-TableScan [TS_71] (rows=8000 
width=656)
-  
default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_preferred_cust_flag","c_birth_country","c_login","c_email_address"]
-<-Reducer 18 [SIMPLE_EDGE]
-  SHUFFLE [RS_77]
-PartitionCols:_col1
-

[24/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query75.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query75.q.out
index 85e6dca..553d11a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query75.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out
@@ -219,14 +219,14 @@ STAGE PLANS:
 Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: i_item_sk (type: int), i_brand_id (type: 
int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: 
int)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col5
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4
   Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 231000 Data size: 331780228 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col5 (type: int)
+value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: int)
 Execution mode: vectorized
 Map 11 
 Map Operator Tree:
@@ -397,14 +397,14 @@ STAGE PLANS:
 Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: i_item_sk (type: int), i_brand_id (type: 
int), i_class_id (type: int), i_category_id (type: int), i_manufact_id (type: 
int)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col5
+  outputColumnNames: _col0, _col1, _col2, _col3, _col4
   Statistics: Num rows: 231000 Data size: 331780228 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
 Statistics: Num rows: 231000 Data size: 331780228 
Basic stats: COMPLETE Column stats: NONE
-value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col5 (type: int)
+value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: int)
 Execution mode: vectorized
 Map 34 
 Map Operator Tree:
@@ -530,23 +530,23 @@ STAGE PLANS:
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
-outputColumnNames: _col1, _col2, _col3, _col4, _col8, _col9, 
_col10, _col12
+outputColumnNames: _col1, _col2, _col3, _col4, _col7, _col8, 
_col9, _col10
 Statistics: Num rows: 696954748 Data size: 61485550191 Basic 
stats: COMPLETE Column stats: NONE
 Reduce Output Operator
   key expressions: _col1 (type: int), _col2 (type: int)
   sort order: ++
   Map-reduce partition columns: _col1 (type: int), _col2 
(type: int)
   Statistics: Num rows: 696954748 Data size: 61485550191 Basic 
stats: COMPLETE Column stats: NONE
-  value expressions: _col3 (type: int), _col4 (type: 
decimal(7,2)), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 
(type: int)
+  value expressions: _col3 (type: int), _col4 (type: 
decimal(7,2)), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 
(type: int)
 Reducer 15 
 Reduce Operator Tree:
   Join Operator
 condition map:
  Left Outer Join 0 to 1
-outputColumnNames: _col3, _col4, _col8, _col9, _col10, _col12, 
_col15, _col16
+outputColumnNames: _col3, _col4, _col7, _col8, _col9, _col10, 
_col13, _col14
 Statistics: Num rows: 766650239 Data size: 67634106676 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
-  expressions: _col8 (type: int), _col9 (type: int), _col10 
(type: int), _col12 (type: int), (_col3 - CASE WHEN (_col15 is not null) THEN 
(_col15) ELSE (0) END) (type: int), (_col4 - CASE WHEN (_col16 is not null) 
THEN (_col16) ELSE (0) END) (type: decimal(8,2))
+  expressions: _col7 (type: int), _col8

[23/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/perf/spark/query85.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
index 6bdbf7e..a7bf288 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
@@ -182,8 +182,7 @@ POSTHOOK: Input: default@web_sales
  A masked pattern was here 
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
-  Stage-3 depends on stages: Stage-2
-  Stage-1 depends on stages: Stage-3
+  Stage-1 depends on stages: Stage-2
   Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
@@ -191,47 +190,42 @@ STAGE PLANS:
 Spark
  A masked pattern was here 
   Vertices:
-Map 13 
+Map 12 
 Map Operator Tree:
 TableScan
-  alias: reason
-  filterExpr: r_reason_sk is not null (type: boolean)
-  Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
+  alias: web_page
+  filterExpr: wp_web_page_sk is not null (type: boolean)
+  Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
-predicate: r_reason_sk is not null (type: boolean)
-Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
+predicate: wp_web_page_sk is not null (type: boolean)
+Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: r_reason_sk (type: int), r_reason_desc 
(type: string)
-  outputColumnNames: _col0, _col1
-  Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
+  expressions: wp_web_page_sk (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 4602 Data size: 2696178 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
 keys:
-  0 _col4 (type: int)
+  0 _col10 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
 Local Work:
   Map Reduce Local Work
-
-  Stage: Stage-3
-Spark
- A masked pattern was here 
-  Vertices:
-Map 11 
+Map 13 
 Map Operator Tree:
 TableScan
-  alias: web_page
-  filterExpr: wp_web_page_sk is not null (type: boolean)
-  Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
+  alias: reason
+  filterExpr: r_reason_sk is not null (type: boolean)
+  Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
   Filter Operator
-predicate: wp_web_page_sk is not null (type: boolean)
-Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
+predicate: r_reason_sk is not null (type: boolean)
+Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
-  expressions: wp_web_page_sk (type: int)
-  outputColumnNames: _col0
-  Statistics: Num rows: 4602 Data size: 2696178 Basic 
stats: COMPLETE Column stats: NONE
+  expressions: r_reason_sk (type: int), r_reason_desc 
(type: string)
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
 keys:
-  0 _col10 (type: int)
+  0 _col4 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
 Local Work:
@@ -241,11 +235,11 @@ STAGE PLANS:
 Spark
   Edges:
 Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 28), Map 9 (PARTITION-LEVEL 
SORT, 28)
-Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 178), Reducer 2 
(PARTITION-LEVEL SORT, 178)
-Reducer 4 <- Map 12 (PARTITION-LEVEL SORT, 65), Reducer 3 
(PARTITION-LEVEL SORT, 65)
-Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 83), Reducer 4 
(PARTITION-LEVEL SORT, 83)
-Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 13), Reducer 5 
(PARTITION-LEVEL SORT, 13)
-Reducer 7

[40/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out 
b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
index a1fe936..d9d2396 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
@@ -180,29 +180,33 @@ STAGE PLANS:
   alias: test1_n4
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Join Operator
   condition map:
Left Outer Join 0 to 1
   filter predicates:
-0 {_col0 BETWEEN 100 AND 102}
+0 {_col3}
 1 
   keys:
 0 _col1 (type: int)
 1 _col1 (type: int)
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+  outputColumnNames: _col0, _col1, _col2, _col4, _col5, 
_col6
   input vertices:
 1 Map 2
   Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-  File Output Operator
-compressed: false
+  Select Operator
+expressions: _col0 (type: int), _col1 (type: int), 
_col2 (type: string), _col4 (type: int), _col5 (type: int), _col6 (type: string)
+outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
 Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
-table:
-input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 8 Data size: 1049 Basic stats: 
COMPLETE Column stats: COMPLETE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: llap
 LLAP IO: no inputs
 Map 2 
@@ -294,29 +298,33 @@ STAGE PLANS:
   alias: test1_n4
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
   Select Operator
-expressions: key (type: int), value (type: int), col_1 
(type: string)
-outputColumnNames: _col0, _col1, _col2
-Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
+expressions: key (type: int), value (type: int), col_1 
(type: string), key BETWEEN 100 AND 102 (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 6 Data size: 596 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Join Operator
   condition map:
Left Outer Join 0 to 1
   filter predicates:
-0 {_col0 BETWEEN 100 AND 102}
+0 {_col3}
 1 
   keys:
 0 
 1 
-  outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
+  outputColumnNames: _col0, _col1, _col2, _col4, _col5, 
_col6
   input vertices:
 1 Map 2
   Statistics: Num rows: 6 Data size: 1142 Basic stats: 
COMPLETE Column stats: COMPLETE
-  File Output Operator
-compressed: false
+

[30/59] [abbrv] [partial] hive git commit: HIVE-20850: Push case conditional from projections to dimension tables if possible (Zoltan Haindrich via Jesus Camacho Rodriguez)

2018-11-13 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/55887646/ql/src/test/results/clientpositive/masking_4.q.out
--
diff --git a/ql/src/test/results/clientpositive/masking_4.q.out 
b/ql/src/test/results/clientpositive/masking_4.q.out
index 60cbd0f..54861b0 100644
--- a/ql/src/test/results/clientpositive/masking_4.q.out
+++ b/ql/src/test/results/clientpositive/masking_4.q.out
@@ -210,12 +210,16 @@ STAGE PLANS:
   mode: mergepartial
   outputColumnNames: _col0
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
-  File Output Operator
-compressed: false
-table:
-input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+  Select Operator
+expressions: _col0 (type: int), UDFToDouble(_col0) (type: double)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
 Map Reduce
@@ -228,20 +232,20 @@ STAGE PLANS:
   predicate: key is not null (type: boolean)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
-expressions: key (type: string)
-outputColumnNames: _col0
+expressions: key (type: string), UDFToDouble(key) (type: 
double)
+outputColumnNames: _col0, _col1
 Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col1 (type: double)
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: 
double)
+  Map-reduce partition columns: _col1 (type: double)
   Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col0 (type: string)
   TableScan
 Reduce Output Operator
-  key expressions: UDFToDouble(_col0) (type: double)
+  key expressions: _col1 (type: double)
   sort order: +
-  Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+  Map-reduce partition columns: _col1 (type: double)
   Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
   value expressions: _col0 (type: int)
   Reduce Operator Tree:
@@ -249,21 +253,25 @@ STAGE PLANS:
   condition map:
Inner Join 0 to 1
   keys:
-0 UDFToDouble(_col0) (type: double)
-1 UDFToDouble(_col0) (type: double)
-  outputColumnNames: _col0, _col1
+0 _col1 (type: double)
+1 _col1 (type: double)
+  outputColumnNames: _col0, _col2
   Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-  Group By Operator
-keys: _col0 (type: string), _col1 (type: int)
-mode: hash
+  Select Operator
+expressions: _col0 (type: string), _col2 (type: int)
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
-File Output Operator
-  compressed: false
-  table:
-  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-  serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+Group By Operator
+  keys: _col0 (type: string), _col1 (type: int)
+  mode: hash
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
+  File Output Operator
+compressed: false
+table:
+input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage:

[2/3] hive git commit: HIVE-20853 : Expose ShuffleHandler.registerDag in the llap daemon API (Jaume Marhuenda, reviewed by Sergey Shelukhin)

2018-11-09 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java
--
diff --git 
a/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java
 
b/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java
index 8fecc1e..7659140 100644
--- 
a/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java
+++ 
b/llap-common/src/gen/protobuf/gen-java/org/apache/hadoop/hive/llap/daemon/rpc/LlapDaemonProtocolProtos.java
@@ -11465,6 +11465,1155 @@ public final class LlapDaemonProtocolProtos {
 // @@protoc_insertion_point(class_scope:SubmitWorkRequestProto)
   }
 
+  public interface RegisterDagRequestProtoOrBuilder
+  extends com.google.protobuf.MessageOrBuilder {
+
+// optional string user = 1;
+/**
+ * optional string user = 1;
+ */
+boolean hasUser();
+/**
+ * optional string user = 1;
+ */
+java.lang.String getUser();
+/**
+ * optional string user = 1;
+ */
+com.google.protobuf.ByteString
+getUserBytes();
+
+// required .QueryIdentifierProto query_identifier = 2;
+/**
+ * required .QueryIdentifierProto query_identifier = 2;
+ */
+boolean hasQueryIdentifier();
+/**
+ * required .QueryIdentifierProto query_identifier = 2;
+ */
+
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto
 getQueryIdentifier();
+/**
+ * required .QueryIdentifierProto query_identifier = 2;
+ */
+
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProtoOrBuilder
 getQueryIdentifierOrBuilder();
+
+// optional bytes credentials_binary = 3;
+/**
+ * optional bytes credentials_binary = 3;
+ */
+boolean hasCredentialsBinary();
+/**
+ * optional bytes credentials_binary = 3;
+ */
+com.google.protobuf.ByteString getCredentialsBinary();
+  }
+  /**
+   * Protobuf type {@code RegisterDagRequestProto}
+   */
+  public static final class RegisterDagRequestProto extends
+  com.google.protobuf.GeneratedMessage
+  implements RegisterDagRequestProtoOrBuilder {
+// Use RegisterDagRequestProto.newBuilder() to construct.
+private 
RegisterDagRequestProto(com.google.protobuf.GeneratedMessage.Builder 
builder) {
+  super(builder);
+  this.unknownFields = builder.getUnknownFields();
+}
+private RegisterDagRequestProto(boolean noInit) { this.unknownFields = 
com.google.protobuf.UnknownFieldSet.getDefaultInstance(); }
+
+private static final RegisterDagRequestProto defaultInstance;
+public static RegisterDagRequestProto getDefaultInstance() {
+  return defaultInstance;
+}
+
+public RegisterDagRequestProto getDefaultInstanceForType() {
+  return defaultInstance;
+}
+
+private final com.google.protobuf.UnknownFieldSet unknownFields;
+@java.lang.Override
+public final com.google.protobuf.UnknownFieldSet
+getUnknownFields() {
+  return this.unknownFields;
+}
+private RegisterDagRequestProto(
+com.google.protobuf.CodedInputStream input,
+com.google.protobuf.ExtensionRegistryLite extensionRegistry)
+throws com.google.protobuf.InvalidProtocolBufferException {
+  initFields();
+  int mutable_bitField0_ = 0;
+  com.google.protobuf.UnknownFieldSet.Builder unknownFields =
+  com.google.protobuf.UnknownFieldSet.newBuilder();
+  try {
+boolean done = false;
+while (!done) {
+  int tag = input.readTag();
+  switch (tag) {
+case 0:
+  done = true;
+  break;
+default: {
+  if (!parseUnknownField(input, unknownFields,
+ extensionRegistry, tag)) {
+done = true;
+  }
+  break;
+}
+case 10: {
+  bitField0_ |= 0x0001;
+  user_ = input.readBytes();
+  break;
+}
+case 18: {
+  
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto.Builder
 subBuilder = null;
+  if (((bitField0_ & 0x0002) == 0x0002)) {
+subBuilder = queryIdentifier_.toBuilder();
+  }
+  queryIdentifier_ = 
input.readMessage(org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto.PARSER,
 extensionRegistry);
+  if (subBuilder != null) {
+subBuilder.mergeFrom(queryIdentifier_);
+queryIdentifier_ = subBuilder.buildPartial();
+  }
+  bitField0_ |= 0x0002;
+  break;
+}
+case 26: {
+

[1/3] hive git commit: HIVE-20853 : Expose ShuffleHandler.registerDag in the llap daemon API (Jaume Marhuenda, reviewed by Sergey Shelukhin)

2018-11-09 Thread sershe

Repository: hive
Updated Branches:
  refs/heads/master 01cef9230 -> 10f4eadd5


http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-common/src/protobuf/LlapDaemonProtocol.proto
--
diff --git a/llap-common/src/protobuf/LlapDaemonProtocol.proto 
b/llap-common/src/protobuf/LlapDaemonProtocol.proto
index d70dd41..3aeacb2 100644
--- a/llap-common/src/protobuf/LlapDaemonProtocol.proto
+++ b/llap-common/src/protobuf/LlapDaemonProtocol.proto
@@ -134,6 +134,14 @@ message SubmitWorkRequestProto {
   optional bool is_guaranteed = 12 [default = false];
 }
 
+message RegisterDagRequestProto {
+  optional string user = 1;
+  required QueryIdentifierProto query_identifier = 2;
+  optional bytes credentials_binary = 3;
+}
+
+message RegisterDagResponseProto {
+}
 
 enum SubmissionStateProto {
   ACCEPTED = 1;
@@ -204,6 +212,7 @@ message PurgeCacheResponseProto {
 }
 
 service LlapDaemonProtocol {
+  rpc registerDag(RegisterDagRequestProto) returns (RegisterDagResponseProto);
   rpc submitWork(SubmitWorkRequestProto) returns (SubmitWorkResponseProto);
   rpc sourceStateUpdated(SourceStateUpdatedRequestProto) returns 
(SourceStateUpdatedResponseProto);
   rpc queryComplete(QueryCompleteRequestProto) returns 
(QueryCompleteResponseProto);

http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java
index 035960e..582f518 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/ContainerRunner.java
@@ -16,6 +16,7 @@ package org.apache.hadoop.hive.llap.daemon;
 
 import java.io.IOException;
 
+import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteRequestProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteResponseProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceStateUpdatedRequestProto;
@@ -29,6 +30,10 @@ import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.UpdateFra
 
 public interface ContainerRunner {
 
+  LlapDaemonProtocolProtos.RegisterDagResponseProto registerDag(
+  LlapDaemonProtocolProtos.RegisterDagRequestProto request)
+  throws IOException;
+
   SubmitWorkResponseProto submitWork(SubmitWorkRequestProto request) throws 
IOException;
 
   SourceStateUpdatedResponseProto sourceStateUpdated(

http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
--
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
index ef5922e..7a3ca2f 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/daemon/impl/ContainerRunnerImpl.java
@@ -27,10 +27,9 @@ import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.UgiFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.llap.DaemonId;
 import org.apache.hadoop.hive.llap.LlapNodeId;
+import org.apache.hadoop.hive.llap.LlapUtil;
 import org.apache.hadoop.hive.llap.NotTezEventHelper;
 import org.apache.hadoop.hive.llap.counters.FragmentCountersMap;
 import org.apache.hadoop.hive.llap.counters.LlapWmCounters;
@@ -55,6 +54,8 @@ import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceSta
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmissionStateProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkResponseProto;
+import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.RegisterDagRequestProto;
+import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.RegisterDagResponseProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.TerminateFragmentRequestProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.TerminateFragmentResponseProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.UpdateFragmentRequestProto;
@@ -65,7 +66,6 @@ import

[3/3] hive git commit: HIVE-20853 : Expose ShuffleHandler.registerDag in the llap daemon API (Jaume Marhuenda, reviewed by Sergey Shelukhin)

2018-11-09 Thread sershe

HIVE-20853 : Expose ShuffleHandler.registerDag in the llap daemon API (Jaume 
Marhuenda, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/10f4eadd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/10f4eadd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/10f4eadd

Branch: refs/heads/master
Commit: 10f4eadd5934c4bfa575c40c98abfe6b6e148d0f
Parents: 01cef92
Author: Jaume Marhuenda 
Authored: Fri Nov 9 12:41:56 2018 -0800
Committer: sergey 
Committed: Fri Nov 9 12:41:56 2018 -0800

--
 .../hive/llap/tez/LlapProtocolClientProxy.java  |   22 +
 .../daemon/rpc/LlapDaemonProtocolProtos.java| 1433 --
 .../org/apache/hadoop/hive/llap/LlapUtil.java   |   17 +
 .../hive/llap/impl/LlapProtocolClientImpl.java  |   13 +
 .../src/protobuf/LlapDaemonProtocol.proto   |9 +
 .../hive/llap/daemon/ContainerRunner.java   |5 +
 .../llap/daemon/impl/ContainerRunnerImpl.java   |   48 +-
 .../hive/llap/daemon/impl/LlapDaemon.java   |   27 +-
 .../daemon/impl/LlapProtocolServerImpl.java |   12 +
 .../hive/llap/daemon/impl/QueryTracker.java |   17 +
 .../llap/shufflehandler/ShuffleHandler.java |   26 +-
 .../hive/llap/daemon/LlapDaemonTestUtils.java   |   73 +
 .../daemon/impl/TestContainerRunnerImpl.java|  180 +++
 .../TestFirstInFirstOutComparator.java  |   63 +-
 .../llap/tezplugins/LlapTaskCommunicator.java   |   74 +-
 .../tezplugins/LlapTaskSchedulerService.java|   49 +-
 .../hive/llap/tezplugins/LlapTezUtils.java  |   14 +
 17 files changed, 1894 insertions(+), 188 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/10f4eadd/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java
--
diff --git 
a/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java
 
b/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java
index 211696a..bc74c55 100644
--- 
a/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java
+++ 
b/llap-client/src/java/org/apache/hadoop/hive/llap/tez/LlapProtocolClientProxy.java
@@ -26,6 +26,8 @@ import org.apache.hadoop.hive.llap.AsyncPbRpcProxy;
 import org.apache.hadoop.hive.llap.LlapNodeId;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteRequestProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryCompleteResponseProto;
+import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.RegisterDagRequestProto;
+import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.RegisterDagResponseProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceStateUpdatedRequestProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SourceStateUpdatedResponseProto;
 import 
org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SubmitWorkRequestProto;
@@ -55,6 +57,12 @@ public class LlapProtocolClientProxy
 TimeUnit.MILLISECONDS), -1, 1);
   }
 
+  public void registerDag(RegisterDagRequestProto request, String host, int 
port,
+  final ExecuteRequestCallback callback) {
+LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
+queueRequest(new RegisterDagCallable(nodeId, request, callback));
+  }
+
   public void sendSubmitWork(SubmitWorkRequestProto request, String host, int 
port,
  final ExecuteRequestCallback 
callback) {
 LlapNodeId nodeId = LlapNodeId.getInstance(host, port);
@@ -86,6 +94,20 @@ public class LlapProtocolClientProxy
 queueRequest(new SendUpdateFragmentCallable(nodeId, request, callback));
   }
 
+  private class RegisterDagCallable extends
+  NodeCallableRequest {
+protected RegisterDagCallable(LlapNodeId nodeId,
+RegisterDagRequestProto registerDagRequestProto,
+ExecuteRequestCallback callback) {
+  super(nodeId, registerDagRequestProto, callback);
+}
+
+@Override public
+RegisterDagResponseProto call() throws Exception {
+  return getProxy(nodeId, null).registerDag(null, request);
+}
+  }
+
   private class SubmitWorkCallable extends 
NodeCallableRequest {
 
 protected SubmitWorkCallable(LlapNodeId nodeId,

[4/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

2018-10-30 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java
--
diff --git 
a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java
 
b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java
index 114cdde..555bc5d 100644
--- 
a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java
+++ 
b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMDropPoolRequest.java
@@ -40,6 +40,7 @@ import org.slf4j.LoggerFactory;
 
   private static final org.apache.thrift.protocol.TField 
RESOURCE_PLAN_NAME_FIELD_DESC = new 
org.apache.thrift.protocol.TField("resourcePlanName", 
org.apache.thrift.protocol.TType.STRING, (short)1);
   private static final org.apache.thrift.protocol.TField POOL_PATH_FIELD_DESC 
= new org.apache.thrift.protocol.TField("poolPath", 
org.apache.thrift.protocol.TType.STRING, (short)2);
+  private static final org.apache.thrift.protocol.TField NS_FIELD_DESC = new 
org.apache.thrift.protocol.TField("ns", 
org.apache.thrift.protocol.TType.STRING, (short)3);
 
   private static final Map, SchemeFactory> schemes = 
new HashMap, SchemeFactory>();
   static {
@@ -49,11 +50,13 @@ import org.slf4j.LoggerFactory;
 
   private String resourcePlanName; // optional
   private String poolPath; // optional
+  private String ns; // optional
 
   /** The set of fields this struct contains, along with convenience methods 
for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
 RESOURCE_PLAN_NAME((short)1, "resourcePlanName"),
-POOL_PATH((short)2, "poolPath");
+POOL_PATH((short)2, "poolPath"),
+NS((short)3, "ns");
 
 private static final Map byName = new HashMap();
 
@@ -72,6 +75,8 @@ import org.slf4j.LoggerFactory;
   return RESOURCE_PLAN_NAME;
 case 2: // POOL_PATH
   return POOL_PATH;
+case 3: // NS
+  return NS;
 default:
   return null;
   }
@@ -112,7 +117,7 @@ import org.slf4j.LoggerFactory;
   }
 
   // isset id assignments
-  private static final _Fields optionals[] = 
{_Fields.RESOURCE_PLAN_NAME,_Fields.POOL_PATH};
+  private static final _Fields optionals[] = 
{_Fields.RESOURCE_PLAN_NAME,_Fields.POOL_PATH,_Fields.NS};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> 
metaDataMap;
   static {
 Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new 
EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -120,6 +125,8 @@ import org.slf4j.LoggerFactory;
 new 
org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
 tmpMap.put(_Fields.POOL_PATH, new 
org.apache.thrift.meta_data.FieldMetaData("poolPath", 
org.apache.thrift.TFieldRequirementType.OPTIONAL, 
 new 
org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
+tmpMap.put(_Fields.NS, new org.apache.thrift.meta_data.FieldMetaData("ns", 
org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+new 
org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
 metaDataMap = Collections.unmodifiableMap(tmpMap);
 
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(WMDropPoolRequest.class,
 metaDataMap);
   }
@@ -137,6 +144,9 @@ import org.slf4j.LoggerFactory;
 if (other.isSetPoolPath()) {
   this.poolPath = other.poolPath;
 }
+if (other.isSetNs()) {
+  this.ns = other.ns;
+}
   }
 
   public WMDropPoolRequest deepCopy() {
@@ -147,6 +157,7 @@ import org.slf4j.LoggerFactory;
   public void clear() {
 this.resourcePlanName = null;
 this.poolPath = null;
+this.ns = null;
   }
 
   public String getResourcePlanName() {
@@ -195,6 +206,29 @@ import org.slf4j.LoggerFactory;
 }
   }
 
+  public String getNs() {
+return this.ns;
+  }
+
+  public void setNs(String ns) {
+this.ns = ns;
+  }
+
+  public void unsetNs() {
+this.ns = null;
+  }
+
+  /** Returns true if field ns is set (has been assigned a value) and false 
otherwise */
+  public boolean isSetNs() {
+return this.ns != null;
+  }
+
+  public void setNsIsSet(boolean value) {
+if (!value) {
+  this.ns = null;
+}
+  }
+
   public void setFieldValue(_Fields field, Object value) {
 switch (field) {
 case RESOURCE_PLAN_NAME:
@@ -213,6 +247,14 @@ import org.slf4j.LoggerFactory;
   }
   break;
 
+case NS:
+  if (value == null) {
+unsetNs();
+  } else {
+setNs((String)value);
+  }
+  break;
+
 }
   }
 
@@

[6/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

2018-10-30 Thread sershe

HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, 
reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5258c67e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5258c67e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5258c67e

Branch: refs/heads/master
Commit: 5258c67e9558bd2d98e4887d3dd8e3eb8aa5d763
Parents: b701720
Author: sergey 
Authored: Tue Oct 30 12:44:14 2018 -0700
Committer: sergey 
Committed: Tue Oct 30 12:57:17 2018 -0700

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   4 +
 .../listener/DummyRawStoreFailEvent.java|  44 +-
 .../upgrade/hive/hive-schema-4.0.0.hive.sql |  11 +
 .../hive/upgrade-3.1.0-to-4.0.0.hive.sql| 137 
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |   5 +
 .../apache/hadoop/hive/ql/metadata/Hive.java|  75 +-
 .../hadoop/hive/ql/metadata/TestHive.java   |  68 +-
 .../test/queries/clientpositive/resourceplan.q  |   6 +-
 .../clientpositive/llap/resourceplan.q.out  | 732 ++-
 .../api/WMAlterResourcePlanRequest.java | 114 ++-
 ...CreateOrDropTriggerToPoolMappingRequest.java | 114 ++-
 .../hive/metastore/api/WMDropPoolRequest.java   | 114 ++-
 .../api/WMDropResourcePlanRequest.java  | 114 ++-
 .../metastore/api/WMDropTriggerRequest.java | 114 ++-
 .../api/WMGetActiveResourcePlanRequest.java | 112 ++-
 .../api/WMGetAllResourcePlanRequest.java| 112 ++-
 .../metastore/api/WMGetResourcePlanRequest.java | 114 ++-
 .../api/WMGetTriggersForResourePlanRequest.java | 114 ++-
 .../hadoop/hive/metastore/api/WMMapping.java| 114 ++-
 .../hive/metastore/api/WMNullablePool.java  | 114 ++-
 .../metastore/api/WMNullableResourcePlan.java   | 114 ++-
 .../hadoop/hive/metastore/api/WMPool.java   | 114 ++-
 .../hive/metastore/api/WMPoolTrigger.java   | 112 ++-
 .../hive/metastore/api/WMResourcePlan.java  | 114 ++-
 .../hadoop/hive/metastore/api/WMTrigger.java| 114 ++-
 .../api/WMValidateResourcePlanRequest.java  | 114 ++-
 .../src/gen/thrift/gen-php/metastore/Types.php  | 399 +-
 .../gen/thrift/gen-py/hive_metastore/ttypes.py  | 263 ++-
 .../gen/thrift/gen-rb/hive_metastore_types.rb   |  66 +-
 .../hive/metastore/HiveMetaStoreClient.java |  35 +-
 .../hadoop/hive/metastore/IMetaStoreClient.java |  20 +-
 .../src/main/thrift/hive_metastore.thrift   |  17 +
 .../hadoop/hive/metastore/HiveMetaStore.java|  26 +-
 .../hadoop/hive/metastore/ObjectStore.java  | 204 --
 .../apache/hadoop/hive/metastore/RawStore.java  |  28 +-
 .../hive/metastore/cache/CachedStore.java   |  46 +-
 .../hive/metastore/model/MWMResourcePlan.java   |   9 +
 .../src/main/resources/package.jdo  |   4 +
 .../main/sql/derby/hive-schema-4.0.0.derby.sql  |   4 +-
 .../sql/derby/upgrade-3.2.0-to-4.0.0.derby.sql  |   7 +
 .../main/sql/mssql/hive-schema-4.0.0.mssql.sql  |   3 +-
 .../sql/mssql/upgrade-3.2.0-to-4.0.0.mssql.sql  |   6 +
 .../main/sql/mysql/hive-schema-4.0.0.mysql.sql  |   3 +-
 .../sql/mysql/upgrade-3.2.0-to-4.0.0.mysql.sql  |   7 +
 .../sql/oracle/hive-schema-4.0.0.oracle.sql |   3 +-
 .../oracle/upgrade-3.2.0-to-4.0.0.oracle.sql|   6 +
 .../sql/postgres/hive-schema-4.0.0.postgres.sql |   3 +-
 .../upgrade-3.2.0-to-4.0.0.postgres.sql |   7 +
 .../DummyRawStoreControlledCommit.java  |  44 +-
 .../DummyRawStoreForJdoConnection.java  |  22 +-
 .../HiveMetaStoreClientPreCatalog.java  |  36 +-
 51 files changed, 3603 insertions(+), 679 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
--
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 917aaeb..102e6c6 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3206,6 +3206,10 @@ public class HiveConf extends Configuration {
 HIVE_SERVER2_TEZ_INTERACTIVE_QUEUE("hive.server2.tez.interactive.queue", 
"",
 "A single YARN queues to use for Hive Interactive sessions. When this 
is specified,\n" +
 "workload management is enabled and used for these sessions."),
+HIVE_SERVER2_WM_NAMESPACE("hive.server2.wm.namespace", "default",
+"The WM namespace to use when one metastore is used by multiple 
compute clusters each \n" +
+"with their own workload management. The special value 'default' (the 
default) will \n" +
+"also include any resource plans created before the namespaces were 
introduced."),

[2/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

2018-10-30 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py
--
diff --git 
a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py
 
b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py
index 03c2a4e..bdfb480 100644
--- 
a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py
+++ 
b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py
@@ -18352,6 +18352,7 @@ class WMResourcePlan:
- status
- queryParallelism
- defaultPoolPath
+   - ns
   """
 
   thrift_spec = (
@@ -18360,13 +18361,15 @@ class WMResourcePlan:
 (2, TType.I32, 'status', None, None, ), # 2
 (3, TType.I32, 'queryParallelism', None, None, ), # 3
 (4, TType.STRING, 'defaultPoolPath', None, None, ), # 4
+(5, TType.STRING, 'ns', None, None, ), # 5
   )
 
-  def __init__(self, name=None, status=None, queryParallelism=None, 
defaultPoolPath=None,):
+  def __init__(self, name=None, status=None, queryParallelism=None, 
defaultPoolPath=None, ns=None,):
 self.name = name
 self.status = status
 self.queryParallelism = queryParallelism
 self.defaultPoolPath = defaultPoolPath
+self.ns = ns
 
   def read(self, iprot):
 if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and 
isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is 
not None and fastbinary is not None:
@@ -18397,6 +18400,11 @@ class WMResourcePlan:
   self.defaultPoolPath = iprot.readString()
 else:
   iprot.skip(ftype)
+  elif fid == 5:
+if ftype == TType.STRING:
+  self.ns = iprot.readString()
+else:
+  iprot.skip(ftype)
   else:
 iprot.skip(ftype)
   iprot.readFieldEnd()
@@ -18423,6 +18431,10 @@ class WMResourcePlan:
   oprot.writeFieldBegin('defaultPoolPath', TType.STRING, 4)
   oprot.writeString(self.defaultPoolPath)
   oprot.writeFieldEnd()
+if self.ns is not None:
+  oprot.writeFieldBegin('ns', TType.STRING, 5)
+  oprot.writeString(self.ns)
+  oprot.writeFieldEnd()
 oprot.writeFieldStop()
 oprot.writeStructEnd()
 
@@ -18438,6 +18450,7 @@ class WMResourcePlan:
 value = (value * 31) ^ hash(self.status)
 value = (value * 31) ^ hash(self.queryParallelism)
 value = (value * 31) ^ hash(self.defaultPoolPath)
+value = (value * 31) ^ hash(self.ns)
 return value
 
   def __repr__(self):
@@ -18460,6 +18473,7 @@ class WMNullableResourcePlan:
- isSetQueryParallelism
- defaultPoolPath
- isSetDefaultPoolPath
+   - ns
   """
 
   thrift_spec = (
@@ -18471,15 +18485,17 @@ class WMNullableResourcePlan:
 (5, TType.BOOL, 'isSetQueryParallelism', None, None, ), # 5
 (6, TType.STRING, 'defaultPoolPath', None, None, ), # 6
 (7, TType.BOOL, 'isSetDefaultPoolPath', None, None, ), # 7
+(8, TType.STRING, 'ns', None, None, ), # 8
   )
 
-  def __init__(self, name=None, status=None, queryParallelism=None, 
isSetQueryParallelism=None, defaultPoolPath=None, isSetDefaultPoolPath=None,):
+  def __init__(self, name=None, status=None, queryParallelism=None, 
isSetQueryParallelism=None, defaultPoolPath=None, isSetDefaultPoolPath=None, 
ns=None,):
 self.name = name
 self.status = status
 self.queryParallelism = queryParallelism
 self.isSetQueryParallelism = isSetQueryParallelism
 self.defaultPoolPath = defaultPoolPath
 self.isSetDefaultPoolPath = isSetDefaultPoolPath
+self.ns = ns
 
   def read(self, iprot):
 if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and 
isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is 
not None and fastbinary is not None:
@@ -18520,6 +18536,11 @@ class WMNullableResourcePlan:
   self.isSetDefaultPoolPath = iprot.readBool()
 else:
   iprot.skip(ftype)
+  elif fid == 8:
+if ftype == TType.STRING:
+  self.ns = iprot.readString()
+else:
+  iprot.skip(ftype)
   else:
 iprot.skip(ftype)
   iprot.readFieldEnd()
@@ -18554,6 +18575,10 @@ class WMNullableResourcePlan:
   oprot.writeFieldBegin('isSetDefaultPoolPath', TType.BOOL, 7)
   oprot.writeBool(self.isSetDefaultPoolPath)
   oprot.writeFieldEnd()
+if self.ns is not None:
+  oprot.writeFieldBegin('ns', TType.STRING, 8)
+  oprot.writeString(self.ns)
+  oprot.writeFieldEnd()
 oprot.writeFieldStop()
 oprot.writeStructEnd()
 
@@ -18569,6 +18594,7 @@ class WMNullableResourcePlan:
 value = (value * 31) ^ hash(self.isSetQueryParallelism)
 value = (value * 31) ^ hash(self.defaultPoolPath)
 value = (value * 31) ^ hash(self.isSetDefaultPoolPath)
+value = (value * 31) ^ hash(self.ns)
 return value
 
   def __repr__(self):
@@ -18590,6

[3/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

2018-10-30 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java
--
diff --git 
a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java
 
b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java
index 4621e10..15bb764 100644
--- 
a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java
+++ 
b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/WMNullableResourcePlan.java
@@ -44,6 +44,7 @@ import org.slf4j.LoggerFactory;
   private static final org.apache.thrift.protocol.TField 
IS_SET_QUERY_PARALLELISM_FIELD_DESC = new 
org.apache.thrift.protocol.TField("isSetQueryParallelism", 
org.apache.thrift.protocol.TType.BOOL, (short)5);
   private static final org.apache.thrift.protocol.TField 
DEFAULT_POOL_PATH_FIELD_DESC = new 
org.apache.thrift.protocol.TField("defaultPoolPath", 
org.apache.thrift.protocol.TType.STRING, (short)6);
   private static final org.apache.thrift.protocol.TField 
IS_SET_DEFAULT_POOL_PATH_FIELD_DESC = new 
org.apache.thrift.protocol.TField("isSetDefaultPoolPath", 
org.apache.thrift.protocol.TType.BOOL, (short)7);
+  private static final org.apache.thrift.protocol.TField NS_FIELD_DESC = new 
org.apache.thrift.protocol.TField("ns", 
org.apache.thrift.protocol.TType.STRING, (short)8);
 
   private static final Map, SchemeFactory> schemes = 
new HashMap, SchemeFactory>();
   static {
@@ -57,6 +58,7 @@ import org.slf4j.LoggerFactory;
   private boolean isSetQueryParallelism; // optional
   private String defaultPoolPath; // optional
   private boolean isSetDefaultPoolPath; // optional
+  private String ns; // optional
 
   /** The set of fields this struct contains, along with convenience methods 
for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
@@ -69,7 +71,8 @@ import org.slf4j.LoggerFactory;
 QUERY_PARALLELISM((short)4, "queryParallelism"),
 IS_SET_QUERY_PARALLELISM((short)5, "isSetQueryParallelism"),
 DEFAULT_POOL_PATH((short)6, "defaultPoolPath"),
-IS_SET_DEFAULT_POOL_PATH((short)7, "isSetDefaultPoolPath");
+IS_SET_DEFAULT_POOL_PATH((short)7, "isSetDefaultPoolPath"),
+NS((short)8, "ns");
 
 private static final Map byName = new HashMap();
 
@@ -96,6 +99,8 @@ import org.slf4j.LoggerFactory;
   return DEFAULT_POOL_PATH;
 case 7: // IS_SET_DEFAULT_POOL_PATH
   return IS_SET_DEFAULT_POOL_PATH;
+case 8: // NS
+  return NS;
 default:
   return null;
   }
@@ -140,7 +145,7 @@ import org.slf4j.LoggerFactory;
   private static final int __ISSETQUERYPARALLELISM_ISSET_ID = 1;
   private static final int __ISSETDEFAULTPOOLPATH_ISSET_ID = 2;
   private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = 
{_Fields.NAME,_Fields.STATUS,_Fields.QUERY_PARALLELISM,_Fields.IS_SET_QUERY_PARALLELISM,_Fields.DEFAULT_POOL_PATH,_Fields.IS_SET_DEFAULT_POOL_PATH};
+  private static final _Fields optionals[] = 
{_Fields.NAME,_Fields.STATUS,_Fields.QUERY_PARALLELISM,_Fields.IS_SET_QUERY_PARALLELISM,_Fields.DEFAULT_POOL_PATH,_Fields.IS_SET_DEFAULT_POOL_PATH,_Fields.NS};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> 
metaDataMap;
   static {
 Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new 
EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
@@ -156,6 +161,8 @@ import org.slf4j.LoggerFactory;
 new 
org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
 tmpMap.put(_Fields.IS_SET_DEFAULT_POOL_PATH, new 
org.apache.thrift.meta_data.FieldMetaData("isSetDefaultPoolPath", 
org.apache.thrift.TFieldRequirementType.OPTIONAL, 
 new 
org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
+tmpMap.put(_Fields.NS, new org.apache.thrift.meta_data.FieldMetaData("ns", 
org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+new 
org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
 metaDataMap = Collections.unmodifiableMap(tmpMap);
 
org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(WMNullableResourcePlan.class,
 metaDataMap);
   }
@@ -180,6 +187,9 @@ import org.slf4j.LoggerFactory;
   this.defaultPoolPath = other.defaultPoolPath;
 }
 this.isSetDefaultPoolPath = other.isSetDefaultPoolPath;
+if (other.isSetNs()) {
+  this.ns = other.ns;
+}
   }
 
   public WMNullableResourcePlan deepCopy() {
@@ -197,6 +207,7 @@ import org.slf4j.LoggerFactory;

[1/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

2018-10-30 Thread sershe

Repository: hive
Updated Branches:
  refs/heads/master b701720f5 -> 5258c67e9


http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
--
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 0755483..03e3a2d 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -11593,13 +11593,15 @@ public class ObjectStore implements RawStore, 
Configurable {
 } else {
   rp = new MWMResourcePlan(rpName, null, Status.DISABLED);
 }
+rp.setNs(resourcePlan.getNs());
 try {
   openTransaction();
   pm.makePersistent(rp);
   if (copyFromName != null) {
-MWMResourcePlan copyFrom = getMWMResourcePlan(copyFromName, false);
+String ns = getNsOrDefault(resourcePlan.getNs());
+MWMResourcePlan copyFrom = getMWMResourcePlan(copyFromName, ns, false);
 if (copyFrom == null) {
-  throw new NoSuchObjectException(copyFromName);
+  throw new NoSuchObjectException(copyFromName + " in " + ns);
 }
 copyRpContents(rp, copyFrom);
   } else {
@@ -11627,6 +11629,7 @@ public class ObjectStore implements RawStore, 
Configurable {
 
   private void copyRpContents(MWMResourcePlan dest, MWMResourcePlan src) {
 dest.setQueryParallelism(src.getQueryParallelism());
+dest.setNs(src.getNs());
 Map pools = new HashMap<>();
 Map> triggersToPools = new HashMap<>();
 for (MWMPool copyPool : src.getPools()) {
@@ -11686,6 +11689,7 @@ public class ObjectStore implements RawStore, 
Configurable {
 }
 WMResourcePlan rp = new WMResourcePlan();
 rp.setName(mplan.getName());
+rp.setNs(mplan.getNs());
 rp.setStatus(WMResourcePlanStatus.valueOf(mplan.getStatus().name()));
 if (mplan.getQueryParallelism() != null) {
   rp.setQueryParallelism(mplan.getQueryParallelism());
@@ -11724,6 +11728,7 @@ public class ObjectStore implements RawStore, 
Configurable {
 assert mPool.getQueryParallelism() != null;
 result.setQueryParallelism(mPool.getQueryParallelism());
 result.setSchedulingPolicy(mPool.getSchedulingPolicy());
+result.setNs(mPool.getResourcePlan().getNs());
 return result;
   }
 
@@ -11736,15 +11741,24 @@ public class ObjectStore implements RawStore, 
Configurable {
 if (mMapping.getOrdering() != null) {
   result.setOrdering(mMapping.getOrdering());
 }
+result.setNs(mMapping.getResourcePlan().getNs());
 return result;
   }
 
+  private final String getNsOrDefault(String ns) {
+// This is only needed for old clients not setting NS in requests.
+// Not clear how to handle this... this is properly a HS2 config but 
metastore needs its default
+// value for backward compat, and we don't want it configurable separately 
because it's also
+// used in upgrade scripts, were it cannot be configured.
+ return normalizeIdentifier(ns == null ? "default" : ns);
+  }
+
   @Override
-  public WMFullResourcePlan getResourcePlan(String name) throws 
NoSuchObjectException {
+  public WMFullResourcePlan getResourcePlan(String name, String ns) throws 
NoSuchObjectException {
 boolean commited = false;
 try {
   openTransaction();
-  WMFullResourcePlan fullRp = 
fullFromMResourcePlan(getMWMResourcePlan(name, false));
+  WMFullResourcePlan fullRp = 
fullFromMResourcePlan(getMWMResourcePlan(name, ns, false));
   commited = commitTransaction();
   return fullRp;
 } catch (InvalidOperationException e) {
@@ -11755,12 +11769,12 @@ public class ObjectStore implements RawStore, 
Configurable {
 }
   }
 
-  private MWMResourcePlan getMWMResourcePlan(String name, boolean editCheck)
+  private MWMResourcePlan getMWMResourcePlan(String name, String ns, boolean 
editCheck)
   throws NoSuchObjectException, InvalidOperationException {
-return getMWMResourcePlan(name, editCheck, true);
+return getMWMResourcePlan(name, ns, editCheck, true);
   }
 
-  private MWMResourcePlan getMWMResourcePlan(String name, boolean editCheck, 
boolean mustExist)
+  private MWMResourcePlan getMWMResourcePlan(String name, String ns, boolean 
editCheck, boolean mustExist)
   throws NoSuchObjectException, InvalidOperationException {
 MWMResourcePlan resourcePlan;
 boolean commited = false;
@@ -11768,18 +11782,16 @@ public class ObjectStore implements RawStore, 
Configurable {
 
 name = normalizeIdentifier(name);
 try {
-  openTransaction();
-  query = pm.newQuery(MWMResourcePlan.class, "name == rpname");
-

[5/6] hive git commit: HIVE-20793 : add RP namespacing to workload management (Sergey Shelukhin, reviewed by Prasanth Jayachandran)

2018-10-30 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/5258c67e/ql/src/test/results/clientpositive/llap/resourceplan.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/resourceplan.q.out 
b/ql/src/test/results/clientpositive/llap/resourceplan.q.out
index c11daf7..7fd7278 100644
--- a/ql/src/test/results/clientpositive/llap/resourceplan.q.out
+++ b/ql/src/test/results/clientpositive/llap/resourceplan.q.out
@@ -1838,14 +1838,14 @@ FROM
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: SYS@PART_COL_STATS
 POSTHOOK: Output: database:sys
-PREHOOK: query: CREATE OR REPLACE VIEW `VERSION` AS SELECT 1 AS `VER_ID`, 
'3.1.0' AS `SCHEMA_VERSION`,
-  'Hive release version 3.1.0' AS `VERSION_COMMENT`
+PREHOOK: query: CREATE OR REPLACE VIEW `VERSION` AS SELECT 1 AS `VER_ID`, 
'4.0.0' AS `SCHEMA_VERSION`,
+  'Hive release version 4.0.0' AS `VERSION_COMMENT`
 PREHOOK: type: CREATEVIEW
 PREHOOK: Input: _dummy_database@_dummy_table
 PREHOOK: Output: SYS@VERSION
 PREHOOK: Output: database:sys
-POSTHOOK: query: CREATE OR REPLACE VIEW `VERSION` AS SELECT 1 AS `VER_ID`, 
'3.1.0' AS `SCHEMA_VERSION`,
-  'Hive release version 3.1.0' AS `VERSION_COMMENT`
+POSTHOOK: query: CREATE OR REPLACE VIEW `VERSION` AS SELECT 1 AS `VER_ID`, 
'4.0.0' AS `SCHEMA_VERSION`,
+  'Hive release version 4.0.0' AS `VERSION_COMMENT`
 POSTHOOK: type: CREATEVIEW
 POSTHOOK: Input: _dummy_database@_dummy_table
 POSTHOOK: Output: SYS@VERSION
@@ -2107,6 +2107,7 @@ POSTHOOK: Lineage: PARTITION_STATS_VIEW.total_size 
EXPRESSION [(partition_params
 POSTHOOK: Lineage: PARTITION_STATS_VIEW.transient_last_ddl_time EXPRESSION 
[(partition_params)partition_params.FieldSchema(name:param_key, type:string, 
comment:from deserializer), 
(partition_params)partition_params.FieldSchema(name:param_value, type:string, 
comment:from deserializer), ]
 PREHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_RESOURCEPLANS` (
   `NAME` string,
+  `NS` string,
   `STATUS` string,
   `QUERY_PARALLELISM` int,
   `DEFAULT_POOL_PATH` string
@@ -2117,6 +2118,7 @@ TBLPROPERTIES (
 "hive.sql.query" =
 "SELECT
   \"WM_RESOURCEPLAN\".\"NAME\",
+  case when \"WM_RESOURCEPLAN\".\"NS\" is null then 'default' else 
\"WM_RESOURCEPLAN\".\"NS\" end AS NS,
   \"STATUS\",
   \"WM_RESOURCEPLAN\".\"QUERY_PARALLELISM\",
   \"WM_POOL\".\"PATH\"
@@ -2128,6 +2130,7 @@ PREHOOK: Output: SYS@WM_RESOURCEPLANS
 PREHOOK: Output: database:sys
 POSTHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_RESOURCEPLANS` (
   `NAME` string,
+  `NS` string,
   `STATUS` string,
   `QUERY_PARALLELISM` int,
   `DEFAULT_POOL_PATH` string
@@ -2138,6 +2141,7 @@ TBLPROPERTIES (
 "hive.sql.query" =
 "SELECT
   \"WM_RESOURCEPLAN\".\"NAME\",
+  case when \"WM_RESOURCEPLAN\".\"NS\" is null then 'default' else 
\"WM_RESOURCEPLAN\".\"NS\" end AS NS,
   \"STATUS\",
   \"WM_RESOURCEPLAN\".\"QUERY_PARALLELISM\",
   \"WM_POOL\".\"PATH\"
@@ -2149,6 +2153,7 @@ POSTHOOK: Output: SYS@WM_RESOURCEPLANS
 POSTHOOK: Output: database:sys
 PREHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_TRIGGERS` (
   `RP_NAME` string,
+  `NS` string,
   `NAME` string,
   `TRIGGER_EXPRESSION` string,
   `ACTION_EXPRESSION` string
@@ -2159,6 +2164,7 @@ TBLPROPERTIES (
 "hive.sql.query" =
 "SELECT
   r.\"NAME\" AS RP_NAME,
+  case when r.\"NS\" is null then 'default' else r.\"NS\" end,
   t.\"NAME\" AS NAME,
   \"TRIGGER_EXPRESSION\",
   \"ACTION_EXPRESSION\"
@@ -2174,6 +2180,7 @@ PREHOOK: Output: SYS@WM_TRIGGERS
 PREHOOK: Output: database:sys
 POSTHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_TRIGGERS` (
   `RP_NAME` string,
+  `NS` string,
   `NAME` string,
   `TRIGGER_EXPRESSION` string,
   `ACTION_EXPRESSION` string
@@ -2184,6 +2191,7 @@ TBLPROPERTIES (
 "hive.sql.query" =
 "SELECT
   r.\"NAME\" AS RP_NAME,
+  case when r.\"NS\" is null then 'default' else r.\"NS\" end,
   t.\"NAME\" AS NAME,
   \"TRIGGER_EXPRESSION\",
   \"ACTION_EXPRESSION\"
@@ -2199,6 +2207,7 @@ POSTHOOK: Output: SYS@WM_TRIGGERS
 POSTHOOK: Output: database:sys
 PREHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_POOLS` (
   `RP_NAME` string,
+  `NS` string,
   `PATH` string,
   `ALLOC_FRACTION` double,
   `QUERY_PARALLELISM` int,
@@ -2210,6 +2219,7 @@ TBLPROPERTIES (
 "hive.sql.query" =
 "SELECT
   \"WM_RESOURCEPLAN\".\"NAME\",
+  case when \"WM_RESOURCEPLAN\".\"NS\" is null then 'default' else 
\"WM_RESOURCEPLAN\".\"NS\" end AS NS,
   \"WM_POOL\".\"PATH\",
   \"WM_POOL\".\"ALLOC_FRACTION\",
   \"WM_POOL\".\"QUERY_PARALLELISM\",
@@ -2226,6 +2236,7 @@ PREHOOK: Output: SYS@WM_POOLS
 PREHOOK: Output: database:sys
 POSTHOOK: query: CREATE EXTERNAL TABLE IF NOT EXISTS `WM_POOLS` (
   `RP_NAME` string,
+  `NS` string,
   `PATH` string,
   `ALLOC_FRACTION` double,
   `QUERY_PARALLELISM` int,
@@ -2237,6 +2248,7 @@ TBLPROPERTIES (
 "hive.sql.query" =
 "SELECT
   \"WM_RESOURCEPLAN\".\"NAME\",
+  case when \"WM_RESOURCEPLAN\".\"NS\" is null then 'default' else 
\"WM_RESOURCEPLAN\".\"NS\" end AS NS,

[55/75] [abbrv] hive git commit: HIVE-20679: DDL operations on hive might create large messages for DBNotification (Anishek Agarwal, reviewed by Sankar Hariappan)

2018-10-26 Thread sershe

HIVE-20679: DDL operations on hive might create large messages for 
DBNotification (Anishek Agarwal, reviewed by Sankar Hariappan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b4302bb7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b4302bb7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b4302bb7

Branch: refs/heads/master-tez092
Commit: b4302bb7ad967f15ca1b708685b2ac669e3cf037
Parents: b829955
Author: Anishek Agarwal 
Authored: Mon Oct 22 13:51:43 2018 +0530
Committer: Anishek Agarwal 
Committed: Mon Oct 22 13:51:43 2018 +0530

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   2 +-
 .../listener/DbNotificationListener.java| 182 +---
 .../json/JSONCreateFunctionMessage.java |   3 +-
 .../messaging/json/JSONDropFunctionMessage.java |   3 +-
 .../messaging/json/JSONMessageFactory.java  |  39 +-
 .../listener/TestDbNotificationListener.java|  14 +-
 .../TestReplAcidTablesWithJsonMessage.java  |  43 ++
 ...eplAcrossInstancesWithJsonMessageFormat.java |  45 ++
 ...ncrementalLoadAcidTablesWithJsonMessage.java |  46 ++
 .../ql/parse/TestReplWithJsonMessageFormat.java |  39 ++
 .../hive/ql/parse/TestReplicationScenarios.java |  82 ++--
 .../TestReplicationScenariosAcidTables.java |  61 +--
 ...TestReplicationScenariosAcrossInstances.java | 103 +++--
 ...ationScenariosIncrementalLoadAcidTables.java |  55 ++-
 .../hadoop/hive/ql/parse/WarehouseInstance.java |   2 +-
 .../ql/cache/results/QueryResultsCache.java |  14 +-
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java  |   5 +-
 .../repl/bootstrap/load/LoadConstraint.java |   4 +-
 .../parse/repl/dump/events/AbortTxnHandler.java |  12 +-
 .../events/AbstractConstraintEventHandler.java  |   3 +-
 .../repl/dump/events/AbstractEventHandler.java  |  32 +-
 .../repl/dump/events/AddForeignKeyHandler.java  |  12 +-
 .../events/AddNotNullConstraintHandler.java |  12 +-
 .../repl/dump/events/AddPartitionHandler.java   |  10 +-
 .../repl/dump/events/AddPrimaryKeyHandler.java  |  12 +-
 .../dump/events/AddUniqueConstraintHandler.java |  13 +-
 .../repl/dump/events/AllocWriteIdHandler.java   |  12 +-
 .../repl/dump/events/AlterDatabaseHandler.java  |  12 +-
 .../repl/dump/events/AlterPartitionHandler.java |  21 +-
 .../repl/dump/events/AlterTableHandler.java |  18 +-
 .../repl/dump/events/CommitTxnHandler.java  |  28 +-
 .../repl/dump/events/CreateDatabaseHandler.java |  13 +-
 .../repl/dump/events/CreateFunctionHandler.java |  13 +-
 .../repl/dump/events/CreateTableHandler.java|  15 +-
 .../parse/repl/dump/events/DefaultHandler.java  |   9 +
 .../repl/dump/events/DropConstraintHandler.java |  13 +-
 .../repl/dump/events/DropDatabaseHandler.java   |  12 +-
 .../repl/dump/events/DropFunctionHandler.java   |  12 +-
 .../repl/dump/events/DropPartitionHandler.java  |  12 +-
 .../repl/dump/events/DropTableHandler.java  |  12 +-
 .../repl/dump/events/EventHandlerFactory.java   |  44 +-
 .../parse/repl/dump/events/InsertHandler.java   |  22 +-
 .../parse/repl/dump/events/OpenTxnHandler.java  |  12 +-
 .../repl/dump/io/ConstraintsSerializer.java |  10 +-
 .../load/message/AbstractMessageHandler.java|   4 +-
 .../dump/events/TestEventHandlerFactory.java|   7 +-
 .../hive/metastore/conf/MetastoreConf.java  |   2 +-
 .../hive/metastore/messaging/EventMessage.java  |  64 +--
 .../metastore/messaging/MessageBuilder.java | 425 ++
 .../metastore/messaging/MessageEncoder.java |  27 ++
 .../metastore/messaging/MessageFactory.java | 367 +++-
 .../metastore/messaging/MessageSerializer.java  |  24 ++
 .../event/filters/DatabaseAndTableFilter.java   |   8 +-
 .../messaging/json/JSONAcidWriteMessage.java|   9 +-
 .../json/JSONAddForeignKeyMessage.java  |   5 +-
 .../json/JSONAddNotNullConstraintMessage.java   |   5 +-
 .../messaging/json/JSONAddPartitionMessage.java |  11 +-
 .../json/JSONAddPrimaryKeyMessage.java  |   5 +-
 .../json/JSONAddUniqueConstraintMessage.java|   5 +-
 .../messaging/json/JSONAlterCatalogMessage.java |   9 +-
 .../json/JSONAlterDatabaseMessage.java  |   9 +-
 .../json/JSONAlterPartitionMessage.java |  15 +-
 .../messaging/json/JSONAlterTableMessage.java   |   9 +-
 .../messaging/json/JSONCommitTxnMessage.java|   5 +-
 .../json/JSONCreateDatabaseMessage.java |   5 +-
 .../json/JSONCreateFunctionMessage.java |   5 +-
 .../messaging/json/JSONCreateTableMessage.java  |   5 +-
 .../json/JSONDropPartitionMessage.java  |   5 +-
 .../messaging/json/JSONDropTableMessage.java|   5 +-
 .../messaging/json/JSONInsertMessage.java   |   9 +-
 .../messaging/json/JSONMessageEncoder.java  |  70 +++
 .../messaging/json/JSONMessageFactory.java  | 432 ---
 .../messaging/json/gzip/DeSerializer.java   | 181

[40/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out
new file mode 100644
index 000..aa04df8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query70.q.out
@@ -0,0 +1,119 @@
+PREHOOK: query: explain cbo
+select  
+sum(ss_net_profit) as total_sum
+   ,s_state
+   ,s_county
+   ,grouping(s_state)+grouping(s_county) as lochierarchy
+   ,rank() over (
+   partition by grouping(s_state)+grouping(s_county),
+   case when grouping(s_county) = 0 then s_state end 
+   order by sum(ss_net_profit) desc) as rank_within_parent
+ from
+store_sales
+   ,date_dim   d1
+   ,store
+ where
+d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ss_sold_date_sk
+ and s_store_sk  = ss_store_sk
+ and s_state in
+ ( select s_state
+   from  (select s_state as s_state,
+   rank() over ( partition by s_state order by 
sum(ss_net_profit) desc) as ranking
+  from   store_sales, store, date_dim
+  where  d_month_seq between 1212 and 1212+11
+   and d_date_sk = ss_sold_date_sk
+   and s_store_sk  = ss_store_sk
+  group by s_state
+ ) tmp1 
+   where ranking <= 5
+ )
+ group by rollup(s_state,s_county)
+ order by
+   lochierarchy desc
+  ,case when lochierarchy = 0 then s_state end
+  ,rank_within_parent
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select  
+sum(ss_net_profit) as total_sum
+   ,s_state
+   ,s_county
+   ,grouping(s_state)+grouping(s_county) as lochierarchy
+   ,rank() over (
+   partition by grouping(s_state)+grouping(s_county),
+   case when grouping(s_county) = 0 then s_state end 
+   order by sum(ss_net_profit) desc) as rank_within_parent
+ from
+store_sales
+   ,date_dim   d1
+   ,store
+ where
+d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ss_sold_date_sk
+ and s_store_sk  = ss_store_sk
+ and s_state in
+ ( select s_state
+   from  (select s_state as s_state,
+   rank() over ( partition by s_state order by 
sum(ss_net_profit) desc) as ranking
+  from   store_sales, store, date_dim
+  where  d_month_seq between 1212 and 1212+11
+   and d_date_sk = ss_sold_date_sk
+   and s_store_sk  = ss_store_sk
+  group by s_state
+ ) tmp1 
+   where ranking <= 5
+ )
+ group by rollup(s_state,s_county)
+ order by
+   lochierarchy desc
+  ,case when lochierarchy = 0 then s_state end
+  ,rank_within_parent
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveProject(total_sum=[$0], s_state=[$1], s_county=[$2], lochierarchy=[$3], 
rank_within_parent=[$4])
+  HiveSortLimit(sort0=[$3], sort1=[$5], sort2=[$4], dir0=[DESC-nulls-last], 
dir1=[ASC], dir2=[ASC], fetch=[100])
+HiveProject(total_sum=[$2], s_state=[$0], s_county=[$1], 
lochierarchy=[+(grouping($3, 1), grouping($3, 0))], rank_within_parent=[rank() 
OVER (PARTITION BY +(grouping($3, 1), grouping($3, 0)), CASE(=(grouping($3, 0), 
0), $0, null) ORDER BY $2 DESC NULLS LAST ROWS BETWEEN 2147483647 FOLLOWING AND 
2147483647 PRECEDING)], (tok_function when (= (tok_table_or_col lochierarchy) 
0) (tok_table_or_col s_state))=[CASE(=(+(grouping($3, 1), grouping($3, 0)), 0), 
$0, null)])
+  HiveProject($f0=[$0], $f1=[$1], $f2=[$2], GROUPING__ID=[$3])
+HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], 
agg#0=[sum($2)], GROUPING__ID=[GROUPING__ID()])
+  HiveProject($f0=[$7], $f1=[$6], $f2=[$2])
+HiveJoin(condition=[=($7, $8)], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveJoin(condition=[=($5, $1)], joinType=[inner], 
algorithm=[none], cost=[not available])
+HiveJoin(condition=[=($3, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveProject(ss_sold_date_sk=[$0], ss_store_sk=[$7], 
ss_net_profit=[$22])
+HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT 
NULL($7))])
+  HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
+  HiveProject(d_date_sk=[$0], d_month_seq=[$3])
+

[16/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out
new file mode 100644
index 000..324eef2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out
@@ -0,0 +1,555 @@
+PREHOOK: query: explain
+select  
+ 'web' as channel
+ ,web.item
+ ,web.return_ratio
+ ,web.return_rank
+ ,web.currency_rank
+ from (
+   select 
+item
+   ,return_ratio
+   ,currency_ratio
+   ,rank() over (order by return_ratio) as return_rank
+   ,rank() over (order by currency_ratio) as currency_rank
+   from
+   (   select ws.ws_item_sk as item
+   ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/
+   cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as 
return_ratio
+   ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/
+   cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as 
currency_ratio
+   from 
+web_sales ws left outer join web_returns wr 
+   on (ws.ws_order_number = wr.wr_order_number and 
+   ws.ws_item_sk = wr.wr_item_sk)
+ ,date_dim
+   where 
+   wr.wr_return_amt > 1 
+   and ws.ws_net_profit > 1
+ and ws.ws_net_paid > 0
+ and ws.ws_quantity > 0
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+   group by ws.ws_item_sk
+   ) in_web
+ ) web
+ where 
+ (
+ web.return_rank <= 10
+ or
+ web.currency_rank <= 10
+ )
+ union
+ select 
+ 'catalog' as channel
+ ,catalog.item
+ ,catalog.return_ratio
+ ,catalog.return_rank
+ ,catalog.currency_rank
+ from (
+   select 
+item
+   ,return_ratio
+   ,currency_ratio
+   ,rank() over (order by return_ratio) as return_rank
+   ,rank() over (order by currency_ratio) as currency_rank
+   from
+   (   select 
+   cs.cs_item_sk as item
+   ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/
+   cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as 
return_ratio
+   ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/
+   cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as 
currency_ratio
+   from 
+   catalog_sales cs left outer join catalog_returns cr
+   on (cs.cs_order_number = cr.cr_order_number and 
+   cs.cs_item_sk = cr.cr_item_sk)
+,date_dim
+   where 
+   cr.cr_return_amount > 1 
+   and cs.cs_net_profit > 1
+ and cs.cs_net_paid > 0
+ and cs.cs_quantity > 0
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by cs.cs_item_sk
+   ) in_cat
+ ) catalog
+ where 
+ (
+ catalog.return_rank <= 10
+ or
+ catalog.currency_rank <=10
+ )
+ union
+ select 
+ 'store' as channel
+ ,store.item
+ ,store.return_ratio
+ ,store.return_rank
+ ,store.currency_rank
+ from (
+   select 
+item
+   ,return_ratio
+   ,currency_ratio
+   ,rank() over (order by return_ratio) as return_rank
+   ,rank() over (order by currency_ratio) as currency_rank
+   from
+   (   select sts.ss_item_sk as item
+   ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as 
dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio
+   ,(cast(sum(coalesce(sr.sr_return_amt,0)) as 
dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as 
currency_ratio
+   from 
+   store_sales sts left outer join store_returns sr
+   on (sts.ss_ticket_number = sr.sr_ticket_number and 
sts.ss_item_sk = sr.sr_item_sk)
+,date_dim
+   where 
+   sr.sr_return_amt > 1 
+   and sts.ss_net_profit > 1
+ and sts.ss_net_paid > 0 
+ and sts.ss_quantity > 0
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+   group by sts.ss_item_sk
+   ) in_store
+ ) store
+ where  (
+ store.return_rank <= 10
+ or 
+ store.currency_rank <= 10
+ )
+ order by 1,4,5
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_returns
+PREHOOK: Input:

[21/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out
new file mode 100644
index 000..c7fd970
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query28.q.out
@@ -0,0 +1,296 @@
+Warning: Shuffle Join MERGEJOIN[102][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 4' is a cross product
+Warning: Shuffle Join MERGEJOIN[103][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 5' is a cross product
+Warning: Shuffle Join MERGEJOIN[104][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3]] in Stage 'Reducer 6' is a cross product
+Warning: Shuffle Join MERGEJOIN[105][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product
+Warning: Shuffle Join MERGEJOIN[106][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 8' is a cross product
+PREHOOK: query: explain
+select  *
+from (select avg(ss_list_price) B1_LP
+,count(ss_list_price) B1_CNT
+,count(distinct ss_list_price) B1_CNTD
+  from store_sales
+  where ss_quantity between 0 and 5
+and (ss_list_price between 11 and 11+10 
+ or ss_coupon_amt between 460 and 460+1000
+ or ss_wholesale_cost between 14 and 14+20)) B1,
+ (select avg(ss_list_price) B2_LP
+,count(ss_list_price) B2_CNT
+,count(distinct ss_list_price) B2_CNTD
+  from store_sales
+  where ss_quantity between 6 and 10
+and (ss_list_price between 91 and 91+10
+  or ss_coupon_amt between 1430 and 1430+1000
+  or ss_wholesale_cost between 32 and 32+20)) B2,
+ (select avg(ss_list_price) B3_LP
+,count(ss_list_price) B3_CNT
+,count(distinct ss_list_price) B3_CNTD
+  from store_sales
+  where ss_quantity between 11 and 15
+and (ss_list_price between 66 and 66+10
+  or ss_coupon_amt between 920 and 920+1000
+  or ss_wholesale_cost between 4 and 4+20)) B3,
+ (select avg(ss_list_price) B4_LP
+,count(ss_list_price) B4_CNT
+,count(distinct ss_list_price) B4_CNTD
+  from store_sales
+  where ss_quantity between 16 and 20
+and (ss_list_price between 142 and 142+10
+  or ss_coupon_amt between 3054 and 3054+1000
+  or ss_wholesale_cost between 80 and 80+20)) B4,
+ (select avg(ss_list_price) B5_LP
+,count(ss_list_price) B5_CNT
+,count(distinct ss_list_price) B5_CNTD
+  from store_sales
+  where ss_quantity between 21 and 25
+and (ss_list_price between 135 and 135+10
+  or ss_coupon_amt between 14180 and 14180+1000
+  or ss_wholesale_cost between 38 and 38+20)) B5,
+ (select avg(ss_list_price) B6_LP
+,count(ss_list_price) B6_CNT
+,count(distinct ss_list_price) B6_CNTD
+  from store_sales
+  where ss_quantity between 26 and 30
+and (ss_list_price between 28 and 28+10
+  or ss_coupon_amt between 2513 and 2513+1000
+  or ss_wholesale_cost between 42 and 42+20)) B6
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+select  *
+from (select avg(ss_list_price) B1_LP
+,count(ss_list_price) B1_CNT
+,count(distinct ss_list_price) B1_CNTD
+  from store_sales
+  where ss_quantity between 0 and 5
+and (ss_list_price between 11 and 11+10 
+ or ss_coupon_amt between 460 and 460+1000
+ or ss_wholesale_cost between 14 and 14+20)) B1,
+ (select avg(ss_list_price) B2_LP
+,count(ss_list_price) B2_CNT
+,count(distinct ss_list_price) B2_CNTD
+  from store_sales
+  where ss_quantity between 6 and 10
+and (ss_list_price between 91 and 91+10
+  or ss_coupon_amt between 1430 and 1430+1000
+  or ss_wholesale_cost between 32 and 32+20)) B2,
+ (select avg(ss_list_price) B3_LP
+,count(ss_list_price) B3_CNT
+,count(distinct ss_list_price) B3_CNTD
+  from store_sales
+  where ss_quantity between 11 and 15
+and (ss_list_price between 66 and 66+10
+  or ss_coupon_amt between 920 and 920+1000
+  or ss_wholesale_cost between 4 and 4+20)) B3,
+ (select avg(ss_list_price) B4_LP
+,count(ss_list_price) B4_CNT
+,count(distinct ss_list_price) B4_CNTD
+  from store_sales
+  where ss_quantity between 16 and 20
+and (ss_list_price between 142 and 142+10
+  or ss_coupon_amt between 3054 and 3054+1000
+  or ss_wholesale_cost between 80 and 80+20)) B4,
+ (select avg(ss_list_price) B5_LP
+

[33/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out
new file mode 100644
index 000..8f2f79f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query42.q.out
@@ -0,0 +1,68 @@
+PREHOOK: query: explain cbo
+select  dt.d_year
+   ,item.i_category_id
+   ,item.i_category
+   ,sum(ss_ext_sales_price)
+ from  date_dim dt
+   ,store_sales
+   ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+   and store_sales.ss_item_sk = item.i_item_sk
+   and item.i_manager_id = 1   
+   and dt.d_moy=12
+   and dt.d_year=1998
+ group by  dt.d_year
+   ,item.i_category_id
+   ,item.i_category
+ order by   sum(ss_ext_sales_price) desc,dt.d_year
+   ,item.i_category_id
+   ,item.i_category
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select  dt.d_year
+   ,item.i_category_id
+   ,item.i_category
+   ,sum(ss_ext_sales_price)
+ from  date_dim dt
+   ,store_sales
+   ,item
+ where dt.d_date_sk = store_sales.ss_sold_date_sk
+   and store_sales.ss_item_sk = item.i_item_sk
+   and item.i_manager_id = 1   
+   and dt.d_moy=12
+   and dt.d_year=1998
+ group by  dt.d_year
+   ,item.i_category_id
+   ,item.i_category
+ order by   sum(ss_ext_sales_price) desc,dt.d_year
+   ,item.i_category_id
+   ,item.i_category
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(fetch=[100])
+  HiveProject(d_year=[CAST(1998):INTEGER], i_category_id=[$0], 
i_category=[$1], _o__c3=[$2])
+HiveSortLimit(sort0=[$3], sort1=[$0], sort2=[$1], dir0=[DESC-nulls-last], 
dir1=[ASC], dir2=[ASC])
+  HiveProject(i_category_id=[$0], i_category=[$1], _o__c3=[$2], 
(tok_function sum (tok_table_or_col ss_ext_sales_price))=[$2])
+HiveAggregate(group=[{5, 6}], agg#0=[sum($2)])
+  HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], 
cost=[not available])
+HiveJoin(condition=[=($3, $0)], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], 
ss_ext_sales_price=[$15])
+HiveFilter(condition=[IS NOT NULL($0)])
+  HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
+  HiveProject(d_date_sk=[$0])
+HiveFilter(condition=[AND(=($8, 12), =($6, 1998))])
+  HiveTableScan(table=[[default, date_dim]], table:alias=[dt])
+HiveProject(i_item_sk=[$0], i_category_id=[$11], i_category=[$12])
+  HiveFilter(condition=[=($20, 1)])
+HiveTableScan(table=[[default, item]], table:alias=[item])
+

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out
new file mode 100644
index 000..6b21ee4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query43.q.out
@@ -0,0 +1,61 @@
+PREHOOK: query: explain cbo
+select  s_store_name, s_store_id,
+sum(case when (d_day_name='Sunday') then ss_sales_price else null end) 
sun_sales,
+sum(case when (d_day_name='Monday') then ss_sales_price else null end) 
mon_sales,
+sum(case when (d_day_name='Tuesday') then ss_sales_price else  null 
end) tue_sales,
+sum(case when (d_day_name='Wednesday') then ss_sales_price else null 
end) wed_sales,
+sum(case when (d_day_name='Thursday') then ss_sales_price else null 
end) thu_sales,
+sum(case when (d_day_name='Friday') then ss_sales_price else null end) 
fri_sales,
+sum(case when (d_day_name='Saturday') then ss_sales_price else null 
end) sat_sales
+ from date_dim, store_sales, store
+ where d_date_sk = ss_sold_date_sk and
+   s_store_sk = ss_store_sk and
+   s_gmt_offset = -6 and
+   d_year = 1998 
+ group by s_store_name, s_store_id
+ order by s_store_name, 
s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK:

[58/75] [abbrv] hive git commit: HIVE-20542: Incremental REPL DUMP progress information log message is incorrect (Ashutosh Bapat, reviewed by Sankar Hariappan)

2018-10-26 Thread sershe

HIVE-20542: Incremental REPL DUMP progress information log message is incorrect 
(Ashutosh Bapat, reviewed by Sankar Hariappan)

Signed-off-by: Sankar Hariappan 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0d4d03fd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0d4d03fd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0d4d03fd

Branch: refs/heads/master-tez092
Commit: 0d4d03fd1daeb3b75182b73f7b40de7a3b7d48ea
Parents: 7765e90
Author: Ashutosh Bapat 
Authored: Tue Oct 23 17:56:47 2018 +0530
Committer: Sankar Hariappan 
Committed: Tue Oct 23 17:56:47 2018 +0530

--
 .../listener/DbNotificationListener.java|  75 ++-
 .../listener/TestDbNotificationListener.java|  59 ++
 .../TestReplicationScenariosAcidTables.java |   6 +-
 .../hadoop/hive/ql/parse/WarehouseInstance.java |  17 ++
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java  |   9 +-
 .../hive/ql/metadata/events/EventUtils.java |  16 +-
 .../api/NotificationEventsCountRequest.java | 206 ++-
 .../src/gen/thrift/gen-php/metastore/Types.php  |  46 +
 .../gen/thrift/gen-py/hive_metastore/ttypes.py  |  28 ++-
 .../gen/thrift/gen-rb/hive_metastore_types.rb   |   6 +-
 .../src/main/thrift/hive_metastore.thrift   |   4 +-
 .../hadoop/hive/metastore/ObjectStore.java  |  58 +-
 .../hadoop/hive/metastore/txn/TxnHandler.java   |   2 +-
 13 files changed, 503 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/0d4d03fd/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java
--
diff --git 
a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java
 
b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java
index c23aab2..fe101d3 100644
--- 
a/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java
+++ 
b/hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/listener/DbNotificationListener.java
@@ -24,6 +24,7 @@ import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
 import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -939,19 +940,71 @@ public class DbNotificationListener extends 
TransactionalMetaStoreEventListener
   long nextNLId = getNextNLId(stmt, sqlGenerator,
   "org.apache.hadoop.hive.metastore.model.MNotificationLog");
 
-  String insertVal = "(" + nextNLId + "," + nextEventId + "," + now() + ", 
?, ?," +
-  quoteString(" ") + ",?, ?)";
+  String insertVal;
+  String columns;
+  List params = new ArrayList();
+
+  // Construct the values string, parameters and column string step by 
step simultaneously so
+  // that the positions of columns and of their corresponding values do 
not go out of sync.
+
+  // Notification log id
+  columns = "\"NL_ID\"";
+  insertVal = "" + nextNLId;
+
+  // Event id
+  columns = columns + ", \"EVENT_ID\"";
+  insertVal = insertVal + "," + nextEventId;
+
+  // Event time
+  columns = columns + ", \"EVENT_TIME\"";
+  insertVal = insertVal + "," + now();
+
+  // Event type
+  columns = columns + ", \"EVENT_TYPE\"";
+  insertVal = insertVal + ", ?";
+  params.add(event.getEventType());
+
+  // Message
+  columns = columns + ", \"MESSAGE\"";
+  insertVal = insertVal + ", ?";
+  params.add(event.getMessage());
+
+  // Message format
+  columns = columns + ", \"MESSAGE_FORMAT\"";
+  insertVal = insertVal + ", ?";
+  params.add(event.getMessageFormat());
+
+  // Database name, optional
+  String dbName = event.getDbName();
+  if (dbName != null) {
+assert dbName.equals(dbName.toLowerCase());
+columns = columns + ", \"DB_NAME\"";
+insertVal = insertVal + ", ?";
+params.add(dbName);
+  }
 
-  s = "insert into \"NOTIFICATION_LOG\" (\"NL_ID\", \"EVENT_ID\", 
\"EVENT_TIME\", " +
-  " \"EVENT_TYPE\", \"DB_NAME\", " +
-  " \"TBL_NAME\", \"MESSAGE\", \"MESSAGE_FORMAT\") VALUES " + 
insertVal;
-  List params = Arrays.asList(
-  event.getEventType(), event.getDbName(), event.getMessage(), 
event.getMessageFormat());
-  pst = sqlGenerator.prepareStmtWithParameters(dbConn, s, params);
+  // Table name, optional
+  String tableName = event.getTableName();
+  if (tableName != null) {
+assert tableName.equals(tableName.toLowerCase());
+columns = columns + ", \"TBL_NAME\"";
+insertVal = insertVal +

[53/75] [abbrv] hive git commit: HIVE-20679: DDL operations on hive might create large messages for DBNotification (Anishek Agarwal, reviewed by Sankar Hariappan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b4302bb7/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageBuilder.java
--
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageBuilder.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageBuilder.java
new file mode 100644
index 000..787b9b2
--- /dev/null
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/MessageBuilder.java
@@ -0,0 +1,425 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.messaging;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Predicate;
+import java.util.regex.PatternSyntaxException;
+
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.api.Catalog;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.Function;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
+import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint;
+import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
+import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.api.TxnToWriteId;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.hadoop.hive.metastore.events.AcidWriteEvent;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONAbortTxnMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONAcidWriteMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONAddForeignKeyMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONAddNotNullConstraintMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONAddPartitionMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONAddPrimaryKeyMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONAddUniqueConstraintMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONAllocWriteIdMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterCatalogMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONAlterDatabaseMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONAlterPartitionMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONAlterTableMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONCommitTxnMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONCreateCatalogMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONCreateDatabaseMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONCreateFunctionMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONCreateTableMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONDropCatalogMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONDropConstraintMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONDropDatabaseMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONDropFunctionMessage;
+import 
org.apache.hadoop.hive.metastore.messaging.json.JSONDropPartitionMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONDropTableMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONInsertMessage;
+import org.apache.hadoop.hive.metastore.messaging.json.JSONOpenTxnMessage;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.thrift.TBase;
+import

[63/75] [abbrv] hive git commit: HIVE-20788: Extended SJ reduction may backtrack columns incorrectly when creating filters (Jesus Camacho Rodriguez, reviewed by Deepak Jaiswal)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/query24.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/query24.q.out 
b/ql/src/test/results/clientpositive/perf/tez/query24.q.out
index 902358a..43ece85 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query24.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query24.q.out
@@ -1,4 +1,4 @@
-Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 8' is a cross product
+Warning: Shuffle Join MERGEJOIN[301][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 6' is a cross product
 PREHOOK: query: explain
 with ssales as
 (select c_last_name
@@ -23,7 +23,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name
@@ -79,7 +80,8 @@ where ss_ticket_number = sr_ticket_number
   and ss_customer_sk = c_customer_sk
   and ss_item_sk = i_item_sk
   and ss_store_sk = s_store_sk
-  and c_birth_country = upper(ca_country)
+  and c_current_addr_sk = ca_address_sk
+  and c_birth_country <> upper(ca_country)
   and s_zip = ca_zip
 and s_market_id=7
 group by c_last_name
@@ -114,234 +116,242 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
 Plan optimized by CBO.
 
 Vertex dependency in root stage
-Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE)
-Map 24 <- Reducer 20 (BROADCAST_EDGE)
-Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE)
-Reducer 11 <- Map 9 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
-Reducer 12 <- Map 22 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
-Reducer 13 <- Map 23 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
-Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
-Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE)
-Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE)
-Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE)
-Reducer 19 <- Map 21 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE)
-Reducer 3 <- Map 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Map 22 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Map 23 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE)
+Map 1 <- Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 8 
(BROADCAST_EDGE)
+Map 24 <- Reducer 19 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE)
+Reducer 10 <- Map 23 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
+Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE)
+Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE)
+Reducer 15 <- Map 22 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
+Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE)
+Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE)
+Reducer 18 <- Map 24 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+Reducer 19 <- Reducer 15 (CUSTOM_SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+Reducer 20 <- Reducer 15 (CUSTOM_SIMPLE_EDGE)
+Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 23 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE)
+Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE)
 
 Stage-0
   Fetch Operator
 limit:-1
 Stage-1
-  Reducer 8
-  File Output Operator [FS_91]
-Select Operator [SEL_90] (rows=78393744 width=380)
+  Reducer 6
+  File Output Operator [FS_94]
+Select Operator [SEL_93] (rows=1313165 width=380)
   Output:["_col0","_col1","_col2","_col3"]
-  Filter Operator [FIL_89] (rows=78393744 width=492)
+  Filter Operator [FIL_92] (rows=1313165 width=492)
 predicate:(_col3 > _col4)
-Merge Join Operator [MERGEJOIN_290] (rows=235181232 width=492)
+Merge Join Operator [MERGEJOIN_301] (rows=3939496 width=492)
   Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
-<-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized
-  PARTITION_ONLY_SHUFFLE [RS_345]
-Select Operator [SEL_344] (rows=1 width=112)
+<-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized
+  PARTITION_ONLY_SHUFFLE [RS_351]
+Select Operator [SEL_350] (rows=1 width=112)
   Output:["_col0"]
-  Group By Operator [GBY_343] (rows=1 width=120)
+  Group By Operator [GBY_349] (rows=1 width=120)

[18/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out
new file mode 100644
index 000..812928b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query39.q.out
@@ -0,0 +1,236 @@
+PREHOOK: query: explain
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+   ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) 
mean
+  from inventory
+  ,item
+  ,warehouse
+  ,date_dim
+  where inv_item_sk = i_item_sk
+and inv_warehouse_sk = w_warehouse_sk
+and inv_date_sk = d_date_sk
+and d_year =1999
+  group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+  and inv1.w_warehouse_sk =  inv2.w_warehouse_sk
+  and inv1.d_moy=4
+  and inv2.d_moy=4+1
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+,inv2.d_moy,inv2.mean, inv2.cov
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@inventory
+PREHOOK: Input: default@item
+PREHOOK: Input: default@warehouse
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+with inv as
+(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+   ,stdev,mean, case mean when 0 then null else stdev/mean end cov
+ from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
+,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) 
mean
+  from inventory
+  ,item
+  ,warehouse
+  ,date_dim
+  where inv_item_sk = i_item_sk
+and inv_warehouse_sk = w_warehouse_sk
+and inv_date_sk = d_date_sk
+and d_year =1999
+  group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo
+ where case mean when 0 then 0 else stdev/mean end > 1)
+select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov
+,inv2.w_warehouse_sk,inv2.i_item_sk,inv2.d_moy,inv2.mean, inv2.cov
+from inv inv1,inv inv2
+where inv1.i_item_sk = inv2.i_item_sk
+  and inv1.w_warehouse_sk =  inv2.w_warehouse_sk
+  and inv1.d_moy=4
+  and inv2.d_moy=4+1
+order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
+,inv2.d_moy,inv2.mean, inv2.cov
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@inventory
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@warehouse
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (ONE_TO_ONE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+limit:-1
+Stage-1
+  Reducer 6 vectorized
+  File Output Operator [FS_166]
+Select Operator [SEL_165] (rows=859 width=56)
+  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
+<-Reducer 5 [SIMPLE_EDGE]
+  SHUFFLE [RS_44]
+Select Operator [SEL_43] (rows=859 width=48)
+  Output:["_col0","_col1","_col2","_col3","_col6","_col7"]
+  Merge Join Operator [MERGEJOIN_144] (rows=859 width=40)
+Conds:RS_160._col0, _col1=RS_164._col0, 
_col1(Inner),Output:["_col0","_col1","_col2","_col3","_col6","_col7"]
+  <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized
+FORWARD [RS_160]
+  PartitionCols:_col0, _col1
+  Select Operator [SEL_159] (rows=859 width=24)
+Output:["_col0","_col1","_col2","_col3"]
+Filter Operator [FIL_158] (rows=859 width=40)
+  predicate:CASE WHEN (((UDFToDouble(_col2) / _col3) = 0)) 
THEN (false) ELSE (((power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN 
((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) / (UDFToDouble(_col2) 
/ _col3)) > 1.0D)) END
+  Group By Operator [GBY_157] (rows=1719 width=40)
+

[05/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
new file mode 100644
index 000..93bce2e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out
@@ -0,0 +1,397 @@
+PREHOOK: query: explain
+select  s_store_name
+  ,sum(ss_net_profit)
+ from store_sales
+ ,date_dim
+ ,store,
+ (select ca_zip
+ from (
+ (SELECT substr(ca_zip,1,5) ca_zip
+  FROM customer_address
+  WHERE substr(ca_zip,1,5) IN (
+  '89436','30868','65085','22977','83927','77557',
+  '58429','40697','80614','10502','32779',
+  '91137','61265','98294','17921','18427',
+  '21203','59362','87291','84093','21505',
+  '17184','10866','67898','25797','28055',
+  '18377','80332','74535','21757','29742',
+  '90885','29898','17819','40811','25990',
+  '47513','89531','91068','10391','18846',
+  '99223','82637','41368','83658','86199',
+  '81625','26696','89338','88425','32200',
+  '81427','19053','77471','36610','99823',
+  '43276','41249','48584','83550','82276',
+  '18842','78890','14090','38123','40936',
+  '34425','19850','43286','80072','79188',
+  '54191','11395','50497','84861','90733',
+  '21068','57666','37119','25004','57835',
+  '70067','62878','95806','19303','18840',
+  '19124','29785','16737','16022','49613',
+  '89977','68310','60069','98360','48649',
+  '39050','41793','25002','27413','39736',
+  '47208','16515','94808','57648','15009',
+  '80015','42961','63982','21744','71853',
+  '81087','67468','34175','64008','20261',
+  '11201','51799','48043','45645','61163',
+  '48375','36447','57042','21218','41100',
+  '89951','22745','35851','83326','61125',
+  '78298','80752','49858','52940','96976',
+  '63792','11376','53582','18717','90226',
+  '50530','94203','99447','27670','96577',
+  '57856','56372','16165','23427','54561',
+  '28806','44439','22926','30123','61451',
+  '92397','56979','92309','70873','13355',
+  '21801','46346','37562','56458','28286',
+  '47306','99555','69399','26234','47546',
+  '49661','88601','35943','39936','25632',
+  '24611','44166','56648','30379','59785',
+  '0','14329','93815','52226','71381',
+  '13842','25612','63294','14664','21077',
+  '82626','18799','60915','81020','56447',
+  '76619','11433','13414','42548','92713',
+  '70467','30884','47484','16072','38936',
+  '13036','88376','45539','35901','19506',
+  '65690','73957','71850','49231','14276',
+  '20005','18384','76615','11635','38177',
+  '55607','41369','95447','58581','58149',
+  '91946','33790','76232','75692','95464',
+  '22246','51061','56692','53121','77209',
+  '15482','10688','14868','45907','73520',
+  '72666','25734','17959','24677','66446',
+  '94627','53535','15560','41967','69297',
+  '11929','59403','33283','52232','57350',
+  '43933','40921','36635','10827','71286',
+  '19736','80619','25251','95042','15526',
+  '36496','55854','49124','81980','35375',
+  '49157','63512','28944','14946','36503',
+  '54010','18767','23969','43905','66979',
+  '33113','21286','58471','59080','13395',
+  '79144','70373','67031','38360','26705',
+  '50906','52406','26066','73146','15884',
+  '31897','30045','61068','45550','92454',
+  '13376','14354','19770','22928','97790',
+  '50723','46081','30202','14410','20223',
+

[12/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out
new file mode 100644
index 000..d9543ad
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query61.q.out
@@ -0,0 +1,388 @@
+Warning: Shuffle Join MERGEJOIN[266][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 5' is a cross product
+PREHOOK: query: explain
+select  promotions,total,cast(promotions as decimal(15,4))/cast(total as 
decimal(15,4))*100
+from
+  (select sum(ss_ext_sales_price) promotions
+   from  store_sales
+,store
+,promotion
+,date_dim
+,customer
+,customer_address 
+,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_promo_sk = p_promo_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk 
+   and   ca_gmt_offset = -7
+   and   i_category = 'Electronics'
+   and   (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y')
+   and   s_gmt_offset = -7
+   and   d_year = 1999
+   and   d_moy  = 11) promotional_sales,
+  (select sum(ss_ext_sales_price) total
+   from  store_sales
+,store
+,date_dim
+,customer
+,customer_address
+,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk
+   and   ca_gmt_offset = -7
+   and   i_category = 'Electronics'
+   and   s_gmt_offset = -7
+   and   d_year = 1999
+   and   d_moy  = 11) all_sales
+order by promotions, total
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@promotion
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+select  promotions,total,cast(promotions as decimal(15,4))/cast(total as 
decimal(15,4))*100
+from
+  (select sum(ss_ext_sales_price) promotions
+   from  store_sales
+,store
+,promotion
+,date_dim
+,customer
+,customer_address 
+,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_promo_sk = p_promo_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk 
+   and   ca_gmt_offset = -7
+   and   i_category = 'Electronics'
+   and   (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y')
+   and   s_gmt_offset = -7
+   and   d_year = 1999
+   and   d_moy  = 11) promotional_sales,
+  (select sum(ss_ext_sales_price) total
+   from  store_sales
+,store
+,date_dim
+,customer
+,customer_address
+,item
+   where ss_sold_date_sk = d_date_sk
+   and   ss_store_sk = s_store_sk
+   and   ss_customer_sk= c_customer_sk
+   and   ca_address_sk = c_current_addr_sk
+   and   ss_item_sk = i_item_sk
+   and   ca_gmt_offset = -7
+   and   i_category = 'Electronics'
+   and   s_gmt_offset = -7
+   and   d_year = 1999
+   and   d_moy  = 11) all_sales
+order by promotions, total
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@promotion
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 12 <- Reducer 18 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE), Reducer 27 
(BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE)
+Map 30 <- Reducer 10 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE), Reducer 25 
(BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE)
+Reducer 10 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE)
+Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE)
+Reducer 15 <- Map 26 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
+Reducer 16 <- Map 29 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
+Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE)
+Reducer 19 <- Map 17 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE)
+Reducer 20 <- Map 23 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
+Reducer 21 <- Map 26 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
+Reducer 22 <- Map 17 (CUSTOM_SIMPLE_EDGE)
+Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE)
+Reducer 25 <- Map 23 (CUSTOM_SIMPLE_EDGE)
+Reducer 27 <- Map 26 (CUSTOM_SIMPLE_EDGE)
+Reducer 28 <- Map 26 (CUSTOM_SIMPLE_EDGE)
+Reducer 3 <- Reducer 16

[17/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out
new file mode 100644
index 000..495b6bd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out
@@ -0,0 +1,135 @@
+PREHOOK: query: explain
+select  s_store_name, s_store_id,
+sum(case when (d_day_name='Sunday') then ss_sales_price else null end) 
sun_sales,
+sum(case when (d_day_name='Monday') then ss_sales_price else null end) 
mon_sales,
+sum(case when (d_day_name='Tuesday') then ss_sales_price else  null 
end) tue_sales,
+sum(case when (d_day_name='Wednesday') then ss_sales_price else null 
end) wed_sales,
+sum(case when (d_day_name='Thursday') then ss_sales_price else null 
end) thu_sales,
+sum(case when (d_day_name='Friday') then ss_sales_price else null end) 
fri_sales,
+sum(case when (d_day_name='Saturday') then ss_sales_price else null 
end) sat_sales
+ from date_dim, store_sales, store
+ where d_date_sk = ss_sold_date_sk and
+   s_store_sk = ss_store_sk and
+   s_gmt_offset = -6 and
+   d_year = 1998 
+ group by s_store_name, s_store_id
+ order by s_store_name, 
s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+select  s_store_name, s_store_id,
+sum(case when (d_day_name='Sunday') then ss_sales_price else null end) 
sun_sales,
+sum(case when (d_day_name='Monday') then ss_sales_price else null end) 
mon_sales,
+sum(case when (d_day_name='Tuesday') then ss_sales_price else  null 
end) tue_sales,
+sum(case when (d_day_name='Wednesday') then ss_sales_price else null 
end) wed_sales,
+sum(case when (d_day_name='Thursday') then ss_sales_price else null 
end) thu_sales,
+sum(case when (d_day_name='Friday') then ss_sales_price else null end) 
fri_sales,
+sum(case when (d_day_name='Saturday') then ss_sales_price else null 
end) sat_sales
+ from date_dim, store_sales, store
+ where d_date_sk = ss_sold_date_sk and
+   s_store_sk = ss_store_sk and
+   s_gmt_offset = -6 and
+   d_year = 1998 
+ group by s_store_name, s_store_id
+ order by s_store_name, 
s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+limit:100
+Stage-1
+  Reducer 5 vectorized
+  File Output Operator [FS_79]
+Limit [LIM_78] (rows=100 width=972)
+  Number of rows:100
+  Select Operator [SEL_77] (rows=3751 width=972)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+  <-Reducer 4 [SIMPLE_EDGE] vectorized
+SHUFFLE [RS_76]
+  Group By Operator [GBY_75] (rows=3751 width=972)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0,
 KEY._col1
+  <-Reducer 3 [SIMPLE_EDGE]
+SHUFFLE [RS_18]
+  PartitionCols:_col0, _col1
+  Group By Operator [GBY_17] (rows=142538 width=972)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0,
 _col1
+Top N Key Operator [TNK_33] (rows=37536846 width=257)
+  keys:_col0, _col1,sort order:++,top n:100
+  Select Operator [SEL_15] (rows=37536846 width=257)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+Merge Join Operator [MERGEJOIN_55] (rows=37536846 
width=257)
+

[48/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query67.q
--
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query67.q 
b/ql/src/test/queries/clientpositive/perf/cbo_query67.q
new file mode 100644
index 000..5781aac
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/cbo_query67.q
@@ -0,0 +1,46 @@
+set hive.mapred.mode=nonstrict;
+-- start query 1 in stream 0 using template query67.tpl and seed 1819994127
+explain cbo
+select  *
+from (select i_category
+,i_class
+,i_brand
+,i_product_name
+,d_year
+,d_qoy
+,d_moy
+,s_store_id
+,sumsales
+,rank() over (partition by i_category order by sumsales desc) rk
+  from (select i_category
+  ,i_class
+  ,i_brand
+  ,i_product_name
+  ,d_year
+  ,d_qoy
+  ,d_moy
+  ,s_store_id
+  ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales
+from store_sales
+,date_dim
+,store
+,item
+   where  ss_sold_date_sk=d_date_sk
+  and ss_item_sk=i_item_sk
+  and ss_store_sk = s_store_sk
+  and d_month_seq between 1212 and 1212+11
+   group by  rollup(i_category, i_class, i_brand, i_product_name, d_year, 
d_qoy, d_moy,s_store_id))dw1) dw2
+where rk <= 100
+order by i_category
+,i_class
+,i_brand
+,i_product_name
+,d_year
+,d_qoy
+,d_moy
+,s_store_id
+,sumsales
+,rk
+limit 100;
+
+-- end query 1 in stream 0 using template query67.tpl

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query68.q
--
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query68.q 
b/ql/src/test/queries/clientpositive/perf/cbo_query68.q
new file mode 100644
index 000..520b9d7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/cbo_query68.q
@@ -0,0 +1,44 @@
+set hive.mapred.mode=nonstrict;
+-- start query 1 in stream 0 using template query68.tpl and seed 803547492
+explain cbo
+select  c_last_name
+   ,c_first_name
+   ,ca_city
+   ,bought_city
+   ,ss_ticket_number
+   ,extended_price
+   ,extended_tax
+   ,list_price
+ from (select ss_ticket_number
+ ,ss_customer_sk
+ ,ca_city bought_city
+ ,sum(ss_ext_sales_price) extended_price 
+ ,sum(ss_ext_list_price) list_price
+ ,sum(ss_ext_tax) extended_tax 
+   from store_sales
+   ,date_dim
+   ,store
+   ,household_demographics
+   ,customer_address 
+   where store_sales.ss_sold_date_sk = date_dim.d_date_sk
+ and store_sales.ss_store_sk = store.s_store_sk  
+and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
+and store_sales.ss_addr_sk = customer_address.ca_address_sk
+and date_dim.d_dom between 1 and 2 
+and (household_demographics.hd_dep_count = 2 or
+ household_demographics.hd_vehicle_count= 1)
+and date_dim.d_year in (1998,1998+1,1998+2)
+and store.s_city in ('Cedar Grove','Wildwood')
+   group by ss_ticket_number
+   ,ss_customer_sk
+   ,ss_addr_sk,ca_city) dn
+  ,customer
+  ,customer_address current_addr
+ where ss_customer_sk = c_customer_sk
+   and customer.c_current_addr_sk = current_addr.ca_address_sk
+   and current_addr.ca_city <> bought_city
+ order by c_last_name
+ ,ss_ticket_number
+ limit 100;
+
+-- end query 1 in stream 0 using template query68.tpl

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/queries/clientpositive/perf/cbo_query69.q
--
diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query69.q 
b/ql/src/test/queries/clientpositive/perf/cbo_query69.q
new file mode 100644
index 000..01183fb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/cbo_query69.q
@@ -0,0 +1,49 @@
+set hive.mapred.mode=nonstrict;
+-- start query 1 in stream 0 using template query69.tpl and seed 797269820
+explain cbo
+select  
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3
+ from
+  customer c,customer_address ca,customer_demographics
+ where
+  c.c_current_addr_sk = ca.ca_address_sk and
+  ca_state in ('CO','IL','MN') and
+  cd_demo_sk = c.c_current_cdemo_sk and 
+  exists (select *
+  from store_sales,date_dim
+  where c.c_customer_sk = ss_customer_sk and
+ss_sold_date_sk = d_date_sk and
+

[43/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out
new file mode 100644
index 000..9c31d61
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query49.q.out
@@ -0,0 +1,330 @@
+PREHOOK: query: explain cbo
+select  
+ 'web' as channel
+ ,web.item
+ ,web.return_ratio
+ ,web.return_rank
+ ,web.currency_rank
+ from (
+   select 
+item
+   ,return_ratio
+   ,currency_ratio
+   ,rank() over (order by return_ratio) as return_rank
+   ,rank() over (order by currency_ratio) as currency_rank
+   from
+   (   select ws.ws_item_sk as item
+   ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as dec(15,4))/
+   cast(sum(coalesce(ws.ws_quantity,0)) as dec(15,4) )) as 
return_ratio
+   ,(cast(sum(coalesce(wr.wr_return_amt,0)) as dec(15,4))/
+   cast(sum(coalesce(ws.ws_net_paid,0)) as dec(15,4) )) as 
currency_ratio
+   from 
+web_sales ws left outer join web_returns wr 
+   on (ws.ws_order_number = wr.wr_order_number and 
+   ws.ws_item_sk = wr.wr_item_sk)
+ ,date_dim
+   where 
+   wr.wr_return_amt > 1 
+   and ws.ws_net_profit > 1
+ and ws.ws_net_paid > 0
+ and ws.ws_quantity > 0
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+   group by ws.ws_item_sk
+   ) in_web
+ ) web
+ where 
+ (
+ web.return_rank <= 10
+ or
+ web.currency_rank <= 10
+ )
+ union
+ select 
+ 'catalog' as channel
+ ,catalog.item
+ ,catalog.return_ratio
+ ,catalog.return_rank
+ ,catalog.currency_rank
+ from (
+   select 
+item
+   ,return_ratio
+   ,currency_ratio
+   ,rank() over (order by return_ratio) as return_rank
+   ,rank() over (order by currency_ratio) as currency_rank
+   from
+   (   select 
+   cs.cs_item_sk as item
+   ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as dec(15,4))/
+   cast(sum(coalesce(cs.cs_quantity,0)) as dec(15,4) )) as 
return_ratio
+   ,(cast(sum(coalesce(cr.cr_return_amount,0)) as dec(15,4))/
+   cast(sum(coalesce(cs.cs_net_paid,0)) as dec(15,4) )) as 
currency_ratio
+   from 
+   catalog_sales cs left outer join catalog_returns cr
+   on (cs.cs_order_number = cr.cr_order_number and 
+   cs.cs_item_sk = cr.cr_item_sk)
+,date_dim
+   where 
+   cr.cr_return_amount > 1 
+   and cs.cs_net_profit > 1
+ and cs.cs_net_paid > 0
+ and cs.cs_quantity > 0
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+ group by cs.cs_item_sk
+   ) in_cat
+ ) catalog
+ where 
+ (
+ catalog.return_rank <= 10
+ or
+ catalog.currency_rank <=10
+ )
+ union
+ select 
+ 'store' as channel
+ ,store.item
+ ,store.return_ratio
+ ,store.return_rank
+ ,store.currency_rank
+ from (
+   select 
+item
+   ,return_ratio
+   ,currency_ratio
+   ,rank() over (order by return_ratio) as return_rank
+   ,rank() over (order by currency_ratio) as currency_rank
+   from
+   (   select sts.ss_item_sk as item
+   ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as 
dec(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as dec(15,4) )) as return_ratio
+   ,(cast(sum(coalesce(sr.sr_return_amt,0)) as 
dec(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as dec(15,4) )) as 
currency_ratio
+   from 
+   store_sales sts left outer join store_returns sr
+   on (sts.ss_ticket_number = sr.sr_ticket_number and 
sts.ss_item_sk = sr.sr_item_sk)
+,date_dim
+   where 
+   sr.sr_return_amt > 1 
+   and sts.ss_net_profit > 1
+ and sts.ss_net_paid > 0 
+ and sts.ss_quantity > 0
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 12
+   group by sts.ss_item_sk
+   ) in_store
+ ) store
+ where  (
+ store.return_rank <= 10
+ or 
+ store.currency_rank <= 10
+ )
+ order by 1,4,5
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_returns
+PREHOOK: Input: default@catalog_sales
+PREHOOK: Input:

[28/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out
new file mode 100644
index 000..ee94ea3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query83.q.out
@@ -0,0 +1,219 @@
+PREHOOK: query: explain cbo
+with sr_items as
+ (select i_item_id item_id,
+sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+  item,
+  date_dim
+ where sr_item_sk = i_item_sk
+ and   d_datein 
+   (select d_date
+   from date_dim
+   where d_week_seq in 
+   (select d_week_seq
+   from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and   sr_returned_date_sk   = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+  item,
+  date_dim
+ where cr_item_sk = i_item_sk
+ and   d_datein 
+   (select d_date
+   from date_dim
+   where d_week_seq in 
+   (select d_week_seq
+   from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and   cr_returned_date_sk   = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+  item,
+  date_dim
+ where wr_item_sk = i_item_sk
+ and   d_datein 
+   (select d_date
+   from date_dim
+   where d_week_seq in 
+   (select d_week_seq
+   from date_dim
+   where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and   wr_returned_date_sk   = d_date_sk
+ group by i_item_id)
+  select  sr_items.item_id
+   ,sr_item_qty
+   ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+   ,cr_item_qty
+   ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+   ,wr_item_qty
+   ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+   ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+ ,cr_items
+ ,wr_items
+ where sr_items.item_id=cr_items.item_id
+   and sr_items.item_id=wr_items.item_id 
+ order by sr_items.item_id
+ ,sr_item_qty
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_returns
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@store_returns
+PREHOOK: Input: default@web_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+with sr_items as
+ (select i_item_id item_id,
+sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+  item,
+  date_dim
+ where sr_item_sk = i_item_sk
+ and   d_datein 
+   (select d_date
+   from date_dim
+   where d_week_seq in 
+   (select d_week_seq
+   from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and   sr_returned_date_sk   = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+  item,
+  date_dim
+ where cr_item_sk = i_item_sk
+ and   d_datein 
+   (select d_date
+   from date_dim
+   where d_week_seq in 
+   (select d_week_seq
+   from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and   cr_returned_date_sk   = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+  item,
+  date_dim
+ where wr_item_sk = i_item_sk
+ and   d_datein 
+   (select d_date
+   from date_dim
+   where d_week_seq in 
+   (select d_week_seq
+   from date_dim
+   where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and   wr_returned_date_sk   = d_date_sk
+ group by i_item_id)
+  select  sr_items.item_id
+   ,sr_item_qty
+   ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+   ,cr_item_qty
+   ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+   ,wr_item_qty
+   ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+   ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+ ,cr_items
+ ,wr_items
+ where sr_items.item_id=cr_items.item_id
+   and sr_items.item_id=wr_items.item_id 
+ order by sr_items.item_id
+ ,sr_item_qty
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@catalog_returns
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Input: default@web_returns

[24/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out
new file mode 100644
index 000..b41b4e3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out
@@ -0,0 +1,142 @@
+PREHOOK: query: explain
+select  ca_zip
+   ,sum(cs_sales_price)
+ from catalog_sales
+ ,customer
+ ,customer_address
+ ,date_dim
+ where cs_bill_customer_sk = c_customer_sk
+   and c_current_addr_sk = ca_address_sk 
+   and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
+   '85392', '85460', '80348', '81792')
+ or ca_state in ('CA','WA','GA')
+ or cs_sales_price > 500)
+   and cs_sold_date_sk = d_date_sk
+   and d_qoy = 2 and d_year = 2000
+ group by ca_zip
+ order by ca_zip
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_sales
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+select  ca_zip
+   ,sum(cs_sales_price)
+ from catalog_sales
+ ,customer
+ ,customer_address
+ ,date_dim
+ where cs_bill_customer_sk = c_customer_sk
+   and c_current_addr_sk = ca_address_sk 
+   and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475',
+   '85392', '85460', '80348', '81792')
+ or ca_state in ('CA','WA','GA')
+ or cs_sales_price > 500)
+   and cs_sold_date_sk = d_date_sk
+   and d_qoy = 2 and d_year = 2000
+ group by ca_zip
+ order by ca_zip
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@catalog_sales
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 7 <- Reducer 10 (BROADCAST_EDGE)
+Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+limit:100
+Stage-1
+  Reducer 5 vectorized
+  File Output Operator [FS_97]
+Limit [LIM_96] (rows=100 width=201)
+  Number of rows:100
+  Select Operator [SEL_95] (rows=2555 width=201)
+Output:["_col0","_col1"]
+  <-Reducer 4 [SIMPLE_EDGE] vectorized
+SHUFFLE [RS_94]
+  Group By Operator [GBY_93] (rows=2555 width=201)
+
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+  <-Reducer 3 [SIMPLE_EDGE]
+SHUFFLE [RS_24]
+  PartitionCols:_col0
+  Group By Operator [GBY_23] (rows=43435 width=201)
+
Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col3
+Top N Key Operator [TNK_43] (rows=20154874 width=205)
+  keys:_col3,sort order:+,top n:100
+  Select Operator [SEL_22] (rows=20154874 width=205)
+Output:["_col3","_col8"]
+Filter Operator [FIL_21] (rows=20154874 width=205)
+  predicate:(_col4 or _col5 or _col9)
+  Merge Join Operator [MERGEJOIN_76] (rows=20154874 
width=205)
+
Conds:RS_18._col0=RS_19._col1(Inner),Output:["_col3","_col4","_col5","_col8","_col9"]
+  <-Reducer 2 [SIMPLE_EDGE]
+SHUFFLE [RS_18]
+  PartitionCols:_col0
+  Merge Join Operator [MERGEJOIN_74] 
(rows=8000 width=101)
+
Conds:RS_79._col1=RS_81._col0(Inner),Output:["_col0","_col3","_col4","_col5"]
+  <-Map 1 [SIMPLE_EDGE] vectorized
+SHUFFLE [RS_79]
+  PartitionCols:_col1
+  Select Operator [SEL_78] (rows=8000 
width=8)
+Output:["_col0","_col1"]
+Filter Operator [FIL_77] (rows=8000 
width=8)
+  predicate:c_current_addr_sk is not null
+  TableScan [TS_0] (rows=8000 width=8)
+
default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"]
+

[01/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

Repository: hive
Updated Branches:
  refs/heads/master-tez092 8151911b4 -> c55347d52


http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out
new file mode 100644
index 000..673050e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query89.q.out
@@ -0,0 +1,178 @@
+PREHOOK: query: explain
+select  *
+from(
+select i_category, i_class, i_brand,
+   s_store_name, s_company_name,
+   d_moy,
+   sum(ss_sales_price) sum_sales,
+   avg(sum(ss_sales_price)) over
+ (partition by i_category, i_brand, s_store_name, s_company_name)
+ avg_monthly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+  ss_sold_date_sk = d_date_sk and
+  ss_store_sk = s_store_sk and
+  d_year in (2000) and
+((i_category in ('Home','Books','Electronics') and
+  i_class in ('wallpaper','parenting','musical')
+ )
+  or (i_category in ('Shoes','Jewelry','Men') and
+  i_class in ('womens','birdal','pants') 
+))
+group by i_category, i_class, i_brand,
+ s_store_name, s_company_name, d_moy) tmp1
+where case when (avg_monthly_sales <> 0) then (abs(sum_sales - 
avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
+order by sum_sales - avg_monthly_sales, s_store_name
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+select  *
+from(
+select i_category, i_class, i_brand,
+   s_store_name, s_company_name,
+   d_moy,
+   sum(ss_sales_price) sum_sales,
+   avg(sum(ss_sales_price)) over
+ (partition by i_category, i_brand, s_store_name, s_company_name)
+ avg_monthly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+  ss_sold_date_sk = d_date_sk and
+  ss_store_sk = s_store_sk and
+  d_year in (2000) and
+((i_category in ('Home','Books','Electronics') and
+  i_class in ('wallpaper','parenting','musical')
+ )
+  or (i_category in ('Shoes','Jewelry','Men') and
+  i_class in ('womens','birdal','pants') 
+))
+group by i_category, i_class, i_brand,
+ s_store_name, s_company_name, d_moy) tmp1
+where case when (avg_monthly_sales <> 0) then (abs(sum_sales - 
avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
+order by sum_sales - avg_monthly_sales, s_store_name
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
+Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+limit:-1
+Stage-1
+  Reducer 7 vectorized
+  File Output Operator [FS_115]
+Limit [LIM_114] (rows=100 width=801)
+  Number of rows:100
+  Select Operator [SEL_113] (rows=4804228 width=801)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+  <-Reducer 6 [SIMPLE_EDGE] vectorized
+SHUFFLE [RS_112]
+  Select Operator [SEL_111] (rows=4804228 width=801)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+Filter Operator [FIL_110] (rows=4804228 width=689)
+  predicate:CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 
- avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END
+  Select Operator [SEL_109] (rows=9608456 width=577)
+
Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+PTF Operator [PTF_108] (rows=9608456 width=577)
+  Function 
definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS 
FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS 
FIRST","partition by:":"_col2, _col0, _col4, _col5"}]
+  Select Operator [SEL_107] (rows=9608456 width=577)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+

[07/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out
new file mode 100644
index 000..fee4e83
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query75.q.out
@@ -0,0 +1,662 @@
+PREHOOK: query: explain
+WITH all_sales AS (
+ SELECT d_year
+   ,i_brand_id
+   ,i_class_id
+   ,i_category_id
+   ,i_manufact_id
+   ,SUM(sales_cnt) AS sales_cnt
+   ,SUM(sales_amt) AS sales_amt
+ FROM (SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt
+ ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt
+   FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk
+  JOIN date_dim ON d_date_sk=cs_sold_date_sk
+  LEFT JOIN catalog_returns ON 
(cs_order_number=cr_order_number 
+AND cs_item_sk=cr_item_sk)
+   WHERE i_category='Sports'
+   UNION
+   SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt
+ ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt
+   FROM store_sales JOIN item ON i_item_sk=ss_item_sk
+JOIN date_dim ON d_date_sk=ss_sold_date_sk
+LEFT JOIN store_returns ON 
(ss_ticket_number=sr_ticket_number 
+AND ss_item_sk=sr_item_sk)
+   WHERE i_category='Sports'
+   UNION
+   SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt
+ ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt
+   FROM web_sales JOIN item ON i_item_sk=ws_item_sk
+  JOIN date_dim ON d_date_sk=ws_sold_date_sk
+  LEFT JOIN web_returns ON 
(ws_order_number=wr_order_number 
+AND ws_item_sk=wr_item_sk)
+   WHERE i_category='Sports') sales_detail
+ GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id)
+ SELECT  prev_yr.d_year AS prev_year
+  ,curr_yr.d_year AS year
+  ,curr_yr.i_brand_id
+  ,curr_yr.i_class_id
+  ,curr_yr.i_category_id
+  ,curr_yr.i_manufact_id
+  ,prev_yr.sales_cnt AS prev_yr_cnt
+  ,curr_yr.sales_cnt AS curr_yr_cnt
+  ,curr_yr.sales_cnt-prev_yr.sales_cnt AS 
sales_cnt_diff
+  ,curr_yr.sales_amt-prev_yr.sales_amt AS 
sales_amt_diff
+ FROM all_sales curr_yr, all_sales prev_yr
+ WHERE curr_yr.i_brand_id=prev_yr.i_brand_id
+   AND curr_yr.i_class_id=prev_yr.i_class_id
+   AND curr_yr.i_category_id=prev_yr.i_category_id
+   AND curr_yr.i_manufact_id=prev_yr.i_manufact_id
+   AND curr_yr.d_year=2002
+   AND prev_yr.d_year=2002-1
+   AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS 
DECIMAL(17,2))<0.9
+ ORDER BY sales_cnt_diff
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_returns
+PREHOOK: Input: default@catalog_sales
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@store_returns
+PREHOOK: Input: default@store_sales
+PREHOOK: Input: default@web_returns
+PREHOOK: Input: default@web_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+WITH all_sales AS (
+ SELECT d_year
+   ,i_brand_id
+   ,i_class_id
+   ,i_category_id
+   ,i_manufact_id
+   ,SUM(sales_cnt) AS sales_cnt
+   ,SUM(sales_amt) AS sales_amt
+ FROM (SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+ ,i_manufact_id
+ ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt
+ ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt
+   FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk
+  JOIN date_dim ON d_date_sk=cs_sold_date_sk
+  LEFT JOIN catalog_returns ON 
(cs_order_number=cr_order_number 
+AND cs_item_sk=cr_item_sk)
+   WHERE i_category='Sports'
+   UNION
+   SELECT d_year
+ ,i_brand_id
+ ,i_class_id
+ ,i_category_id
+

[50/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index ff9f758..da2091a 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -1732,7 +1732,104 @@ spark.only.query.negative.files=spark_job_max_tasks.q,\
   spark_submit_negative_executor_memory.q
 
 spark.perf.disabled.query.files=query14.q,\
-  query64.q
+  query64.q,\
+  cbo_query1.q,\
+  cbo_query10.q,\
+  cbo_query11.q,\
+  cbo_query12.q,\
+  cbo_query13.q,\
+  cbo_query14.q,\
+  cbo_query15.q,\
+  cbo_query16.q,\
+  cbo_query17.q,\
+  cbo_query18.q,\
+  cbo_query19.q,\
+  cbo_query2.q,\
+  cbo_query20.q,\
+  cbo_query21.q,\
+  cbo_query22.q,\
+  cbo_query23.q,\
+  cbo_query24.q,\
+  cbo_query25.q,\
+  cbo_query26.q,\
+  cbo_query27.q,\
+  cbo_query28.q,\
+  cbo_query29.q,\
+  cbo_query3.q,\
+  cbo_query30.q,\
+  cbo_query31.q,\
+  cbo_query32.q,\
+  cbo_query33.q,\
+  cbo_query34.q,\
+  cbo_query35.q,\
+  cbo_query36.q,\
+  cbo_query37.q,\
+  cbo_query38.q,\
+  cbo_query39.q,\
+  cbo_query4.q,\
+  cbo_query40.q,\
+  cbo_query42.q,\
+  cbo_query43.q,\
+  cbo_query44.q,\
+  cbo_query45.q,\
+  cbo_query46.q,\
+  cbo_query47.q,\
+  cbo_query48.q,\
+  cbo_query49.q,\
+  cbo_query5.q,\
+  cbo_query50.q,\
+  cbo_query51.q,\
+  cbo_query52.q,\
+  cbo_query53.q,\
+  cbo_query54.q,\
+  cbo_query55.q,\
+  cbo_query56.q,\
+  cbo_query57.q,\
+  cbo_query58.q,\
+  cbo_query59.q,\
+  cbo_query6.q,\
+  cbo_query60.q,\
+  cbo_query61.q,\
+  cbo_query63.q,\
+  cbo_query64.q,\
+  cbo_query65.q,\
+  cbo_query66.q,\
+  cbo_query67.q,\
+  cbo_query68.q,\
+  cbo_query69.q,\
+  cbo_query7.q,\
+  cbo_query70.q,\
+  cbo_query71.q,\
+  cbo_query72.q,\
+  cbo_query73.q,\
+  cbo_query74.q,\
+  cbo_query75.q,\
+  cbo_query76.q,\
+  cbo_query77.q,\
+  cbo_query78.q,\
+  cbo_query79.q,\
+  cbo_query8.q,\
+  cbo_query80.q,\
+  cbo_query81.q,\
+  cbo_query82.q,\
+  cbo_query83.q,\
+  cbo_query84.q,\
+  cbo_query85.q,\
+  cbo_query86.q,\
+  cbo_query87.q,\
+  cbo_query88.q,\
+  cbo_query89.q,\
+  cbo_query9.q,\
+  cbo_query90.q,\
+  cbo_query91.q,\
+  cbo_query92.q,\
+  cbo_query93.q,\
+  cbo_query94.q,\
+  cbo_query95.q,\
+  cbo_query96.q,\
+  cbo_query97.q,\
+  cbo_query98.q,\
+  cbo_query99.q
 
 druid.query.files=druidmini_test1.q,\
   druidmini_test_ts.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
--
diff --git 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
index 5e1e88e..afff0df 100644
--- 
a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
+++ 
b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java
@@ -280,7 +280,7 @@ public class CliConfigs {
   }
 
   public static class TezPerfCliConfig extends AbstractCliConfig {
-public TezPerfCliConfig() {
+public TezPerfCliConfig(boolean useConstraints) {
   super(CorePerfCliDriver.class);
   try {
 setQueryDir("ql/src/test/queries/clientpositive/perf");
@@ -290,10 +290,21 @@ public class CliConfigs {
 excludesFrom(testConfigProps, "encrypted.query.files");
 excludesFrom(testConfigProps, "erasurecoding.only.query.files");
 
-setResultsDir("ql/src/test/results/clientpositive/perf/tez");
+excludeQuery("cbo_query44.q"); // TODO: Enable when we move to Calcite 
1.18
+excludeQuery("cbo_query45.q"); // TODO: Enable when we move to Calcite 
1.18
+excludeQuery("cbo_query67.q"); // TODO: Enable when we move to Calcite 
1.18
+excludeQuery("cbo_query70.q"); // TODO: Enable when we move to Calcite 
1.18
+excludeQuery("cbo_query86.q"); // TODO: Enable when we move to Calcite 
1.18
+
 setLogDir("itests/qtest/target/qfile-results/clientpositive/tez");
 
-setInitScript("q_perf_test_init.sql");
+if (useConstraints) {
+  setInitScript("q_perf_test_init_constraints.sql");
+  
setResultsDir("ql/src/test/results/clientpositive/perf/tez/constraints");
+} else {
+  setInitScript("q_perf_test_init.sql");
+  setResultsDir("ql/src/test/results/clientpositive/perf/tez");
+}
 setCleanupScript("q_perf_test_cleanup.sql");
 
 setHiveConfDir("data/conf/perf-reg/tez");

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/java/org/apache/hadoop/hive/ql/Context.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java

[31/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out
new file mode 100644
index 000..ea098f7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out
@@ -0,0 +1,241 @@
+PREHOOK: query: explain cbo
+with ss as (
+ select
+  i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+   store_sales,
+   date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+  i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ss_item_sk  = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year  = 1999
+ and d_moy   = 9
+ and ss_addr_sk  = ca_address_sk
+ and ca_gmt_offset   = -6 
+ group by i_item_id),
+ cs as (
+ select
+  i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+   catalog_sales,
+   date_dim,
+ customer_address,
+ item
+ where
+ i_item_id   in (select
+  i_item_id
+from
+ item
+where i_category in ('Children'))
+ and cs_item_sk  = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year  = 1999
+ and d_moy   = 9
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset   = -6 
+ group by i_item_id),
+ ws as (
+ select
+  i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+   web_sales,
+   date_dim,
+ customer_address,
+ item
+ where
+ i_item_id   in (select
+  i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ws_item_sk  = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year  = 1999
+ and d_moy   = 9
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset   = -6
+ group by i_item_id)
+  select   
+  i_item_id
+,sum(total_sales) total_sales
+ from  (select * from ss 
+union all
+select * from cs 
+union all
+select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+  ,total_sales
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_sales
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@store_sales
+PREHOOK: Input: default@web_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+with ss as (
+ select
+  i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+   store_sales,
+   date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+  i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ss_item_sk  = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year  = 1999
+ and d_moy   = 9
+ and ss_addr_sk  = ca_address_sk
+ and ca_gmt_offset   = -6 
+ group by i_item_id),
+ cs as (
+ select
+  i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+   catalog_sales,
+   date_dim,
+ customer_address,
+ item
+ where
+ i_item_id   in (select
+  i_item_id
+from
+ item
+where i_category in ('Children'))
+ and cs_item_sk  = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year  = 1999
+ and d_moy   = 9
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset   = -6 
+ group by i_item_id),
+ ws as (
+ select
+  i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+   web_sales,
+   date_dim,
+ customer_address,
+ item
+ where
+ i_item_id   in (select
+  i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ws_item_sk  = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year  = 1999
+ and d_moy   = 9
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset   = -6
+ group by i_item_id)
+  select   
+  i_item_id
+,sum(total_sales) total_sales
+ from  (select * from ss 
+union all
+select * from cs 
+union all
+select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+  ,total_sales
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@catalog_sales
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Input: default@web_sales
+POSTHOOK: Output:

[57/75] [abbrv] hive git commit: HIVE-20701: Allow HiveStreaming to receive a key value to commit atomically together with the transaction (Jaume M reviewed by Prasanth Jayachandran)

2018-10-26 Thread sershe

HIVE-20701: Allow HiveStreaming to receive a key value to commit atomically 
together with the transaction (Jaume M reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7765e90a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7765e90a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7765e90a

Branch: refs/heads/master-tez092
Commit: 7765e90aad44747860b3c1adbe8a4857d864912d
Parents: cbe3228
Author: Jaume Marhuenda 
Authored: Mon Oct 22 14:18:20 2018 -0700
Committer: Prasanth Jayachandran 
Committed: Mon Oct 22 14:18:49 2018 -0700

--
 .../streaming/AbstractStreamingTransaction.java |  6 ++-
 .../hive/streaming/HiveStreamingConnection.java | 13 +--
 .../hive/streaming/StreamingConnection.java | 23 ---
 .../hive/streaming/StreamingTransaction.java| 14 ++-
 .../apache/hive/streaming/TransactionBatch.java | 26 +++--
 .../streaming/UnManagedSingleTransaction.java   |  3 +-
 .../apache/hive/streaming/TestStreaming.java| 41 +++-
 7 files changed, 109 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/7765e90a/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java
--
diff --git 
a/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java
 
b/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java
index a99fdba..6ab3ffe 100644
--- 
a/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java
+++ 
b/streaming/src/java/org/apache/hive/streaming/AbstractStreamingTransaction.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.metastore.api.TxnToWriteId;
 
 import java.io.InputStream;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 /**
@@ -151,6 +152,9 @@ abstract class AbstractStreamingTransaction
   }
 
   public void commit() throws StreamingException {
-commitWithPartitions(null);
+commit(null);
+  }
+  public void commit(Set partitions) throws StreamingException {
+commit(partitions, null, null);
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7765e90a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java
--
diff --git 
a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java 
b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java
index f79b844..74fc531 100644
--- a/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java
+++ b/streaming/src/java/org/apache/hive/streaming/HiveStreamingConnection.java
@@ -146,6 +146,7 @@ public class HiveStreamingConnection implements 
StreamingConnection {
   private boolean manageTransactions;
   private int countTransactions = 0;
   private Set partitions;
+  private Long tableId;
 
   private HiveStreamingConnection(Builder builder) throws StreamingException {
 this.database = builder.database.toLowerCase();
@@ -574,12 +575,18 @@ public class HiveStreamingConnection implements 
StreamingConnection {
 
   @Override
   public void commitTransaction() throws StreamingException {
-commitTransactionWithPartition(null);
+commitTransaction(null);
   }
 
   @Override
-  public void commitTransactionWithPartition(Set partitions)
+  public void commitTransaction(Set partitions)
   throws StreamingException {
+commitTransaction(partitions, null, null);
+  }
+
+  @Override
+  public void commitTransaction(Set partitions, String key,
+  String value) throws StreamingException {
 checkState();
 
 Set createdPartitions = new HashSet<>();
@@ -598,7 +605,7 @@ public class HiveStreamingConnection implements 
StreamingConnection {
   connectionStats.incrementTotalPartitions(partitions.size());
 }
 
-currentTransactionBatch.commitWithPartitions(createdPartitions);
+currentTransactionBatch.commit(createdPartitions, key, value);
 this.partitions.addAll(
 currentTransactionBatch.getPartitions());
 connectionStats.incrementCreatedPartitions(createdPartitions.size());

http://git-wip-us.apache.org/repos/asf/hive/blob/7765e90a/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java
--
diff --git 
a/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java 
b/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java
index 92016e5..ba4c6a5 100644
--- a/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java
+++ b/streaming/src/java/org/apache/hive/streaming/StreamingConnection.java
@@ -66,13 +66,26 @@ public interface

[73/75] [abbrv] hive git commit: HIVE-20638 : Upgrade version of Jetty to 9.3.25.v20180904 (Laszlo Bodor via Thejas Nair)

2018-10-26 Thread sershe

HIVE-20638 : Upgrade version of Jetty to 9.3.25.v20180904 (Laszlo Bodor  via 
Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1002e89b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1002e89b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1002e89b

Branch: refs/heads/master-tez092
Commit: 1002e89b6501afca7f886323e96f2f37b4b9ac60
Parents: 0d70154
Author: Thejas M Nair 
Authored: Fri Oct 26 09:37:34 2018 -0700
Committer: Thejas M Nair 
Committed: Fri Oct 26 09:37:34 2018 -0700

--
 hbase-handler/pom.xml| 12 +++-
 hcatalog/webhcat/svr/pom.xml | 10 +-
 pom.xml  |  8 +++-
 serde/pom.xml| 20 
 4 files changed, 43 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/1002e89b/hbase-handler/pom.xml
--
diff --git a/hbase-handler/pom.xml b/hbase-handler/pom.xml
index 06939a4..58666f8 100644
--- a/hbase-handler/pom.xml
+++ b/hbase-handler/pom.xml
@@ -51,7 +51,7 @@
   ${hadoop.version}
   true
 
- 
+  
 org.slf4j
 slf4j-log4j12
   
@@ -59,6 +59,10 @@
 commons-logging
 commons-logging
   
+  
+org.eclipse.jetty
+jetty-util
+  
   
 
 
@@ -130,6 +134,12 @@
   ${hadoop.version}
   tests
   test
+  
+
+  org.eclipse.jetty
+  jetty-util
+
+  
 
 
   org.apache.hbase

http://git-wip-us.apache.org/repos/asf/hive/blob/1002e89b/hcatalog/webhcat/svr/pom.xml
--
diff --git a/hcatalog/webhcat/svr/pom.xml b/hcatalog/webhcat/svr/pom.xml
index d19e99a..4dfade5 100644
--- a/hcatalog/webhcat/svr/pom.xml
+++ b/hcatalog/webhcat/svr/pom.xml
@@ -164,13 +164,13 @@
   ${hadoop.version}
   
 
-  org.mortbay.jetty
+  org.eclipse.jetty
   jetty
 
 
-  org.mortbay.jetty
+  org.eclipse.jetty
   jetty-util
-   
+
   
 
 
@@ -179,11 +179,11 @@
   ${hadoop.version}
   
 
-  org.mortbay.jetty
+  org.eclipse.jetty
   jetty
 
 
-  org.mortbay.jetty
+  org.eclipse.jetty
   jetty-util
 
   

http://git-wip-us.apache.org/repos/asf/hive/blob/1002e89b/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 716db28..842a143 100644
--- a/pom.xml
+++ b/pom.xml
@@ -171,7 +171,7 @@
 5.5.1
 3.0.1
 1.1
-9.3.20.v20170531
+9.3.25.v20180904
 1.19
 
 2.22.2
@@ -1003,6 +1003,12 @@
   org.mockito
   mockito-all
   test
+  
+
+  org.eclipse.jetty
+  jetty-util
+
+  
 
   
 

http://git-wip-us.apache.org/repos/asf/hive/blob/1002e89b/serde/pom.xml
--
diff --git a/serde/pom.xml b/serde/pom.xml
index 8c37414..3756582 100644
--- a/serde/pom.xml
+++ b/serde/pom.xml
@@ -114,6 +114,10 @@
 commons-logging
 commons-logging
   
+  
+org.eclipse.jetty
+jetty-util
+  
 

 
@@ -163,6 +167,10 @@
 commons-logging
 commons-logging
   
+  
+org.eclipse.jetty
+jetty-util
+  
 

 
@@ -176,6 +184,12 @@
   hadoop-hdfs
   ${hadoop.version}
   test
+  
+
+  org.eclipse.jetty
+  jetty-util
+
+  
 
 
   org.apache.hadoop
@@ -183,6 +197,12 @@
   ${hadoop.version}
   tests
   test
+  
+
+  org.eclipse.jetty
+  jetty-util
+
+

[15/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out
new file mode 100644
index 000..9862559
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query51.q.out
@@ -0,0 +1,222 @@
+PREHOOK: query: explain
+WITH web_v1 as (
+select
+  ws_item_sk item_sk, d_date,
+  sum(sum(ws_sales_price))
+  over (partition by ws_item_sk order by d_date rows between unbounded 
preceding and current row) cume_sales
+from web_sales
+,date_dim
+where ws_sold_date_sk=d_date_sk
+  and d_month_seq between 1212 and 1212+11
+  and ws_item_sk is not NULL
+group by ws_item_sk, d_date),
+store_v1 as (
+select
+  ss_item_sk item_sk, d_date,
+  sum(sum(ss_sales_price))
+  over (partition by ss_item_sk order by d_date rows between unbounded 
preceding and current row) cume_sales
+from store_sales
+,date_dim
+where ss_sold_date_sk=d_date_sk
+  and d_month_seq between 1212 and 1212+11
+  and ss_item_sk is not NULL
+group by ss_item_sk, d_date)
+ select  *
+from (select item_sk
+ ,d_date
+ ,web_sales
+ ,store_sales
+ ,max(web_sales)
+ over (partition by item_sk order by d_date rows between unbounded 
preceding and current row) web_cumulative
+ ,max(store_sales)
+ over (partition by item_sk order by d_date rows between unbounded 
preceding and current row) store_cumulative
+ from (select case when web.item_sk is not null then web.item_sk else 
store.item_sk end item_sk
+ ,case when web.d_date is not null then web.d_date else 
store.d_date end d_date
+ ,web.cume_sales web_sales
+ ,store.cume_sales store_sales
+   from web_v1 web full outer join store_v1 store on (web.item_sk = 
store.item_sk
+  and web.d_date = 
store.d_date)
+  )x )y
+where web_cumulative > store_cumulative
+order by item_sk
+,d_date
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store_sales
+PREHOOK: Input: default@web_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+WITH web_v1 as (
+select
+  ws_item_sk item_sk, d_date,
+  sum(sum(ws_sales_price))
+  over (partition by ws_item_sk order by d_date rows between unbounded 
preceding and current row) cume_sales
+from web_sales
+,date_dim
+where ws_sold_date_sk=d_date_sk
+  and d_month_seq between 1212 and 1212+11
+  and ws_item_sk is not NULL
+group by ws_item_sk, d_date),
+store_v1 as (
+select
+  ss_item_sk item_sk, d_date,
+  sum(sum(ss_sales_price))
+  over (partition by ss_item_sk order by d_date rows between unbounded 
preceding and current row) cume_sales
+from store_sales
+,date_dim
+where ss_sold_date_sk=d_date_sk
+  and d_month_seq between 1212 and 1212+11
+  and ss_item_sk is not NULL
+group by ss_item_sk, d_date)
+ select  *
+from (select item_sk
+ ,d_date
+ ,web_sales
+ ,store_sales
+ ,max(web_sales)
+ over (partition by item_sk order by d_date rows between unbounded 
preceding and current row) web_cumulative
+ ,max(store_sales)
+ over (partition by item_sk order by d_date rows between unbounded 
preceding and current row) store_cumulative
+ from (select case when web.item_sk is not null then web.item_sk else 
store.item_sk end item_sk
+ ,case when web.d_date is not null then web.d_date else 
store.d_date end d_date
+ ,web.cume_sales web_sales
+ ,store.cume_sales store_sales
+   from web_v1 web full outer join store_v1 store on (web.item_sk = 
store.item_sk
+  and web.d_date = 
store.d_date)
+  )x )y
+where web_cumulative > store_cumulative
+order by item_sk
+,d_date
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Input: default@web_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Reducer 8 (BROADCAST_EDGE)
+Map 12 <- Reducer 11 (BROADCAST_EDGE)
+Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+Reducer 9 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+limit:100
+Stage-1
+  Reducer 6 vectorized
+  File Output Operator [FS_117]
+Limit [LIM_116] (rows=100 width=636)
+

[67/75] [abbrv] hive git commit: HIVE-20788: Extended SJ reduction may backtrack columns incorrectly when creating filters (Jesus Camacho Rodriguez, reviewed by Deepak Jaiswal)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out
index ff4c05f..b7f9778 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query18.q.out
@@ -99,56 +99,56 @@ Stage-0
 limit:100
 Stage-1
   Reducer 6 vectorized
-  File Output Operator [FS_179]
-Limit [LIM_178] (rows=100 width=1165)
+  File Output Operator [FS_177]
+Limit [LIM_176] (rows=100 width=1165)
   Number of rows:100
-  Select Operator [SEL_177] (rows=10969055 width=1165)
+  Select Operator [SEL_175] (rows=10969055 width=1165)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"]
   <-Reducer 5 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_176]
-  Select Operator [SEL_175] (rows=10969055 width=1165)
+SHUFFLE [RS_174]
+  Select Operator [SEL_173] (rows=10969055 width=1165)
 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"]
-Group By Operator [GBY_174] (rows=10969055 width=1229)
+Group By Operator [GBY_172] (rows=10969055 width=1229)
   
Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4
 <-Reducer 4 [SIMPLE_EDGE]
   SHUFFLE [RS_40]
 PartitionCols:_col0, _col1, _col2, _col3, _col4
 Group By Operator [GBY_39] (rows=10969055 width=1229)
   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col15)","count(_col15)","sum(_col16)","count(_col16)","sum(_col17)","count(_col17)","sum(_col18)","count(_col18)","sum(_col19)","count(_col19)","sum(_col3)","count(_col3)","sum(_col22)","count(_col22)"],keys:_col5,
 _col6, _col7, _col10, 0L
-  Merge Join Operator [MERGEJOIN_142] (rows=2193811 
width=811)
+  Merge Join Operator [MERGEJOIN_140] (rows=2193811 
width=811)
 
Conds:RS_35._col0=RS_36._col3(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col15","_col16","_col17","_col18","_col19","_col22"]
   <-Reducer 3 [SIMPLE_EDGE]
 PARTITION_ONLY_SHUFFLE [RS_35]
   PartitionCols:_col0
-  Merge Join Operator [MERGEJOIN_138] (rows=4959744 
width=368)
-
Conds:RS_32._col1=RS_150._col0(Inner),Output:["_col0","_col3","_col5","_col6","_col7"]
+  Merge Join Operator [MERGEJOIN_136] (rows=4959744 
width=368)
+
Conds:RS_32._col1=RS_148._col0(Inner),Output:["_col0","_col3","_col5","_col6","_col7"]
   <-Map 9 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_150]
+SHUFFLE [RS_148]
   PartitionCols:_col0
-  Select Operator [SEL_149] (rows=1861800 width=4)
+  Select Operator [SEL_147] (rows=1861800 width=4)
 Output:["_col0"]
 TableScan [TS_6] (rows=1861800 width=4)
   
default@customer_demographics,cd2,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"]
   <-Reducer 2 [SIMPLE_EDGE]
 SHUFFLE [RS_32]
   PartitionCols:_col1
-  Merge Join Operator [MERGEJOIN_137] 
(rows=4890586 width=371)
-
Conds:RS_145._col2=RS_148._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"]
+  Merge Join Operator [MERGEJOIN_135] 
(rows=4890586 width=371)
+
Conds:RS_143._col2=RS_146._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7"]
   <-Map 1 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_145]
+

[37/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out
new file mode 100644
index 000..60b7557
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query93.q.out
@@ -0,0 +1,59 @@
+PREHOOK: query: explain cbo
+select  ss_customer_sk
+,sum(act_sales) sumsales
+  from (select ss_item_sk
+  ,ss_ticket_number
+  ,ss_customer_sk
+  ,case when sr_return_quantity is not null then 
(ss_quantity-sr_return_quantity)*ss_sales_price
+else 
(ss_quantity*ss_sales_price) end act_sales
+from store_sales left outer join store_returns on (sr_item_sk = 
ss_item_sk
+   and 
sr_ticket_number = ss_ticket_number)
+,reason
+where sr_reason_sk = r_reason_sk
+  and r_reason_desc = 'Did not like the warranty') t
+  group by ss_customer_sk
+  order by sumsales, ss_customer_sk
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@reason
+PREHOOK: Input: default@store_returns
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select  ss_customer_sk
+,sum(act_sales) sumsales
+  from (select ss_item_sk
+  ,ss_ticket_number
+  ,ss_customer_sk
+  ,case when sr_return_quantity is not null then 
(ss_quantity-sr_return_quantity)*ss_sales_price
+else 
(ss_quantity*ss_sales_price) end act_sales
+from store_sales left outer join store_returns on (sr_item_sk = 
ss_item_sk
+   and 
sr_ticket_number = ss_ticket_number)
+,reason
+where sr_reason_sk = r_reason_sk
+  and r_reason_desc = 'Did not like the warranty') t
+  group by ss_customer_sk
+  order by sumsales, ss_customer_sk
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@reason
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100])
+  HiveProject(ss_customer_sk=[$0], $f1=[$1])
+HiveAggregate(group=[{0}], agg#0=[sum($1)])
+  HiveProject(ss_customer_sk=[$1], act_sales=[CASE(IS NOT NULL($8), 
*(CAST(-($3, $8)):DECIMAL(10, 0), $4), *(CAST($3):DECIMAL(10, 0), $4))])
+HiveJoin(condition=[AND(=($5, $0), =($7, $2))], joinType=[inner], 
algorithm=[none], cost=[not available])
+  HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], 
ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13])
+HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($9))])
+  HiveTableScan(table=[[default, store_sales]], 
table:alias=[store_sales])
+  HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], 
cost=[not available])
+HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], 
sr_ticket_number=[$9], sr_return_quantity=[$10])
+  HiveFilter(condition=[AND(IS NOT NULL($8), IS NOT NULL($2), IS 
NOT NULL($9))])
+HiveTableScan(table=[[default, store_returns]], 
table:alias=[store_returns])
+HiveProject(r_reason_sk=[$0], r_reason_desc=[CAST(_UTF-16LE'Did 
not like the warranty'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE 
"ISO-8859-1$en_US$primary"])
+  HiveFilter(condition=[AND(=($2, _UTF-16LE'Did not like the 
warranty'), IS NOT NULL($0))])
+HiveTableScan(table=[[default, reason]], table:alias=[reason])
+

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out
new file mode 100644
index 000..4f97a67
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query94.q.out
@@ -0,0 +1,100 @@
+PREHOOK: query: explain cbo
+select  
+   count(distinct ws_order_number) as `order count`
+  ,sum(ws_ext_ship_cost) as `total shipping cost`
+  ,sum(ws_net_profit) as `total net profit`
+from
+   web_sales ws1
+  ,date_dim
+  ,customer_address
+  ,web_site
+where
+d_date between '1999-5-01' and 
+   (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name =

[25/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out
new file mode 100644
index 000..e8a6eaa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out
@@ -0,0 +1,1400 @@
+Warning: Shuffle Join MERGEJOIN[1431][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 5' is a cross product
+Warning: Shuffle Join MERGEJOIN[1443][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 6' is a cross product
+Warning: Shuffle Join MERGEJOIN[1433][tables = [$hdt$_1, $hdt$_2]] in Stage 
'Reducer 13' is a cross product
+Warning: Shuffle Join MERGEJOIN[1456][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in 
Stage 'Reducer 14' is a cross product
+Warning: Shuffle Join MERGEJOIN[1435][tables = [$hdt$_2, $hdt$_3]] in Stage 
'Reducer 18' is a cross product
+Warning: Shuffle Join MERGEJOIN[1469][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in 
Stage 'Reducer 19' is a cross product
+PREHOOK: query: explain
+with  cross_items as
+ (select i_item_sk ss_item_sk
+ from item,
+ (select iss.i_brand_id brand_id
+ ,iss.i_class_id class_id
+ ,iss.i_category_id category_id
+ from store_sales
+ ,item iss
+ ,date_dim d1
+ where ss_item_sk = iss.i_item_sk
+   and ss_sold_date_sk = d1.d_date_sk
+   and d1.d_year between 1999 AND 1999 + 2
+ intersect 
+ select ics.i_brand_id
+ ,ics.i_class_id
+ ,ics.i_category_id
+ from catalog_sales
+ ,item ics
+ ,date_dim d2
+ where cs_item_sk = ics.i_item_sk
+   and cs_sold_date_sk = d2.d_date_sk
+   and d2.d_year between 1999 AND 1999 + 2
+ intersect
+ select iws.i_brand_id
+ ,iws.i_class_id
+ ,iws.i_category_id
+ from web_sales
+ ,item iws
+ ,date_dim d3
+ where ws_item_sk = iws.i_item_sk
+   and ws_sold_date_sk = d3.d_date_sk
+   and d3.d_year between 1999 AND 1999 + 2) x
+ where i_brand_id = brand_id
+  and i_class_id = class_id
+  and i_category_id = category_id
+),
+ avg_sales as
+ (select avg(quantity*list_price) average_sales
+  from (select ss_quantity quantity
+ ,ss_list_price list_price
+   from store_sales
+   ,date_dim
+   where ss_sold_date_sk = d_date_sk
+ and d_year between 1999 and 2001 
+   union all 
+   select cs_quantity quantity 
+ ,cs_list_price list_price
+   from catalog_sales
+   ,date_dim
+   where cs_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2 
+   union all
+   select ws_quantity quantity
+ ,ws_list_price list_price
+   from web_sales
+   ,date_dim
+   where ws_sold_date_sk = d_date_sk
+ and d_year between 1998 and 1998 + 2) x)
+  select  channel, i_brand_id,i_class_id,i_category_id,sum(sales), 
sum(number_sales)
+ from(
+   select 'store' channel, i_brand_id,i_class_id
+ ,i_category_id,sum(ss_quantity*ss_list_price) sales
+ , count(*) number_sales
+   from store_sales
+   ,item
+   ,date_dim
+   where ss_item_sk in (select ss_item_sk from cross_items)
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1998+2 
+ and d_moy = 11
+   group by i_brand_id,i_class_id,i_category_id
+   having sum(ss_quantity*ss_list_price) > (select average_sales from 
avg_sales)
+   union all
+   select 'catalog' channel, i_brand_id,i_class_id,i_category_id, 
sum(cs_quantity*cs_list_price) sales, count(*) number_sales
+   from catalog_sales
+   ,item
+   ,date_dim
+   where cs_item_sk in (select ss_item_sk from cross_items)
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1998+2 
+ and d_moy = 11
+   group by i_brand_id,i_class_id,i_category_id
+   having sum(cs_quantity*cs_list_price) > (select average_sales from 
avg_sales)
+   union all
+   select 'web' channel, i_brand_id,i_class_id,i_category_id, 
sum(ws_quantity*ws_list_price) sales , count(*) number_sales
+   from web_sales
+   ,item
+   ,date_dim
+   where ws_item_sk in (select ss_item_sk from cross_items)
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1998+2
+ and d_moy = 11
+   group by i_brand_id,i_class_id,i_category_id
+   having sum(ws_quantity*ws_list_price) > (select average_sales from 
avg_sales)
+ ) y
+ group by rollup (channel, i_brand_id,i_class_id,i_category_id)
+ order by channel,i_brand_id,i_class_id,i_category_id
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_sales
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@store_sales
+PREHOOK:

[38/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out 
b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
new file mode 100644
index 000..50474bc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/cbo_query85.q.out
@@ -0,0 +1,219 @@
+PREHOOK: query: explain cbo
+select  substr(r_reason_desc,1,20)
+   ,avg(ws_quantity)
+   ,avg(wr_refunded_cash)
+   ,avg(wr_fee)
+ from web_sales, web_returns, web_page, customer_demographics cd1,
+  customer_demographics cd2, customer_address, date_dim, reason 
+ where ws_web_page_sk = wp_web_page_sk
+   and ws_item_sk = wr_item_sk
+   and ws_order_number = wr_order_number
+   and ws_sold_date_sk = d_date_sk and d_year = 1998
+   and cd1.cd_demo_sk = wr_refunded_cdemo_sk 
+   and cd2.cd_demo_sk = wr_returning_cdemo_sk
+   and ca_address_sk = wr_refunded_addr_sk
+   and r_reason_sk = wr_reason_sk
+   and
+   (
+(
+ cd1.cd_marital_status = 'M'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = '4 yr Degree'
+ and 
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 100.00 and 150.00
+)
+   or
+(
+ cd1.cd_marital_status = 'D'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Primary' 
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 50.00 and 100.00
+)
+   or
+(
+ cd1.cd_marital_status = 'U'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Advanced Degree'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 150.00 and 200.00
+)
+   )
+   and
+   (
+(
+ ca_country = 'United States'
+ and
+ ca_state in ('KY', 'GA', 'NM')
+ and ws_net_profit between 100 and 200  
+)
+or
+(
+ ca_country = 'United States'
+ and
+ ca_state in ('MT', 'OR', 'IN')
+ and ws_net_profit between 150 and 300  
+)
+or
+(
+ ca_country = 'United States'
+ and
+ ca_state in ('WI', 'MO', 'WV')
+ and ws_net_profit between 50 and 250  
+)
+   )
+group by r_reason_desc
+order by substr(r_reason_desc,1,20)
+,avg(ws_quantity)
+,avg(wr_refunded_cash)
+,avg(wr_fee)
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@customer_demographics
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@reason
+PREHOOK: Input: default@web_page
+PREHOOK: Input: default@web_returns
+PREHOOK: Input: default@web_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select  substr(r_reason_desc,1,20)
+   ,avg(ws_quantity)
+   ,avg(wr_refunded_cash)
+   ,avg(wr_fee)
+ from web_sales, web_returns, web_page, customer_demographics cd1,
+  customer_demographics cd2, customer_address, date_dim, reason 
+ where ws_web_page_sk = wp_web_page_sk
+   and ws_item_sk = wr_item_sk
+   and ws_order_number = wr_order_number
+   and ws_sold_date_sk = d_date_sk and d_year = 1998
+   and cd1.cd_demo_sk = wr_refunded_cdemo_sk 
+   and cd2.cd_demo_sk = wr_returning_cdemo_sk
+   and ca_address_sk = wr_refunded_addr_sk
+   and r_reason_sk = wr_reason_sk
+   and
+   (
+(
+ cd1.cd_marital_status = 'M'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = '4 yr Degree'
+ and 
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 100.00 and 150.00
+)
+   or
+(
+ cd1.cd_marital_status = 'D'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Primary' 
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 50.00 and 100.00
+)
+   or
+(
+ cd1.cd_marital_status = 'U'
+ and
+ cd1.cd_marital_status = cd2.cd_marital_status
+ and
+ cd1.cd_education_status = 'Advanced Degree'
+ and
+ cd1.cd_education_status = cd2.cd_education_status
+ and
+ ws_sales_price between 150.00 and 200.00
+)
+   )
+   and
+   (
+(
+ ca_country = 'United States'
+ and
+ ca_state in ('KY', 'GA', 'NM')
+ and ws_net_profit between 100 and 200  
+)
+or
+(
+ ca_country = 'United States'
+ and
+ ca_state in ('MT', 'OR', 'IN')
+ and ws_net_profit between 150 and 300  
+)
+or
+(
+ ca_country = 'United States'
+ and
+ ca_state in ('WI', 'MO', 'WV')
+ and ws_net_profit between 50 and 250  
+)
+   )
+group by r_reason_desc
+order by substr(r_reason_desc,1,20)
+

[54/75] [abbrv] hive git commit: HIVE-20679: DDL operations on hive might create large messages for DBNotification (Anishek Agarwal, reviewed by Sankar Hariappan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b4302bb7/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java
--
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java
index 3fe8b58..314ca48 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosIncrementalLoadAcidTables.java
@@ -17,32 +17,19 @@
  */
 package org.apache.hadoop.hive.ql.parse;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsRequest;
-import org.apache.hadoop.hive.metastore.api.AllocateTableWriteIdsResponse;
-import org.apache.hadoop.hive.metastore.api.OpenTxnRequest;
-import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
-import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
-import org.apache.hadoop.hive.metastore.txn.TxnStore;
-import org.apache.hadoop.hive.metastore.txn.TxnUtils;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import 
org.apache.hadoop.hive.metastore.messaging.json.gzip.GzipJSONMessageEncoder;
 import org.apache.hadoop.hive.shims.Utils;
-import org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore;
-import 
org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.CallerArguments;
-import 
org.apache.hadoop.hive.metastore.InjectableBehaviourObjectStore.BehaviourInjection;
+
 import static 
org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLICATION;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.ErrorMsg;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+
 import org.junit.rules.TestName;
 
-import org.junit.rules.TestRule;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
@@ -50,13 +37,11 @@ import org.junit.BeforeClass;
 import org.junit.AfterClass;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
-import javax.annotation.Nullable;
-import java.util.Collections;
+import java.util.Map;
+
 import com.google.common.collect.Lists;
-import org.junit.Ignore;
 
 /**
  * TestReplicationScenariosAcidTables - test replication for ACID tables
@@ -65,11 +50,9 @@ public class 
TestReplicationScenariosIncrementalLoadAcidTables {
   @Rule
   public final TestName testName = new TestName();
 
-  @Rule
-  public TestRule replV1BackwardCompat;
-
   protected static final Logger LOG = 
LoggerFactory.getLogger(TestReplicationScenariosIncrementalLoadAcidTables.class);
-  private static WarehouseInstance primary, replica, replicaNonAcid;
+  static WarehouseInstance primary;
+  private static WarehouseInstance replica, replicaNonAcid;
   private static HiveConf conf;
   private String primaryDbName, replicatedDbName, primaryDbNameExtra;
   private enum OperationType {
@@ -80,12 +63,21 @@ public class 
TestReplicationScenariosIncrementalLoadAcidTables {
 
   @BeforeClass
   public static void classLevelSetup() throws Exception {
-conf = new HiveConf(TestReplicationScenariosAcidTables.class);
+HashMap overrides = new HashMap<>();
+overrides.put(MetastoreConf.ConfVars.EVENT_MESSAGE_FACTORY.getHiveName(),
+GzipJSONMessageEncoder.class.getCanonicalName());
+
+internalBeforeClassSetup(overrides, 
TestReplicationScenariosAcidTables.class);
+  }
+
+  static void internalBeforeClassSetup(Map overrides, Class 
clazz)
+  throws Exception {
+conf = new HiveConf(clazz);
 conf.set("dfs.client.use.datanode.hostname", "true");
 conf.set("hadoop.proxyuser." + Utils.getUGI().getShortUserName() + 
".hosts", "*");
 MiniDFSCluster miniDFSCluster =
new 
MiniDFSCluster.Builder(conf).numDataNodes(1).format(true).build();
-HashMap overridesForHiveConf = new HashMap() {{
+HashMap acidConfs = new HashMap() {{
 put("fs.defaultFS", 
miniDFSCluster.getFileSystem().getUri().toString());
 put("hive.support.concurrency", "true");
 put("hive.txn.manager", 
"org.apache.hadoop.hive.ql.lockmgr.DbTxnManager");
@@ -97,9 +89,11 @@ public class 
TestReplicationScenariosIncrementalLoadAcidTables {
 put("mapred.input.dir.recursive", "true");
 put("hive.metastore.disallow.incompatible.col.type.changes", "false");
 }};
-primary = new WarehouseInstance(LOG, miniDFSCluster,

[10/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out
new file mode 100644
index 000..f82272c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out
@@ -0,0 +1,702 @@
+PREHOOK: query: explain
+select   
+ w_warehouse_name
+   ,w_warehouse_sq_ft
+   ,w_city
+   ,w_county
+   ,w_state
+   ,w_country
+,ship_carriers
+,year
+   ,sum(jan_sales) as jan_sales
+   ,sum(feb_sales) as feb_sales
+   ,sum(mar_sales) as mar_sales
+   ,sum(apr_sales) as apr_sales
+   ,sum(may_sales) as may_sales
+   ,sum(jun_sales) as jun_sales
+   ,sum(jul_sales) as jul_sales
+   ,sum(aug_sales) as aug_sales
+   ,sum(sep_sales) as sep_sales
+   ,sum(oct_sales) as oct_sales
+   ,sum(nov_sales) as nov_sales
+   ,sum(dec_sales) as dec_sales
+   ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot
+   ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot
+   ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot
+   ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot
+   ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot
+   ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot
+   ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot
+   ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot
+   ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot
+   ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot
+   ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot
+   ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot
+   ,sum(jan_net) as jan_net
+   ,sum(feb_net) as feb_net
+   ,sum(mar_net) as mar_net
+   ,sum(apr_net) as apr_net
+   ,sum(may_net) as may_net
+   ,sum(jun_net) as jun_net
+   ,sum(jul_net) as jul_net
+   ,sum(aug_net) as aug_net
+   ,sum(sep_net) as sep_net
+   ,sum(oct_net) as oct_net
+   ,sum(nov_net) as nov_net
+   ,sum(dec_net) as dec_net
+ from (
+(select 
+   w_warehouse_name
+   ,w_warehouse_sq_ft
+   ,w_city
+   ,w_county
+   ,w_state
+   ,w_country
+   ,'DIAMOND' || ',' || 'AIRBORNE' as ship_carriers
+   ,d_year as year
+   ,sum(case when d_moy = 1 
+   then ws_sales_price* ws_quantity else 0 end) as jan_sales
+   ,sum(case when d_moy = 2 
+   then ws_sales_price* ws_quantity else 0 end) as feb_sales
+   ,sum(case when d_moy = 3 
+   then ws_sales_price* ws_quantity else 0 end) as mar_sales
+   ,sum(case when d_moy = 4 
+   then ws_sales_price* ws_quantity else 0 end) as apr_sales
+   ,sum(case when d_moy = 5 
+   then ws_sales_price* ws_quantity else 0 end) as may_sales
+   ,sum(case when d_moy = 6 
+   then ws_sales_price* ws_quantity else 0 end) as jun_sales
+   ,sum(case when d_moy = 7 
+   then ws_sales_price* ws_quantity else 0 end) as jul_sales
+   ,sum(case when d_moy = 8 
+   then ws_sales_price* ws_quantity else 0 end) as aug_sales
+   ,sum(case when d_moy = 9 
+   then ws_sales_price* ws_quantity else 0 end) as sep_sales
+   ,sum(case when d_moy = 10 
+   then ws_sales_price* ws_quantity else 0 end) as oct_sales
+   ,sum(case when d_moy = 11
+   then ws_sales_price* ws_quantity else 0 end) as nov_sales
+   ,sum(case when d_moy = 12
+   then ws_sales_price* ws_quantity else 0 end) as dec_sales
+   ,sum(case when d_moy = 1 
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as jan_net
+   ,sum(case when d_moy = 2
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as feb_net
+   ,sum(case when d_moy = 3 
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as mar_net
+   ,sum(case when d_moy = 4 
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as apr_net
+   ,sum(case when d_moy = 5 
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as may_net
+   ,sum(case when d_moy = 6 
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as jun_net
+   ,sum(case when d_moy = 7 
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as jul_net
+   ,sum(case when d_moy = 8 
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as aug_net
+   ,sum(case when d_moy = 9 
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as sep_net
+   ,sum(case when d_moy = 10 
+   then ws_net_paid_inc_tax * ws_quantity else 0 end) as oct_net
+

[02/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out
new file mode 100644
index 000..08079cd
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query88.q.out
@@ -0,0 +1,946 @@
+Warning: Shuffle Join MERGEJOIN[599][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 6' is a cross product
+Warning: Shuffle Join MERGEJOIN[600][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 7' is a cross product
+Warning: Shuffle Join MERGEJOIN[601][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3]] in Stage 'Reducer 8' is a cross product
+Warning: Shuffle Join MERGEJOIN[602][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3, $hdt$_4]] in Stage 'Reducer 9' is a cross product
+Warning: Shuffle Join MERGEJOIN[603][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 10' is a cross product
+Warning: Shuffle Join MERGEJOIN[604][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6]] in Stage 'Reducer 11' is a cross product
+Warning: Shuffle Join MERGEJOIN[605][tables = [$hdt$_0, $hdt$_1, $hdt$_2, 
$hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7]] in Stage 'Reducer 12' is a cross 
product
+PREHOOK: query: explain
+select  *
+from
+ (select count(*) h8_30_to_9
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk   
+ and ss_hdemo_sk = household_demographics.hd_demo_sk 
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and 
household_demographics.hd_vehicle_count<=3+2) or
+  (household_demographics.hd_dep_count = 0 and 
household_demographics.hd_vehicle_count<=0+2) or
+  (household_demographics.hd_dep_count = 1 and 
household_demographics.hd_vehicle_count<=1+2)) 
+ and store.s_store_name = 'ese') s1,
+ (select count(*) h9_to_9_30 
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk 
+ and time_dim.t_hour = 9 
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and 
household_demographics.hd_vehicle_count<=3+2) or
+  (household_demographics.hd_dep_count = 0 and 
household_demographics.hd_vehicle_count<=0+2) or
+  (household_demographics.hd_dep_count = 1 and 
household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s2,
+ (select count(*) h9_30_to_10 
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 9
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and 
household_demographics.hd_vehicle_count<=3+2) or
+  (household_demographics.hd_dep_count = 0 and 
household_demographics.hd_vehicle_count<=0+2) or
+  (household_demographics.hd_dep_count = 1 and 
household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s3,
+ (select count(*) h10_to_10_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10 
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 3 and 
household_demographics.hd_vehicle_count<=3+2) or
+  (household_demographics.hd_dep_count = 0 and 
household_demographics.hd_vehicle_count<=0+2) or
+  (household_demographics.hd_dep_count = 1 and 
household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s4,
+ (select count(*) h10_30_to_11
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10 
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 3 and 
household_demographics.hd_vehicle_count<=3+2) or
+  (household_demographics.hd_dep_count = 0 and 
household_demographics.hd_vehicle_count<=0+2) or
+  (household_demographics.hd_dep_count = 1 and 
household_demographics.hd_vehicle_count<=1+2))
+ and store.s_store_name = 'ese') s5,
+ (select count(*) h11_to_11_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk =

[66/75] [abbrv] hive git commit: HIVE-20788: Extended SJ reduction may backtrack columns incorrectly when creating filters (Jesus Camacho Rodriguez, reviewed by Deepak Jaiswal)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/3cbc13e9/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out
index c82c415..6d7c620 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query33.q.out
@@ -194,57 +194,57 @@ Stage-0
 limit:100
 Stage-1
   Reducer 7 vectorized
-  File Output Operator [FS_372]
-Limit [LIM_371] (rows=59 width=115)
+  File Output Operator [FS_368]
+Limit [LIM_367] (rows=59 width=115)
   Number of rows:100
-  Select Operator [SEL_370] (rows=59 width=115)
+  Select Operator [SEL_366] (rows=59 width=115)
 Output:["_col0","_col1"]
   <-Reducer 6 [SIMPLE_EDGE] vectorized
-SHUFFLE [RS_369]
-  Group By Operator [GBY_368] (rows=59 width=115)
+SHUFFLE [RS_365]
+  Group By Operator [GBY_364] (rows=59 width=115)
 
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
   <-Union 5 [SIMPLE_EDGE]
 <-Reducer 11 [CONTAINS] vectorized
-  Reduce Output Operator [RS_392]
+  Reduce Output Operator [RS_388]
 PartitionCols:_col0
-Group By Operator [GBY_391] (rows=59 width=115)
+Group By Operator [GBY_387] (rows=59 width=115)
   
Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0
-  Group By Operator [GBY_390] (rows=19 width=115)
+  Group By Operator [GBY_386] (rows=19 width=115)
 
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
   <-Reducer 10 [SIMPLE_EDGE]
 SHUFFLE [RS_109]
   PartitionCols:_col0
   Group By Operator [GBY_108] (rows=19 width=115)
 
Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1
-Merge Join Operator [MERGEJOIN_308] (rows=11364 
width=3)
+Merge Join Operator [MERGEJOIN_304] (rows=11364 
width=3)
   
Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"]
 <-Reducer 2 [SIMPLE_EDGE]
   SHUFFLE [RS_104]
 PartitionCols:_col0
-Merge Join Operator [MERGEJOIN_297] 
(rows=461514 width=7)
-  
Conds:RS_323._col1=RS_329._col0(Inner),Output:["_col0","_col1"]
+Merge Join Operator [MERGEJOIN_293] 
(rows=461514 width=7)
+  
Conds:RS_319._col1=RS_325._col0(Inner),Output:["_col0","_col1"]
 <-Map 1 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_323]
+  SHUFFLE [RS_319]
 PartitionCols:_col1
-Select Operator [SEL_322] (rows=460848 
width=7)
+Select Operator [SEL_318] (rows=460848 
width=7)
   Output:["_col0","_col1"]
-  Filter Operator [FIL_321] (rows=460848 
width=7)
+  Filter Operator [FIL_317] (rows=460848 
width=7)
 predicate:i_manufact_id is not null
 TableScan [TS_0] (rows=462000 width=7)
   
default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_manufact_id"]
 <-Reducer 13 [ONE_TO_ONE_EDGE] vectorized
-  FORWARD [RS_329]
+  FORWARD [RS_325]
 PartitionCols:_col0
-Group By Operator [GBY_328] (rows=692 
width=3)
+Group By Operator [GBY_324] (rows=692 
width=3)
   Output:["_col0"],keys:KEY._col0
 <-Map 12 [SIMPLE_EDGE] vectorized
-  SHUFFLE [RS_327]
+  SHUFFLE [RS_323]
 PartitionCols:_col0
-Group By Operator [GBY_326] (rows=692 
width=3)
+Group By Operator [GBY_322] (rows=692 
width=3)

[04/75] [abbrv] [partial] hive git commit: HIVE-20718: Add perf cli driver with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

2018-10-26 Thread sershe

http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out 
b/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out
new file mode 100644
index 000..bcfe19e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query81.q.out
@@ -0,0 +1,220 @@
+PREHOOK: query: explain
+with customer_total_return as
+ (select cr_returning_customer_sk as ctr_customer_sk
+,ca_state as ctr_state, 
+   sum(cr_return_amt_inc_tax) as ctr_total_return
+ from catalog_returns
+ ,date_dim
+ ,customer_address
+ where cr_returned_date_sk = d_date_sk 
+   and d_year =1998
+   and cr_returning_addr_sk = ca_address_sk 
+ group by cr_returning_customer_sk
+ ,ca_state )
+  select  
c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+   
,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+  ,ca_location_type,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2 
+ where ctr1.ctr_state = ctr2.ctr_state)
+   and ca_address_sk = c_current_addr_sk
+   and ca_state = 'IL'
+   and ctr1.ctr_customer_sk = c_customer_sk
+ order by 
c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+   
,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+  ,ca_location_type,ctr_total_return
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_returns
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+with customer_total_return as
+ (select cr_returning_customer_sk as ctr_customer_sk
+,ca_state as ctr_state, 
+   sum(cr_return_amt_inc_tax) as ctr_total_return
+ from catalog_returns
+ ,date_dim
+ ,customer_address
+ where cr_returned_date_sk = d_date_sk 
+   and d_year =1998
+   and cr_returning_addr_sk = ca_address_sk 
+ group by cr_returning_customer_sk
+ ,ca_state )
+  select  
c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+   
,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+  ,ca_location_type,ctr_total_return
+ from customer_total_return ctr1
+ ,customer_address
+ ,customer
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+ from customer_total_return ctr2 
+ where ctr1.ctr_state = ctr2.ctr_state)
+   and ca_address_sk = c_current_addr_sk
+   and ca_state = 'IL'
+   and ctr1.ctr_customer_sk = c_customer_sk
+ order by 
c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name
+   
,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset
+  ,ca_location_type,ctr_total_return
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@catalog_returns
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 10 <- Reducer 13 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
+Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+Reducer 7 <- Map 14 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+Reducer 8 <- Map 15 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+limit:-1
+Stage-1
+  Reducer 4 vectorized
+  File Output Operator [FS_210]
+Select Operator [SEL_209] (rows=100 width=1506)
+  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"]
+  Limit [LIM_208] (rows=100 width=1420)
+Number of rows:100
+Select Operator [SEL_207] (rows=1577696 width=1418)
+  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"]
+<-Reducer 3 [SIMPLE_EDGE]
+  SHUFFLE [RS_63]
+Select Operator [SEL_62] (rows=1577696

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 4142 matches

Mail list logo