http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/mapjoin46.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/mapjoin46.q.out b/ql/src/test/results/clientpositive/mapjoin46.q.out index febb6c7..b6f8b19 100644 --- a/ql/src/test/results/clientpositive/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/mapjoin46.q.out @@ -124,14 +124,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -234,12 +234,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -340,12 +340,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob 102 2 Del +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob 102 2 Del -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -430,10 +430,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product @@ -528,10 +528,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat NULL NULL NULL 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -540,6 +536,10 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -635,11 +635,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -648,6 +643,11 @@ NULL NULL None 102 2 Del 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -739,11 +739,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -752,6 +747,11 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -843,14 +843,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob 102 2 Del -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -944,13 +944,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1046,19 +1046,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del 100 1 Bob 102 2 Del -101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 103 2 Ema -101 2 Car 103 2 Ema 100 1 Bob 104 3 Fli -101 2 Car 104 3 Fli 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1150,16 +1150,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 100 1 Bob 102 2 Del -101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 103 2 Ema -101 2 Car 103 2 Ema 100 1 Bob 104 3 Fli -101 2 Car 104 3 Fli 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli 101 2 Car 105 NULL None +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1251,16 +1251,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del -100 1 Bob 102 2 Del -101 2 Car 102 2 Del 99 2 Mat 103 2 Ema -101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -1354,9 +1354,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product @@ -1407,7 +1407,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1447,31 +1447,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -101 2 Car 105 NULL None -101 2 Car 104 3 Fli -101 2 Car 103 2 Ema -101 2 Car 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli -100 1 Bob 103 2 Ema 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema -99 2 Mat 102 2 Del -99 0 Alice 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1506,12 +1508,12 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1531,7 +1533,8 @@ Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAP PREHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1_n4 PREHOOK: Input: default@test2_n2 @@ -1539,36 +1542,37 @@ PREHOOK: Input: default@test2_n2 POSTHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test1_n4.key between 100 and 102) + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -101 2 Car 105 NULL None -101 2 Car 104 3 Fli -101 2 Car 103 2 Ema -101 2 Car 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli -100 1 Bob 103 2 Ema 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del 99 2 Mat 102 2 Del -99 0 Alice NULL NULL NULL -98 NULL None NULL NULL NULL -NULL NULL None NULL NULL NULL +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1603,12 +1607,12 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1628,7 +1632,7 @@ Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAP PREHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1_n4 PREHOOK: Input: default@test2_n2 @@ -1636,34 +1640,36 @@ PREHOOK: Input: default@test2_n2 POSTHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - OR test2_n2.key between 100 and 102) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -101 2 Car 103 2 Ema -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del -99 0 Alice 102 2 Del -98 NULL None 102 2 Del -NULL NULL None 102 2 Del -NULL NULL NULL 105 NULL None -NULL NULL NULL 104 3 Fli +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1681,11 +1687,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) TableScan alias: test2_n2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE @@ -1694,24 +1698,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1723,11 +1725,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1_n4 PREHOOK: Input: default@test2_n2 @@ -1735,37 +1737,426 @@ PREHOOK: Input: default@test2_n2 POSTHOOK: query: SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) + OR test1_n4.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None NULL NULL NULL -NULL NULL None NULL NULL NULL -NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema -101 2 Car 102 2 Del 99 2 Mat 102 2 Del -NULL NULL NULL 104 3 Fli -Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * -FROM ( - SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, - test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 - FROM test1_n4 RIGHT OUTER JOIN test2_n2 - ON (test1_n4.value=test2_n2.value - AND (test1_n4.key between 100 and 102 - OR test2_n2.key between 100 and 102)) - ) sq1 -FULL OUTER JOIN ( - SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, - test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 - FROM test1_n4 LEFT OUTER JOIN test2_n2 - ON (test1_n4.value=test2_n2.value +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test2_n2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value AND (test1_n4.key between 100 and 102 OR test2_n2.key between 100 and 102)) ) sq2 @@ -1880,7 +2271,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1946,23 +2337,239 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 #### A masked pattern was here #### -NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema -NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del -NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL -NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del -NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL -NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL -NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL -NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL -NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del -NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL -101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-2 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1_n4 + Fetch Operator + limit: -1 + $hdt$_2:$hdt$_3:test2_n2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1_n4 + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + $hdt$_2:$hdt$_3:test2_n2 + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: test2_n2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + TableScan + alias: test1_n4 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1_n4 +PREHOOK: Input: default@test2_n2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM ( + SELECT test1_n4.key AS key1, test1_n4.value AS value1, test1_n4.col_1 AS col_1, + test2_n2.key AS key2, test2_n2.value AS value2, test2_n2.col_2 AS col_2 + FROM test1_n4 RIGHT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1_n4.key AS key3, test1_n4.value AS value3, test1_n4.col_1 AS col_3, + test2_n2.key AS key4, test2_n2.value AS value4, test2_n2.col_2 AS col_4 + FROM test1_n4 LEFT OUTER JOIN test2_n2 + ON (test1_n4.value=test2_n2.value + AND (test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1_n4 +POSTHOOK: Input: default@test2_n2 +#### A masked pattern was here #### 101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/mapjoin47.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/mapjoin47.q.out b/ql/src/test/results/clientpositive/mapjoin47.q.out index 172d160..c42094d 100644 --- a/ql/src/test/results/clientpositive/mapjoin47.q.out +++ b/ql/src/test/results/clientpositive/mapjoin47.q.out @@ -1415,7 +1415,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1521,7 +1521,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/mergejoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out b/ql/src/test/results/clientpositive/mergejoin.q.out index 7cbcbbe..95b961f 100644 --- a/ql/src/test/results/clientpositive/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/mergejoin.q.out @@ -1706,7 +1706,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/mergejoins_mixed.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/mergejoins_mixed.q.out b/ql/src/test/results/clientpositive/mergejoins_mixed.q.out index 9ac6d59..4d94085 100644 --- a/ql/src/test/results/clientpositive/mergejoins_mixed.q.out +++ b/ql/src/test/results/clientpositive/mergejoins_mixed.q.out @@ -820,7 +820,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col3 (type: string) 1 _col0 (type: string) @@ -859,7 +859,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1161,7 +1161,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1463,7 +1463,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/optional_outer.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/optional_outer.q.out b/ql/src/test/results/clientpositive/optional_outer.q.out index 9ec1af7..efc952c 100644 --- a/ql/src/test/results/clientpositive/optional_outer.q.out +++ b/ql/src/test/results/clientpositive/optional_outer.q.out @@ -283,7 +283,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -344,7 +344,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out index 6b91a7f..44253c6 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -6,7 +6,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesparquet ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -32,7 +32,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint @@ -72,12 +71,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -101,7 +94,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:_col0:tinyint, 1:_col1:tinyint, 2:_col2:bigint, 3:_col3:bigint] Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -122,12 +114,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 4 - includeColumns: [0, 1, 2, 3] - dataColumns: _col0:tinyint, _col1:tinyint, _col2:bigint, _col3:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -170,12 +156,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesparquet ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(ctinyint) as c1 FROM alltypesparquet ORDER BY c1 @@ -198,7 +184,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: ctinyint (type: tinyint) outputColumnNames: ctinyint @@ -238,12 +223,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -267,7 +246,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:_col0:bigint] Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -287,12 +265,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -486,7 +458,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -4.344925324321378 1158.3003004768175 1158.3003004768175 1158.426587033782 34.03381113652741 34.03381113652741 34.03381113652741 34.03566639620535 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -494,7 +466,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesparquet ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -520,7 +492,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint @@ -560,12 +531,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [3] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -589,7 +554,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:_col0:bigint, 1:_col1:bigint, 2:_col2:bigint, 3:_col3:bigint] Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -610,12 +574,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 4 - includeColumns: [0, 1, 2, 3] - dataColumns: _col0:bigint, _col1:bigint, _col2:bigint, _col3:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -658,12 +616,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesparquet ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cbigint) as c1 FROM alltypesparquet ORDER BY c1 @@ -686,7 +644,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: cbigint (type: bigint) outputColumnNames: cbigint @@ -726,12 +683,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [3] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -755,7 +706,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:_col0:bigint] Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -775,12 +725,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -974,7 +918,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543066035E18 2.07689300543066035E18 2.07711944383072922E18 1.441142951074133E9 1.441142951074133E9 1.441142951074133E9 1.4412215110213728E9 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -982,7 +926,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesparquet ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -1008,7 +952,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat @@ -1048,12 +991,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [4] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -1077,7 +1014,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:_col0:float, 1:_col1:float, 2:_col2:bigint, 3:_col3:bigint] Reduce Output Operator key expressions: _col0 (type: float) sort order: + @@ -1098,12 +1034,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 4 - includeColumns: [0, 1, 2, 3] - dataColumns: _col0:float, _col1:float, _col2:bigint, _col3:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -1146,12 +1076,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesparquet ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT SUM(cfloat) as c1 FROM alltypesparquet ORDER BY c1 @@ -1174,7 +1104,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Select Operator expressions: cfloat (type: float) outputColumnNames: cfloat @@ -1214,12 +1143,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [4] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -1243,7 +1166,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:_col0:double] Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -1263,12 +1185,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 1 - includeColumns: [0] - dataColumns: _col0:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -1463,7 +1379,7 @@ POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -4.303895780321011 1163.8972588605056 1163.8972588605056 1164.0241556397098 34.11593848717203 34.11593848717203 34.11593848717203 34.11779822379677 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -1490,7 +1406,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -1535,7 +1451,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -1583,12 +1498,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(13,3), double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_1.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_1.q.out index b72982c..3a22da6 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_1.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT VAR_POP(ctinyint), (VAR_POP(ctinyint) / -26.28), SUM(cfloat), @@ -19,7 +19,7 @@ WHERE (((cdouble > ctinyint) OR ((cint > cbigint) OR (cboolean1 < 0)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT VAR_POP(ctinyint), (VAR_POP(ctinyint) / -26.28), SUM(cfloat), @@ -58,7 +58,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -106,12 +105,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 2, 3, 4, 5, 10, 11] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out index d174a00..c6f2e23 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, ctimestamp1, ctinyint, @@ -22,7 +22,7 @@ WHERE (((cstring2 <= '10') AND ((csmallint = 9763215.5639) OR (cstring1 LIKE '%a')))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdouble, ctimestamp1, ctinyint, @@ -64,7 +64,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -101,12 +100,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(6,2), decimal(11,4), double, double, double, double, double, bigint, bigint, bigint, double, double, double] Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_11.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_11.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_11.q.out index 5048ad7..cd60179 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_11.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cstring1, cboolean1, cdouble, @@ -13,7 +13,7 @@ WHERE ((cstring2 = cstring1) OR ((ctimestamp1 IS NULL) AND (cstring1 LIKE '%a'))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cstring1, cboolean1, cdouble, @@ -46,7 +46,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -83,12 +82,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [1, 5, 6, 7, 8, 10] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, double, double, double, double] Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out index 83ca333..cbf7c25 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cbigint, cboolean1, cstring1, @@ -30,7 +30,7 @@ WHERE (((ctimestamp1 IS NULL) GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble ORDER BY ctimestamp1, cdouble, cbigint, cstring1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cbigint, cboolean1, cstring1, @@ -81,7 +81,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -133,12 +132,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 1, 3, 5, 6, 8, 10, 11] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -167,7 +160,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:_col0:bigint, 1:_col1:boolean, 2:_col2:string, 3:_col3:double, 4:_col4:double, 5:_col5:bigint, 6:_col6:bigint, 7:_col7:bigint, 8:_col8:double, 9:_col9:double, 10:_col10:double, 11:_col11:double, 12:_col12:double, 13:_col13:decimal(22,2), 14:_col14:bigint, 15:_col15:double, 16:_col17:double, 17:_col18:double, 18:_col19:double] Reduce Output Operator key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) sort order: +++ @@ -188,12 +180,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 19 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] - dataColumns: _col0:bigint, _col1:boolean, _col2:string, _col3:double, _col4:double, _col5:bigint, _col6:bigint, _col7:bigint, _col8:double, _col9:double, _col10:double, _col11:double, _col12:double, _col13:decimal(22,2), _col14:bigint, _col15:double, _col17:double, _col18:double, _col19:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out index 66af40c..e60548c 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -31,7 +31,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cboolean1, ctinyint, ctimestamp1, @@ -83,7 +83,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -135,12 +134,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 4, 5, 6, 8, 9, 10] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(11,4), double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -169,7 +162,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:_col0:boolean, 1:_col1:tinyint, 2:_col2:timestamp, 3:_col3:float, 4:_col4:string, 5:_col5:tinyint, 6:_col6:tinyint, 7:_col7:tinyint, 8:_col8:double, 9:_col9:double, 10:_col10:double, 11:_col11:float, 12:_col12:double, 13:_col13:double, 14:_col14:double, 15:_col15:decimal(7,3), 16:_col16:double, 17:_col17:double, 18:_col18:float, 19:_col19:double, 20:_col20:tinyint] Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ @@ -190,12 +182,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 21 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - dataColumns: _col0:boolean, _col1:tinyint, _col2:timestamp, _col3:float, _col4:string, _col5:tinyint, _col6:tinyint, _col7:tinyint, _col8:double, _col9:double, _col10:double, _col11:float, _col12:double, _col13:double, _col14:double, _col15:decimal(7,3), _col16:double, _col17:double, _col18:float, _col19:double, _col20:tinyint - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true