http://git-wip-us.apache.org/repos/asf/hive/blob/5f01dc42/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out index c237025..3e4f408 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_7.q.out @@ -122,38 +122,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_TABREF - TOK_TABNAME - bucket_small - a - TOK_TABREF - TOK_TABNAME - bucket_big - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTIONSTAR - count - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -161,6 +129,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) @@ -170,20 +139,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -195,7 +170,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -243,7 +217,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -290,36 +263,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 5932 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 2 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 54 Data size: 5932 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 5932 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 1 => 4 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 59 Data size: 6525 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -331,7 +309,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -379,7 +356,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -423,7 +399,7 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] Reducer 3 - Execution mode: uber + Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -439,13 +415,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0 columns.types bigint escape.delim \ hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -482,38 +459,6 @@ PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_s PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_TABREF - TOK_TABNAME - bucket_big - a - TOK_TABREF - TOK_TABNAME - bucket_small - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTIONSTAR - count - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -521,6 +466,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) @@ -530,36 +476,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 5932 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 2 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 54 Data size: 5932 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 5932 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 4 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 59 Data size: 6525 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -571,7 +522,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -619,7 +569,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -666,20 +615,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -691,7 +646,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -739,7 +693,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -783,7 +736,7 @@ STAGE PLANS: /bucket_small/ds=2008-04-08 [b] /bucket_small/ds=2008-04-09 [b] Reducer 2 - Execution mode: uber + Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -799,13 +752,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0 columns.types bigint escape.delim \ hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -842,38 +796,6 @@ PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_s PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_TABREF - TOK_TABNAME - bucket_big - a - TOK_TABREF - TOK_TABNAME - bucket_small - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTIONSTAR - count - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -881,6 +803,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) @@ -890,36 +813,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 5932 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 2 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 54 Data size: 5932 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 5932 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 4 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 59 Data size: 6525 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -931,7 +859,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -979,7 +906,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -1026,20 +952,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1051,7 +983,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -1099,7 +1030,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -1143,7 +1073,7 @@ STAGE PLANS: /bucket_small/ds=2008-04-08 [b] /bucket_small/ds=2008-04-09 [b] Reducer 2 - Execution mode: uber + Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -1159,13 +1089,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0 columns.types bigint escape.delim \ hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
http://git-wip-us.apache.org/repos/asf/hive/blob/5f01dc42/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out index 7c51746..dae32a5 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_8.q.out @@ -122,38 +122,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_TABREF - TOK_TABNAME - bucket_small - a - TOK_TABREF - TOK_TABNAME - bucket_big - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTIONSTAR - count - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -161,6 +129,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) @@ -170,20 +139,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: true Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -195,7 +170,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -243,7 +217,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -290,36 +263,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 12552 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 1 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 116 Data size: 12552 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 12552 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 1 => 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 127 Data size: 13807 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -331,7 +309,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -379,7 +356,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -423,7 +399,7 @@ STAGE PLANS: /bucket_big/ds=2008-04-08 [b] /bucket_big/ds=2008-04-09 [b] Reducer 3 - Execution mode: uber + Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -439,13 +415,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0 columns.types bigint escape.delim \ hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -482,38 +459,6 @@ PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_s PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_TABREF - TOK_TABNAME - bucket_big - a - TOK_TABREF - TOK_TABNAME - bucket_small - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTIONSTAR - count - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -521,6 +466,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) @@ -530,36 +476,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 12552 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 116 Data size: 12552 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 12552 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 127 Data size: 13807 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -571,7 +522,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -619,7 +569,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -666,20 +615,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -691,7 +646,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -739,7 +693,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -783,7 +736,7 @@ STAGE PLANS: /bucket_small/ds=2008-04-08 [b] /bucket_small/ds=2008-04-09 [b] Reducer 2 - Execution mode: uber + Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -799,13 +752,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0 columns.types bigint escape.delim \ hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -844,38 +798,6 @@ PREHOOK: type: QUERY POSTHOOK: query: -- The mapjoin should fail resulting in the sort-merge join explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_TABREF - TOK_TABNAME - bucket_big - a - TOK_TABREF - TOK_TABNAME - bucket_small - b - = - . - TOK_TABLE_OR_COL - a - key - . - TOK_TABLE_OR_COL - b - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - TOK_FUNCTIONSTAR - count - - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -883,6 +805,7 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) @@ -892,36 +815,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 12552 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 3 => 1 - keys: - 0 key (type: string) - 1 key (type: string) - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Statistics: Num rows: 116 Data size: 12552 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 12552 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 127 Data size: 13807 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -933,7 +861,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -981,7 +908,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value @@ -1028,20 +954,26 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE - tag: 1 - auto parallelism: true + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 244 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Execution mode: llap + LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1053,7 +985,6 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -1101,7 +1032,6 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value @@ -1145,7 +1075,7 @@ STAGE PLANS: /bucket_small/ds=2008-04-08 [b] /bucket_small/ds=2008-04-09 [b] Reducer 2 - Execution mode: uber + Execution mode: llap Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -1161,13 +1091,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0 columns.types bigint escape.delim \ hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
