Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out?rev=1627210&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out Wed Sep 24 03:16:25 2014 @@ -0,0 +1,595 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) + Reducer 3 <- Reducer 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 11 1 +1 NULL 1 +2 12 1 +2 NULL 1 +3 13 1 +3 NULL 1 +7 17 1 +7 NULL 1 +8 18 1 +8 28 1 +8 NULL 2 +NULL 11 1 +NULL 12 1 +NULL 13 1 +NULL 17 1 +NULL 18 1 +NULL 28 1 +NULL NULL 6 +PREHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(DISTINCT val) + keys: key (type: string), '0' (type: string), val (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 1 +2 1 +3 1 +7 1 +8 2 +NULL 6 +PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +POSTHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +POSTHOOK: query: CREATE TABLE T3(key1 STRING, key2 STRING, val INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T3 +PREHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-4 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4, Stage-5 + Stage-0 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Spark + Edges: + Reducer 4 <- Map 1 (GROUP SORT) + Reducer 5 <- Reducer 4 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-5 + Spark + Edges: + Reducer 6 <- Map 2 (GROUP SORT) + Reducer 7 <- Reducer 6 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(1) + keys: key (type: string), val (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t3 + +PREHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube +INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t2 +POSTHOOK: Output: default@t3 +POSTHOOK: Lineage: t2.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t2.val EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: t3.key1 SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t3.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t3.val EXPRESSION [(t1)t1.null, ]
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out?rev=1627210&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out Wed Sep 24 03:16:25 2014 @@ -0,0 +1,758 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g3 +POSTHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g3 +PREHOOK: query: CREATE TABLE dest_g4(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g4 +POSTHOOK: query: CREATE TABLE dest_g4(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g4 +PREHOOK: query: CREATE TABLE dest_h2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_h2 +POSTHOOK: query: CREATE TABLE dest_h2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_h2 +PREHOOK: query: CREATE TABLE dest_h3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_h3 +POSTHOOK: query: CREATE TABLE dest_h3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_h3 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + sort order: ++ + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g3 + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g4 + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g3 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g4 + + Stage: Stage-7 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2 +PREHOOK: Output: default@dest_g3 +PREHOOK: Output: default@dest_g4 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2 +POSTHOOK: Output: default@dest_g3 +POSTHOOK: Output: default@dest_g4 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM dest_g2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_g2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 +PREHOOK: query: SELECT * FROM dest_g3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_g3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g3 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +PREHOOK: query: SELECT * FROM dest_g4 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_g4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g4 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10 +INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10 +INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-7 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-7, Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-9 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-6 + Stage-10 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-6 + Stage-11 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-6 + Stage-12 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-6 + Stage-13 depends on stages: Stage-4 + Stage-8 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Spark + Edges: + Reducer 4 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + sort order: ++ + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g3 + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g4 + + Stage: Stage-6 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-9 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g3 + + Stage: Stage-10 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g4 + + Stage: Stage-11 + Stats-Aggr Operator + + Stage: Stage-3 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_h2 + + Stage: Stage-12 + Stats-Aggr Operator + + Stage: Stage-4 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_h3 + + Stage: Stage-13 + Stats-Aggr Operator + + Stage: Stage-8 + Spark + Edges: + Reducer 5 <- Map 2 (GROUP SORT) + Reducer 6 <- Reducer 5 (GROUP) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string), substr(value, 5) (type: string) + sort order: +++ + Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint) + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_h3 + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_h2 + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10 +INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2 +PREHOOK: Output: default@dest_g3 +PREHOOK: Output: default@dest_g4 +PREHOOK: Output: default@dest_h2 +PREHOOK: Output: default@dest_h3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10 +INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2 +POSTHOOK: Output: default@dest_g3 +POSTHOOK: Output: default@dest_g4 +POSTHOOK: Output: default@dest_h2 +POSTHOOK: Output: default@dest_h3 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] +POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM dest_g2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_g2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 +PREHOOK: query: SELECT * FROM dest_g3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_g3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g3 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +PREHOOK: query: SELECT * FROM dest_g4 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_g4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g4 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 +PREHOOK: query: SELECT * FROM dest_h2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_h2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_h2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_h2 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 4 1878.0 878 6 +1 5 1729.0 729 8 +1 6 11282.0 1282 12 +1 6 11494.0 1494 11 +1 7 11171.0 1171 11 +1 7 11516.0 1516 10 +1 8 11263.0 1263 10 +1 9 12294.0 2294 14 +1 9 12654.0 2654 16 +PREHOOK: query: SELECT * FROM dest_h3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_h3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_h3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_h3 +#### A masked pattern was here #### +5 1 5102.0 102 2 +5 1 5116.0 116 2 +5 1 515.0 15 3 +5 1 553.0 53 1 +5 1 554.0 54 1 +5 1 557.0 57 1 +6 1 6134.0 134 2 +6 1 664.0 64 1 +6 1 665.0 65 1 +6 1 666.0 66 1 +6 1 669.0 69 1 +7 1 7144.0 144 2 +7 1 7152.0 152 2 +7 1 7210.0 210 3 +7 1 774.0 74 1 +7 1 777.0 77 1 +7 1 778.0 78 1 +8 1 8166.0 166 2 +8 1 8168.0 168 2 +8 1 88.0 8 1 +8 1 880.0 80 1 +8 1 882.0 82 1 +8 1 885.0 85 1 +8 1 886.0 86 1 +8 1 887.0 87 1 +9 1 9190.0 190 2 +9 1 9194.0 194 2 +9 1 9196.0 196 2 +9 1 9270.0 270 3 +9 1 99.0 9 1 +9 1 992.0 92 1 +9 1 996.0 96 1 +PREHOOK: query: DROP TABLE dest_g2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g2 +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: DROP TABLE dest_g2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g2 +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: DROP TABLE dest_g3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g3 +PREHOOK: Output: default@dest_g3 +POSTHOOK: query: DROP TABLE dest_g3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g3 +POSTHOOK: Output: default@dest_g3 +PREHOOK: query: DROP TABLE dest_g4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g4 +PREHOOK: Output: default@dest_g4 +POSTHOOK: query: DROP TABLE dest_g4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g4 +POSTHOOK: Output: default@dest_g4 +PREHOOK: query: DROP TABLE dest_h2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_h2 +PREHOOK: Output: default@dest_h2 +POSTHOOK: query: DROP TABLE dest_h2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_h2 +POSTHOOK: Output: default@dest_h2 +PREHOOK: query: DROP TABLE dest_h3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_h3 +PREHOOK: Output: default@dest_h3 +POSTHOOK: query: DROP TABLE dest_h3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_h3 +POSTHOOK: Output: default@dest_h3 Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out?rev=1627210&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out Wed Sep 24 03:16:25 2014 @@ -0,0 +1,198 @@ +PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g3 +POSTHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g3 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((substr(key, 1, 1) >= 5) or (substr(key, 1, 1) < 5)) (type: boolean) + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(key, 1, 1) (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: substr(key, 1, 1) (type: string) + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g3 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g3 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2 +PREHOOK: Output: default@dest_g3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) +INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2 +POSTHOOK: Output: default@dest_g3 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * FROM dest_g2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_g2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +5 6 +6 5 +7 6 +8 8 +9 7 +PREHOOK: query: SELECT * FROM dest_g3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM dest_g3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g3 +#### A masked pattern was here #### +0 1 3 +1 71 115 +2 69 111 +3 62 99 +4 74 124 +PREHOOK: query: DROP TABLE dest_g2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g2 +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: DROP TABLE dest_g2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g2 +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: DROP TABLE dest_g3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g3 +PREHOOK: Output: default@dest_g3 +POSTHOOK: query: DROP TABLE dest_g3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g3 +POSTHOOK: Output: default@dest_g3
