Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out?rev=1627210&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out Wed Sep 24 03:16:25 2014 @@ -0,0 +1,746 @@ +PREHOOK: query: -- HIVE-3849 Aliased column in where clause for multi-groupby single reducer cannot be resolved +create table e1 (key string, count int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e1 +POSTHOOK: query: -- HIVE-3849 Aliased column in where clause for multi-groupby single reducer cannot be resolved +create table e1 (key string, count int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e1 +PREHOOK: query: create table e2 (key string, count int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@e2 +POSTHOOK: query: create table e2 (key string, count int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e2 +PREHOOK: query: explain +from src +insert overwrite table e1 +select key, count(*) +where src.value in ('val_100', 'val_200', 'val_300') AND key in (100, 150, 200) +group by key +insert overwrite table e2 +select key, count(*) +where src.value in ('val_400', 'val_500') AND key in (400, 450) +group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src +insert overwrite table e1 +select key, count(*) +where src.value in ('val_100', 'val_200', 'val_300') AND key in (100, 150, 200) +group by key +insert overwrite table e2 +select key, count(*) +where src.value in ('val_400', 'val_500') AND key in (400, 450) +group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + Filter Operator + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from src +insert overwrite table e1 +select key, count(*) +where src.value in ('val_100', 'val_200', 'val_300') AND key in (100, 150, 200) +group by key +insert overwrite table e2 +select key, count(*) +where src.value in ('val_400', 'val_500') AND key in (400, 450) +group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@e1 +PREHOOK: Output: default@e2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from src +insert overwrite table e1 +select key, count(*) +where src.value in ('val_100', 'val_200', 'val_300') AND key in (100, 150, 200) +group by key +insert overwrite table e2 +select key, count(*) +where src.value in ('val_400', 'val_500') AND key in (400, 450) +group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@e1 +POSTHOOK: Output: default@e2 +POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from e1 +PREHOOK: type: QUERY +PREHOOK: Input: default@e1 +#### A masked pattern was here #### +POSTHOOK: query: select * from e1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e1 +#### A masked pattern was here #### +100 2 +200 2 +PREHOOK: query: select * from e2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e2 +#### A masked pattern was here #### +POSTHOOK: query: select * from e2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e2 +#### A masked pattern was here #### +400 1 +PREHOOK: query: explain +from src +insert overwrite table e1 +select value, count(*) +where src.key + src.key = 200 or src.key - 100 = 100 or src.key = 300 AND VALUE IS NOT NULL +group by value +insert overwrite table e2 +select value, count(*) +where src.key + src.key = 400 or src.key - 100 = 500 AND VALUE IS NOT NULL +group by value +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src +insert overwrite table e1 +select value, count(*) +where src.key + src.key = 200 or src.key - 100 = 100 or src.key = 300 AND VALUE IS NOT NULL +group by value +insert overwrite table e2 +select value, count(*) +where src.key + src.key = 400 or src.key - 100 = 500 AND VALUE IS NOT NULL +group by value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((VALUE._col0 + VALUE._col0) = 200) or ((VALUE._col0 - 100) = 100)) or ((VALUE._col0 = 300) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + Filter Operator + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 187 Data size: 1986 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 187 Data size: 1986 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 187 Data size: 1986 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from src +insert overwrite table e1 +select value, count(*) +where src.key + src.key = 200 or src.key - 100 = 100 or src.key = 300 AND VALUE IS NOT NULL +group by value +insert overwrite table e2 +select value, count(*) +where src.key + src.key = 400 or src.key - 100 = 500 AND VALUE IS NOT NULL +group by value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@e1 +PREHOOK: Output: default@e2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from src +insert overwrite table e1 +select value, count(*) +where src.key + src.key = 200 or src.key - 100 = 100 or src.key = 300 AND VALUE IS NOT NULL +group by value +insert overwrite table e2 +select value, count(*) +where src.key + src.key = 400 or src.key - 100 = 500 AND VALUE IS NOT NULL +group by value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@e1 +POSTHOOK: Output: default@e2 +POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e1.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from e1 +PREHOOK: type: QUERY +PREHOOK: Input: default@e1 +#### A masked pattern was here #### +POSTHOOK: query: select * from e1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e1 +#### A masked pattern was here #### +val_100 2 +val_200 2 +PREHOOK: query: select * from e2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e2 +#### A masked pattern was here #### +POSTHOOK: query: select * from e2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e2 +#### A masked pattern was here #### +val_200 2 +PREHOOK: query: explain +from src +insert overwrite table e1 +select key, count(*) +where src.value in ('val_100', 'val_200', 'val_300') AND key in (100, 150, 200) +group by key +insert overwrite table e2 +select key, count(*) +where src.value in ('val_400', 'val_500') AND key in (400, 450) +group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src +insert overwrite table e1 +select key, count(*) +where src.value in ('val_100', 'val_200', 'val_300') AND key in (100, 150, 200) +group by key +insert overwrite table e2 +select key, count(*) +where src.value in ('val_400', 'val_500') AND key in (400, 450) +group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((value) IN ('val_100', 'val_200', 'val_300') and (key) IN (100, 150, 200)) or ((value) IN ('val_400', 'val_500') and (key) IN (400, 450))) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((VALUE._col0) IN ('val_100', 'val_200', 'val_300') and (KEY._col0) IN (100, 150, 200)) (type: boolean) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + Filter Operator + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from src +insert overwrite table e1 +select key, count(*) +where src.value in ('val_100', 'val_200', 'val_300') AND key in (100, 150, 200) +group by key +insert overwrite table e2 +select key, count(*) +where src.value in ('val_400', 'val_500') AND key in (400, 450) +group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@e1 +PREHOOK: Output: default@e2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from src +insert overwrite table e1 +select key, count(*) +where src.value in ('val_100', 'val_200', 'val_300') AND key in (100, 150, 200) +group by key +insert overwrite table e2 +select key, count(*) +where src.value in ('val_400', 'val_500') AND key in (400, 450) +group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@e1 +POSTHOOK: Output: default@e2 +POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from e1 +PREHOOK: type: QUERY +PREHOOK: Input: default@e1 +#### A masked pattern was here #### +POSTHOOK: query: select * from e1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e1 +#### A masked pattern was here #### +100 2 +200 2 +PREHOOK: query: select * from e2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e2 +#### A masked pattern was here #### +POSTHOOK: query: select * from e2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e2 +#### A masked pattern was here #### +400 1 +PREHOOK: query: explain +from src +insert overwrite table e1 +select value, count(*) +where src.key + src.key = 200 or src.key - 100 = 100 or src.key = 300 AND VALUE IS NOT NULL +group by value +insert overwrite table e2 +select value, count(*) +where src.key + src.key = 400 or src.key - 100 = 500 AND VALUE IS NOT NULL +group by value +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src +insert overwrite table e1 +select value, count(*) +where src.key + src.key = 200 or src.key - 100 = 100 or src.key = 300 AND VALUE IS NOT NULL +group by value +insert overwrite table e2 +select value, count(*) +where src.key + src.key = 400 or src.key - 100 = 500 AND VALUE IS NOT NULL +group by value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((key + key) = 200) or ((key - 100) = 100)) or ((key = 300) and value is not null)) or (((key + key) = 400) or (((key - 100) = 500) and value is not null))) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((VALUE._col0 + VALUE._col0) = 200) or ((VALUE._col0 - 100) = 100)) or ((VALUE._col0 = 300) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + Filter Operator + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 187 Data size: 1986 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 187 Data size: 1986 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 187 Data size: 1986 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.e2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from src +insert overwrite table e1 +select value, count(*) +where src.key + src.key = 200 or src.key - 100 = 100 or src.key = 300 AND VALUE IS NOT NULL +group by value +insert overwrite table e2 +select value, count(*) +where src.key + src.key = 400 or src.key - 100 = 500 AND VALUE IS NOT NULL +group by value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@e1 +PREHOOK: Output: default@e2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from src +insert overwrite table e1 +select value, count(*) +where src.key + src.key = 200 or src.key - 100 = 100 or src.key = 300 AND VALUE IS NOT NULL +group by value +insert overwrite table e2 +select value, count(*) +where src.key + src.key = 400 or src.key - 100 = 500 AND VALUE IS NOT NULL +group by value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@e1 +POSTHOOK: Output: default@e2 +POSTHOOK: Lineage: e1.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e1.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: e2.count EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: e2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from e1 +PREHOOK: type: QUERY +PREHOOK: Input: default@e1 +#### A masked pattern was here #### +POSTHOOK: query: select * from e1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e1 +#### A masked pattern was here #### +val_100 2 +val_200 2 +PREHOOK: query: select * from e2 +PREHOOK: type: QUERY +PREHOOK: Input: default@e2 +#### A masked pattern was here #### +POSTHOOK: query: select * from e2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@e2 +#### A masked pattern was here #### +val_200 2
Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_position.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_position.q.out?rev=1627210&view=auto ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_position.q.out (added) +++ hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_position.q.out Wed Sep 24 03:16:25 2014 @@ -0,0 +1,783 @@ +PREHOOK: query: CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testTable1 +POSTHOOK: query: CREATE TABLE testTable1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testTable1 +PREHOOK: query: CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testTable2 +POSTHOOK: query: CREATE TABLE testTable2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testTable2 +PREHOOK: query: -- Position Alias in GROUP BY and ORDER BY + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 +PREHOOK: type: QUERY +POSTHOOK: query: -- Position Alias in GROUP BY and ORDER BY + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-4 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4, Stage-5 + Stage-0 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Spark + Edges: + Reducer 4 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + Filter Operator + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable1 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable1 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-5 + Spark + Edges: + Reducer 5 <- Map 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + Filter Operator + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@testtable1 +PREHOOK: Output: default@testtable2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@testtable1 +POSTHOOK: Output: default@testtable2 +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@testtable1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testtable1 +#### A masked pattern was here #### +0 1 +2 1 +4 1 +5 1 +8 1 +9 1 +10 1 +11 1 +12 1 +15 1 +17 1 +18 1 +19 1 +PREHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@testtable2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testtable2 +#### A masked pattern was here #### +0 val_0 1 +2 val_2 1 +4 val_4 1 +5 val_5 1 +8 val_8 1 +9 val_9 1 +10 val_10 1 +11 val_11 1 +12 val_12 1 +15 val_15 1 +17 val_17 1 +18 val_18 1 +19 val_19 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-4 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-4, Stage-5 + Stage-0 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Spark + Edges: + Reducer 4 <- Map 1 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + Filter Operator + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable1 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable1 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-5 + Spark + Edges: + Reducer 5 <- Map 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + Filter Operator + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: value (type: string), key (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@testtable1 +PREHOOK: Output: default@testtable2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 +INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@testtable1 +POSTHOOK: Output: default@testtable2 +POSTHOOK: Lineage: testtable1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: testtable2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@testtable1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value FROM testTable1 ORDER BY 1, 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testtable1 +#### A masked pattern was here #### +0 1 +2 1 +4 1 +5 1 +8 1 +9 1 +10 1 +11 1 +12 1 +15 1 +17 1 +18 1 +19 1 +PREHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@testtable2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val1, val2 FROM testTable2 ORDER BY 1, 2, 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testtable2 +#### A masked pattern was here #### +0 val_0 1 +2 val_2 1 +4 val_4 1 +5 val_5 1 +8 val_8 1 +9 val_9 1 +10 val_10 1 +11 val_11 1 +12 val_12 1 +15 val_15 1 +17 val_17 1 +18 val_18 1 +19 val_19 1 +PREHOOK: query: -- Position Alias in subquery + +EXPLAIN +SELECT t.key, t.value +FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t +ORDER BY 2 DESC, 1 ASC +PREHOOK: type: QUERY +POSTHOOK: query: -- Position Alias in subquery + +EXPLAIN +SELECT t.key, t.value +FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t +ORDER BY 2 DESC, 1 ASC +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP) + Reducer 3 <- Reducer 2 (GROUP SORT) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key <= 20) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint), _col0 (type: string) + sort order: -+ + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t.key, t.value +FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t +ORDER BY 2 DESC, 1 ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT t.key, t.value +FROM (SELECT b.key as key, count(1) as value FROM src b WHERE b.key <= 20 GROUP BY 1) t +ORDER BY 2 DESC, 1 ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 +5 3 +12 2 +15 2 +18 2 +10 1 +11 1 +17 1 +19 1 +2 1 +20 1 +4 1 +8 1 +9 1 +PREHOOK: query: EXPLAIN +SELECT c1, c2, c3, c4 +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2 + ) a + JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT c1, c2, c3, c4 +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2 + ) a + JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT) + Reducer 3 <- Reducer 2 (GROUP SORT), Reducer 6 (GROUP SORT) + Reducer 4 <- Reducer 3 (GROUP SORT) + Reducer 6 <- Map 5 (GROUP) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col3, _col4 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: --++ + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT c1, c2, c3, c4 +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2 + ) a + JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT c1, c2, c3, c4 +FROM ( + FROM + ( + FROM src src1 SELECT src1.key AS c1, src1.value AS c2, COUNT(DISTINCT SUBSTR(src1.value,5)) AS c3 WHERE src1.key > 10 and src1.key < 20 GROUP BY 1, 2 + ) a + JOIN + ( + FROM src src2 SELECT src2.key AS c3, src2.value AS c4 WHERE src2.key > 15 and src2.key < 25 GROUP BY 1, 2 + ) b + ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 +) c +ORDER BY 1 DESC, 2 DESC, 3 ASC, 4 ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +19 val_19 19 val_19 +18 val_18 18 val_18 +17 val_17 17 val_17 Added: hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out?rev=1627210&view=auto ============================================================================== Files hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out (added) and hive/branches/spark/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out Wed Sep 24 03:16:25 2014 differ
