Author: xuefu
Date: Wed Dec 18 18:49:15 2013
New Revision: 1552057
URL: http://svn.apache.org/r1552057
Log:
HIVE-6021: Problem in GroupByOperator for handling distinct aggrgations (Sun
Rui via Xuefu)
Added:
hive/trunk/ql/src/test/queries/clientpositive/groupby12.q
hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1552057&r1=1552056&r2=1552057&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
Wed Dec 18 18:49:15 2013
@@ -290,7 +290,7 @@ public class GroupByOperator extends Ope
if (unionExprEval != null) {
String[] names = parameters.get(j).getExprString().split("\\.");
// parameters of the form : KEY.colx:t.coly
- if (Utilities.ReduceField.KEY.name().equals(names[0])) {
+ if (Utilities.ReduceField.KEY.name().equals(names[0]) &&
names.length > 2) {
String name = names[names.length - 2];
int tag = Integer.parseInt(name.split("\\:")[1]);
if (aggr.getDistinct()) {
@@ -314,7 +314,7 @@ public class GroupByOperator extends Ope
}
}
} else {
- // will be VALUE._COLx
+ // will be KEY._COLx or VALUE._COLx
if (!nonDistinctAggrs.contains(i)) {
nonDistinctAggrs.add(i);
}
@@ -691,7 +691,7 @@ public class GroupByOperator extends Ope
}
}
- // update non-distinct value aggregations: 'VALUE._colx'
+ // update non-distinct groupby key or value aggregations: 'KEY._COLx or
VALUE._colx'
// these aggregations should be updated only once.
if (unionTag == 0) {
for (int pos : nonDistinctAggrs) {
Added: hive/trunk/ql/src/test/queries/clientpositive/groupby12.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby12.q?rev=1552057&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/groupby12.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/groupby12.q Wed Dec 18
18:49:15 2013
@@ -0,0 +1,13 @@
+set hive.map.aggr=false;
+
+CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value)
GROUP BY src.key;
+
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value)
GROUP BY src.key;
+
+SELECT dest1.* FROM dest1;
+
Added: hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out?rev=1552057&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby12.q.out Wed Dec 18
18:49:15 2013
@@ -0,0 +1,417 @@
+PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value)
GROUP BY src.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value)
GROUP BY src.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT
(TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR
(TOK_FUNCTION COUNT (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR
(TOK_FUNCTIONDI COUNT (TOK_TABLE_OR_COL value)))) (TOK_GROUPBY (.
(TOK_TABLE_OR_COL src) key))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: -1
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(KEY._col0)
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-2
+ Stats-Aggr Operator
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value)
GROUP BY src.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value)
GROUP BY src.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: SELECT dest1.* FROM dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT dest1.* FROM dest1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+3 1
+1 1
+2 1
+2 1
+2 1
+1 1
+1 1
+1 1
+2 1
+1 1
+1 1
+2 1
+3 1
+2 1
+2 1
+2 1
+1 1
+3 1
+2 1
+1 1
+1 1
+2 1
+1 1
+2 1
+4 1
+1 1
+1 1
+2 1
+2 1
+2 1
+1 1
+2 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+2 1
+2 1
+1 1
+3 1
+1 1
+4 1
+1 1
+1 1
+2 1
+2 1
+2 1
+2 1
+1 1
+1 1
+2 1
+2 1
+1 1
+1 1
+1 1
+1 1
+3 1
+1 1
+1 1
+1 1
+2 1
+1 1
+3 1
+1 1
+2 1
+1 1
+2 1
+3 1
+1 1
+1 1
+2 1
+1 1
+1 1
+2 1
+2 1
+2 1
+3 1
+2 1
+2 1
+1 1
+2 1
+2 1
+1 1
+2 1
+2 1
+1 1
+2 1
+2 1
+1 1
+1 1
+2 1
+5 1
+2 1
+1 1
+2 1
+2 1
+2 1
+2 1
+1 1
+2 1
+1 1
+1 1
+1 1
+1 1
+1 1
+2 1
+2 1
+1 1
+1 1
+2 1
+1 1
+1 1
+1 1
+2 1
+1 1
+1 1
+2 1
+3 1
+1 1
+1 1
+4 1
+2 1
+1 1
+2 1
+2 1
+2 1
+1 1
+1 1
+1 1
+1 1
+1 1
+2 1
+1 1
+1 1
+1 1
+1 1
+3 1
+1 1
+1 1
+1 1
+1 1
+2 1
+1 1
+2 1
+1 1
+3 1
+1 1
+3 1
+2 1
+3 1
+2 1
+2 1
+1 1
+2 1
+3 1
+1 1
+2 1
+1 1
+2 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+2 1
+2 1
+1 1
+5 1
+3 1
+1 1
+2 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+2 1
+1 1
+3 1
+2 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+2 1
+3 1
+1 1
+1 1
+1 1
+1 1
+1 1
+2 1
+3 1
+2 1
+2 1
+1 1
+1 1
+5 1
+1 1
+3 1
+2 1
+4 1
+1 1
+3 1
+1 1
+1 1
+2 1
+2 1
+3 1
+1 1
+1 1
+2 1
+1 1
+2 1
+1 1
+2 1
+1 1
+3 1
+3 1
+1 1
+1 1
+1 1
+1 1
+3 1
+2 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+3 1
+1 1
+1 1
+2 1
+2 1
+1 1
+2 1
+2 1
+3 1
+1 1
+4 1
+5 1
+1 1
+1 1
+1 1
+1 1
+1 1
+2 1
+1 1
+3 1
+1 1
+1 1
+1 1
+1 1
+1 1
+1 1
+4 1
+1 1
+1 1
+2 1
+1 1
+1 1
+1 1
+1 1
+1 1
+3 1
+3 1
+2 1
+1 1
+1 1
+1 1
+2 1
+1 1
+1 1
+1 1
+2 1
+1 1
+3 1
+2 1
+1 1
+2 1
+1 1
+1 1
+1 1
+1 1
+1 1
+2 1
+2 1
+1 1
+1 1
+1 1
+1 1
+3 1
+1 1
+2 1
+1 1
+2 1
+2 1