Author: hashutosh
Date: Fri Sep 27 17:34:31 2013
New Revision: 1526990

URL: http://svn.apache.org/r1526990
Log:
HIVE-5357 : ReduceSinkDeDuplication optimizer pick the wrong keys in 
pRS-cGBYm-cRS-cGBYr scenario when there are distinct keys in child GBY (Chun 
Chen via Ashutosh Chauhan)

Modified:
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
    hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
    
hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java?rev=1526990&r1=1526989&r2=1526990&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
 (original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
 Fri Sep 27 17:34:31 2013
@@ -378,7 +378,8 @@ public final class CorrelationUtilities 
       // copies desc of cGBYm to cGBYr and remove cGBYm and cRS
       GroupByOperator cGBYm = (GroupByOperator) parent;
 
-      cGBYr.getConf().setKeys(cGBYm.getConf().getKeys());
+      
cGBYr.getConf().setKeys(ExprNodeDescUtils.backtrack(ExprNodeDescUtils.backtrack(cGBYr
+              .getConf().getKeys(), cGBYr, cRS), cRS, cGBYm));
       cGBYr.getConf().setAggregators(cGBYm.getConf().getAggregators());
       for (AggregationDesc aggr : cGBYm.getConf().getAggregators()) {
         aggr.setMode(GenericUDAFEvaluator.Mode.COMPLETE);

Modified: 
hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q?rev=1526990&r1=1526989&r2=1526990&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q 
(original)
+++ hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q 
Fri Sep 27 17:34:31 2013
@@ -18,6 +18,7 @@ explain select src.key, sum(src.key) FRO
 explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key 
order by src.key, src.value;
 -- mGBY-RS-rGBY-mGBY-RS-rGBY
 explain from (select key, value from src group by key, value) s select s.key 
group by s.key;
+explain select key, count(distinct value) from (select key, value from src 
group by key, value) t group by key;
 
 select key, sum(key) from (select * from src distribute by key sort by key, 
value) Q1 group by key;
 select key, sum(key), lower(value) from (select * from src order by key) Q1 
group by key, lower(value);
@@ -26,6 +27,7 @@ select key, sum(key) as value from src g
 select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by 
src.key, src.value;
 select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by 
src.key, src.value;
 from (select key, value from src group by key, value) s select s.key group by 
s.key;
+select key, count(distinct value) from (select key, value from src group by 
key, value) t group by key;
 
 set hive.map.aggr=false;
 
@@ -41,6 +43,7 @@ explain select src.key, sum(src.key) FRO
 explain select src.key, src.value FROM src JOIN src1 ON src.key = src1.key 
order by src.key, src.value;
 -- RS-GBY-RS-GBY
 explain from (select key, value from src group by key, value) s select s.key 
group by s.key;
+explain select key, count(distinct value) from (select key, value from src 
group by key, value) t group by key;
 
 select key, sum(key) from (select * from src distribute by key sort by key, 
value) Q1 group by key;
 select key, sum(key), lower(value) from (select * from src order by key) Q1 
group by key, lower(value);
@@ -49,3 +52,4 @@ select key, sum(key) as value from src g
 select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by 
src.key, src.value;
 select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by 
src.key, src.value;
 from (select key, value from src group by key, value) s select s.key group by 
s.key;
+select key, count(distinct value) from (select key, value from src group by 
key, value) t group by key;

Modified: 
hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out?rev=1526990&r1=1526989&r2=1526990&view=diff
==============================================================================
--- 
hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out 
(original)
+++ 
hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out 
Fri Sep 27 17:34:31 2013
@@ -659,6 +659,97 @@ STAGE PLANS:
       limit: -1
 
 
+PREHOOK: query: explain select key, count(distinct value) from (select key, 
value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key, count(distinct value) from (select key, 
value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF 
(TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) 
(TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL 
value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) 
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 
(TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL 
value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: key, value
+              Group By Operator
+                bucketGroup: false
+                keys:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  sort order: ++
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+      Reduce Operator Tree:
+        Group By Operator
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            Group By Operator
+              aggregations:
+                    expr: count(DISTINCT _col1)
+              bucketGroup: false
+              keys:
+                    expr: _col0
+                    type: string
+              mode: complete
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: bigint
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
 PREHOOK: query: select key, sum(key) from (select * from src distribute by key 
sort by key, value) Q1 group by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
@@ -2316,6 +2407,323 @@ POSTHOOK: Input: default@src
 96
 97
 98
+PREHOOK: query: select key, count(distinct value) from (select key, value from 
src group by key, value) t group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, count(distinct value) from (select key, value 
from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      1
+10     1
+100    1
+103    1
+104    1
+105    1
+11     1
+111    1
+113    1
+114    1
+116    1
+118    1
+119    1
+12     1
+120    1
+125    1
+126    1
+128    1
+129    1
+131    1
+133    1
+134    1
+136    1
+137    1
+138    1
+143    1
+145    1
+146    1
+149    1
+15     1
+150    1
+152    1
+153    1
+155    1
+156    1
+157    1
+158    1
+160    1
+162    1
+163    1
+164    1
+165    1
+166    1
+167    1
+168    1
+169    1
+17     1
+170    1
+172    1
+174    1
+175    1
+176    1
+177    1
+178    1
+179    1
+18     1
+180    1
+181    1
+183    1
+186    1
+187    1
+189    1
+19     1
+190    1
+191    1
+192    1
+193    1
+194    1
+195    1
+196    1
+197    1
+199    1
+2      1
+20     1
+200    1
+201    1
+202    1
+203    1
+205    1
+207    1
+208    1
+209    1
+213    1
+214    1
+216    1
+217    1
+218    1
+219    1
+221    1
+222    1
+223    1
+224    1
+226    1
+228    1
+229    1
+230    1
+233    1
+235    1
+237    1
+238    1
+239    1
+24     1
+241    1
+242    1
+244    1
+247    1
+248    1
+249    1
+252    1
+255    1
+256    1
+257    1
+258    1
+26     1
+260    1
+262    1
+263    1
+265    1
+266    1
+27     1
+272    1
+273    1
+274    1
+275    1
+277    1
+278    1
+28     1
+280    1
+281    1
+282    1
+283    1
+284    1
+285    1
+286    1
+287    1
+288    1
+289    1
+291    1
+292    1
+296    1
+298    1
+30     1
+302    1
+305    1
+306    1
+307    1
+308    1
+309    1
+310    1
+311    1
+315    1
+316    1
+317    1
+318    1
+321    1
+322    1
+323    1
+325    1
+327    1
+33     1
+331    1
+332    1
+333    1
+335    1
+336    1
+338    1
+339    1
+34     1
+341    1
+342    1
+344    1
+345    1
+348    1
+35     1
+351    1
+353    1
+356    1
+360    1
+362    1
+364    1
+365    1
+366    1
+367    1
+368    1
+369    1
+37     1
+373    1
+374    1
+375    1
+377    1
+378    1
+379    1
+382    1
+384    1
+386    1
+389    1
+392    1
+393    1
+394    1
+395    1
+396    1
+397    1
+399    1
+4      1
+400    1
+401    1
+402    1
+403    1
+404    1
+406    1
+407    1
+409    1
+41     1
+411    1
+413    1
+414    1
+417    1
+418    1
+419    1
+42     1
+421    1
+424    1
+427    1
+429    1
+43     1
+430    1
+431    1
+432    1
+435    1
+436    1
+437    1
+438    1
+439    1
+44     1
+443    1
+444    1
+446    1
+448    1
+449    1
+452    1
+453    1
+454    1
+455    1
+457    1
+458    1
+459    1
+460    1
+462    1
+463    1
+466    1
+467    1
+468    1
+469    1
+47     1
+470    1
+472    1
+475    1
+477    1
+478    1
+479    1
+480    1
+481    1
+482    1
+483    1
+484    1
+485    1
+487    1
+489    1
+490    1
+491    1
+492    1
+493    1
+494    1
+495    1
+496    1
+497    1
+498    1
+5      1
+51     1
+53     1
+54     1
+57     1
+58     1
+64     1
+65     1
+66     1
+67     1
+69     1
+70     1
+72     1
+74     1
+76     1
+77     1
+78     1
+8      1
+80     1
+82     1
+83     1
+84     1
+85     1
+86     1
+87     1
+9      1
+90     1
+92     1
+95     1
+96     1
+97     1
+98     1
 PREHOOK: query: -- RS-RS-GBY
 explain select key, sum(key) from (select * from src distribute by key sort by 
key, value) Q1 group by key
 PREHOOK: type: QUERY
@@ -2934,6 +3342,88 @@ STAGE PLANS:
       limit: -1
 
 
+PREHOOK: query: explain select key, count(distinct value) from (select key, 
value from src group by key, value) t group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key, count(distinct value) from (select key, 
value from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF 
(TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) 
(TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL 
value))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)))) t)) 
(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 
(TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL 
value)))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        t:src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+                    expr: value
+                    type: string
+              outputColumnNames: key, value
+              Reduce Output Operator
+                key expressions:
+                      expr: key
+                      type: string
+                      expr: value
+                      type: string
+                sort order: ++
+                Map-reduce partition columns:
+                      expr: key
+                      type: string
+                tag: -1
+      Reduce Operator Tree:
+        Group By Operator
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+                expr: KEY._col1
+                type: string
+          mode: complete
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: string
+            outputColumnNames: _col0, _col1
+            Group By Operator
+              aggregations:
+                    expr: count(DISTINCT _col1)
+              bucketGroup: false
+              keys:
+                    expr: _col0
+                    type: string
+              mode: complete
+              outputColumnNames: _col0, _col1
+              Select Operator
+                expressions:
+                      expr: _col0
+                      type: string
+                      expr: _col1
+                      type: bigint
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
 PREHOOK: query: select key, sum(key) from (select * from src distribute by key 
sort by key, value) Q1 group by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
@@ -4591,3 +5081,320 @@ POSTHOOK: Input: default@src
 96
 97
 98
+PREHOOK: query: select key, count(distinct value) from (select key, value from 
src group by key, value) t group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select key, count(distinct value) from (select key, value 
from src group by key, value) t group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0      1
+10     1
+100    1
+103    1
+104    1
+105    1
+11     1
+111    1
+113    1
+114    1
+116    1
+118    1
+119    1
+12     1
+120    1
+125    1
+126    1
+128    1
+129    1
+131    1
+133    1
+134    1
+136    1
+137    1
+138    1
+143    1
+145    1
+146    1
+149    1
+15     1
+150    1
+152    1
+153    1
+155    1
+156    1
+157    1
+158    1
+160    1
+162    1
+163    1
+164    1
+165    1
+166    1
+167    1
+168    1
+169    1
+17     1
+170    1
+172    1
+174    1
+175    1
+176    1
+177    1
+178    1
+179    1
+18     1
+180    1
+181    1
+183    1
+186    1
+187    1
+189    1
+19     1
+190    1
+191    1
+192    1
+193    1
+194    1
+195    1
+196    1
+197    1
+199    1
+2      1
+20     1
+200    1
+201    1
+202    1
+203    1
+205    1
+207    1
+208    1
+209    1
+213    1
+214    1
+216    1
+217    1
+218    1
+219    1
+221    1
+222    1
+223    1
+224    1
+226    1
+228    1
+229    1
+230    1
+233    1
+235    1
+237    1
+238    1
+239    1
+24     1
+241    1
+242    1
+244    1
+247    1
+248    1
+249    1
+252    1
+255    1
+256    1
+257    1
+258    1
+26     1
+260    1
+262    1
+263    1
+265    1
+266    1
+27     1
+272    1
+273    1
+274    1
+275    1
+277    1
+278    1
+28     1
+280    1
+281    1
+282    1
+283    1
+284    1
+285    1
+286    1
+287    1
+288    1
+289    1
+291    1
+292    1
+296    1
+298    1
+30     1
+302    1
+305    1
+306    1
+307    1
+308    1
+309    1
+310    1
+311    1
+315    1
+316    1
+317    1
+318    1
+321    1
+322    1
+323    1
+325    1
+327    1
+33     1
+331    1
+332    1
+333    1
+335    1
+336    1
+338    1
+339    1
+34     1
+341    1
+342    1
+344    1
+345    1
+348    1
+35     1
+351    1
+353    1
+356    1
+360    1
+362    1
+364    1
+365    1
+366    1
+367    1
+368    1
+369    1
+37     1
+373    1
+374    1
+375    1
+377    1
+378    1
+379    1
+382    1
+384    1
+386    1
+389    1
+392    1
+393    1
+394    1
+395    1
+396    1
+397    1
+399    1
+4      1
+400    1
+401    1
+402    1
+403    1
+404    1
+406    1
+407    1
+409    1
+41     1
+411    1
+413    1
+414    1
+417    1
+418    1
+419    1
+42     1
+421    1
+424    1
+427    1
+429    1
+43     1
+430    1
+431    1
+432    1
+435    1
+436    1
+437    1
+438    1
+439    1
+44     1
+443    1
+444    1
+446    1
+448    1
+449    1
+452    1
+453    1
+454    1
+455    1
+457    1
+458    1
+459    1
+460    1
+462    1
+463    1
+466    1
+467    1
+468    1
+469    1
+47     1
+470    1
+472    1
+475    1
+477    1
+478    1
+479    1
+480    1
+481    1
+482    1
+483    1
+484    1
+485    1
+487    1
+489    1
+490    1
+491    1
+492    1
+493    1
+494    1
+495    1
+496    1
+497    1
+498    1
+5      1
+51     1
+53     1
+54     1
+57     1
+58     1
+64     1
+65     1
+66     1
+67     1
+69     1
+70     1
+72     1
+74     1
+76     1
+77     1
+78     1
+8      1
+80     1
+82     1
+83     1
+84     1
+85     1
+86     1
+87     1
+9      1
+90     1
+92     1
+95     1
+96     1
+97     1
+98     1


Reply via email to