Author: olga
Date: Tue Dec  1 20:07:53 2009
New Revision: 885902

URL: http://svn.apache.org/viewvc?rev=885902&view=rev
Log:
PIG-1108: Incorrect map output key type in MultiQuery optimiza (rding via
olgan)

Modified:
    hadoop/pig/trunk/CHANGES.txt
    
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=885902&r1=885901&r2=885902&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Tue Dec  1 20:07:53 2009
@@ -41,6 +41,9 @@
 
 BUG FIXES
 
+PIG-1108: Incorrect map output key type in MultiQuery optimiza (rding via
+olgan)
+
 PIG-1022:  optimizer pushes filter before the foreach that generates column
 used by filter (daijy via gates)
 

Modified: 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java?rev=885902&r1=885901&r2=885902&view=diff
==============================================================================
--- 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java
 (original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java
 Tue Dec  1 20:07:53 2009
@@ -107,6 +107,11 @@
                     
         List<MapReduceOper> successors = getPlan().getSuccessors(mr);
         for (MapReduceOper successor : successors) {
+            if (successor.getUseSecondaryKey()) {
+                log.debug("Splittee " + successor.getOperatorKey().getId()
+                        + " uses secondary key, do not merge it");
+                continue;
+            }
             if (isMapOnly(successor)) {
                 if (isSingleLoadMapperPlan(successor.mapPlan)) {               
     
                     mappers.add(successor);                
@@ -121,10 +126,11 @@
                 }
             }                
         }
-                      
+                  
+        int numSplittees = successors.size();
+        
         // case 1: exactly one splittee and it's map-only
-        if (mappers.size() == 1 && mapReducers.size() == 0 
-                && multiLoadMROpers.size() == 0 ) {            
+        if (mappers.size() == 1 && numSplittees == 1) {    
             mergeOnlyMapperSplittee(mappers.get(0), mr);
             
             log.info("Merged the only map-only splittee.");
@@ -133,16 +139,14 @@
         }
         
         // case 2: exactly one splittee and it has reducer
-        if (isMapOnly(mr) && mapReducers.size() == 1 
-                && mappers.size() == 0 && multiLoadMROpers.size() == 0) {      
      
+        if (isMapOnly(mr) && mapReducers.size() == 1 && numSplittees == 1) {   
         
             mergeOnlyMapReduceSplittee(mapReducers.get(0), mr);
             
             log.info("Merged the only map-reduce splittee.");
             
             return;
         } 
-        
-        int numSplittees = successors.size();
+                
         int numMerges = 0;
         
         PhysicalPlan splitterPl = isMapOnly(mr) ? mr.mapPlan : mr.reducePlan;  
                          

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java?rev=885902&r1=885901&r2=885902&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java Tue Dec  1 
20:07:53 2009
@@ -89,6 +89,33 @@
     }
     
     @Test
+    public void testMultiQueryJiraPig1108() {
+        
+        try {
+            myPig.setBatchOn();
+
+            myPig.registerQuery("a = load 
'file:test/org/apache/pig/test/data/passwd' " 
+                    + "using PigStorage(':') as (uname:chararray, 
passwd:chararray, uid:int, gid:int);");
+            myPig.registerQuery("split a into plan1 if (uid > 5), plan2 if ( 
uid < 5);");
+            myPig.registerQuery("b = group plan1 by uname;");
+            myPig.registerQuery("c = foreach b { tmp = order plan1 by uid 
desc; " 
+                    + "generate flatten(group) as foo, tmp; };");
+            myPig.registerQuery("d = filter c BY foo is not null;");
+            myPig.registerQuery("store d into '/tmp/output1';");
+            myPig.registerQuery("store plan2 into '/tmp/output2';");
+             
+            List<ExecJob> jobs = myPig.executeBatch();
+            for (ExecJob job : jobs) {
+                assertTrue(job.getStatus() == ExecJob.JOB_STATUS.COMPLETED);
+            }
+            
+        } catch (Exception e) {
+            e.printStackTrace();
+            Assert.fail();
+        } 
+    }    
+    
+    @Test
     public void testMultiQueryJiraPig1060() {
 
         // test case: 


Reply via email to