Author: olga Date: Tue Dec 1 20:21:52 2009 New Revision: 885907 URL: http://svn.apache.org/viewvc?rev=885907&view=rev Log: PIG-1108: Incorrect map output key type in MultiQuery optimiza (rding via olgan)
Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestMultiQuery.java Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=885907&r1=885906&r2=885907&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.6/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/CHANGES.txt Tue Dec 1 20:21:52 2009 @@ -125,6 +125,8 @@ BUG FIXES +PIG-1108: Incorrect map output key type in MultiQuery optimiza (rding via olgan) + PIG-1107: PigLineRecordReader bails out on an empty line for compressed data (ankit.modi via ) PIG-1080: PigStorage may miss records when loading a file (rding via olgan) Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java?rev=885907&r1=885906&r2=885907&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java (original) +++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java Tue Dec 1 20:21:52 2009 @@ -107,6 +107,11 @@ List<MapReduceOper> successors = getPlan().getSuccessors(mr); for (MapReduceOper successor : successors) { + if (successor.getUseSecondaryKey()) { + log.debug("Splittee " + successor.getOperatorKey().getId() + + " uses secondary key, do not merge it"); + continue; + } if (isMapOnly(successor)) { if (isSingleLoadMapperPlan(successor.mapPlan)) { mappers.add(successor); @@ -121,10 +126,11 @@ } } } - + + int numSplittees = successors.size(); + // case 1: exactly one splittee and it's map-only - if (mappers.size() == 1 && mapReducers.size() == 0 - && multiLoadMROpers.size() == 0 ) { + if (mappers.size() == 1 && numSplittees == 1) { mergeOnlyMapperSplittee(mappers.get(0), mr); log.info("Merged the only map-only splittee."); @@ -133,16 +139,14 @@ } // case 2: exactly one splittee and it has reducer - if (isMapOnly(mr) && mapReducers.size() == 1 - && mappers.size() == 0 && multiLoadMROpers.size() == 0) { + if (isMapOnly(mr) && mapReducers.size() == 1 && numSplittees == 1) { mergeOnlyMapReduceSplittee(mapReducers.get(0), mr); log.info("Merged the only map-reduce splittee."); return; } - - int numSplittees = successors.size(); + int numMerges = 0; PhysicalPlan splitterPl = isMapOnly(mr) ? mr.mapPlan : mr.reducePlan; Modified: hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestMultiQuery.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestMultiQuery.java?rev=885907&r1=885906&r2=885907&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestMultiQuery.java (original) +++ hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestMultiQuery.java Tue Dec 1 20:21:52 2009 @@ -89,6 +89,33 @@ } @Test + public void testMultiQueryJiraPig1108() { + + try { + myPig.setBatchOn(); + + myPig.registerQuery("a = load 'file:test/org/apache/pig/test/data/passwd' " + + "using PigStorage(':') as (uname:chararray, passwd:chararray, uid:int, gid:int);"); + myPig.registerQuery("split a into plan1 if (uid > 5), plan2 if ( uid < 5);"); + myPig.registerQuery("b = group plan1 by uname;"); + myPig.registerQuery("c = foreach b { tmp = order plan1 by uid desc; " + + "generate flatten(group) as foo, tmp; };"); + myPig.registerQuery("d = filter c BY foo is not null;"); + myPig.registerQuery("store d into '/tmp/output1';"); + myPig.registerQuery("store plan2 into '/tmp/output2';"); + + List<ExecJob> jobs = myPig.executeBatch(); + for (ExecJob job : jobs) { + assertTrue(job.getStatus() == ExecJob.JOB_STATUS.COMPLETED); + } + + } catch (Exception e) { + e.printStackTrace(); + Assert.fail(); + } + } + + @Test public void testMultiQueryJiraPig1060() { // test case: