Author: gates Date: Wed Dec 2 01:47:44 2009 New Revision: 886030 URL: http://svn.apache.org/viewvc?rev=886030&view=rev Log: PIG-1022: optimizer pushes filter before the foreach that generates column used by filter.
Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPushUpFilter.java Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=886030&r1=886029&r2=886030&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.6/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/CHANGES.txt Wed Dec 2 01:47:44 2009 @@ -127,6 +127,9 @@ BUG FIXES +PIG-1022: optimizer pushes filter before the foreach that generates column +used by filter (daijy via gates) + PIG-1108: Incorrect map output key type in MultiQuery optimiza (rding via olgan) PIG-1107: PigLineRecordReader bails out on an empty line for compressed data (ankit.modi via ) Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java?rev=886030&r1=886029&r2=886030&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java (original) +++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java Wed Dec 2 01:47:44 2009 @@ -36,6 +36,7 @@ import org.apache.pig.impl.logicalLayer.LOForEach; import org.apache.pig.impl.logicalLayer.LOLimit; import org.apache.pig.impl.logicalLayer.LOLoad; +import org.apache.pig.impl.logicalLayer.LOProject; import org.apache.pig.impl.logicalLayer.LOSplit; import org.apache.pig.impl.logicalLayer.LOStore; import org.apache.pig.impl.logicalLayer.LOStream; @@ -43,10 +44,12 @@ import org.apache.pig.impl.logicalLayer.LOUnion; import org.apache.pig.impl.logicalLayer.LogicalOperator; import org.apache.pig.impl.logicalLayer.LogicalPlan; +import org.apache.pig.impl.logicalLayer.TopLevelProjectFinder; import org.apache.pig.impl.logicalLayer.UDFFinder; import org.apache.pig.impl.plan.DepthFirstWalker; import org.apache.pig.impl.plan.ProjectionMap; import org.apache.pig.impl.plan.RequiredFields; +import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.plan.optimizer.OptimizerException; import org.apache.pig.PigException; import org.apache.pig.impl.util.MultiMap; @@ -256,6 +259,18 @@ Pair<Boolean, Set<Integer>> mappingResult = isRequiredFieldMapped(requiredField, predecessor.getProjectionMap()); boolean mapped = mappingResult.first; + + // Check if it is a direct mapping, that is, project optionally followed by cast, so if project->project, it is not + // considered as a mapping + for (Pair<Integer, Integer> pair : requiredField.getFields()) + { + if (!isFieldSimple(loForEach.getForEachPlans().get(pair.second))) + { + mapped = false; + break; + } + } + if (!mapped) { return false; } @@ -420,4 +435,44 @@ return new Pair<Boolean, Set<Integer>>(true, grandParentIndexes); } + + /** + * Check if the inner plan is simple + * + * @param lp + * logical plan to check + * @return Whether if the logical plan is a simple project optionally followed by cast + */ + boolean isFieldSimple(LogicalPlan lp) throws OptimizerException + { + TopLevelProjectFinder projectFinder = new TopLevelProjectFinder(lp); + + try { + projectFinder.visit(); + } catch (VisitorException ve) { + throw new OptimizerException(); + } + if (projectFinder.getProjectSet()!=null && projectFinder.getProjectSet().size()==1) + { + LOProject project = projectFinder.getProjectSet().iterator().next(); + if (lp.getPredecessors(project)==null) + { + LogicalOperator pred = project; + while (lp.getSuccessors(pred)!=null) + { + if (lp.getSuccessors(pred).size()!=1) + return false; + if (!(lp.getSuccessors(pred).get(0) instanceof LOCast)) + { + return false; + } + pred = lp.getSuccessors(pred).get(0); + } + return true; + } + return false; + } + else + return true; + } } Modified: hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPushUpFilter.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPushUpFilter.java?rev=886030&r1=886029&r2=886030&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPushUpFilter.java (original) +++ hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPushUpFilter.java Wed Dec 2 01:47:44 2009 @@ -1050,6 +1050,24 @@ assertTrue(pushUpFilter.getPushBeforeInput() == -1); } + + @Test + public void testFilterNestedForEach() throws Exception { + planTester.buildPlan("A = load 'myfile' as (name, age, gpa);"); + planTester.buildPlan("B = group A by name;"); + planTester.buildPlan("C = foreach B generate A.age as age;"); + LogicalPlan lp = planTester.buildPlan("D = filter C by age == 20;"); + + planTester.setPlan(lp); + planTester.setProjectionMap(lp); + + PushUpFilter pushUpFilter = new PushUpFilter(lp); + + assertTrue(!pushUpFilter.check(lp.getLeaves())); + assertTrue(pushUpFilter.getSwap() == false); + assertTrue(pushUpFilter.getPushBefore() == false); + assertTrue(pushUpFilter.getPushBeforeInput() == -1); + } }