Author: daijy Date: Fri Apr 16 17:18:47 2010 New Revision: 935001 URL: http://svn.apache.org/viewvc?rev=935001&view=rev Log: PIG-1374: PushDownForeachFlatten shall not push ForEach below Join if the flattened fields is used in the next statement
Modified: hadoop/pig/branches/branch-0.7/CHANGES.txt hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/optimizer/PushDownForeachFlatten.java hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestPushDownForeachFlatten.java Modified: hadoop/pig/branches/branch-0.7/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/CHANGES.txt?rev=935001&r1=935000&r2=935001&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.7/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.7/CHANGES.txt Fri Apr 16 17:18:47 2010 @@ -183,6 +183,8 @@ OPTIMIZATIONS BUG FIXES +PIG-1374: PushDownForeachFlatten shall not push ForEach below Join if the flattened fields is used in the next statement (daijy) + PIG-1372: Restore PigInputFormat.sJob for backward compatibility (pradeepkth) PIG-1369: POProject does not handle null tuples and non existent fields in Modified: hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/optimizer/PushDownForeachFlatten.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/optimizer/PushDownForeachFlatten.java?rev=935001&r1=935000&r2=935001&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/optimizer/PushDownForeachFlatten.java (original) +++ hadoop/pig/branches/branch-0.7/src/org/apache/pig/impl/logicalLayer/optimizer/PushDownForeachFlatten.java Fri Apr 16 17:18:47 2010 @@ -35,6 +35,7 @@ import org.apache.pig.impl.logicalLayer. import org.apache.pig.impl.logicalLayer.LOSort; import org.apache.pig.impl.logicalLayer.LogicalOperator; import org.apache.pig.impl.logicalLayer.LogicalPlan; +import org.apache.pig.impl.logicalLayer.RelationalOperator; import org.apache.pig.impl.logicalLayer.UDFFinder; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.plan.DepthFirstWalker; @@ -181,6 +182,23 @@ public class PushDownForeachFlatten exte IndexHelper<LogicalOperator> indexHelper = new IndexHelper<LogicalOperator>(peers); Integer foreachPosition = indexHelper.getIndex(foreach); + // Check if flattened fields is required by successor, if so, don't optimize + List<RequiredFields> requiredFieldsList = ((RelationalOperator)successor).getRequiredFields(); + RequiredFields requiredFields = requiredFieldsList.get(foreachPosition.intValue()); + + MultiMap<Integer, Column> foreachMappedFields = foreachProjectionMap.getMappedFields(); + + if (requiredFields.getFields()!=null) { + for (Pair<Integer, Integer> pair : requiredFields.getFields()) { + Collection<Column> columns = foreachMappedFields.get(pair.second); + for (Column column : columns) { + Pair<Integer, Integer> foreachInputColumn = column.getInputColumn(); + if (foreach.isInputFlattened(foreachInputColumn.second)) + return false; + } + } + } + // the foreach with flatten can be swapped with an order by // as the order by will have lesser number of records to sort // also the sort does not alter the records that are processed @@ -285,23 +303,6 @@ public class PushDownForeachFlatten exte } } - // Check if flattened fields is required by LOJoin, if so, don't optimize - if (successor instanceof LOJoin) { - List<RequiredFields> requiredFieldsList = ((LOJoin)successor).getRequiredFields(); - RequiredFields requiredFields = requiredFieldsList.get(foreachPosition.intValue()); - - MultiMap<Integer, Column> foreachMappedFields = foreachProjectionMap.getMappedFields(); - - for (Pair<Integer, Integer> pair : requiredFields.getFields()) { - Collection<Column> columns = foreachMappedFields.get(pair.second); - for (Column column : columns) { - Pair<Integer, Integer> foreachInputColumn = column.getInputColumn(); - if (foreach.isInputFlattened(foreachInputColumn.second)) - return false; - } - } - } - mInsertBetween = true; return true; } Modified: hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestPushDownForeachFlatten.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestPushDownForeachFlatten.java?rev=935001&r1=935000&r2=935001&view=diff ============================================================================== --- hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestPushDownForeachFlatten.java (original) +++ hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestPushDownForeachFlatten.java Fri Apr 16 17:18:47 2010 @@ -998,6 +998,25 @@ public class TestPushDownForeachFlatten assertTrue(pushDownForeach.getSwap() == false); assertTrue(pushDownForeach.getInsertBetween() == false); } + + // See PIG-1374 + @Test + public void testForeachRequiredField() throws Exception { + planTester.buildPlan("A = load 'myfile' as (b{t(a0:chararray,a1:int)});"); + planTester.buildPlan("B = foreach A generate flatten($0);"); + LogicalPlan lp = planTester.buildPlan("C = order B by $1 desc;"); + + planTester.setPlan(lp); + planTester.setProjectionMap(lp); + planTester.rebuildSchema(lp); + + PushDownForeachFlatten pushDownForeach = new PushDownForeachFlatten(lp); + LOLoad loada = (LOLoad) lp.getRoots().get(0); + + assertTrue(!pushDownForeach.check(lp.getSuccessors(loada))); + assertTrue(pushDownForeach.getSwap() == false); + assertTrue(pushDownForeach.getInsertBetween() == false); + } }