Author: rding Date: Wed Mar 10 21:00:45 2010 New Revision: 921567 URL: http://svn.apache.org/viewvc?rev=921567&view=rev Log: PIG-1252: Diamond splitter does not generate correct results when using Multi-query optimization
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=921567&r1=921566&r2=921567&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Wed Mar 10 21:00:45 2010 @@ -145,6 +145,9 @@ OPTIMIZATIONS BUG FIXES +PIG-1252: Diamond splitter does not generate correct results when using +Multi-query optimization (rding) + PIG-1260: Param Subsitution results in parser error if there is no EOL after last line in script (rding) Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java?rev=921567&r1=921566&r2=921567&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java Wed Mar 10 21:00:45 2010 @@ -30,6 +30,10 @@ import java.util.Map; import org.apache.pig.data.Tuple; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.BinaryExpressionOperator; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.ExpressionOperator; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POBinCond; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.UnaryComparisonOperator; import org.apache.pig.impl.plan.OperatorPlan; import org.apache.pig.impl.plan.PlanException; import org.apache.pig.impl.plan.VisitorException; @@ -264,6 +268,26 @@ public class PhysicalPlan extends Operat } cloneOp.setInputs(newInputs); } + + for (PhysicalOperator op : mOps.keySet()) { + if (op instanceof UnaryComparisonOperator) { + UnaryComparisonOperator orig = (UnaryComparisonOperator)op; + UnaryComparisonOperator cloneOp = (UnaryComparisonOperator)matches.get(op); + cloneOp.setExpr((ExpressionOperator)matches.get(orig.getExpr())); + cloneOp.setOperandType(orig.getOperandType()); + } else if (op instanceof BinaryExpressionOperator) { + BinaryExpressionOperator orig = (BinaryExpressionOperator)op; + BinaryExpressionOperator cloneOp = (BinaryExpressionOperator)matches.get(op); + cloneOp.setRhs((ExpressionOperator)matches.get(orig.getRhs())); + cloneOp.setLhs((ExpressionOperator)matches.get(orig.getLhs())); + } else if (op instanceof POBinCond) { + POBinCond orig = (POBinCond)op; + POBinCond cloneOp = (POBinCond)matches.get(op); + cloneOp.setRhs((ExpressionOperator)matches.get(orig.getRhs())); + cloneOp.setLhs((ExpressionOperator)matches.get(orig.getLhs())); + cloneOp.setCond((ExpressionOperator)matches.get(orig.getCond())); + } + } return clone; } Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java?rev=921567&r1=921566&r2=921567&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java Wed Mar 10 21:00:45 2010 @@ -108,6 +108,68 @@ public class TestMultiQuery { myPig = null; } + @Test + public void testMultiQueryJiraPig1252() { + + // test case: Problems with secondary key optimization and multiquery + // diamond optimization + + String INPUT_FILE = "abc"; + + try { + + PrintWriter w = new PrintWriter(new FileWriter(INPUT_FILE)); + w.println("1\t2\t3"); + w.println("2\t3\t4"); + w.println("3\t\t5"); + w.println("5\t6\t6"); + w.println("6\t\t7"); + w.close(); + + Util.copyFromLocalToCluster(cluster, INPUT_FILE, INPUT_FILE); + + myPig.setBatchOn(); + + myPig.registerQuery("A = load '" + INPUT_FILE + "' as (col1, col2, col3);"); + myPig.registerQuery("B = foreach A generate (chararray) col1, " + + "(chararray) ((col2 is not null) ? " + + "col2 : (col3 < 6 ? col3 : '')) as splitcond;"); + myPig.registerQuery("split B into C if splitcond != '', D if splitcond == '';"); + myPig.registerQuery("E = group C by splitcond;"); + myPig.registerQuery("F = foreach E { orderedData = order C by $1, $0; generate flatten(orderedData); };"); + + Iterator<Tuple> iter = myPig.openIterator("F"); + + List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings( + new String[] { + "(1,2)", + "(2,3)", + "(3,5)", + "(5,6)" + }); + + int counter = 0; + while (iter.hasNext()) { + assertEquals(expectedResults.get(counter++).toString(), iter.next().toString()); + } + + assertEquals(expectedResults.size(), counter); + + } catch (Exception e) { + e.printStackTrace(); + Assert.fail(); + } finally { + new File(INPUT_FILE).delete(); + try { + Util.deleteFile(cluster, INPUT_FILE); + } catch (IOException e) { + e.printStackTrace(); + Assert.fail(); + } + } + } + + @Test public void testMultiQueryJiraPig1169() { // test case: Problems with some top N queries @@ -168,6 +230,7 @@ public class TestMultiQuery { } } + @Test public void testMultiQueryJiraPig1171() { // test case: Problems with some top N queries @@ -225,6 +288,7 @@ public class TestMultiQuery { } } + @Test public void testMultiQueryJiraPig1157() { // test case: Sucessive replicated joins do not generate Map Reduce plan and fails due to OOM @@ -285,6 +349,7 @@ public class TestMultiQuery { } } + @Test public void testMultiQueryJiraPig1068() { // test case: COGROUP fails with 'Type mismatch in key from map: