Author: rohini Date: Fri Mar 3 20:14:02 2017 New Revision: 1785370 URL: http://svn.apache.org/viewvc?rev=1785370&view=rev Log: PIG-5173: Script with multiple splits fails with Invalid dag containing 0 vertices (rohini)
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-7.gld Modified: pig/trunk/CHANGES.txt pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPlanContainer.java pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1785370&r1=1785369&r2=1785370&view=diff ============================================================================== --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Fri Mar 3 20:14:02 2017 @@ -89,6 +89,8 @@ OPTIMIZATIONS Â BUG FIXES +PIG-5173: Script with multiple splits fails with Invalid dag containing 0 vertices (rohini) + PIG-5159: Fix Pig not saving grunt history (szita via rohini) PIG-5127: Test fail when running test-core-mrtez (daijy) Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPlanContainer.java URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPlanContainer.java?rev=1785370&r1=1785369&r2=1785370&view=diff ============================================================================== --- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPlanContainer.java (original) +++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPlanContainer.java Fri Mar 3 20:14:02 2017 @@ -291,7 +291,7 @@ public class TezPlanContainer extends Op Set<TezOperator> splitters2 = new HashSet<>(); Set<TezOperator> processedPredecessors = new HashSet<>(); // Find predecessors which are splitters - fetchSplitterPredecessors(plan, operToSegment, processedPredecessors, splitters1); + fetchSplitterPredecessors(plan, operToSegment, processedPredecessors, splitters1, false); if (!splitters1.isEmpty()) { // For the successor, traverse rest of the plan below it and // search the predecessors of its successors to find any predecessor that might be a splitter. @@ -300,7 +300,7 @@ public class TezPlanContainer extends Op processedPredecessors.clear(); processedPredecessors.add(successor); for (TezOperator succ : allSuccs) { - fetchSplitterPredecessors(plan, succ, processedPredecessors, splitters2); + fetchSplitterPredecessors(plan, succ, processedPredecessors, splitters2, true); } // Find the common ones splitters1.retainAll(splitters2); @@ -309,7 +309,7 @@ public class TezPlanContainer extends Op } private void fetchSplitterPredecessors(TezOperPlan plan, TezOperator tezOp, - Set<TezOperator> processedPredecessors, Set<TezOperator> splitters) { + Set<TezOperator> processedPredecessors, Set<TezOperator> splitters, boolean stopAtSplit) { List<TezOperator> predecessors = plan.getPredecessors(tezOp); if (predecessors != null) { for (TezOperator pred : predecessors) { @@ -319,9 +319,13 @@ public class TezPlanContainer extends Op } if (pred.isSplitter()) { splitters.add(pred); + if (!stopAtSplit) { + processedPredecessors.add(pred); + fetchSplitterPredecessors(plan, pred, processedPredecessors, splitters, stopAtSplit); + } } else if (!pred.needSegmentBelow()) { processedPredecessors.add(pred); - fetchSplitterPredecessors(plan, pred, processedPredecessors, splitters); + fetchSplitterPredecessors(plan, pred, processedPredecessors, splitters, stopAtSplit); } } } Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-7.gld URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-7.gld?rev=1785370&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-7.gld (added) +++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-7.gld Fri Mar 3 20:14:02 2017 @@ -0,0 +1,105 @@ +#-------------------------------------------------- +# There are 3 DAGs in the session +#-------------------------------------------------- +Tez DAG pig-0_scope-0 -> Tez DAG pig-1_scope-1, +Tez DAG pig-1_scope-1 -> Tez DAG pig-2_scope-2, +Tez DAG pig-2_scope-2 + +#-------------------------------------------------- +# TEZ DAG plan: pig-0_scope-0 +#-------------------------------------------------- +Tez vertex scope-45 + +Tez vertex scope-45 +# Plan on vertex +a: Split - scope-76 +| | +| a: Store(file:///tmp/pigoutput/Dir0:org.apache.pig.builtin.PigStorage) - scope-4 +| | +| Store(file:/tmp/temp-1456742965/tmp774375955:org.apache.pig.impl.io.InterStorage) - scope-72 +| | +| |---a1: Filter[bag] - scope-7 +| | | +| | Equal To[boolean] - scope-11 +| | | +| | |---Cast[int] - scope-9 +| | | | +| | | |---Project[bytearray][0] - scope-8 +| | | +| | |---Constant(5) - scope-10 +| +|---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0 +#-------------------------------------------------- +# TEZ DAG plan: pig-1_scope-1 +#-------------------------------------------------- +Tez vertex scope-52 +Tez vertex scope-54 -> Tez vertex scope-58, +Tez vertex scope-58 -> Tez vertex scope-67, +Tez vertex scope-67 + +Tez vertex scope-52 +# Plan on vertex +a1: Store(file:///tmp/pigoutput/Dir1:org.apache.pig.builtin.PigStorage) - scope-15 +| +|---Load(file:/tmp/temp-1456742965/tmp774375955:org.apache.pig.impl.io.InterStorage) - scope-73 +Tez vertex scope-54 +# Plan on vertex +a2: Local Rearrange[tuple]{tuple}(true) - scope-57 -> scope-58 +| | +| Project[tuple][*] - scope-56 +| +|---Load(file:/tmp/temp-1456742965/tmp774375955:org.apache.pig.impl.io.InterStorage) - scope-74 +Tez vertex scope-58 +# Combine plan on edge <scope-54> +org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.DistinctCombiner$Combine +# Plan on vertex +a2: Split - scope-77 +| | +| a2: Store(file:///tmp/pigoutput/Dir2:org.apache.pig.builtin.PigStorage) - scope-22 +| | +| a3: Local Rearrange[tuple]{bytearray}(false) - scope-29 -> scope-67 +| | | +| | Project[bytearray][0] - scope-30 +| +|---New For Each(true)[bag] - scope-61 + | | + | Project[tuple][0] - scope-60 + | + |---Package(Packager)[tuple]{tuple} - scope-59 +Tez vertex scope-67 +# Plan on vertex +a3: Store(file:///tmp/pigoutput/Dir3:org.apache.pig.builtin.PigStorage) - scope-31 +| +|---a3: Package(Packager)[tuple]{bytearray} - scope-28 +#-------------------------------------------------- +# TEZ DAG plan: pig-2_scope-2 +#-------------------------------------------------- +Tez vertex scope-68 -> Tez vertex scope-71, +Tez vertex scope-70 -> Tez vertex scope-71, +Tez vertex scope-71 + +Tez vertex scope-68 +# Plan on vertex +c: Local Rearrange[tuple]{bytearray}(false) - scope-37 -> scope-71 +| | +| Project[bytearray][0] - scope-38 +| +|---Load(file:/tmp/temp-1456742965/tmp774375955:org.apache.pig.impl.io.InterStorage) - scope-75 +Tez vertex scope-70 +# Plan on vertex +c: Local Rearrange[tuple]{bytearray}(false) - scope-39 -> scope-71 +| | +| Project[bytearray][0] - scope-40 +| +|---b: Load(file:///tmp/pigoutput/Dir3:org.apache.pig.builtin.PigStorage) - scope-32 +Tez vertex scope-71 +# Plan on vertex +c: Store(file:///tmp/pigoutput/Dir4:org.apache.pig.builtin.PigStorage) - scope-44 +| +|---c: New For Each(true,true)[tuple] - scope-43 + | | + | Project[bag][1] - scope-41 + | | + | Project[bag][2] - scope-42 + | + |---c: Package(Packager)[tuple]{bytearray} - scope-36 Modified: pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java?rev=1785370&r1=1785369&r2=1785370&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java (original) +++ pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java Fri Mar 3 20:14:02 2017 @@ -203,6 +203,25 @@ public class TestTezCompiler { resetScope(); resetFileLocalizer(); run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-6.gld"); + + // Three levels of splits - a, a1 and a2. + // One split above and one split below a1 which is the split to be replaced with tmp store. + query = + "a = load 'file:///tmp/input';" + + "store a into 'file:///tmp/pigoutput/Dir0';" + + "a1 = filter a by $0 == 5;" + + "store a1 into 'file:///tmp/pigoutput/Dir1';" + + "a2 = distinct a1;" + + "store a2 into 'file:///tmp/pigoutput/Dir2';" + + "a3 = group a2 by $0;" + + "store a3 into 'file:///tmp/pigoutput/Dir3';" + + "b = load 'file:///tmp/pigoutput/Dir3';" + + "c = join a1 by $0, b by $0;" + + "store c into 'file:///tmp/pigoutput/Dir4';"; + + resetScope(); + resetFileLocalizer(); + run(query, "test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-7.gld"); } @Test