Author: rohini
Date: Wed Feb 10 21:20:15 2016
New Revision: 1729739
URL: http://svn.apache.org/viewvc?rev=1729739&view=rev
Log:
PIG-4759: Fix Classresolution_1 e2e failure (rohini)
Added:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Feb 10 21:20:15 2016
@@ -89,6 +89,8 @@ PIG-4639: Add better parser for Apache H
BUG FIXES
+PIG-4759: Fix Classresolution_1 e2e failure (rohini)
+
PIG-4800: EvalFunc.getCacheFiles() fails for different namenode (rohini)
PIG-4790: Join after union fail due to UnionOptimizer (rohini)
Modified: pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml
URL:
http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/perf.xml Wed Feb 10
21:20:15 2016
@@ -493,7 +493,7 @@ Gtab = .... aggregation function
STORE Gtab INTO '/user/vxj/finalresult2';
</source>
-<p>To make the script works, add the exec statement. </p>
+<p>To make the script work, add the exec statement. </p>
<source>
A = LOAD '/user/xxx/firstinput' USING PigStorage();
@@ -518,6 +518,11 @@ Ftab = group ....
Gtab = .... aggregation function
STORE Gtab INTO '/user/vxj/finalresult2';
</source>
+
+<p>If the STORE and LOAD both had exact matching file paths, Pig will
recognize the implicit dependency
+and launch two different mapreduce jobs/Tez DAGs with the second job depending
on the output of the first one.
+exec is not required to be specified in that case.</p>
+
</section>
</section>
</section>
Modified:
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
---
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
(original)
+++
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezCompiler.java
Wed Feb 10 21:20:15 2016
@@ -363,52 +363,18 @@ public class TezCompiler extends PhyPlan
String msg = "Predecessor of load should be a store or
native oper. Got " + p.getClass();
throw new PlanException(msg, errCode, PigException.BUG);
}
- if (p instanceof POStore) {
- PhysicalOperator store =
oper.plan.getOperator(p.getOperatorKey());
- // replace POStore to POValueOutputTez, convert the
tezOperator to splitter
-
oper.plan.disconnect(oper.plan.getPredecessors(store).get(0), store);
- oper.plan.remove(store);
- POValueOutputTez valueOutput = new POValueOutputTez(new
OperatorKey(scope,nig.getNextNodeId(scope)));
- oper.plan.addAsLeaf(valueOutput);
- oper.setSplitter(true);
-
- // Create a splittee of store only
- TezOperator storeOnlyTezOperator = getTezOp();
- PhysicalPlan storeOnlyPhyPlan = new PhysicalPlan();
- POValueInputTez valueInput = new POValueInputTez(new
OperatorKey(scope,nig.getNextNodeId(scope)));
- valueInput.setInputKey(oper.getOperatorKey().toString());
- storeOnlyPhyPlan.addAsLeaf(valueInput);
- storeOnlyPhyPlan.addAsLeaf(store);
- storeOnlyTezOperator.plan = storeOnlyPhyPlan;
- tezPlan.add(storeOnlyTezOperator);
- phyToTezOpMap.put(p, storeOnlyTezOperator);
-
- // Create new operator as second splittee
- curTezOp = getTezOp();
- POValueInputTez valueInput2 = new POValueInputTez(new
OperatorKey(scope,nig.getNextNodeId(scope)));
- valueInput2.setInputKey(oper.getOperatorKey().toString());
- curTezOp.plan.add(valueInput2);
- tezPlan.add(curTezOp);
-
- // Connect splitter to splittee
- TezEdgeDescriptor edge = TezCompilerUtil.connect(tezPlan,
oper, storeOnlyTezOperator);
- TezCompilerUtil.configureValueOnlyTupleOutput(edge,
DataMovementType.ONE_TO_ONE);
-
storeOnlyTezOperator.setRequestedParallelismByReference(oper);
-
- edge = TezCompilerUtil.connect(tezPlan, oper, curTezOp);
- TezCompilerUtil.configureValueOnlyTupleOutput(edge,
DataMovementType.ONE_TO_ONE);
- curTezOp.setRequestedParallelismByReference(oper);
- } else if (p instanceof PONative) {
- // Need new operator
- curTezOp = getTezOp();
- curTezOp.plan.add(op);
- tezPlan.add(curTezOp);
-
- plan.disconnect(op, p);
- TezCompilerUtil.connect(tezPlan, oper, curTezOp);
- phyToTezOpMap.put(op, curTezOp);
- return;
+ curTezOp = getTezOp();
+ curTezOp.plan.add(op);
+ curTezOp.setUseMRMapSettings(true);
+ if (((POLoad) op).getLFile() != null
+ && ((POLoad) op).getLFile().getFuncSpec() != null) {
+
curTezOp.UDFs.add(((POLoad)op).getLFile().getFuncSpec().toString());
}
+ tezPlan.add(curTezOp);
+ phyToTezOpMap.put(op, curTezOp);
+ plan.disconnect(op, p);
+ TezCompilerUtil.connect(tezPlan, oper, curTezOp);
+ oper.segmentBelow = true;
return;
}
Modified:
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
---
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
(original)
+++
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/plan/TezPrinter.java
Wed Feb 10 21:20:15 2016
@@ -90,6 +90,9 @@ public class TezPrinter extends TezOpPla
printer.setVerbose(isVerbose);
printer.visit();
mStream.println();
+ } else if (!tezOper.isVertexGroup()) {
+ // For things like NativeTezOper
+ mStream.println("" + tezOper);
}
}
Added:
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld?rev=1729739&view=auto
==============================================================================
---
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld
(added)
+++
pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld
Wed Feb 10 21:20:15 2016
@@ -0,0 +1,43 @@
+#--------------------------------------------------
+# There are 2 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-18
+
+Tez vertex scope-18
+# Plan on vertex
+a: Store(file:///tmp/output:org.apache.pig.builtin.PigStorage) - scope-8
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
+#--------------------------------------------------
+# TEZ DAG plan: pig-1_scope-1
+#--------------------------------------------------
+Tez vertex scope-19
+
+Tez vertex scope-19
+# Plan on vertex
+b: Store(file:///tmp/output1:org.apache.pig.builtin.PigStorage) - scope-17
+|
+|---b: New For Each(false,false)[bag] - scope-16
+ | |
+ | Cast[int] - scope-11
+ | |
+ | |---Project[bytearray][0] - scope-10
+ | |
+ | Cast[int] - scope-14
+ | |
+ | |---Project[bytearray][1] - scope-13
+ |
+ |---b: Load(file:///tmp/output:org.apache.pig.builtin.PigStorage) - scope-9
Added: pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld?rev=1729739&view=auto
==============================================================================
--- pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld
(added)
+++ pig/trunk/test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld
Wed Feb 10 21:20:15 2016
@@ -0,0 +1,42 @@
+#--------------------------------------------------
+# There are 3 DAGs in the session
+#--------------------------------------------------
+#--------------------------------------------------
+# TEZ DAG plan: pig-0_scope-0
+#--------------------------------------------------
+Tez vertex scope-12
+
+Tez vertex scope-12
+# Plan on vertex
+a:
Store(/tmp/table_testNativeMRJobSimple_input:org.apache.pig.builtin.PigStorage)
- scope-8
+|
+|---a: New For Each(false,false)[bag] - scope-7
+ | |
+ | Cast[int] - scope-2
+ | |
+ | |---Project[bytearray][0] - scope-1
+ | |
+ | Cast[int] - scope-5
+ | |
+ | |---Project[bytearray][1] - scope-4
+ |
+ |---a: Load(file:///tmp/input:org.apache.pig.builtin.PigStorage) - scope-0
+#--------------------------------------------------
+# TEZ DAG plan: pig-1_scope-1
+#--------------------------------------------------
+Tez vertex scope-13
+
+Tez vertex scope-13
+Tez - scope-13
+ Native Tez - jar : hadoop-examples.jar, params: [wordcount,
/tmp/table_testNativeMRJobSimple_input,
/tmp/table_testNativeMRJobSimple_output]:
+Plan Empty
+#--------------------------------------------------
+# TEZ DAG plan: pig-2_scope-2
+#--------------------------------------------------
+Tez vertex scope-14
+
+Tez vertex scope-14
+# Plan on vertex
+b: Store(file:///tmp/output:org.apache.pig.builtin.PigStorage) - scope-11
+|
+|---b:
Load(/tmp/table_testNativeMRJobSimple_output:org.apache.pig.builtin.PigStorage)
- scope-10
Modified: pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java?rev=1729739&r1=1729738&r2=1729739&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java (original)
+++ pig/trunk/test/org/apache/pig/tez/TestTezCompiler.java Wed Feb 10 21:20:15
2016
@@ -89,6 +89,27 @@ public class TestTezCompiler {
}
@Test
+ public void testStoreLoad() throws Exception {
+ String query =
+ "a = load 'file:///tmp/input' as (x:int, y:int);" +
+ "store a into 'file:///tmp/output';" +
+ "b = load 'file:///tmp/output' as (x:int, y:int);" +
+ "store b into 'file:///tmp/output1';";
+
+ run(query,
"test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-LoadStore-1.gld");
+ }
+
+ @Test
+ public void testNative() throws Exception {
+ String query =
+ "a = load 'file:///tmp/input' as (x:int, y:int);" +
+ "b = native 'hadoop-examples.jar' Store a into
'/tmp/table_testNativeMRJobSimple_input' Load
'/tmp/table_testNativeMRJobSimple_output' `wordcount
/tmp/table_testNativeMRJobSimple_input
/tmp/table_testNativeMRJobSimple_output`;" +
+ "store b into 'file:///tmp/output';";
+
+ run(query,
"test/org/apache/pig/test/data/GoldenFiles/tez/TEZC-Native-1.gld");
+ }
+
+ @Test
public void testFilter() throws Exception {
String query =
"a = load 'file:///tmp/input' as (x:int, y:int);" +