Author: xuefu
Date: Fri Dec 9 02:52:31 2016
New Revision: 1773318
URL: http://svn.apache.org/viewvc?rev=1773318&view=rev
Log:
PIG-4815: Add xml format support for 'explain' in spark engine (Adam via Xuefu)
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
pig/branches/spark/test/org/apache/pig/test/TestPigServer.java
Modified:
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java?rev=1773318&r1=1773317&r2=1773318&view=diff
==============================================================================
---
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
(original)
+++
pig/branches/spark/src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java
Fri Dec 9 02:52:31 2016
@@ -31,6 +31,8 @@ import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.TransformerException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -108,6 +110,7 @@ import org.apache.pig.backend.hadoop.exe
import org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkOperator;
import
org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkPOPackageAnnotator;
import org.apache.pig.backend.hadoop.executionengine.spark.plan.SparkPrinter;
+import
org.apache.pig.backend.hadoop.executionengine.spark.plan.XMLSparkPrinter;
import org.apache.pig.data.SchemaTupleBackend;
import org.apache.pig.impl.PigContext;
import org.apache.pig.impl.plan.OperatorKey;
@@ -589,20 +592,20 @@ public class SparkLauncher extends Launc
Map<OperatorKey, SparkOperator> allOperKeys = sparkPlan.getKeys();
List<OperatorKey> operKeyList = new ArrayList(allOperKeys.keySet());
Collections.sort(operKeyList);
- for (OperatorKey operatorKey : operKeyList) {
- SparkOperator op = sparkPlan.getOperator(operatorKey);
- ps.print(op.getOperatorKey());
- List<SparkOperator> successors = sparkPlan.getSuccessors(op);
- if (successors != null) {
- ps.print("->");
- for (SparkOperator suc : successors) {
- ps.print(suc.getOperatorKey() + " ");
- }
- }
- ps.println();
- }
if (format.equals("text")) {
+ for (OperatorKey operatorKey : operKeyList) {
+ SparkOperator op = sparkPlan.getOperator(operatorKey);
+ ps.print(op.getOperatorKey());
+ List<SparkOperator> successors = sparkPlan.getSuccessors(op);
+ if (successors != null) {
+ ps.print("->");
+ for (SparkOperator suc : successors) {
+ ps.print(suc.getOperatorKey() + " ");
+ }
+ }
+ ps.println();
+ }
SparkPrinter printer = new SparkPrinter(ps, sparkPlan);
printer.setVerbose(verbose);
printer.visit();
@@ -615,9 +618,20 @@ public class SparkLauncher extends Launc
printer.setVerbose(verbose);
printer.dump();
ps.println("");
- } else { // TODO: add support for other file format
+ } else if (format.equals("xml")) {
+ try {
+ XMLSparkPrinter printer = new XMLSparkPrinter(ps, sparkPlan);
+ printer.visit();
+ printer.closePlan();
+ } catch (ParserConfigurationException e) {
+ e.printStackTrace();
+ } catch (TransformerException e) {
+ e.printStackTrace();
+ }
+ }
+ else {
throw new IOException(
- "Non-text and non-dot output of explain is not
supported.");
+ "Unsupported explain format. Supported formats are: text,
dot, xml");
}
}
Modified: pig/branches/spark/test/org/apache/pig/test/TestPigServer.java
URL:
http://svn.apache.org/viewvc/pig/branches/spark/test/org/apache/pig/test/TestPigServer.java?rev=1773318&r1=1773317&r2=1773318&view=diff
==============================================================================
--- pig/branches/spark/test/org/apache/pig/test/TestPigServer.java (original)
+++ pig/branches/spark/test/org/apache/pig/test/TestPigServer.java Fri Dec 9
02:52:31 2016
@@ -547,7 +547,8 @@ public class TestPigServer {
public void testExplainXmlComplex() throws Throwable {
// TODO: Explain XML output is not supported in non-MR mode. Remove the
// following condition once it's implemented in Tez.
- if (cluster.getExecType() != ExecType.MAPREDUCE) {
+ String execType = cluster.getExecType().toString().toLowerCase();
+ if (!execType.equals(ExecType.MAPREDUCE.name().toLowerCase()) &&
!execType.equals(MiniGenericCluster.EXECTYPE_SPARK)) {
return;
}
PigServer pig = new PigServer(cluster.getExecType(), properties);
@@ -574,6 +575,55 @@ public class TestPigServer {
assertEquals(1, physicalPlan.getLength());
assertTrue(physicalPlan.item(0).getTextContent().contains("Not
Supported"));
+
+ if (execType.equals(ExecType.MAPREDUCE.name().toLowerCase())){
+ verifyExplainXmlComplexMR(doc);
+ } else if (execType.equals(MiniGenericCluster.EXECTYPE_SPARK)){
+ verifyExplainXmlComplexSpark(doc);
+ }
+
+
+ }
+
+ private void verifyExplainXmlComplexSpark(Document doc) {
+ NodeList stores = doc.getElementsByTagName("POStore");
+ assertEquals(1, stores.getLength());
+
+ NodeList groups = doc.getElementsByTagName("POJoinGroupSpark");
+ assertEquals(2, groups.getLength());
+
+ Node innerGroup = groups.item(1);
+
+ NodeList groupChildren = innerGroup.getChildNodes();
+
+ int foreachCount = 0;
+ int castCount = 0;
+ int loadCount = 0;
+
+ for (int i = 0; i < groupChildren.getLength(); i++) {
+ Node node = groupChildren.item(i);
+ if (node.getNodeName().equals("POForEach")){
+ ++foreachCount;
+ NodeList foreachNodes = node.getChildNodes();
+ for (int j = 0; j < foreachNodes.getLength(); j++) {
+ Node innerNode = foreachNodes.item(j);
+ if (innerNode.getNodeName().equals("alias")){
+ assertEquals("b",innerNode.getTextContent());
+ }else if (innerNode.getNodeName().equals("POCast")){
+ ++castCount;
+ }else if (innerNode.getNodeName().equals("POLoad")) {
+ ++loadCount;
+ }
+ }
+ }
+ }
+
+ assertEquals(1,foreachCount);
+ assertEquals(3,castCount);
+ assertEquals(1,loadCount);
+ }
+
+ private void verifyExplainXmlComplexMR(Document doc) {
//Verify we have two loads and one is temporary
NodeList loads = doc.getElementsByTagName("POLoad");
assertEquals(2, loads.getLength());