OOZIE-2786 Pass Oozie workflow ID and settings to Spark application configuration (zhengxb2005 via rkanter)
Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/a8c33925 Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/a8c33925 Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/a8c33925 Branch: refs/heads/oya Commit: a8c3392570ab46517c995660c922257059272f70 Parents: 4478d53 Author: Robert Kanter <[email protected]> Authored: Tue Feb 7 13:50:42 2017 -0800 Committer: Robert Kanter <[email protected]> Committed: Tue Feb 7 13:50:42 2017 -0800 ---------------------------------------------------------------------- release-log.txt | 1 + .../org/apache/oozie/action/hadoop/SparkMain.java | 17 +++++++++++++++++ .../apache/oozie/action/hadoop/TestSparkMain.java | 17 +++++++++++++++++ 3 files changed, 35 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/a8c33925/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index f693f70..4d2b4bd 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 4.4.0 release (trunk - unreleased) +OOZIE-2786 Pass Oozie workflow ID and settings to Spark application configuration (zhengxb2005 via rkanter) OOZIE-2790 log4j configuration is not passed to spark executors (satishsaley) OOZIE-2787 Oozie distributes application jar twice making the spark job fail (satishsaley) OOZIE-2789 Maven complains about checkstyle error during build (xzheng via abhishekbafna) http://git-wip-us.apache.org/repos/asf/oozie/blob/a8c33925/sharelib/spark/src/main/java/org/apache/oozie/action/hadoop/SparkMain.java ---------------------------------------------------------------------- diff --git a/sharelib/spark/src/main/java/org/apache/oozie/action/hadoop/SparkMain.java b/sharelib/spark/src/main/java/org/apache/oozie/action/hadoop/SparkMain.java index 911c99d..d37053d 100644 --- a/sharelib/spark/src/main/java/org/apache/oozie/action/hadoop/SparkMain.java +++ b/sharelib/spark/src/main/java/org/apache/oozie/action/hadoop/SparkMain.java @@ -29,6 +29,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Properties; import java.util.jar.JarFile; import java.util.jar.Manifest; @@ -43,6 +44,8 @@ import org.apache.hadoop.fs.Path; import org.apache.log4j.PropertyConfigurator; import org.apache.spark.deploy.SparkSubmit; +import com.google.common.annotations.VisibleForTesting; + public class SparkMain extends LauncherMain { private static final String MASTER_OPTION = "--master"; private static final String MODE_OPTION = "--deploy-mode"; @@ -114,6 +117,8 @@ public class SparkMain extends LauncherMain { sparkArgs.add(className); } + appendOoziePropertiesToSparkConf(sparkArgs, actionConf); + String jarPath = actionConf.get(SparkActionExecutor.SPARK_JAR); if(jarPath!=null && jarPath.endsWith(".py")){ isPyspark = true; @@ -466,6 +471,18 @@ public class SparkMain extends LauncherMain { return manifest.getMainAttributes().getValue("Specification-Version"); } + /* + * Get properties that needs to be passed to Spark as Spark configuration from actionConf. + */ + @VisibleForTesting + protected void appendOoziePropertiesToSparkConf(List<String> sparkArgs, Configuration actionConf) { + for (Map.Entry<String, String> oozieConfig : actionConf + .getValByRegex("^oozie\\.(?!launcher|spark).+").entrySet()) { + sparkArgs.add("--conf"); + sparkArgs.add(String.format("spark.%s=%s", oozieConfig.getKey(), oozieConfig.getValue())); + } + } + private void appendWithPathSeparator(String what, StringBuilder to){ if(to.length() > 0){ to.append(File.pathSeparator); http://git-wip-us.apache.org/repos/asf/oozie/blob/a8c33925/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java ---------------------------------------------------------------------- diff --git a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java index f044048..bf4adf9 100644 --- a/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java +++ b/sharelib/spark/src/test/java/org/apache/oozie/action/hadoop/TestSparkMain.java @@ -26,13 +26,17 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.ArrayList; +import java.util.List; import java.util.regex.Pattern; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.oozie.util.IOUtils; import org.apache.oozie.util.XConfiguration; +import com.google.common.collect.Lists; + public class TestSparkMain extends MainTestCase { private static final String INPUT = "input.txt"; @@ -120,4 +124,17 @@ public class TestSparkMain extends MainTestCase { assertFalse(pattern.matcher(s).find()); } } + + public void testAppendOoziePropertiesToSparkConf() throws Exception { + SparkMain instance = SparkMain.class.newInstance(); + List<String> sparkArgs = new ArrayList<String>(); + Configuration actionConf = new Configuration(); + actionConf.set("foo", "foo-not-to-include"); + actionConf.set("oozie.launcher", "launcher-not-to-include"); + actionConf.set("oozie.spark", "spark-not-to-include"); + actionConf.set("oozie.bar", "bar"); + + instance.appendOoziePropertiesToSparkConf(sparkArgs, actionConf); + assertEquals(Lists.newArrayList("--conf", "spark.oozie.bar=bar"), sparkArgs); + } } \ No newline at end of file
