Hi,
I used to submit my Spark yarn applications by using
org.apache.spark.yarn.deploy.Client api so I can get the application id after I
submit it. The following is the code that I have, but after upgrading to 1.3.1,
the yarn Client class was made into a private class. Is there a particular
reason why this Client class was made private?
I know that there’s a new SparkSubmit object that can be used, but it’s not
clear to me how I can use it to get the application id after submitting to the
cluster.
Thoughts?
Thanks,
Ron
class SparkLauncherServiceImpl extends SparkLauncherService {
override def runApp(conf: Configuration, appName: String, queue: String):
ApplicationId = {
val ws = SparkLauncherServiceImpl.getWorkspace()
val params = Array("--class", //
"com.xyz.sparkdb.service.impl.AssemblyServiceImpl", //
"--name", appName, //
"--queue", queue, //
"--driver-memory", "1024m", //
"--addJars",
getListOfDependencyJars(s"$ws/ledp/le-sparkdb/target/dependency"), //
"--jar",
s"file:$ws/ledp/le-sparkdb/target/le-sparkdb-1.0.3-SNAPSHOT.jar")
System.setProperty("SPARK_YARN_MODE", "true")
System.setProperty("spark.driver.extraJavaOptions", "-XX:PermSize=128m
-XX:MaxPermSize=128m -Dsun.io.serialization.extendedDebugInfo=true")
val sparkConf = new SparkConf()
val args = new ClientArguments(params, sparkConf)
new Client(args, conf, sparkConf).runApp()
}
private def getListOfDependencyJars(baseDir: String): String = {
val files = new
File(baseDir).listFiles().filter(!_.getName().startsWith("spark-assembly"))
val prependedFiles = files.map(x => "file:" + x.getAbsolutePath())
val result = ((prependedFiles.tail.foldLeft(new
StringBuilder(prependedFiles.head))) {(acc, e) => acc.append(",
").append(e)}).toString()
result
}
}