Github user tigerquoll commented on a diff in the pull request:
https://github.com/apache/spark/pull/2516#discussion_r19793754
--- Diff: core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala ---
@@ -83,216 +79,163 @@ object SparkSubmit {
* (4) the main class for the child
*/
private[spark] def createLaunchEnv(args: SparkSubmitArguments)
- : (ArrayBuffer[String], ArrayBuffer[String], Map[String, String],
String) = {
+ : (mutable.ArrayBuffer[String], mutable.ArrayBuffer[String],
Map[String, String], String) = {
// Values to return
- val childArgs = new ArrayBuffer[String]()
- val childClasspath = new ArrayBuffer[String]()
- val sysProps = new HashMap[String, String]()
+ val childArgs = new mutable.ArrayBuffer[String]()
+ val childClasspath = new mutable.ArrayBuffer[String]()
+ val sysProps = new mutable.HashMap[String, String]()
var childMainClass = ""
- // Set the cluster manager
- val clusterManager: Int = args.master match {
- case m if m.startsWith("yarn") => YARN
- case m if m.startsWith("spark") => STANDALONE
- case m if m.startsWith("mesos") => MESOS
- case m if m.startsWith("local") => LOCAL
- case _ => printErrorAndExit("Master must start with yarn, spark,
mesos, or local"); -1
- }
-
- // Set the deploy mode; default is client mode
- var deployMode: Int = args.deployMode match {
- case "client" | null => CLIENT
- case "cluster" => CLUSTER
- case _ => printErrorAndExit("Deploy mode must be either client or
cluster"); -1
- }
-
- // Because "yarn-cluster" and "yarn-client" encapsulate both the master
- // and deploy mode, we have some logic to infer the master and deploy
mode
- // from each other if only one is specified, or exit early if they are
at odds.
- if (clusterManager == YARN) {
- if (args.master == "yarn-standalone") {
- printWarning("\"yarn-standalone\" is deprecated. Use
\"yarn-cluster\" instead.")
- args.master = "yarn-cluster"
- }
- (args.master, args.deployMode) match {
- case ("yarn-cluster", null) =>
- deployMode = CLUSTER
- case ("yarn-cluster", "client") =>
- printErrorAndExit("Client deploy mode is not compatible with
master \"yarn-cluster\"")
- case ("yarn-client", "cluster") =>
- printErrorAndExit("Cluster deploy mode is not compatible with
master \"yarn-client\"")
- case (_, mode) =>
- args.master = "yarn-" + Option(mode).getOrElse("client")
- }
-
+ if (args.clusterManagerFlag == CM_YARN) {
// Make sure YARN is included in our build if we're trying to use it
if (!Utils.classIsLoadable("org.apache.spark.deploy.yarn.Client") &&
!Utils.isTesting) {
printErrorAndExit(
"Could not load YARN classes. " +
"This copy of Spark may not have been compiled with YARN
support.")
}
- }
-
- // The following modes are not supported or applicable
- (clusterManager, deployMode) match {
- case (MESOS, CLUSTER) =>
- printErrorAndExit("Cluster deploy mode is currently not supported
for Mesos clusters.")
- case (_, CLUSTER) if args.isPython =>
- printErrorAndExit("Cluster deploy mode is currently not supported
for python applications.")
- case (_, CLUSTER) if isShell(args.primaryResource) =>
- printErrorAndExit("Cluster deploy mode is not applicable to Spark
shells.")
- case _ =>
+ val hasHadoopEnv = sys.env.contains("HADOOP_CONF_DIR") ||
sys.env.contains("YARN_CONF_DIR")
+ if (!hasHadoopEnv && !Utils.isTesting) {
+ throw new Exception("When running with master '" + args.master +
"'" +
+ "either HADOOP_CONF_DIR or YARN_CONF_DIR must be set in the
environment.")
+ }
}
// If we're running a python app, set the main class to our specific
python runner
if (args.isPython) {
if (args.primaryResource == PYSPARK_SHELL) {
- args.mainClass = "py4j.GatewayServer"
- args.childArgs = ArrayBuffer("--die-on-broken-pipe", "0")
+ args.mainClass = PY4J_GATEWAYSERVER
+ args.childArgs = mutable.ArrayBuffer("--die-on-broken-pipe", "0")
} else {
// If a python file is provided, add it to the child arguments and
list of files to deploy.
// Usage: PythonAppRunner <main python file> <extra python files>
[app arguments]
- args.mainClass = "org.apache.spark.deploy.PythonRunner"
- args.childArgs = ArrayBuffer(args.primaryResource, args.pyFiles)
++ args.childArgs
- args.files = mergeFileLists(args.files, args.primaryResource)
+ args.mainClass = PYTHON_RUNNER
+ args.childArgs = mutable.ArrayBuffer(args.primaryResource,
+ args.pyFiles.getOrElse("")) ++ args.childArgs
--- End diff --
Ok, changed it back to orNull.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]