Repository: spark Updated Branches: refs/heads/master 57e9b13bf -> 49351c7f5
[SPARK-8646] PySpark does not run on YARN if master not provided in command line andrewor14 davies vanzin can you take a look at this? thanks Author: Lianhui Wang <[email protected]> Closes #7438 from lianhuiwang/SPARK-8646 and squashes the following commits: cb3f12d [Lianhui Wang] add whitespace 6d874a6 [Lianhui Wang] support pyspark for yarn-client Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/49351c7f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/49351c7f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/49351c7f Branch: refs/heads/master Commit: 49351c7f597c67950cc65e5014a89fad31b9a6f7 Parents: 57e9b13 Author: Lianhui Wang <[email protected]> Authored: Thu Jul 16 19:31:14 2015 -0700 Committer: Andrew Or <[email protected]> Committed: Thu Jul 16 19:31:45 2015 -0700 ---------------------------------------------------------------------- python/pyspark/context.py | 5 +++++ yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/49351c7f/python/pyspark/context.py ---------------------------------------------------------------------- diff --git a/python/pyspark/context.py b/python/pyspark/context.py index d746672..43bde5a 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -152,6 +152,11 @@ class SparkContext(object): self.master = self._conf.get("spark.master") self.appName = self._conf.get("spark.app.name") self.sparkHome = self._conf.get("spark.home", None) + + # Let YARN know it's a pyspark app, so it distributes needed libraries. + if self.master == "yarn-client": + self._conf.set("spark.yarn.isPython", "true") + for (k, v) in self._conf.getAll(): if k.startswith("spark.executorEnv."): varName = k[len("spark.executorEnv."):] http://git-wip-us.apache.org/repos/asf/spark/blob/49351c7f/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala ---------------------------------------------------------------------- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index f86b6d1..b74ea9a 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -616,7 +616,7 @@ private[spark] class Client( val appId = newAppResponse.getApplicationId val appStagingDir = getAppStagingDir(appId) val pySparkArchives = - if (sys.props.getOrElse("spark.yarn.isPython", "false").toBoolean) { + if (sparkConf.getBoolean("spark.yarn.isPython", false)) { findPySparkArchives() } else { Nil --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
