spark git commit: [SPARK-11026] [YARN] spark.yarn.user.classpath.first does work for 'spark-submit --jars hdfs://user/foo.jar'
Repository: spark Updated Branches: refs/heads/branch-1.5 2217f4f8b -> 47bc6c0fa [SPARK-11026] [YARN] spark.yarn.user.classpath.first does work for 'spark-submit --jars hdfs://user/foo.jar' when spark.yarn.user.classpath.first=true and using 'spark-submit --jars hdfs://user/foo.jar', it can not put foo.jar to system classpath. so we need to put yarn's linkNames of jars to the system classpath. vanzin tgravescs Author: Lianhui WangCloses #9045 from lianhuiwang/spark-11026. (cherry picked from commit 626aab79c9b4d4ac9d65bf5fa45b81dd9cbc609c) Signed-off-by: Tom Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47bc6c0f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47bc6c0f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47bc6c0f Branch: refs/heads/branch-1.5 Commit: 47bc6c0fa3cfbd92bb4470240b0c97040217f370 Parents: 2217f4f Author: Lianhui Wang Authored: Tue Oct 13 08:29:47 2015 -0500 Committer: Tom Graves Committed: Tue Oct 13 08:31:00 2015 -0500 -- .../org/apache/spark/deploy/yarn/Client.scala | 23 +--- 1 file changed, 15 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/47bc6c0f/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index f2e1c2b..f21f5ef 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1144,7 +1144,7 @@ object Client extends Logging { } else { getMainJarUri(sparkConf.getOption(CONF_SPARK_USER_JAR)) } - mainJar.foreach(addFileToClasspath(sparkConf, _, APP_JAR, env)) + mainJar.foreach(addFileToClasspath(sparkConf, conf, _, APP_JAR, env)) val secondaryJars = if (args != null) { @@ -1153,10 +1153,10 @@ object Client extends Logging { getSecondaryJarUris(sparkConf.getOption(CONF_SPARK_YARN_SECONDARY_JARS)) } secondaryJars.foreach { x => -addFileToClasspath(sparkConf, x, null, env) +addFileToClasspath(sparkConf, conf, x, null, env) } } -addFileToClasspath(sparkConf, new URI(sparkJar(sparkConf)), SPARK_JAR, env) +addFileToClasspath(sparkConf, conf, new URI(sparkJar(sparkConf)), SPARK_JAR, env) populateHadoopClasspath(conf, env) sys.env.get(ENV_DIST_CLASSPATH).foreach { cp => addClasspathEntry(getClusterPath(sparkConf, cp), env) @@ -1191,15 +1191,17 @@ object Client extends Logging { * If an alternate name for the file is given, and it's not a "local:" file, the alternate * name will be added to the classpath (relative to the job's work directory). * - * If not a "local:" file and no alternate name, the environment is not modified. + * If not a "local:" file and no alternate name, the linkName will be added to the classpath. * - * @param conf Spark configuration. - * @param uri URI to add to classpath (optional). - * @param fileName Alternate name for the file (optional). - * @param env Map holding the environment variables. + * @param confSpark configuration. + * @param hadoopConf Hadoop configuration. + * @param uri URI to add to classpath (optional). + * @param fileNameAlternate name for the file (optional). + * @param env Map holding the environment variables. */ private def addFileToClasspath( conf: SparkConf, + hadoopConf: Configuration, uri: URI, fileName: String, env: HashMap[String, String]): Unit = { @@ -1208,6 +1210,11 @@ object Client extends Logging { } else if (fileName != null) { addClasspathEntry(buildPath( YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), fileName), env) +} else if (uri != null) { + val localPath = getQualifiedLocalPath(uri, hadoopConf) + val linkName = Option(uri.getFragment()).getOrElse(localPath.getName()) + addClasspathEntry(buildPath( +YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), linkName), env) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-11026] [YARN] spark.yarn.user.classpath.first does work for 'spark-submit --jars hdfs://user/foo.jar'
Repository: spark Updated Branches: refs/heads/master c4da5345a -> 626aab79c [SPARK-11026] [YARN] spark.yarn.user.classpath.first does work for 'spark-submit --jars hdfs://user/foo.jar' when spark.yarn.user.classpath.first=true and using 'spark-submit --jars hdfs://user/foo.jar', it can not put foo.jar to system classpath. so we need to put yarn's linkNames of jars to the system classpath. vanzin tgravescs Author: Lianhui WangCloses #9045 from lianhuiwang/spark-11026. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/626aab79 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/626aab79 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/626aab79 Branch: refs/heads/master Commit: 626aab79c9b4d4ac9d65bf5fa45b81dd9cbc609c Parents: c4da534 Author: Lianhui Wang Authored: Tue Oct 13 08:29:47 2015 -0500 Committer: Tom Graves Committed: Tue Oct 13 08:29:47 2015 -0500 -- .../org/apache/spark/deploy/yarn/Client.scala | 23 +--- 1 file changed, 15 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/626aab79/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index d25d830..9fcfe36 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1212,7 +1212,7 @@ object Client extends Logging { } else { getMainJarUri(sparkConf.getOption(CONF_SPARK_USER_JAR)) } - mainJar.foreach(addFileToClasspath(sparkConf, _, APP_JAR, env)) + mainJar.foreach(addFileToClasspath(sparkConf, conf, _, APP_JAR, env)) val secondaryJars = if (args != null) { @@ -1221,10 +1221,10 @@ object Client extends Logging { getSecondaryJarUris(sparkConf.getOption(CONF_SPARK_YARN_SECONDARY_JARS)) } secondaryJars.foreach { x => -addFileToClasspath(sparkConf, x, null, env) +addFileToClasspath(sparkConf, conf, x, null, env) } } -addFileToClasspath(sparkConf, new URI(sparkJar(sparkConf)), SPARK_JAR, env) +addFileToClasspath(sparkConf, conf, new URI(sparkJar(sparkConf)), SPARK_JAR, env) populateHadoopClasspath(conf, env) sys.env.get(ENV_DIST_CLASSPATH).foreach { cp => addClasspathEntry(getClusterPath(sparkConf, cp), env) @@ -1259,15 +1259,17 @@ object Client extends Logging { * If an alternate name for the file is given, and it's not a "local:" file, the alternate * name will be added to the classpath (relative to the job's work directory). * - * If not a "local:" file and no alternate name, the environment is not modified. + * If not a "local:" file and no alternate name, the linkName will be added to the classpath. * - * @param conf Spark configuration. - * @param uri URI to add to classpath (optional). - * @param fileName Alternate name for the file (optional). - * @param env Map holding the environment variables. + * @param confSpark configuration. + * @param hadoopConf Hadoop configuration. + * @param uri URI to add to classpath (optional). + * @param fileNameAlternate name for the file (optional). + * @param env Map holding the environment variables. */ private def addFileToClasspath( conf: SparkConf, + hadoopConf: Configuration, uri: URI, fileName: String, env: HashMap[String, String]): Unit = { @@ -1276,6 +1278,11 @@ object Client extends Logging { } else if (fileName != null) { addClasspathEntry(buildPath( YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), fileName), env) +} else if (uri != null) { + val localPath = getQualifiedLocalPath(uri, hadoopConf) + val linkName = Option(uri.getFragment()).getOrElse(localPath.getName()) + addClasspathEntry(buildPath( +YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), linkName), env) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9439] [YARN] External shuffle service robust to NM restarts using leveldb
Repository: spark Updated Branches: refs/heads/master bb220f657 - 708036c1d [SPARK-9439] [YARN] External shuffle service robust to NM restarts using leveldb https://issues.apache.org/jira/browse/SPARK-9439 In general, Yarn apps should be robust to NodeManager restarts. However, if you run spark with the external shuffle service on, after a NM restart all shuffles fail, b/c the shuffle service has lost some state with info on each executor. (Note the shuffle data is perfectly fine on disk across a NM restart, the problem is we've lost the small bit of state that lets us *find* those files.) The solution proposed here is that the external shuffle service can write out its state to leveldb (backed by a local file) every time an executor is added. When running with yarn, that file is in the NM's local dir. Whenever the service is started, it looks for that file, and if it exists, it reads the file and re-registers all executors there. Nothing is changed in non-yarn modes with this patch. The service is not given a place to save the state to, so it operates the same as before. This should make it easy to update other cluster managers as well, by just supplying the right file the equivalent of yarn's `initializeApplication` -- I'm not familiar enough with those modes to know how to do that. Author: Imran Rashid iras...@cloudera.com Closes #7943 from squito/leveldb_external_shuffle_service_NM_restart and squashes the following commits: 0d285d3 [Imran Rashid] review feedback 70951d6 [Imran Rashid] Merge branch 'master' into leveldb_external_shuffle_service_NM_restart 5c71c8c [Imran Rashid] save executor to db before registering; style 2499c8c [Imran Rashid] explicit dependency on jackson-annotations 795d28f [Imran Rashid] review feedback 81f80e2 [Imran Rashid] Merge branch 'master' into leveldb_external_shuffle_service_NM_restart 594d520 [Imran Rashid] use json to serialize application executor info 1a7980b [Imran Rashid] version 8267d2a [Imran Rashid] style e9f99e8 [Imran Rashid] cleanup the handling of bad dbs a little 9378ba3 [Imran Rashid] fail gracefully on corrupt leveldb files acedb62 [Imran Rashid] switch to writing out one record per executor 79922b7 [Imran Rashid] rely on yarn to call stopApplication; assorted cleanup 12b6a35 [Imran Rashid] save registered executors when apps are removed; add tests c878fbe [Imran Rashid] better explanation of shuffle service port handling 694934c [Imran Rashid] only open leveldb connection once per service d596410 [Imran Rashid] store executor data in leveldb 59800b7 [Imran Rashid] Files.move in case renaming is unsupported 32fe5ae [Imran Rashid] Merge branch 'master' into external_shuffle_service_NM_restart d7450f0 [Imran Rashid] style f729e2b [Imran Rashid] debugging 4492835 [Imran Rashid] lol, dont use a PrintWriter b/c of scalastyle checks 0a39b98 [Imran Rashid] Merge branch 'master' into external_shuffle_service_NM_restart 55f49fc [Imran Rashid] make sure the service doesnt die if the registered executor file is corrupt; add tests 245db19 [Imran Rashid] style 62586a6 [Imran Rashid] just serialize the whole executors map bdbbf0d [Imran Rashid] comments, remove some unnecessary changes 857331a [Imran Rashid] better tests comments bb9d1e6 [Imran Rashid] formatting bdc4b32 [Imran Rashid] rename 86e0cb9 [Imran Rashid] for tests, shuffle service finds an open port 23994ff [Imran Rashid] style 7504de8 [Imran Rashid] style a36729c [Imran Rashid] cleanup efb6195 [Imran Rashid] proper unit test, and no longer leak if apps stop during NM restart dd93dc0 [Imran Rashid] test for shuffle service w/ NM restarts d596969 [Imran Rashid] cleanup imports 0e9d69b [Imran Rashid] better names 9eae119 [Imran Rashid] cleanup lots of duplication 1136f44 [Imran Rashid] test needs to have an actual shuffle 0b588bd [Imran Rashid] more fixes ... ad122ef [Imran Rashid] more fixes 5e5a7c3 [Imran Rashid] fix build c69f46b [Imran Rashid] maybe working version, needs tests cleanup ... bb3ba49 [Imran Rashid] minor cleanup 36127d3 [Imran Rashid] wip b9d2ced [Imran Rashid] incomplete setup for external shuffle service tests Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/708036c1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/708036c1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/708036c1 Branch: refs/heads/master Commit: 708036c1de52d674ceff30ac465e1dcedeb8dde8 Parents: bb220f6 Author: Imran Rashid iras...@cloudera.com Authored: Fri Aug 21 08:41:36 2015 -0500 Committer: Tom Graves tgra...@yahoo-inc.com Committed: Fri Aug 21 08:41:36 2015 -0500 -- .../spark/deploy/ExternalShuffleService.scala | 2 +- .../mesos/MesosExternalShuffleService.scala | 2 +- .../org/apache/spark/storage/BlockManager.scala | 14 +- .../spark/ExternalShuffleServiceSuite.scala | 2 +- network/shuffle/pom.xml
spark git commit: [SPARK-8405] [DOC] Add how to view logs on Web UI when yarn log aggregation is enabled
Repository: spark Updated Branches: refs/heads/branch-1.4 2b1973dd2 - a671dad62 [SPARK-8405] [DOC] Add how to view logs on Web UI when yarn log aggregation is enabled Some users may not be aware that the logs are available on Web UI even if Yarn log aggregation is enabled. Update the doc to make this clear and what need to be configured. Author: Carson Wang carson.w...@intel.com Closes #7463 from carsonwang/YarnLogDoc and squashes the following commits: 274c054 [Carson Wang] Minor text fix 74df3a1 [Carson Wang] address comments 5a95046 [Carson Wang] Update the text in the doc e5775c1 [Carson Wang] Update doc about how to view the logs on Web UI when yarn log aggregation is enabled (cherry picked from commit 622838165756e9669cbf7af13eccbc719638f40b) Signed-off-by: Tom Graves tgra...@yahoo-inc.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a671dad6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a671dad6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a671dad6 Branch: refs/heads/branch-1.4 Commit: a671dad62362b129ae23c4c8947eaa6efa134e9f Parents: 2b1973d Author: Carson Wang carson.w...@intel.com Authored: Mon Jul 27 08:02:40 2015 -0500 Committer: Tom Graves tgra...@yahoo-inc.com Committed: Mon Jul 27 08:03:15 2015 -0500 -- docs/running-on-yarn.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a671dad6/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 07b30bf..5290b21 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -68,9 +68,9 @@ In YARN terminology, executors and application masters run inside containers. yarn logs -applicationId app ID -will print out the contents of all log files from all containers from the given application. You can also view the container log files directly in HDFS using the HDFS shell or API. The directory where they are located can be found by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and `yarn.nodemanager.remote-app-log-dir-suffix`). +will print out the contents of all log files from all containers from the given application. You can also view the container log files directly in HDFS using the HDFS shell or API. The directory where they are located can be found by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and `yarn.nodemanager.remote-app-log-dir-suffix`). The logs are also available on the Spark Web UI under the Executors Tab. You need to have both the Spark history server and the MapReduce history server running and configure `yarn.log.server.url` in `yarn-site.xml` properly. The log URL on the Spark history server UI will redirect you to the MapReduce history server to show the aggregated logs. -When log aggregation isn't turned on, logs are retained locally on each machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and installation. Viewing logs for a container requires going to the host that contains them and looking in this directory. Subdirectories organize log files by application ID and container ID. +When log aggregation isn't turned on, logs are retained locally on each machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and installation. Viewing logs for a container requires going to the host that contains them and looking in this directory. Subdirectories organize log files by application ID and container ID. The logs are also available on the Spark Web UI under the Executors Tab and doesn't require running the MapReduce history server. To review per-container launch environment, increase `yarn.nodemanager.delete.debug-delay-sec` to a large value (e.g. 36000), and then access the application cache through `yarn.nodemanager.local-dirs` - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8405] [DOC] Add how to view logs on Web UI when yarn log aggregation is enabled
Repository: spark Updated Branches: refs/heads/master 72981bc8f - 622838165 [SPARK-8405] [DOC] Add how to view logs on Web UI when yarn log aggregation is enabled Some users may not be aware that the logs are available on Web UI even if Yarn log aggregation is enabled. Update the doc to make this clear and what need to be configured. Author: Carson Wang carson.w...@intel.com Closes #7463 from carsonwang/YarnLogDoc and squashes the following commits: 274c054 [Carson Wang] Minor text fix 74df3a1 [Carson Wang] address comments 5a95046 [Carson Wang] Update the text in the doc e5775c1 [Carson Wang] Update doc about how to view the logs on Web UI when yarn log aggregation is enabled Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/62283816 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/62283816 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/62283816 Branch: refs/heads/master Commit: 622838165756e9669cbf7af13eccbc719638f40b Parents: 72981bc Author: Carson Wang carson.w...@intel.com Authored: Mon Jul 27 08:02:40 2015 -0500 Committer: Tom Graves tgra...@yahoo-inc.com Committed: Mon Jul 27 08:02:40 2015 -0500 -- docs/running-on-yarn.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/62283816/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index de22ab5..cac08a9 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -68,9 +68,9 @@ In YARN terminology, executors and application masters run inside containers. yarn logs -applicationId app ID -will print out the contents of all log files from all containers from the given application. You can also view the container log files directly in HDFS using the HDFS shell or API. The directory where they are located can be found by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and `yarn.nodemanager.remote-app-log-dir-suffix`). +will print out the contents of all log files from all containers from the given application. You can also view the container log files directly in HDFS using the HDFS shell or API. The directory where they are located can be found by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and `yarn.nodemanager.remote-app-log-dir-suffix`). The logs are also available on the Spark Web UI under the Executors Tab. You need to have both the Spark history server and the MapReduce history server running and configure `yarn.log.server.url` in `yarn-site.xml` properly. The log URL on the Spark history server UI will redirect you to the MapReduce history server to show the aggregated logs. -When log aggregation isn't turned on, logs are retained locally on each machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and installation. Viewing logs for a container requires going to the host that contains them and looking in this directory. Subdirectories organize log files by application ID and container ID. +When log aggregation isn't turned on, logs are retained locally on each machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and installation. Viewing logs for a container requires going to the host that contains them and looking in this directory. Subdirectories organize log files by application ID and container ID. The logs are also available on the Spark Web UI under the Executors Tab and doesn't require running the MapReduce history server. To review per-container launch environment, increase `yarn.nodemanager.delete.debug-delay-sec` to a large value (e.g. 36000), and then access the application cache through `yarn.nodemanager.local-dirs` - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8851] [YARN] In Client mode, make sure the client logs in and updates tokens
Repository: spark Updated Branches: refs/heads/master ec8973d12 - c043a3e9d [SPARK-8851] [YARN] In Client mode, make sure the client logs in and updates tokens In client side, the flow is SparkSubmit - SparkContext - yarn/Client. Since the yarn client only gets a cloned config and the staging dir is set here, it is not really possible to do re-logins in the SparkContext. So, do the initial logins in Spark Submit and do re-logins as we do now in the AM, but the Client behaves like an executor in this specific context and reads the credentials file to update the tokens. This way, even if the streaming context is started up from checkpoint - it is fine since we have logged in from SparkSubmit itself itself. Author: Hari Shreedharan hshreedha...@apache.org Closes #7394 from harishreedharan/yarn-client-login and squashes the following commits: 9a2166f [Hari Shreedharan] make it possible to use command line args and config parameters together. de08f57 [Hari Shreedharan] Fix import order. 5c4fa63 [Hari Shreedharan] Add a comment explaining what is being done in YarnClientSchedulerBackend. c872caa [Hari Shreedharan] Fix typo in log message. 2c80540 [Hari Shreedharan] Move token renewal to YarnClientSchedulerBackend. 0c48ac2 [Hari Shreedharan] Remove direct use of ExecutorDelegationTokenUpdater in Client. 26f8bfa [Hari Shreedharan] [SPARK-8851][YARN] In Client mode, make sure the client logs in and updates tokens. 58b1969 [Hari Shreedharan] Simple attempt 1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c043a3e9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c043a3e9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c043a3e9 Branch: refs/heads/master Commit: c043a3e9df55721f21332f7c44ff351832d20324 Parents: ec8973d Author: Hari Shreedharan hshreedha...@apache.org Authored: Fri Jul 17 09:38:08 2015 -0500 Committer: Tom Graves tgra...@yahoo-inc.com Committed: Fri Jul 17 09:38:08 2015 -0500 -- .../apache/spark/deploy/SparkHadoopUtil.scala | 29 +++--- .../org/apache/spark/deploy/SparkSubmit.scala | 10 -- .../org/apache/spark/deploy/yarn/Client.scala | 32 +--- .../cluster/YarnClientSchedulerBackend.scala| 11 +-- 4 files changed, 56 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c043a3e9/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 9f94118..6b14d40 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -25,6 +25,7 @@ import java.util.{Arrays, Comparator} import scala.collection.JavaConversions._ import scala.concurrent.duration._ import scala.language.postfixOps +import scala.util.control.NonFatal import com.google.common.primitives.Longs import org.apache.hadoop.conf.Configuration @@ -248,19 +249,25 @@ class SparkHadoopUtil extends Logging { dir: Path, prefix: String, exclusionSuffix: String): Array[FileStatus] = { -val fileStatuses = remoteFs.listStatus(dir, - new PathFilter { -override def accept(path: Path): Boolean = { - val name = path.getName - name.startsWith(prefix) !name.endsWith(exclusionSuffix) +try { + val fileStatuses = remoteFs.listStatus(dir, +new PathFilter { + override def accept(path: Path): Boolean = { +val name = path.getName +name.startsWith(prefix) !name.endsWith(exclusionSuffix) + } +}) + Arrays.sort(fileStatuses, new Comparator[FileStatus] { +override def compare(o1: FileStatus, o2: FileStatus): Int = { + Longs.compare(o1.getModificationTime, o2.getModificationTime) } }) -Arrays.sort(fileStatuses, new Comparator[FileStatus] { - override def compare(o1: FileStatus, o2: FileStatus): Int = { -Longs.compare(o1.getModificationTime, o2.getModificationTime) - } -}) -fileStatuses + fileStatuses +} catch { + case NonFatal(e) = +logWarning(Error while attempting to list files from application staging dir, e) +Array.empty +} } /** http://git-wip-us.apache.org/repos/asf/spark/blob/c043a3e9/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 036cb6e..0b39ee8 100644 ---
spark git commit: [SPARK-8574] org/apache/spark/unsafe doesn't honor the java source/ta…
Repository: spark Updated Branches: refs/heads/branch-1.4 74001db04 - 13802163d [SPARK-8574] org/apache/spark/unsafe doesn't honor the java source/ta⦠â¦rget versions. I basically copied the compatibility rules from the top level pom.xml into here. Someone more familiar with all the options in the top level pom may want to make sure nothing else should be copied on down. With this is allows me to build with jdk8 and run with lower versions. Source shows compiled for jdk6 as its supposed to. Author: Tom Graves tgra...@yahoo-inc.com Author: Thomas Graves tgra...@staydecay.corp.gq1.yahoo.com Closes #6989 from tgravescs/SPARK-8574 and squashes the following commits: e1ea2d4 [Thomas Graves] Change to use combine.children=append 150d645 [Tom Graves] [SPARK-8574] org/apache/spark/unsafe doesn't honor the java source/target versions (cherry picked from commit e988adb58f02d06065837f3d79eee220f6558def) Signed-off-by: Tom Graves tgra...@yahoo-inc.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/13802163 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/13802163 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/13802163 Branch: refs/heads/branch-1.4 Commit: 13802163deb39fbffa73d72aa15568b6f2223fa6 Parents: 74001db Author: Tom Graves tgra...@yahoo-inc.com Authored: Thu Jun 25 08:27:08 2015 -0500 Committer: Tom Graves tgra...@yahoo-inc.com Committed: Thu Jun 25 08:27:56 2015 -0500 -- unsafe/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/13802163/unsafe/pom.xml -- diff --git a/unsafe/pom.xml b/unsafe/pom.xml index 7298765..fa5085e 100644 --- a/unsafe/pom.xml +++ b/unsafe/pom.xml @@ -80,7 +80,7 @@ groupIdnet.alchim31.maven/groupId artifactIdscala-maven-plugin/artifactId configuration -javacArgs +javacArgs combine.children=append !-- This option is needed to suppress warnings from sun.misc.Unsafe usage -- javacArg-XDignore.symbol.file/javacArg /javacArgs - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7524] [SPARK-7846] add configs for keytab and principal, pass these two configs with different way in different modes
Repository: spark Updated Branches: refs/heads/master 8db40f671 - a51b133de [SPARK-7524] [SPARK-7846] add configs for keytab and principal, pass these two configs with different way in different modes * As spark now supports long running service by updating tokens for namenode, but only accept parameters passed with --k=v format which is not very convinient. This patch add spark.* configs in properties file and system property. * --principal and --keytabl options are passed to client but when we started thrift server or spark-shell these two are also passed into the Main class (org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 and org.apache.spark.repl.Main). In these two main class, arguments passed in will be processed with some 3rd libraries, which will lead to some error: Invalid option: --principal or Unrecgnised option: --principal. We should pass these command args in different forms, say system properties. Author: WangTaoTheTonic wangtao...@huawei.com Closes #6051 from WangTaoTheTonic/SPARK-7524 and squashes the following commits: e65699a [WangTaoTheTonic] change logic to loadEnvironments ebd9ea0 [WangTaoTheTonic] merge master ecfe43a [WangTaoTheTonic] pass keytab and principal seperately in different mode 33a7f40 [WangTaoTheTonic] expand the use of the current configs 08bb4e8 [WangTaoTheTonic] fix wrong cite 73afa64 [WangTaoTheTonic] add configs for keytab and principal, move originals to internal Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a51b133d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a51b133d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a51b133d Branch: refs/heads/master Commit: a51b133de3c65a991ab105b6f020082080121b4c Parents: 8db40f6 Author: WangTaoTheTonic wangtao...@huawei.com Authored: Fri May 29 11:06:11 2015 -0500 Committer: Thomas Graves tgra...@thatenemy-lm.champ.corp.yahoo.com Committed: Fri May 29 11:06:11 2015 -0500 -- .../scala/org/apache/spark/deploy/SparkSubmit.scala | 8 .../apache/spark/deploy/SparkSubmitArguments.scala | 2 ++ docs/running-on-yarn.md | 16 .../deploy/yarn/AMDelegationTokenRenewer.scala | 14 -- .../apache/spark/deploy/yarn/ClientArguments.scala | 6 ++ 5 files changed, 36 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 92bb505..d1b32ea 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -428,6 +428,8 @@ object SparkSubmit { OptionAssigner(args.executorCores, YARN, CLIENT, sysProp = spark.executor.cores), OptionAssigner(args.files, YARN, CLIENT, sysProp = spark.yarn.dist.files), OptionAssigner(args.archives, YARN, CLIENT, sysProp = spark.yarn.dist.archives), + OptionAssigner(args.principal, YARN, CLIENT, sysProp = spark.yarn.principal), + OptionAssigner(args.keytab, YARN, CLIENT, sysProp = spark.yarn.keytab), // Yarn cluster only OptionAssigner(args.name, YARN, CLUSTER, clOption = --name), @@ -440,10 +442,8 @@ object SparkSubmit { OptionAssigner(args.files, YARN, CLUSTER, clOption = --files), OptionAssigner(args.archives, YARN, CLUSTER, clOption = --archives), OptionAssigner(args.jars, YARN, CLUSTER, clOption = --addJars), - - // Yarn client or cluster - OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, clOption = --principal), - OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, clOption = --keytab), + OptionAssigner(args.principal, YARN, CLUSTER, clOption = --principal), + OptionAssigner(args.keytab, YARN, CLUSTER, clOption = --keytab), // Other options OptionAssigner(args.executorCores, STANDALONE, ALL_DEPLOY_MODES, http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index c0e4c77..cc6a7bd 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -169,6 +169,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S deployMode =
spark git commit: [SPARK-6869] [PYSPARK] Add pyspark archives path to PYTHONPATH
Repository: spark Updated Branches: refs/heads/master c2f0821aa - ebff7327a [SPARK-6869] [PYSPARK] Add pyspark archives path to PYTHONPATH Based on https://github.com/apache/spark/pull/5478 that provide a PYSPARK_ARCHIVES_PATH env. within this PR, we just should export PYSPARK_ARCHIVES_PATH=/user/spark/pyspark.zip,/user/spark/python/lib/py4j-0.8.2.1-src.zip in conf/spark-env.sh when we don't install PySpark on each node of Yarn. i run python application successfully on yarn-client and yarn-cluster with this PR. andrewor14 sryza Sephiroth-Lin Can you take a look at this?thanks. Author: Lianhui Wang lianhuiwan...@gmail.com Closes #5580 from lianhuiwang/SPARK-6869 and squashes the following commits: 66ffa43 [Lianhui Wang] Update Client.scala c2ad0f9 [Lianhui Wang] Update Client.scala 1c8f664 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869 008850a [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869 f0b4ed8 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869 150907b [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' into SPARK-6869 20402cd [Lianhui Wang] use ZipEntry 9d87c3f [Lianhui Wang] update scala style e7bd971 [Lianhui Wang] address vanzin's comments 4b8a3ed [Lianhui Wang] use pyArchivesEnvOpt e6b573b [Lianhui Wang] address vanzin's comments f11f84a [Lianhui Wang] zip pyspark archives 5192cca [Lianhui Wang] update import path 3b1e4c8 [Lianhui Wang] address tgravescs's comments 9396346 [Lianhui Wang] put zip to make-distribution.sh 0d2baf7 [Lianhui Wang] update import paths e0179be [Lianhui Wang] add zip pyspark archives in build or sparksubmit 31e8e06 [Lianhui Wang] update code style 9f31dac [Lianhui Wang] update code and add comments f72987c [Lianhui Wang] add archives path to PYTHONPATH Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebff7327 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebff7327 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebff7327 Branch: refs/heads/master Commit: ebff7327af5efa9f57c605284de4fae6b050ae0f Parents: c2f0821 Author: Lianhui Wang lianhuiwan...@gmail.com Authored: Fri May 8 08:44:46 2015 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Fri May 8 08:44:46 2015 -0500 -- assembly/pom.xml| 21 ++ .../org/apache/spark/deploy/SparkSubmit.scala | 41 project/SparkBuild.scala| 37 +- .../org/apache/spark/deploy/yarn/Client.scala | 23 --- 4 files changed, 114 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ebff7327/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 2b4d0a9..626c857 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -92,6 +92,27 @@ skiptrue/skip /configuration /plugin +!-- zip pyspark archives to run python application on yarn mode -- +plugin + groupIdorg.apache.maven.plugins/groupId +artifactIdmaven-antrun-plugin/artifactId +executions + execution +phasepackage/phase + goals +goalrun/goal + /goals + /execution +/executions +configuration + target +delete dir=${basedir}/../python/lib/pyspark.zip/ +zip destfile=${basedir}/../python/lib/pyspark.zip + fileset dir=${basedir}/../python/ includes=pyspark/**/*/ +/zip + /target +/configuration +/plugin !-- Use the shade plugin to create a big JAR with all the dependencies -- plugin groupIdorg.apache.maven.plugins/groupId http://git-wip-us.apache.org/repos/asf/spark/blob/ebff7327/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 8a03279..329fa06 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -332,6 +332,47 @@ object SparkSubmit { } } +// In yarn mode for a python app, add pyspark archives to files +// that can be distributed with the job +if (args.isPython clusterManager == YARN) { + var pyArchives: String = null + val pyArchivesEnvOpt = sys.env.get(PYSPARK_ARCHIVES_PATH) + if (pyArchivesEnvOpt.isDefined) { +
hadoop git commit: YARN-3600. AM container link is broken (Naganarasimha G R via tgraves (cherry picked from commit 5d708a4725529cf09d2dd8b5b4aa3542cc8610b0)
Repository: hadoop Updated Branches: refs/heads/branch-2 28e0593b9 - 547b06988 YARN-3600. AM container link is broken (Naganarasimha G R via tgraves (cherry picked from commit 5d708a4725529cf09d2dd8b5b4aa3542cc8610b0) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/547b0698 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/547b0698 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/547b0698 Branch: refs/heads/branch-2 Commit: 547b0698873334bf0bb50a54e41bd45b6c326d06 Parents: 28e0593 Author: Thomas Graves tgra...@apache.org Authored: Fri May 8 16:35:40 2015 + Committer: Thomas Graves tgra...@apache.org Committed: Fri May 8 16:37:20 2015 + -- hadoop-yarn-project/CHANGES.txt | 2 ++ .../yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/547b0698/hadoop-yarn-project/CHANGES.txt -- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5ae87ef..25625b7 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -308,6 +308,8 @@ Release 2.8.0 - UNRELEASED YARN-3589. RM and AH web UI display DOCTYPE wrongly. (Rohith via ozawa) +YARN-3600. AM container link is broken (Naganarasimha G R via tgraves) + Release 2.7.1 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/547b0698/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java index 30f55be..34ad08a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java @@ -229,8 +229,9 @@ public class RMAppAttemptBlock extends AppAttemptBlock{ AM Container:, appAttempt.getAmContainerId() == null || containers == null || !hasAMContainer(appAttemptReport.getAMContainerId(), containers) -? N/A : root_url(container, appAttempt.getAmContainerId()), -String.valueOf(appAttempt.getAmContainerId())) +? null : root_url(container, appAttempt.getAmContainerId()), +appAttempt.getAmContainerId() == null ? N/A : + String.valueOf(appAttempt.getAmContainerId())) ._(Node:, node) ._( Tracking URL:,
hadoop git commit: YARN-3600. AM container link is broken (Naganarasimha G R via tgraves
Repository: hadoop Updated Branches: refs/heads/trunk bcf289050 - 5d708a472 YARN-3600. AM container link is broken (Naganarasimha G R via tgraves Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5d708a47 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5d708a47 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5d708a47 Branch: refs/heads/trunk Commit: 5d708a4725529cf09d2dd8b5b4aa3542cc8610b0 Parents: bcf2890 Author: Thomas Graves tgra...@apache.org Authored: Fri May 8 16:35:40 2015 + Committer: Thomas Graves tgra...@apache.org Committed: Fri May 8 16:35:40 2015 + -- hadoop-yarn-project/CHANGES.txt | 2 ++ .../yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/5d708a47/hadoop-yarn-project/CHANGES.txt -- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 7f6a09f..b72c648 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -353,6 +353,8 @@ Release 2.8.0 - UNRELEASED YARN-3589. RM and AH web UI display DOCTYPE wrongly. (Rohith via ozawa) +YARN-3600. AM container link is broken (Naganarasimha G R via tgraves) + Release 2.7.1 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/5d708a47/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java index 30f55be..34ad08a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java @@ -229,8 +229,9 @@ public class RMAppAttemptBlock extends AppAttemptBlock{ AM Container:, appAttempt.getAmContainerId() == null || containers == null || !hasAMContainer(appAttemptReport.getAMContainerId(), containers) -? N/A : root_url(container, appAttempt.getAmContainerId()), -String.valueOf(appAttempt.getAmContainerId())) +? null : root_url(container, appAttempt.getAmContainerId()), +appAttempt.getAmContainerId() == null ? N/A : + String.valueOf(appAttempt.getAmContainerId())) ._(Node:, node) ._( Tracking URL:,
hadoop git commit: YARN-20. More information for yarn.resourcemanager.webapp.address in yarn-default.xml (Bartosz Ługowski vai tgraves) (cherry picked from commit f0f5e3c0751bcadcacd6d91e2c5504803ec3d
Repository: hadoop Updated Branches: refs/heads/branch-2 68d85e92b - 638feaaa3 YARN-20. More information for yarn.resourcemanager.webapp.address in yarn-default.xml (Bartosz Åugowski vai tgraves) (cherry picked from commit f0f5e3c0751bcadcacd6d91e2c5504803ec3d0a5) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/638feaaa Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/638feaaa Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/638feaaa Branch: refs/heads/branch-2 Commit: 638feaaa354243bb53f480c161ffefee8a8fbc50 Parents: 68d85e9 Author: Thomas Graves tgra...@apache.org Authored: Fri May 8 17:20:09 2015 + Committer: Thomas Graves tgra...@apache.org Committed: Fri May 8 17:22:33 2015 + -- hadoop-yarn-project/CHANGES.txt | 3 +++ .../src/main/resources/yarn-default.xml | 12 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/638feaaa/hadoop-yarn-project/CHANGES.txt -- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 25625b7..c2f8fb8 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -151,6 +151,9 @@ Release 2.8.0 - UNRELEASED YARN-2784. Make POM project names consistent. (Rohith via devaraj) +YARN-20. More information for yarn.resourcemanager.webapp.address in +yarn-default.xml (Bartosz Åugowski vai tgraves) + OPTIMIZATIONS YARN-3339. TestDockerContainerExecutor should pull a single image and not http://git-wip-us.apache.org/repos/asf/hadoop/blob/638feaaa/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 5d0f07d..e1e0ebd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -111,13 +111,21 @@ /property property -descriptionThe http address of the RM web application./description +description + The http address of the RM web application. + If only a host is provided as the value, + the webapp will be served on a random port. +/description nameyarn.resourcemanager.webapp.address/name value${yarn.resourcemanager.hostname}:8088/value /property property -descriptionThe https adddress of the RM web application./description +description + The https address of the RM web application. + If only a host is provided as the value, + the webapp will be served on a random port. +/description nameyarn.resourcemanager.webapp.https.address/name value${yarn.resourcemanager.hostname}:8090/value /property
hadoop git commit: YARN-20. More information for yarn.resourcemanager.webapp.address in yarn-default.xml (Bartosz Ługowski vai tgraves)
Repository: hadoop Updated Branches: refs/heads/trunk a2d40bced - f0f5e3c07 YARN-20. More information for yarn.resourcemanager.webapp.address in yarn-default.xml (Bartosz Åugowski vai tgraves) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f0f5e3c0 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f0f5e3c0 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f0f5e3c0 Branch: refs/heads/trunk Commit: f0f5e3c0751bcadcacd6d91e2c5504803ec3d0a5 Parents: a2d40bc Author: Thomas Graves tgra...@apache.org Authored: Fri May 8 17:20:09 2015 + Committer: Thomas Graves tgra...@apache.org Committed: Fri May 8 17:21:32 2015 + -- hadoop-yarn-project/CHANGES.txt | 3 +++ .../src/main/resources/yarn-default.xml | 12 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/f0f5e3c0/hadoop-yarn-project/CHANGES.txt -- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index b72c648..eb27152 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -196,6 +196,9 @@ Release 2.8.0 - UNRELEASED YARN-2784. Make POM project names consistent. (Rohith via devaraj) +YARN-20. More information for yarn.resourcemanager.webapp.address in +yarn-default.xml (Bartosz Åugowski vai tgraves) + OPTIMIZATIONS YARN-3339. TestDockerContainerExecutor should pull a single image and not http://git-wip-us.apache.org/repos/asf/hadoop/blob/f0f5e3c0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 5d0f07d..e1e0ebd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -111,13 +111,21 @@ /property property -descriptionThe http address of the RM web application./description +description + The http address of the RM web application. + If only a host is provided as the value, + the webapp will be served on a random port. +/description nameyarn.resourcemanager.webapp.address/name value${yarn.resourcemanager.hostname}:8088/value /property property -descriptionThe https adddress of the RM web application./description +description + The https address of the RM web application. + If only a host is provided as the value, + the webapp will be served on a random port. +/description nameyarn.resourcemanager.webapp.https.address/name value${yarn.resourcemanager.hostname}:8090/value /property
spark git commit: [SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS
Repository: spark Updated Branches: refs/heads/master 4dc8d7449 - b1f4ca82d [SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS Take 2. Does the same thing as #4688, but fixes Hadoop-1 build. Author: Hari Shreedharan hshreedha...@apache.org Closes #5823 from harishreedharan/kerberos-longrunning and squashes the following commits: 3c86bba [Hari Shreedharan] Import fixes. Import postfixOps explicitly. 4d04301 [Hari Shreedharan] Minor formatting fixes. b5e7a72 [Hari Shreedharan] Remove reflection, use a method in SparkHadoopUtil to update the token renewer. 7bff6e9 [Hari Shreedharan] Make sure all required classes are present in the jar. Fix import order. e851f70 [Hari Shreedharan] Move the ExecutorDelegationTokenRenewer to yarn module. Use reflection to use it. 36eb8a9 [Hari Shreedharan] Change the renewal interval config param. Fix a bunch of comments. 611923a [Hari Shreedharan] Make sure the namenodes are listed correctly for creating tokens. 09fe224 [Hari Shreedharan] Use token.renew to get token's renewal interval rather than using hdfs-site.xml 6963bbc [Hari Shreedharan] Schedule renewal in AM before starting user class. Else, a restarted AM cannot access HDFS if the user class tries to. 072659e [Hari Shreedharan] Fix build failure caused by thread factory getting moved to ThreadUtils. f041dd3 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning 42eead4 [Hari Shreedharan] Remove RPC part. Refactor and move methods around, use renewal interval rather than max lifetime to create new tokens. ebb36f5 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning bc083e3 [Hari Shreedharan] Overload RegisteredExecutor to send tokens. Minor doc updates. 7b19643 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning 8a4f268 [Hari Shreedharan] Added docs in the security guide. Changed some code to ensure that the renewer objects are created only if required. e800c8b [Hari Shreedharan] Restore original RegisteredExecutor message, and send new tokens via NewTokens message. 0e9507e [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning 7f1bc58 [Hari Shreedharan] Minor fixes, cleanup. bcd11f9 [Hari Shreedharan] Refactor AM and Executor token update code into separate classes, also send tokens via akka on executor startup. f74303c [Hari Shreedharan] Move the new logic into specialized classes. Add cleanup for old credentials files. 2f9975c [Hari Shreedharan] Ensure new tokens are written out immediately on AM restart. Also, pikc up the latest suffix from HDFS if the AM is restarted. 61b2b27 [Hari Shreedharan] Account for AM restarts by making sure lastSuffix is read from the files on HDFS. 62c45ce [Hari Shreedharan] Relogin from keytab periodically. fa233bd [Hari Shreedharan] Adding logging, fixing minor formatting and ordering issues. 42813b4 [Hari Shreedharan] Remove utils.sh, which was re-added due to merge with master. 0de27ee [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning 55522e3 [Hari Shreedharan] Fix failure caused by Preconditions ambiguity. 9ef5f1b [Hari Shreedharan] Added explanation of how the credentials refresh works, some other minor fixes. f4fd711 [Hari Shreedharan] Fix SparkConf usage. 2debcea [Hari Shreedharan] Change the file structure for credentials files. I will push a followup patch which adds a cleanup mechanism for old credentials files. The credentials files are small and few enough for it to cause issues on HDFS. af6d5f0 [Hari Shreedharan] Cleaning up files where changes weren't required. f0f54cb [Hari Shreedharan] Be more defensive when updating the credentials file. f6954da [Hari Shreedharan] Got rid of Akka communication to renew, instead the executors check a known file's modification time to read the credentials. 5c11c3e [Hari Shreedharan] Move tests to YarnSparkHadoopUtil to fix compile issues. b4cb917 [Hari Shreedharan] Send keytab to AM via DistributedCache rather than directly via HDFS 0985b4e [Hari Shreedharan] Write tokens to HDFS and read them back when required, rather than sending them over the wire. d79b2b9 [Hari Shreedharan] Make sure correct credentials are passed to FileSystem#addDelegationTokens() 8c6928a [Hari Shreedharan] Fix issue caused by direct creation of Actor object. fb27f46 [Hari Shreedharan] Make sure principal and keytab are set before CoarseGrainedSchedulerBackend is started. Also schedule re-logins in CoarseGrainedSchedulerBackend#start() 41efde0 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning d282d7a [Hari Shreedharan] Fix ClientSuite to set YARN mode, so that the correct class is used in tests. bcfc374 [Hari Shreedharan] Fix Hadoop-1 build by adding no-op methods in SparkHadoopUtil, with impl in YarnSparkHadoopUtil. f8fe694 [Hari Shreedharan] Handle None if keytab-login is not scheduled. 2b0d745 [Hari Shreedharan] [SPARK-5342][YARN] Allow long running Spark apps to run on secure YARN/HDFS. ccba5bc [Hari
spark git commit: [SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS
Repository: spark Updated Branches: refs/heads/master 7dacc08ab - 6c65da6bb [SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS Current Spark apps running on Secure YARN/HDFS would not be able to write data to HDFS after 7 days, since delegation tokens cannot be renewed beyond that. This means Spark Streaming apps will not be able to run on Secure YARN. This commit adds basic functionality to fix this issue. In this patch: - new parameters are added - principal and keytab, which can be used to login to a KDC - the client logs in, and then get tokens to start the AM - the keytab is copied to the staging directory - the AM waits for 60% of the time till expiry of the tokens and then logs in using the keytab - each time after 60% of the time, new tokens are created and sent to the executors Currently, to avoid complicating the architecture, we set the keytab and principal in the SparkHadoopUtil singleton, and schedule a login. Once the login is completed, a callback is scheduled. This is being posted for feedback, so I can gather feedback on the general implementation. There are currently a bunch of things to do: - [x] logging - [x] testing - I plan to manually test this soon. If you have ideas of how to add unit tests, comment. - [x] add code to ensure that if these params are set in non-YARN cluster mode, we complain - [x] documentation - [x] Have the executors request for credentials from the AM, so that retries are possible. Author: Hari Shreedharan hshreedha...@apache.org Closes #4688 from harishreedharan/kerberos-longrunning and squashes the following commits: 36eb8a9 [Hari Shreedharan] Change the renewal interval config param. Fix a bunch of comments. 611923a [Hari Shreedharan] Make sure the namenodes are listed correctly for creating tokens. 09fe224 [Hari Shreedharan] Use token.renew to get token's renewal interval rather than using hdfs-site.xml 6963bbc [Hari Shreedharan] Schedule renewal in AM before starting user class. Else, a restarted AM cannot access HDFS if the user class tries to. 072659e [Hari Shreedharan] Fix build failure caused by thread factory getting moved to ThreadUtils. f041dd3 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning 42eead4 [Hari Shreedharan] Remove RPC part. Refactor and move methods around, use renewal interval rather than max lifetime to create new tokens. ebb36f5 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning bc083e3 [Hari Shreedharan] Overload RegisteredExecutor to send tokens. Minor doc updates. 7b19643 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning 8a4f268 [Hari Shreedharan] Added docs in the security guide. Changed some code to ensure that the renewer objects are created only if required. e800c8b [Hari Shreedharan] Restore original RegisteredExecutor message, and send new tokens via NewTokens message. 0e9507e [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning 7f1bc58 [Hari Shreedharan] Minor fixes, cleanup. bcd11f9 [Hari Shreedharan] Refactor AM and Executor token update code into separate classes, also send tokens via akka on executor startup. f74303c [Hari Shreedharan] Move the new logic into specialized classes. Add cleanup for old credentials files. 2f9975c [Hari Shreedharan] Ensure new tokens are written out immediately on AM restart. Also, pikc up the latest suffix from HDFS if the AM is restarted. 61b2b27 [Hari Shreedharan] Account for AM restarts by making sure lastSuffix is read from the files on HDFS. 62c45ce [Hari Shreedharan] Relogin from keytab periodically. fa233bd [Hari Shreedharan] Adding logging, fixing minor formatting and ordering issues. 42813b4 [Hari Shreedharan] Remove utils.sh, which was re-added due to merge with master. 0de27ee [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning 55522e3 [Hari Shreedharan] Fix failure caused by Preconditions ambiguity. 9ef5f1b [Hari Shreedharan] Added explanation of how the credentials refresh works, some other minor fixes. f4fd711 [Hari Shreedharan] Fix SparkConf usage. 2debcea [Hari Shreedharan] Change the file structure for credentials files. I will push a followup patch which adds a cleanup mechanism for old credentials files. The credentials files are small and few enough for it to cause issues on HDFS. af6d5f0 [Hari Shreedharan] Cleaning up files where changes weren't required. f0f54cb [Hari Shreedharan] Be more defensive when updating the credentials file. f6954da [Hari Shreedharan] Got rid of Akka communication to renew, instead the executors check a known file's modification time to read the credentials. 5c11c3e [Hari Shreedharan] Move tests to YarnSparkHadoopUtil to fix compile issues. b4cb917 [Hari Shreedharan] Send keytab to AM via DistributedCache rather than directly via HDFS 0985b4e [Hari Shreedharan] Write tokens to HDFS and read them back when required, rather than sending them over the wire. d79b2b9 [Hari Shreedharan] Make sure
hadoop git commit: YARN-3517. RM web ui for dumping scheduler logs should be for admins only (Varun Vasudev via tgraves)
Repository: hadoop Updated Branches: refs/heads/trunk 3dd6395bb - 2e215484b YARN-3517. RM web ui for dumping scheduler logs should be for admins only (Varun Vasudev via tgraves) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2e215484 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2e215484 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2e215484 Branch: refs/heads/trunk Commit: 2e215484bd05cd5e3b7a81d3558c6879a05dd2d2 Parents: 3dd6395 Author: tgraves tgra...@apache.org Authored: Wed Apr 29 21:25:42 2015 + Committer: tgraves tgra...@apache.org Committed: Wed Apr 29 21:25:42 2015 + -- hadoop-yarn-project/CHANGES.txt | 3 + .../server/security/ApplicationACLsManager.java | 11 +++ .../webapp/CapacitySchedulerPage.java | 51 + .../resourcemanager/webapp/RMWebServices.java | 13 +++- .../webapp/TestRMWebServices.java | 77 5 files changed, 139 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e215484/hadoop-yarn-project/CHANGES.txt -- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index b5581d6..6b8bde9 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -268,6 +268,9 @@ Release 2.8.0 - UNRELEASED YARN-2740. Fix NodeLabelsManager to properly handle node label modifications when distributed node label configuration enabled. (Naganarasimha G R via wangda) +YARN-3517. RM web ui for dumping scheduler logs should be for admins only +(Varun Vasudev via tgraves) + Release 2.7.1 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e215484/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java index 4daaa68..97b4163 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java @@ -138,4 +138,15 @@ public class ApplicationACLsManager { } return false; } + + /** + * Check if the given user in an admin. + * + * @param calledUGI + * UserGroupInformation for the user + * @return true if the user is an admin, false otherwise + */ + public final boolean isAdmin(final UserGroupInformation calledUGI) { +return this.adminAclsManager.isAdmin(calledUGI); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e215484/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java index 2eeda66..fa22a0d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Map; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth; @@ -33,6 +34,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UserInfo import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerLeafQueueInfo; import
hadoop git commit: YARN-3517. RM web ui for dumping scheduler logs should be for admins only (Varun Vasudev via tgraves) (cherry picked from commit 2e215484bd05cd5e3b7a81d3558c6879a05dd2d2)
Repository: hadoop Updated Branches: refs/heads/branch-2 460127e6f - 2e13183f6 YARN-3517. RM web ui for dumping scheduler logs should be for admins only (Varun Vasudev via tgraves) (cherry picked from commit 2e215484bd05cd5e3b7a81d3558c6879a05dd2d2) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2e13183f Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2e13183f Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2e13183f Branch: refs/heads/branch-2 Commit: 2e13183f6010182aef7b1dfec2f9c1f1e9968011 Parents: 460127e Author: tgraves tgra...@apache.org Authored: Wed Apr 29 21:25:42 2015 + Committer: tgraves tgra...@apache.org Committed: Wed Apr 29 21:27:16 2015 + -- hadoop-yarn-project/CHANGES.txt | 3 + .../server/security/ApplicationACLsManager.java | 11 +++ .../webapp/CapacitySchedulerPage.java | 51 + .../resourcemanager/webapp/RMWebServices.java | 13 +++- .../webapp/TestRMWebServices.java | 77 5 files changed, 139 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e13183f/hadoop-yarn-project/CHANGES.txt -- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 98e42c1..8f4907f 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -220,6 +220,9 @@ Release 2.8.0 - UNRELEASED YARN-2740. Fix NodeLabelsManager to properly handle node label modifications when distributed node label configuration enabled. (Naganarasimha G R via wangda) +YARN-3517. RM web ui for dumping scheduler logs should be for admins only +(Varun Vasudev via tgraves) + Release 2.7.1 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e13183f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java index 4daaa68..97b4163 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java @@ -138,4 +138,15 @@ public class ApplicationACLsManager { } return false; } + + /** + * Check if the given user in an admin. + * + * @param calledUGI + * UserGroupInformation for the user + * @return true if the user is an admin, false otherwise + */ + public final boolean isAdmin(final UserGroupInformation calledUGI) { +return this.adminAclsManager.isAdmin(calledUGI); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e13183f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java index 2eeda66..fa22a0d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Map; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth; @@ -33,6 +34,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UserInfo import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerInfo; import
spark git commit: [SPARK-6918] [YARN] Secure HBase support.
Repository: spark Updated Branches: refs/heads/master f49284b5b - baed3f2c7 [SPARK-6918] [YARN] Secure HBase support. Obtain HBase security token with Kerberos credentials locally to be sent to executors. Tested on eBay's secure HBase cluster. Similar to obtainTokenForNamenodes and fails gracefully if HBase classes are not included in path. Requires hbase-site.xml to be in the classpath(typically via conf dir) for the zookeeper configuration. Should that go in the docs somewhere? Did not see an HBase section. Author: Dean Chen deanch...@gmail.com Closes #5586 from deanchen/master and squashes the following commits: 0c190ef [Dean Chen] [SPARK-6918][YARN] Secure HBase support. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/baed3f2c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/baed3f2c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/baed3f2c Branch: refs/heads/master Commit: baed3f2c73afd9c7d9de34f0485c507ac6a498b0 Parents: f49284b Author: Dean Chen deanch...@gmail.com Authored: Wed Apr 29 08:58:33 2015 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Apr 29 08:58:33 2015 -0500 -- .../org/apache/spark/deploy/yarn/Client.scala | 38 +++- 1 file changed, 37 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/baed3f2c/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 741239c..4abcf73 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -39,7 +39,7 @@ import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.Master import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.security.{Credentials, UserGroupInformation} -import org.apache.hadoop.security.token.Token +import org.apache.hadoop.security.token.{TokenIdentifier, Token} import org.apache.hadoop.util.StringUtils import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment @@ -226,6 +226,7 @@ private[spark] class Client( val distributedUris = new HashSet[String] obtainTokensForNamenodes(nns, hadoopConf, credentials) obtainTokenForHiveMetastore(hadoopConf, credentials) +obtainTokenForHBase(hadoopConf, credentials) val replication = sparkConf.getInt(spark.yarn.submit.file.replication, fs.getDefaultReplication(dst)).toShort @@ -1085,6 +1086,41 @@ object Client extends Logging { } /** + * Obtain security token for HBase. + */ + def obtainTokenForHBase(conf: Configuration, credentials: Credentials): Unit = { +if (UserGroupInformation.isSecurityEnabled) { + val mirror = universe.runtimeMirror(getClass.getClassLoader) + + try { +val confCreate = mirror.classLoader. + loadClass(org.apache.hadoop.hbase.HBaseConfiguration). + getMethod(create, classOf[Configuration]) +val obtainToken = mirror.classLoader. + loadClass(org.apache.hadoop.hbase.security.token.TokenUtil). + getMethod(obtainToken, classOf[Configuration]) + +logDebug(Attempting to fetch HBase security token.) + +val hbaseConf = confCreate.invoke(null, conf) +val token = obtainToken.invoke(null, hbaseConf).asInstanceOf[Token[TokenIdentifier]] +credentials.addToken(token.getService, token) + +logInfo(Added HBase security token to credentials.) + } catch { +case e:java.lang.NoSuchMethodException = + logInfo(HBase Method not found: + e) +case e:java.lang.ClassNotFoundException = + logDebug(HBase Class not found: + e) +case e:java.lang.NoClassDefFoundError = + logDebug(HBase Class not found: + e) +case e:Exception = + logError(Exception when obtaining HBase security token: + e) + } +} + } + + /** * Return whether the two file systems are the same. */ private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
hadoop git commit: YARN-3434. Interaction between reservations and userlimit can result in significant ULF violation (cherry picked from commit 189a63a719c63b67a1783a280bfc2f72dcb55277)
Repository: hadoop Updated Branches: refs/heads/branch-2 889b92fa4 - 1cd2fcf25 YARN-3434. Interaction between reservations and userlimit can result in significant ULF violation (cherry picked from commit 189a63a719c63b67a1783a280bfc2f72dcb55277) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1cd2fcf2 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1cd2fcf2 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1cd2fcf2 Branch: refs/heads/branch-2 Commit: 1cd2fcf25dc614c0567e6da776fef737640e4293 Parents: 889b92f Author: tgraves tgra...@apache.org Authored: Thu Apr 23 14:39:25 2015 + Committer: tgraves tgra...@apache.org Committed: Thu Apr 23 14:49:24 2015 + -- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/ResourceLimits.java | 28 +++- .../scheduler/capacity/AbstractCSQueue.java | 94 +-- .../scheduler/capacity/LeafQueue.java | 162 --- .../scheduler/capacity/TestReservations.java| 65 +--- 5 files changed, 186 insertions(+), 166 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cd2fcf2/hadoop-yarn-project/CHANGES.txt -- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 8b09926..261e052 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -204,6 +204,9 @@ Release 2.8.0 - UNRELEASED YARN-3495. Confusing log generated by FairScheduler. (Brahma Reddy Battula via ozawa) +YARN-3434. Interaction between reservations and userlimit can result in +significant ULF violation (tgraves) + Release 2.7.1 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cd2fcf2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java index 12333e8..8074794 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java @@ -19,22 +19,44 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.util.resource.Resources; /** * Resource limits for queues/applications, this means max overall (please note * that, it's not extra) resource you can get. */ public class ResourceLimits { + volatile Resource limit; + + // This is special limit that goes with the RESERVE_CONT_LOOK_ALL_NODES + // config. This limit indicates how much we need to unreserve to allocate + // another container. + private volatile Resource amountNeededUnreserve; + public ResourceLimits(Resource limit) { +this.amountNeededUnreserve = Resources.none(); this.limit = limit; } - - volatile Resource limit; + + public ResourceLimits(Resource limit, Resource amountNeededUnreserve) { +this.amountNeededUnreserve = amountNeededUnreserve; +this.limit = limit; + } + public Resource getLimit() { return limit; } - + + public Resource getAmountNeededUnreserve() { +return amountNeededUnreserve; + } + public void setLimit(Resource limit) { this.limit = limit; } + + public void setAmountNeededUnreserve(Resource amountNeededUnreserve) { +this.amountNeededUnreserve = amountNeededUnreserve; + } + } http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cd2fcf2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn
hadoop git commit: YARN-3434. Interaction between reservations and userlimit can result in significant ULF violation
Repository: hadoop Updated Branches: refs/heads/trunk baf8bc6c4 - 189a63a71 YARN-3434. Interaction between reservations and userlimit can result in significant ULF violation Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/189a63a7 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/189a63a7 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/189a63a7 Branch: refs/heads/trunk Commit: 189a63a719c63b67a1783a280bfc2f72dcb55277 Parents: baf8bc6 Author: tgraves tgra...@apache.org Authored: Thu Apr 23 14:39:25 2015 + Committer: tgraves tgra...@apache.org Committed: Thu Apr 23 14:39:25 2015 + -- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/ResourceLimits.java | 28 +++- .../scheduler/capacity/AbstractCSQueue.java | 94 +-- .../scheduler/capacity/LeafQueue.java | 162 --- .../scheduler/capacity/TestReservations.java| 65 +--- 5 files changed, 186 insertions(+), 166 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/189a63a7/hadoop-yarn-project/CHANGES.txt -- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index f4413a8..d335389 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -252,6 +252,9 @@ Release 2.8.0 - UNRELEASED YARN-3495. Confusing log generated by FairScheduler. (Brahma Reddy Battula via ozawa) +YARN-3434. Interaction between reservations and userlimit can result in +significant ULF violation (tgraves) + Release 2.7.1 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/189a63a7/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java index 12333e8..8074794 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java @@ -19,22 +19,44 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.util.resource.Resources; /** * Resource limits for queues/applications, this means max overall (please note * that, it's not extra) resource you can get. */ public class ResourceLimits { + volatile Resource limit; + + // This is special limit that goes with the RESERVE_CONT_LOOK_ALL_NODES + // config. This limit indicates how much we need to unreserve to allocate + // another container. + private volatile Resource amountNeededUnreserve; + public ResourceLimits(Resource limit) { +this.amountNeededUnreserve = Resources.none(); this.limit = limit; } - - volatile Resource limit; + + public ResourceLimits(Resource limit, Resource amountNeededUnreserve) { +this.amountNeededUnreserve = amountNeededUnreserve; +this.limit = limit; + } + public Resource getLimit() { return limit; } - + + public Resource getAmountNeededUnreserve() { +return amountNeededUnreserve; + } + public void setLimit(Resource limit) { this.limit = limit; } + + public void setAmountNeededUnreserve(Resource amountNeededUnreserve) { +this.amountNeededUnreserve = amountNeededUnreserve; + } + } http://git-wip-us.apache.org/repos/asf/hadoop/blob/189a63a7/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java index 9233e01
spark git commit: [SPARK-2669] [yarn] Distribute client configuration to AM.
Repository: spark Updated Branches: refs/heads/master c84d91692 - 50ab8a654 [SPARK-2669] [yarn] Distribute client configuration to AM. Currently, when Spark launches the Yarn AM, the process will use the local Hadoop configuration on the node where the AM launches, if one is present. A more correct approach is to use the same configuration used to launch the Spark job, since the user may have made modifications (such as adding app-specific configs). The approach taken here is to use the distributed cache to make all files in the Hadoop configuration directory available to the AM. This is a little overkill since only the AM needs them (the executors use the broadcast Hadoop configuration from the driver), but is the easier approach. Even though only a few files in that directory may end up being used, all of them are uploaded. This allows supporting use cases such as when auxiliary configuration files are used for SSL configuration, or when uploading a Hive configuration directory. Not all of these may be reflected in a o.a.h.conf.Configuration object, but may be needed when a driver in cluster mode instantiates, for example, a HiveConf object instead. Author: Marcelo Vanzin van...@cloudera.com Closes #4142 from vanzin/SPARK-2669 and squashes the following commits: f5434b9 [Marcelo Vanzin] Merge branch 'master' into SPARK-2669 013f0fb [Marcelo Vanzin] Review feedback. f693152 [Marcelo Vanzin] Le sigh. ed45b7d [Marcelo Vanzin] Zip all config files and upload them as an archive. 5927b6b [Marcelo Vanzin] Merge branch 'master' into SPARK-2669 cbb9fb3 [Marcelo Vanzin] Remove stale test. e3e58d0 [Marcelo Vanzin] Merge branch 'master' into SPARK-2669 e3d0613 [Marcelo Vanzin] Review feedback. 34bdbd8 [Marcelo Vanzin] Fix test. 022a688 [Marcelo Vanzin] Merge branch 'master' into SPARK-2669 a77ddd5 [Marcelo Vanzin] Merge branch 'master' into SPARK-2669 79221c7 [Marcelo Vanzin] [SPARK-2669] [yarn] Distribute client configuration to AM. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50ab8a65 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50ab8a65 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50ab8a65 Branch: refs/heads/master Commit: 50ab8a6543ad5c31e89c16df374d0cb13222fd1e Parents: c84d916 Author: Marcelo Vanzin van...@cloudera.com Authored: Fri Apr 17 14:21:51 2015 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Fri Apr 17 14:21:51 2015 -0500 -- docs/running-on-yarn.md | 6 +- .../org/apache/spark/deploy/yarn/Client.scala | 125 --- .../spark/deploy/yarn/ExecutorRunnable.scala| 2 +- .../apache/spark/deploy/yarn/ClientSuite.scala | 29 +++-- .../spark/deploy/yarn/YarnClusterSuite.scala| 6 +- 5 files changed, 132 insertions(+), 36 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/50ab8a65/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 853c9f2..0968fc5 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -211,7 +211,11 @@ Most of the configs are the same for Spark on YARN as for other deployment modes # Launching Spark on YARN Ensure that `HADOOP_CONF_DIR` or `YARN_CONF_DIR` points to the directory which contains the (client side) configuration files for the Hadoop cluster. -These configs are used to write to the dfs and connect to the YARN ResourceManager. +These configs are used to write to the dfs and connect to the YARN ResourceManager. The +configuration contained in this directory will be distributed to the YARN cluster so that all +containers used by the application use the same configuration. If the configuration references +Java system properties or environment variables not managed by YARN, they should also be set in the +Spark application's configuration (driver, executors, and the AM when running in client mode). There are two deploy modes that can be used to launch Spark applications on YARN. In yarn-cluster mode, the Spark driver runs inside an application master process which is managed by YARN on the cluster, and the client can go away after initiating the application. In yarn-client mode, the driver runs in the client process, and the application master is only used for requesting resources from YARN. http://git-wip-us.apache.org/repos/asf/spark/blob/50ab8a65/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 52e4dee..019afbd 100644 ---
spark git commit: [SPARK-6207] [YARN] [SQL] Adds delegation tokens for metastore to conf.
Repository: spark Updated Branches: refs/heads/master b29663eee - 77620be76 [SPARK-6207] [YARN] [SQL] Adds delegation tokens for metastore to conf. Adds hive2-metastore delegation token to conf when running in secure mode. Without this change, running on YARN in cluster mode fails with a GSS exception. This is a rough patch that adds a dependency to spark/yarn on hive-exec. I'm looking for suggestions on how to make this patch better. This contribution is my original work and that I licenses the work to the Apache Spark project under the project's open source licenses. Author: Doug Balog doug.balogtarget.com Author: Doug Balog doug.ba...@target.com Closes #5031 from dougb/SPARK-6207 and squashes the following commits: 3e9ac16 [Doug Balog] [SPARK-6207] Fixes minor code spacing issues. e260765 [Doug Balog] [SPARK-6207] Second pass at adding Hive delegation token to conf. - Use reflection instead of adding dependency on hive. - Tested on Hive 0.13 and Hadoop 2.4.1 1ab1729 [Doug Balog] Merge branch 'master' of git://github.com/apache/spark into SPARK-6207 bf356d2 [Doug Balog] [SPARK-6207] [YARN] [SQL] Adds delegation tokens for metastore to conf. Adds hive2-metastore delagations token to conf when running in securemode. Without this change, runing on YARN in cluster mode fails with a GSS exception. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/77620be7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/77620be7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/77620be7 Branch: refs/heads/master Commit: 77620be76e82b6cdaae406cd752d3272656f5fe0 Parents: b29663e Author: Doug Balog doug.ba...@target.com Authored: Mon Apr 13 09:49:58 2015 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Mon Apr 13 09:49:58 2015 -0500 -- .../org/apache/spark/deploy/yarn/Client.scala | 63 1 file changed, 63 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/77620be7/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index c1effd3..1091ff5 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -22,17 +22,21 @@ import java.nio.ByteBuffer import scala.collection.JavaConversions._ import scala.collection.mutable.{ArrayBuffer, HashMap, ListBuffer, Map} +import scala.reflect.runtime.universe import scala.util.{Try, Success, Failure} import com.google.common.base.Objects import org.apache.hadoop.io.DataOutputBuffer import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission +import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.Master import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.security.{Credentials, UserGroupInformation} +import org.apache.hadoop.security.token.Token import org.apache.hadoop.util.StringUtils import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment @@ -220,6 +224,7 @@ private[spark] class Client( val dst = new Path(fs.getHomeDirectory(), appStagingDir) val nns = getNameNodesToAccess(sparkConf) + dst obtainTokensForNamenodes(nns, hadoopConf, credentials) +obtainTokenForHiveMetastore(hadoopConf, credentials) val replication = sparkConf.getInt(spark.yarn.submit.file.replication, fs.getDefaultReplication(dst)).toShort @@ -937,6 +942,64 @@ object Client extends Logging { } /** + * Obtains token for the Hive metastore and adds them to the credentials. + */ + private def obtainTokenForHiveMetastore(conf: Configuration, credentials: Credentials) { +if (UserGroupInformation.isSecurityEnabled) { + val mirror = universe.runtimeMirror(getClass.getClassLoader) + + try { +val hiveClass = mirror.classLoader.loadClass(org.apache.hadoop.hive.ql.metadata.Hive) +val hive = hiveClass.getMethod(get).invoke(null) + +val hiveConf = hiveClass.getMethod(getConf).invoke(hive) +val hiveConfClass = mirror.classLoader.loadClass(org.apache.hadoop.hive.conf.HiveConf) + +val hiveConfGet = (param:String) = Option(hiveConfClass + .getMethod(get, classOf[java.lang.String]) + .invoke(hiveConf, param)) + +val metastore_uri = hiveConfGet(hive.metastore.uris) + +// Check for local metastore +if (metastore_uri != None metastore_uri.get.toString.size 0) { + val
spark git commit: [SPARK-3591][YARN]fire and forget for YARN cluster mode
Repository: spark Updated Branches: refs/heads/master ae980eb41 - b65bad65c [SPARK-3591][YARN]fire and forget for YARN cluster mode https://issues.apache.org/jira/browse/SPARK-3591 The output after this patch: doggie153:/opt/oss/spark-1.3.0-bin-hadoop2.4/bin # ./spark-submit --class org.apache.spark.examples.SparkPi --master yarn-cluster ../lib/spark-examples*.jar 15/03/31 21:15:25 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/03/31 21:15:25 INFO RMProxy: Connecting to ResourceManager at doggie153/10.177.112.153:8032 15/03/31 21:15:25 INFO Client: Requesting a new application from cluster with 4 NodeManagers 15/03/31 21:15:25 INFO Client: Verifying our application has not requested more than the maximum memory capability of the cluster (8192 MB per container) 15/03/31 21:15:25 INFO Client: Will allocate AM container, with 896 MB memory including 384 MB overhead 15/03/31 21:15:25 INFO Client: Setting up container launch context for our AM 15/03/31 21:15:25 INFO Client: Preparing resources for our AM container 15/03/31 21:15:26 INFO Client: Uploading resource file:/opt/oss/spark-1.3.0-bin-hadoop2.4/lib/spark-assembly-1.4.0-SNAPSHOT-hadoop2.4.1.jar - hdfs://doggie153:9000/user/root/.sparkStaging/application_1427257505534_0016/spark-assembly-1.4.0-SNAPSHOT-hadoop2.4.1.jar 15/03/31 21:15:27 INFO Client: Uploading resource file:/opt/oss/spark-1.3.0-bin-hadoop2.4/lib/spark-examples-1.3.0-hadoop2.4.0.jar - hdfs://doggie153:9000/user/root/.sparkStaging/application_1427257505534_0016/spark-examples-1.3.0-hadoop2.4.0.jar 15/03/31 21:15:28 INFO Client: Setting up the launch environment for our AM container 15/03/31 21:15:28 INFO SecurityManager: Changing view acls to: root 15/03/31 21:15:28 INFO SecurityManager: Changing modify acls to: root 15/03/31 21:15:28 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(root); users with modify permissions: Set(root) 15/03/31 21:15:28 INFO Client: Submitting application 16 to ResourceManager 15/03/31 21:15:28 INFO YarnClientImpl: Submitted application application_1427257505534_0016 15/03/31 21:15:28 INFO Client: ... waiting before polling ResourceManager for application state 15/03/31 21:15:33 INFO Client: ... polling ResourceManager for application state 15/03/31 21:15:33 INFO Client: Application report for application_1427257505534_0016 (state: RUNNING) 15/03/31 21:15:33 INFO Client: client token: N/A diagnostics: N/A ApplicationMaster host: doggie157 ApplicationMaster RPC port: 0 queue: default start time: 1427807728307 final status: UNDEFINED tracking URL: http://doggie153:8088/proxy/application_1427257505534_0016/ user: root /cc andrewor14 Author: WangTaoTheTonic wangtao...@huawei.com Closes #5297 from WangTaoTheTonic/SPARK-3591 and squashes the following commits: c76d232 [WangTaoTheTonic] wrap lines 16c90a8 [WangTaoTheTonic] move up lines to avoid duplicate fea390d [WangTaoTheTonic] log failed/killed report, style and comment be1cc2e [WangTaoTheTonic] reword f0bc54f [WangTaoTheTonic] minor: expose appid in excepiton messages ba9b22b [WangTaoTheTonic] wrong config name e1a4013 [WangTaoTheTonic] revert to the old version and do some robust 19706c0 [WangTaoTheTonic] add a config to control whether to forget 0cbdce8 [WangTaoTheTonic] fire and forget for YARN cluster mode Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b65bad65 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b65bad65 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b65bad65 Branch: refs/heads/master Commit: b65bad65c3500475b974ca0219f218eef296db2c Parents: ae980eb Author: WangTaoTheTonic wangtao...@huawei.com Authored: Tue Apr 7 08:36:25 2015 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Apr 7 08:36:25 2015 -0500 -- .../scala/org/apache/spark/deploy/Client.scala | 2 +- .../deploy/rest/StandaloneRestClient.scala | 2 +- docs/running-on-yarn.md | 9 +++ .../org/apache/spark/deploy/yarn/Client.scala | 83 4 files changed, 61 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b65bad65/core/src/main/scala/org/apache/spark/deploy/Client.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala index 65238af..8d13b2a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/Client.scala +++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala @@ -89,7 +89,7 @@ private class
spark git commit: [SPARK-6050] [yarn] Relax matching of vcore count in received containers.
Repository: spark Updated Branches: refs/heads/branch-1.3 a83b9bbb2 - 650d1e7fb [SPARK-6050] [yarn] Relax matching of vcore count in received containers. Some YARN configurations return a vcore count for allocated containers that does not match the requested resource. That means Spark would always ignore those containers. So relax the the matching of the vcore count to allow the Spark jobs to run. Author: Marcelo Vanzin van...@cloudera.com Closes #4818 from vanzin/SPARK-6050 and squashes the following commits: 991c803 [Marcelo Vanzin] Remove config option, standardize on legacy behavior (no vcore matching). 8c9c346 [Marcelo Vanzin] Restrict lax matching to vcores only. 3359692 [Marcelo Vanzin] [SPARK-6050] [yarn] Add config option to do lax resource matching. (cherry picked from commit 6b348d90f475440c285a4b636134ffa9351580b9) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/650d1e7f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/650d1e7f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/650d1e7f Branch: refs/heads/branch-1.3 Commit: 650d1e7fb13545d0d102de9bb6e11ab4f9ef6359 Parents: a83b9bb Author: Marcelo Vanzin van...@cloudera.com Authored: Mon Mar 2 16:41:43 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Mon Mar 2 16:42:02 2015 -0600 -- .../org/apache/spark/deploy/yarn/YarnAllocator.scala | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/650d1e7f/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 12c62a6..55bfbcd 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -290,8 +290,14 @@ private[yarn] class YarnAllocator( location: String, containersToUse: ArrayBuffer[Container], remaining: ArrayBuffer[Container]): Unit = { +// SPARK-6050: certain Yarn configurations return a virtual core count that doesn't match the +// request; for example, capacity scheduler + DefaultResourceCalculator. So match on requested +// memory, but use the asked vcore count for matching, effectively disabling matching on vcore +// count. +val matchingResource = Resource.newInstance(allocatedContainer.getResource.getMemory, + resource.getVirtualCores) val matchingRequests = amClient.getMatchingRequests(allocatedContainer.getPriority, location, - allocatedContainer.getResource) + matchingResource) // Match the allocation to a request if (!matchingRequests.isEmpty) { @@ -318,7 +324,7 @@ private[yarn] class YarnAllocator( assert(container.getResource.getMemory = resource.getMemory) logInfo(Launching container %s for on host %s.format(containerId, executorHostname)) - executorIdToContainer(executorId) = container + executorIdToContainer(executorId) = container val containerSet = allocatedHostToContainersMap.getOrElseUpdate(executorHostname, new HashSet[ContainerId]) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6050] [yarn] Relax matching of vcore count in received containers.
Repository: spark Updated Branches: refs/heads/master 582e5a24c - 6b348d90f [SPARK-6050] [yarn] Relax matching of vcore count in received containers. Some YARN configurations return a vcore count for allocated containers that does not match the requested resource. That means Spark would always ignore those containers. So relax the the matching of the vcore count to allow the Spark jobs to run. Author: Marcelo Vanzin van...@cloudera.com Closes #4818 from vanzin/SPARK-6050 and squashes the following commits: 991c803 [Marcelo Vanzin] Remove config option, standardize on legacy behavior (no vcore matching). 8c9c346 [Marcelo Vanzin] Restrict lax matching to vcores only. 3359692 [Marcelo Vanzin] [SPARK-6050] [yarn] Add config option to do lax resource matching. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b348d90 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b348d90 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b348d90 Branch: refs/heads/master Commit: 6b348d90f475440c285a4b636134ffa9351580b9 Parents: 582e5a2 Author: Marcelo Vanzin van...@cloudera.com Authored: Mon Mar 2 16:41:43 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Mon Mar 2 16:41:43 2015 -0600 -- .../org/apache/spark/deploy/yarn/YarnAllocator.scala | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6b348d90/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 12c62a6..55bfbcd 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -290,8 +290,14 @@ private[yarn] class YarnAllocator( location: String, containersToUse: ArrayBuffer[Container], remaining: ArrayBuffer[Container]): Unit = { +// SPARK-6050: certain Yarn configurations return a virtual core count that doesn't match the +// request; for example, capacity scheduler + DefaultResourceCalculator. So match on requested +// memory, but use the asked vcore count for matching, effectively disabling matching on vcore +// count. +val matchingResource = Resource.newInstance(allocatedContainer.getResource.getMemory, + resource.getVirtualCores) val matchingRequests = amClient.getMatchingRequests(allocatedContainer.getPriority, location, - allocatedContainer.getResource) + matchingResource) // Match the allocation to a request if (!matchingRequests.isEmpty) { @@ -318,7 +324,7 @@ private[yarn] class YarnAllocator( assert(container.getResource.getMemory = resource.getMemory) logInfo(Launching container %s for on host %s.format(containerId, executorHostname)) - executorIdToContainer(executorId) = container + executorIdToContainer(executorId) = container val containerSet = allocatedHostToContainersMap.getOrElseUpdate(executorHostname, new HashSet[ContainerId]) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: SPARK-5393. Flood of util.RackResolver log messages after SPARK-1714
Repository: spark Updated Branches: refs/heads/master 6f21dce5f - 254eaa4d3 SPARK-5393. Flood of util.RackResolver log messages after SPARK-1714 Previously I had tried to solve this with by adding a line in Spark's log4j-defaults.properties. The issue with the message in log4j-defaults.properties was that the log4j.properties packaged inside Hadoop was getting picked up instead. While it would be ideal to fix that as well, we still want to quiet this in situations where a user supplies their own custom log4j properties. Author: Sandy Ryza sa...@cloudera.com Closes #4192 from sryza/sandy-spark-5393 and squashes the following commits: 4d5dedc [Sandy Ryza] Only set log level if unset 46e07c5 [Sandy Ryza] SPARK-5393. Flood of util.RackResolver log messages after SPARK-1714 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/254eaa4d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/254eaa4d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/254eaa4d Branch: refs/heads/master Commit: 254eaa4d350dafe19f1715e80eb816856a126c21 Parents: 6f21dce Author: Sandy Ryza sa...@cloudera.com Authored: Fri Jan 30 11:31:54 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Fri Jan 30 11:31:54 2015 -0600 -- .../org/apache/spark/log4j-defaults.properties | 1 - .../scala/org/apache/spark/SparkContext.scala | 2 +- .../SparkContextSchedulerCreationSuite.scala| 2 +- .../spark/deploy/yarn/YarnAllocator.scala | 7 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 4 -- .../cluster/YarnClientClusterScheduler.scala| 36 -- .../cluster/YarnClusterScheduler.scala | 18 + .../spark/scheduler/cluster/YarnScheduler.scala | 40 8 files changed, 50 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/254eaa4d/core/src/main/resources/org/apache/spark/log4j-defaults.properties -- diff --git a/core/src/main/resources/org/apache/spark/log4j-defaults.properties b/core/src/main/resources/org/apache/spark/log4j-defaults.properties index c99a61f..89eec7d 100644 --- a/core/src/main/resources/org/apache/spark/log4j-defaults.properties +++ b/core/src/main/resources/org/apache/spark/log4j-defaults.properties @@ -10,4 +10,3 @@ log4j.logger.org.eclipse.jetty=WARN log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO -log4j.logger.org.apache.hadoop.yarn.util.RackResolver=WARN http://git-wip-us.apache.org/repos/asf/spark/blob/254eaa4d/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 4c4ee04..3c61c10 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1986,7 +1986,7 @@ object SparkContext extends Logging { case yarn-client = val scheduler = try { val clazz = - Class.forName(org.apache.spark.scheduler.cluster.YarnClientClusterScheduler) +Class.forName(org.apache.spark.scheduler.cluster.YarnScheduler) val cons = clazz.getConstructor(classOf[SparkContext]) cons.newInstance(sc).asInstanceOf[TaskSchedulerImpl] http://git-wip-us.apache.org/repos/asf/spark/blob/254eaa4d/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala index 8ae4f24..bbed8dd 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala @@ -149,7 +149,7 @@ class SparkContextSchedulerCreationSuite } test(yarn-client) { -testYarn(yarn-client, org.apache.spark.scheduler.cluster.YarnClientClusterScheduler) +testYarn(yarn-client, org.apache.spark.scheduler.cluster.YarnScheduler) } def testMesos(master: String, expectedClass: Class[_], coarse: Boolean) { http://git-wip-us.apache.org/repos/asf/spark/blob/254eaa4d/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
spark git commit: SPARK-5370. [YARN] Remove some unnecessary synchronization in YarnAlloca...
Repository: spark Updated Branches: refs/heads/master 246111d17 - 820ce0359 SPARK-5370. [YARN] Remove some unnecessary synchronization in YarnAlloca... ...tor Author: Sandy Ryza sa...@cloudera.com Closes #4164 from sryza/sandy-spark-5370 and squashes the following commits: 0c8d736 [Sandy Ryza] SPARK-5370. [YARN] Remove some unnecessary synchronization in YarnAllocator Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/820ce035 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/820ce035 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/820ce035 Branch: refs/heads/master Commit: 820ce03597350257abe0c5c96435c555038e3e6c Parents: 246111d Author: Sandy Ryza sa...@cloudera.com Authored: Thu Jan 22 13:49:35 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Thu Jan 22 13:49:35 2015 -0600 -- .../spark/deploy/yarn/YarnAllocator.scala | 23 +--- 1 file changed, 10 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/820ce035/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 4c35b60..d00f296 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -60,7 +60,6 @@ private[yarn] class YarnAllocator( import YarnAllocator._ - // These two complementary data structures are locked on allocatedHostToContainersMap. // Visible for testing. val allocatedHostToContainersMap = new HashMap[String, collection.mutable.Set[ContainerId]] @@ -355,20 +354,18 @@ private[yarn] class YarnAllocator( } } - allocatedHostToContainersMap.synchronized { -if (allocatedContainerToHostMap.containsKey(containerId)) { - val host = allocatedContainerToHostMap.get(containerId).get - val containerSet = allocatedHostToContainersMap.get(host).get + if (allocatedContainerToHostMap.containsKey(containerId)) { +val host = allocatedContainerToHostMap.get(containerId).get +val containerSet = allocatedHostToContainersMap.get(host).get - containerSet.remove(containerId) - if (containerSet.isEmpty) { -allocatedHostToContainersMap.remove(host) - } else { -allocatedHostToContainersMap.update(host, containerSet) - } - - allocatedContainerToHostMap.remove(containerId) +containerSet.remove(containerId) +if (containerSet.isEmpty) { + allocatedHostToContainersMap.remove(host) +} else { + allocatedHostToContainersMap.update(host, containerSet) } + +allocatedContainerToHostMap.remove(containerId) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: SPARK-1714. Take advantage of AMRMClient APIs to simplify logic in YarnA...
Repository: spark Updated Branches: refs/heads/master 8c06a5faa - 2eeada373 SPARK-1714. Take advantage of AMRMClient APIs to simplify logic in YarnA... ...llocator The goal of this PR is to simplify YarnAllocator as much as possible and get it up to the level of code quality we see in the rest of Spark. In service of this, it does a few things: * Uses AMRMClient APIs for matching containers to requests. * Adds calls to AMRMClient.removeContainerRequest so that, when we use a container, we don't end up requesting it again. * Removes YarnAllocator's host-rack cache. YARN's RackResolver already does this caching, so this is redundant. * Adds tests for basic YarnAllocator functionality. * Breaks up the allocateResources method, which was previously nearly 300 lines. * A little bit of stylistic cleanup. * Fixes a bug that causes three times the requests to be filed when preferred host locations are given. The patch is lossy. In particular, it loses the logic for trying to avoid containers bunching up on nodes. As I understand it, the logic that's gone is: * If, in a single response from the RM, we receive a set of containers on a node, and prefer some number of containers on that node greater than 0 but less than the number we received, give back the delta between what we preferred and what we received. This seems like a weird way to avoid bunching E.g. it does nothing to avoid bunching when we don't request containers on particular nodes. Author: Sandy Ryza sa...@cloudera.com Closes #3765 from sryza/sandy-spark-1714 and squashes the following commits: 32a5942 [Sandy Ryza] Muffle RackResolver logs 74f56dd [Sandy Ryza] Fix a couple comments and simplify requestTotalExecutors 60ea4bd [Sandy Ryza] Fix scalastyle ca35b53 [Sandy Ryza] Simplify further e9cf8a6 [Sandy Ryza] Fix YarnClusterSuite 257acf3 [Sandy Ryza] Remove locality stuff and more cleanup 59a3c5e [Sandy Ryza] Take out rack stuff 5f72fd5 [Sandy Ryza] Further documentation and cleanup 89edd68 [Sandy Ryza] SPARK-1714. Take advantage of AMRMClient APIs to simplify logic in YarnAllocator Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2eeada37 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2eeada37 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2eeada37 Branch: refs/heads/master Commit: 2eeada373e59d63b774ba92eb5d75fcd3a1cf8f4 Parents: 8c06a5f Author: Sandy Ryza sa...@cloudera.com Authored: Wed Jan 21 10:31:54 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Wed Jan 21 10:31:54 2015 -0600 -- .../org/apache/spark/log4j-defaults.properties | 1 + .../spark/deploy/yarn/YarnAllocator.scala | 733 ++- .../apache/spark/deploy/yarn/YarnRMClient.scala | 3 +- .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 41 +- .../cluster/YarnClientClusterScheduler.scala| 5 +- .../cluster/YarnClusterScheduler.scala | 6 +- .../spark/deploy/yarn/YarnAllocatorSuite.scala | 150 +++- 7 files changed, 389 insertions(+), 550 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2eeada37/core/src/main/resources/org/apache/spark/log4j-defaults.properties -- diff --git a/core/src/main/resources/org/apache/spark/log4j-defaults.properties b/core/src/main/resources/org/apache/spark/log4j-defaults.properties index 89eec7d..c99a61f 100644 --- a/core/src/main/resources/org/apache/spark/log4j-defaults.properties +++ b/core/src/main/resources/org/apache/spark/log4j-defaults.properties @@ -10,3 +10,4 @@ log4j.logger.org.eclipse.jetty=WARN log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO +log4j.logger.org.apache.hadoop.yarn.util.RackResolver=WARN http://git-wip-us.apache.org/repos/asf/spark/blob/2eeada37/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index de65ef2..4c35b60 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -17,8 +17,8 @@ package org.apache.spark.deploy.yarn +import java.util.Collections import java.util.concurrent._ -import java.util.concurrent.atomic.AtomicInteger import java.util.regex.Pattern import scala.collection.JavaConversions._ @@ -28,33 +28,26 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder import org.apache.hadoop.conf.Configuration
spark git commit: [SPARK-5336][YARN]spark.executor.cores must not be less than spark.task.cpus
Repository: spark Updated Branches: refs/heads/master 424d8c6ff - 8c06a5faa [SPARK-5336][YARN]spark.executor.cores must not be less than spark.task.cpus https://issues.apache.org/jira/browse/SPARK-5336 Author: WangTao barneystin...@aliyun.com Author: WangTaoTheTonic barneystin...@aliyun.com Closes #4123 from WangTaoTheTonic/SPARK-5336 and squashes the following commits: 6c9676a [WangTao] Update ClientArguments.scala 9632d3a [WangTaoTheTonic] minor comment fix d03d6fa [WangTaoTheTonic] import ordering should be alphabetical' 3112af9 [WangTao] spark.executor.cores must not be less than spark.task.cpus Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c06a5fa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c06a5fa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c06a5fa Branch: refs/heads/master Commit: 8c06a5faacfc71050461273133b9cf9a9dd8986f Parents: 424d8c6 Author: WangTao barneystin...@aliyun.com Authored: Wed Jan 21 09:42:30 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Wed Jan 21 09:42:30 2015 -0600 -- .../org/apache/spark/ExecutorAllocationManager.scala | 2 +- .../org/apache/spark/scheduler/TaskSchedulerImpl.scala| 2 +- .../org/apache/spark/deploy/yarn/ClientArguments.scala| 10 +++--- 3 files changed, 9 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8c06a5fa/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index a0ee2a7..b28da19 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -158,7 +158,7 @@ private[spark] class ExecutorAllocationManager( shuffle service. You may enable this through spark.shuffle.service.enabled.) } if (tasksPerExecutor == 0) { - throw new SparkException(spark.executor.cores must not be less than spark.task.cpus.cores) + throw new SparkException(spark.executor.cores must not be less than spark.task.cpus.) } } http://git-wip-us.apache.org/repos/asf/spark/blob/8c06a5fa/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index a1dfb01..33a7aae 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -168,7 +168,7 @@ private[spark] class TaskSchedulerImpl( if (!hasLaunchedTask) { logWarning(Initial job has not accepted any resources; + check your cluster UI to ensure that workers are registered + -and have sufficient memory) +and have sufficient resources) } else { this.cancel() } http://git-wip-us.apache.org/repos/asf/spark/blob/8c06a5fa/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala index 79bead7..f96b245 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala @@ -19,9 +19,9 @@ package org.apache.spark.deploy.yarn import scala.collection.mutable.ArrayBuffer -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._ -import org.apache.spark.util.{Utils, IntParam, MemoryParam} +import org.apache.spark.util.{IntParam, MemoryParam, Utils} // TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware ! private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) { @@ -95,6 +95,10 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) throw new IllegalArgumentException( You must specify at least 1 executor!\n + getUsageMessage()) } +if (executorCores sparkConf.getInt(spark.task.cpus, 1)) { + throw new SparkException(Executor cores must not be less than + +spark.task.cpus.) +} if (isClusterMode) { for (key - Seq(amMemKey, amMemOverheadKey,
spark git commit: [SPARK-5169][YARN]fetch the correct max attempts
Repository: spark Updated Branches: refs/heads/master 167a5ab0b - f3da4bd72 [SPARK-5169][YARN]fetch the correct max attempts Soryy for fetching the wrong max attempts in this commit https://github.com/apache/spark/commit/8fdd48959c93b9cf809f03549e2ae6c4687d1fcd. We need to fix it now. tgravescs If we set an spark.yarn.maxAppAttempts which is larger than `yarn.resourcemanager.am.max-attempts` in yarn side, it will be overrided as described here: The maximum number of application attempts. It's a global setting for all application masters. Each application master can specify its individual maximum number of application attempts via the API, but the individual number cannot be more than the global upper bound. If it is, the resourcemanager will override it. The default number is set to 2, to allow at least one retry for AM. http://hadoop.apache.org/docs/r2.6.0/hadoop-yarn/hadoop-yarn-common/yarn-default.xml Author: WangTaoTheTonic barneystin...@aliyun.com Closes #3942 from WangTaoTheTonic/HOTFIX and squashes the following commits: 9ac16ce [WangTaoTheTonic] fetch the correct max attempts Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f3da4bd7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f3da4bd7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f3da4bd7 Branch: refs/heads/master Commit: f3da4bd7289d493014ad3c5176ada60794dfcfe0 Parents: 167a5ab Author: WangTaoTheTonic barneystin...@aliyun.com Authored: Fri Jan 9 08:10:09 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Fri Jan 9 08:10:09 2015 -0600 -- .../org/apache/spark/deploy/yarn/YarnRMClient.scala | 12 +--- 1 file changed, 9 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f3da4bd7/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala index e183efc..b45e599 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala @@ -121,9 +121,15 @@ private[spark] class YarnRMClient(args: ApplicationMasterArguments) extends Logg /** Returns the maximum number of attempts to register the AM. */ def getMaxRegAttempts(sparkConf: SparkConf, yarnConf: YarnConfiguration): Int = { -sparkConf.getOption(spark.yarn.maxAppAttempts).map(_.toInt).getOrElse( - yarnConf.getInt( -YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)) +val sparkMaxAttempts = sparkConf.getOption(spark.yarn.maxAppAttempts).map(_.toInt) +val yarnMaxAttempts = yarnConf.getInt( + YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS) +val retval: Int = sparkMaxAttempts match { + case Some(x) = if (x = yarnMaxAttempts) x else yarnMaxAttempts + case None = yarnMaxAttempts +} + +retval } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-2165][YARN]add support for setting maxAppAttempts in the ApplicationSubmissionContext
Repository: spark Updated Branches: refs/heads/master 5fde66163 - 8fdd48959 [SPARK-2165][YARN]add support for setting maxAppAttempts in the ApplicationSubmissionContext ...xt https://issues.apache.org/jira/browse/SPARK-2165 I still have 2 questions: * If this config is not set, we should use yarn's corresponding value or a default value(like 2) on spark side? * Is the config name best? Or spark.yarn.am.maxAttempts? Author: WangTaoTheTonic barneystin...@aliyun.com Closes #3878 from WangTaoTheTonic/SPARK-2165 and squashes the following commits: 1416c83 [WangTaoTheTonic] use the name spark.yarn.maxAppAttempts 202ac85 [WangTaoTheTonic] rephrase some afdfc99 [WangTaoTheTonic] more detailed description 91562c6 [WangTaoTheTonic] add support for setting maxAppAttempts in the ApplicationSubmissionContext Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8fdd4895 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8fdd4895 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8fdd4895 Branch: refs/heads/master Commit: 8fdd48959c93b9cf809f03549e2ae6c4687d1fcd Parents: 5fde661 Author: WangTaoTheTonic barneystin...@aliyun.com Authored: Wed Jan 7 08:14:39 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Wed Jan 7 08:14:39 2015 -0600 -- docs/running-on-yarn.md | 8 .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 2 +- .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 5 + .../scala/org/apache/spark/deploy/yarn/YarnRMClient.scala| 7 +-- 4 files changed, 19 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8fdd4895/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index da1c8e8..183698f 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -149,6 +149,14 @@ Most of the configs are the same for Spark on YARN as for other deployment modes In cluster mode, use spark.driver.extraJavaOptions instead. /td /tr +tr + tdcodespark.yarn.maxAppAttempts/code/td + tdyarn.resourcemanager.am.max-attempts in YARN/td + td + The maximum number of attempts that will be made to submit the application. + It should be no larger than the global number of max attempts in the YARN configuration. + /td +/tr /table # Launching Spark on YARN http://git-wip-us.apache.org/repos/asf/spark/blob/8fdd4895/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 618db7f..902bdda 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -102,7 +102,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, logInfo(Invoking sc stop from shutdown hook) sc.stop() } - val maxAppAttempts = client.getMaxRegAttempts(yarnConf) + val maxAppAttempts = client.getMaxRegAttempts(sparkConf, yarnConf) val isLastAttempt = client.getAttemptId().getAttemptId() = maxAppAttempts if (!finished) { http://git-wip-us.apache.org/repos/asf/spark/blob/8fdd4895/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index addaddb..a2c3f91 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -98,6 +98,11 @@ private[spark] class Client( appContext.setQueue(args.amQueue) appContext.setAMContainerSpec(containerContext) appContext.setApplicationType(SPARK) +sparkConf.getOption(spark.yarn.maxAppAttempts).map(_.toInt) match { + case Some(v) = appContext.setMaxAppAttempts(v) + case None = logDebug(spark.yarn.maxAppAttempts is not set. + + Cluster's default value will be used.) +} val capability = Records.newRecord(classOf[Resource]) capability.setMemory(args.amMemory + amMemoryOverhead) appContext.setResource(capability) http://git-wip-us.apache.org/repos/asf/spark/blob/8fdd4895/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala -- diff --git
spark git commit: [YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA
Repository: spark Updated Branches: refs/heads/master e21acc197 - 5fde66163 [YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA Nowadays, yarn-client will exit directly when the HA change happens no matter how many times the am should retry. The reason may be that the default final status only considerred the sys.exit, and the yarn-client HA cann't benefit from this. So we should distinct the default final status between client and cluster, because the SUCCEEDED status may cause the HA failed in client mode and UNDEFINED may cause the error reporter in cluster when using sys.exit. Author: huangzhaowei carlmartin...@gmail.com Closes #3771 from SaintBacchus/YarnHA and squashes the following commits: c02bfcc [huangzhaowei] Improve the comment of the funciton 'getDefaultFinalStatus' 0e69924 [huangzhaowei] Bug fix: fix the yarn-client code to support HA Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fde6616 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fde6616 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fde6616 Branch: refs/heads/master Commit: 5fde66163fe460d6f64b145047f76cc4ee33601a Parents: e21acc1 Author: huangzhaowei carlmartin...@gmail.com Authored: Wed Jan 7 08:10:42 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Wed Jan 7 08:10:42 2015 -0600 -- .../spark/deploy/yarn/ApplicationMaster.scala | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5fde6616/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 9c77dff..618db7f 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -60,7 +60,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, @volatile private var exitCode = 0 @volatile private var unregistered = false @volatile private var finished = false - @volatile private var finalStatus = FinalApplicationStatus.SUCCEEDED + @volatile private var finalStatus = getDefaultFinalStatus @volatile private var finalMsg: String = @volatile private var userClassThread: Thread = _ @@ -153,6 +153,20 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, } /** + * Set the default final application status for client mode to UNDEFINED to handle + * if YARN HA restarts the application so that it properly retries. Set the final + * status to SUCCEEDED in cluster mode to handle if the user calls System.exit + * from the application code. + */ + final def getDefaultFinalStatus() = { +if (isDriver) { + FinalApplicationStatus.SUCCEEDED +} else { + FinalApplicationStatus.UNDEFINED +} + } + + /** * unregister is used to completely unregister the application from the ResourceManager. * This means the ResourceManager will not retry the application attempt on your behalf if * a failure occurred. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA
Repository: spark Updated Branches: refs/heads/branch-1.2 db83acb1f - 7a4be0b45 [YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA Nowadays, yarn-client will exit directly when the HA change happens no matter how many times the am should retry. The reason may be that the default final status only considerred the sys.exit, and the yarn-client HA cann't benefit from this. So we should distinct the default final status between client and cluster, because the SUCCEEDED status may cause the HA failed in client mode and UNDEFINED may cause the error reporter in cluster when using sys.exit. Author: huangzhaowei carlmartin...@gmail.com Closes #3771 from SaintBacchus/YarnHA and squashes the following commits: c02bfcc [huangzhaowei] Improve the comment of the funciton 'getDefaultFinalStatus' 0e69924 [huangzhaowei] Bug fix: fix the yarn-client code to support HA (cherry picked from commit 5fde66163fe460d6f64b145047f76cc4ee33601a) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a4be0b4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a4be0b4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a4be0b4 Branch: refs/heads/branch-1.2 Commit: 7a4be0b45f003ce92031d36bf74a736a87889026 Parents: db83acb Author: huangzhaowei carlmartin...@gmail.com Authored: Wed Jan 7 08:10:42 2015 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Wed Jan 7 08:11:14 2015 -0600 -- .../spark/deploy/yarn/ApplicationMaster.scala | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7a4be0b4/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 987b337..166e84e 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -60,7 +60,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, @volatile private var exitCode = 0 @volatile private var unregistered = false @volatile private var finished = false - @volatile private var finalStatus = FinalApplicationStatus.SUCCEEDED + @volatile private var finalStatus = getDefaultFinalStatus @volatile private var finalMsg: String = @volatile private var userClassThread: Thread = _ @@ -153,6 +153,20 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, } /** + * Set the default final application status for client mode to UNDEFINED to handle + * if YARN HA restarts the application so that it properly retries. Set the final + * status to SUCCEEDED in cluster mode to handle if the user calls System.exit + * from the application code. + */ + final def getDefaultFinalStatus() = { +if (isDriver) { + FinalApplicationStatus.SUCCEEDED +} else { + FinalApplicationStatus.UNDEFINED +} + } + + /** * unregister is used to completely unregister the application from the ResourceManager. * This means the ResourceManager will not retry the application attempt on your behalf if * a failure occurred. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4966][YARN]The MemoryOverhead value is setted not correctly
Repository: spark Updated Branches: refs/heads/branch-1.2 23d64cf08 - 2cd446a90 [SPARK-4966][YARN]The MemoryOverhead value is setted not correctly Author: meiyoula 1039320...@qq.com Closes #3797 from XuTingjun/MemoryOverhead and squashes the following commits: 5a780fc [meiyoula] Update ClientArguments.scala (cherry picked from commit 14fa87bdf4b89cd392270864ee063ce01bd31887) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2cd446a9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2cd446a9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2cd446a9 Branch: refs/heads/branch-1.2 Commit: 2cd446a90216ac8eb19584c760685fbb470c4301 Parents: 23d64cf Author: meiyoula 1039320...@qq.com Authored: Mon Dec 29 08:20:30 2014 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Mon Dec 29 08:21:19 2014 -0600 -- .../main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2cd446a9/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala index 4d85945..7687a9b 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala @@ -39,6 +39,8 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) var appName: String = Spark var priority = 0 + parseArgs(args.toList) + // Additional memory to allocate to containers // For now, use driver's memory overhead as our AM container's memory overhead val amMemoryOverhead = sparkConf.getInt(spark.yarn.driver.memoryOverhead, @@ -50,7 +52,6 @@ private[spark] class ClientArguments(args: Array[String], sparkConf: SparkConf) private val isDynamicAllocationEnabled = sparkConf.getBoolean(spark.dynamicAllocation.enabled, false) - parseArgs(args.toList) loadEnvironmentArgs() validateArgs() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4344][DOCS] adding documentation on spark.yarn.user.classpath.first
Repository: spark Updated Branches: refs/heads/master fef27b294 - d24076019 [SPARK-4344][DOCS] adding documentation on spark.yarn.user.classpath.first The documentation for the two parameters is the same with a pointer from the standalone parameter to the yarn parameter Author: arahuja aahuj...@gmail.com Closes #3209 from arahuja/yarn-classpath-first-param and squashes the following commits: 51cb9b2 [arahuja] [SPARK-4344][DOCS] adding documentation for YARN on userClassPathFirst Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2407601 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2407601 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2407601 Branch: refs/heads/master Commit: d240760191f692ee7b88dfc82f06a31a340a88a2 Parents: fef27b2 Author: arahuja aahuj...@gmail.com Authored: Tue Nov 25 08:23:41 2014 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Tue Nov 25 08:23:41 2014 -0600 -- docs/configuration.md | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d2407601/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 8839162..0b77f5a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -224,6 +224,7 @@ Apart from these, the following properties are also available, and may be useful (Experimental) Whether to give user-added jars precedence over Spark's own jars when loading classes in Executors. This feature can be used to mitigate conflicts between Spark's dependencies and user dependencies. It is currently an experimental feature. +(Currently, this setting does not work for YARN, see a href=https://issues.apache.org/jira/browse/SPARK-2996;SPARK-2996/a for more details). /td /tr tr - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4344][DOCS] adding documentation on spark.yarn.user.classpath.first
Repository: spark Updated Branches: refs/heads/branch-1.2 b026546e3 - a689ab98d [SPARK-4344][DOCS] adding documentation on spark.yarn.user.classpath.first The documentation for the two parameters is the same with a pointer from the standalone parameter to the yarn parameter Author: arahuja aahuj...@gmail.com Closes #3209 from arahuja/yarn-classpath-first-param and squashes the following commits: 51cb9b2 [arahuja] [SPARK-4344][DOCS] adding documentation for YARN on userClassPathFirst (cherry picked from commit d240760191f692ee7b88dfc82f06a31a340a88a2) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a689ab98 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a689ab98 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a689ab98 Branch: refs/heads/branch-1.2 Commit: a689ab98d944dbe4b239449897841543c0450450 Parents: b026546 Author: arahuja aahuj...@gmail.com Authored: Tue Nov 25 08:23:41 2014 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Tue Nov 25 08:23:52 2014 -0600 -- docs/configuration.md | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a689ab98/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index f0b396e..be418aa 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -224,6 +224,7 @@ Apart from these, the following properties are also available, and may be useful (Experimental) Whether to give user-added jars precedence over Spark's own jars when loading classes in Executors. This feature can be used to mitigate conflicts between Spark's dependencies and user dependencies. It is currently an experimental feature. +(Currently, this setting does not work for YARN, see a href=https://issues.apache.org/jira/browse/SPARK-2996;SPARK-2996/a for more details). /td /tr tr - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: SPARK-4457. Document how to build for Hadoop versions greater than 2.4
Repository: spark Updated Branches: refs/heads/master 9b2a3c612 - 29372b631 SPARK-4457. Document how to build for Hadoop versions greater than 2.4 Author: Sandy Ryza sa...@cloudera.com Closes #3322 from sryza/sandy-spark-4457 and squashes the following commits: 5e72b77 [Sandy Ryza] Feedback 0cf05c1 [Sandy Ryza] Caveat be8084b [Sandy Ryza] SPARK-4457. Document how to build for Hadoop versions greater than 2.4 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/29372b63 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/29372b63 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/29372b63 Branch: refs/heads/master Commit: 29372b63185a4a170178b6ec2362d7112f389852 Parents: 9b2a3c6 Author: Sandy Ryza sa...@cloudera.com Authored: Mon Nov 24 13:28:48 2014 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Mon Nov 24 13:28:48 2014 -0600 -- docs/building-spark.md | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/29372b63/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index bb18414..fee6a84 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -92,8 +92,11 @@ mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package # Apache Hadoop 2.3.X mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package -# Apache Hadoop 2.4.X -mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package +# Apache Hadoop 2.4.X or 2.5.X +mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=VERSION -DskipTests clean package + +Versions of Hadoop after 2.5.X may or may not work with the -Phadoop-2.4 profile (they were +released after this version of Spark). # Different versions of HDFS and YARN. mvn -Pyarn-alpha -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=0.23.7 -DskipTests clean package - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-3722][Docs]minor improvement and fix in docs
Repository: spark Updated Branches: refs/heads/master 825709a0b - e421072da [SPARK-3722][Docs]minor improvement and fix in docs https://issues.apache.org/jira/browse/SPARK-3722 Author: WangTao barneystin...@aliyun.com Closes #2579 from WangTaoTheTonic/docsWork and squashes the following commits: 6f91cec [WangTao] use more wording express 29d22fa [WangTao] delete the specified version link 34cb4ea [WangTao] Update running-on-yarn.md 4ee1a26 [WangTao] minor improvement and fix in docs Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e421072d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e421072d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e421072d Branch: refs/heads/master Commit: e421072da0ea87e7056cc3f2130ddaafc731530f Parents: 825709a Author: WangTao barneystin...@aliyun.com Authored: Fri Nov 14 08:09:42 2014 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Fri Nov 14 08:09:42 2014 -0600 -- docs/configuration.md | 2 +- docs/running-on-yarn.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e421072d/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index f0b396e..8839162 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -52,7 +52,7 @@ Then, you can supply configuration values at runtime: --conf spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps myApp.jar {% endhighlight %} -The Spark shell and [`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit) +The Spark shell and [`spark-submit`](submitting-applications.html) tool support two ways to load configurations dynamically. The first are command line options, such as `--master`, as shown above. `spark-submit` can accept any Spark property using the `--conf` flag, but uses special flags for properties that play a part in launching the Spark application. http://git-wip-us.apache.org/repos/asf/spark/blob/e421072d/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 2f7e498..dfe2db4 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -39,7 +39,7 @@ Most of the configs are the same for Spark on YARN as for other deployment modes tdcodespark.yarn.preserve.staging.files/code/td tdfalse/td td -Set to true to preserve the staged files (Spark jar, app jar, distributed cache files) at the end of the job rather then delete them. +Set to true to preserve the staged files (Spark jar, app jar, distributed cache files) at the end of the job rather than delete them. /td /tr tr @@ -159,7 +159,7 @@ For example: lib/spark-examples*.jar \ 10 -The above starts a YARN client program which starts the default Application Master. Then SparkPi will be run as a child thread of Application Master. The client will periodically poll the Application Master for status updates and display them in the console. The client will exit once your application has finished running. Refer to the Viewing Logs section below for how to see driver and executor logs. +The above starts a YARN client program which starts the default Application Master. Then SparkPi will be run as a child thread of Application Master. The client will periodically poll the Application Master for status updates and display them in the console. The client will exit once your application has finished running. Refer to the Debugging your Application section below for how to see driver and executor logs. To launch a Spark application in yarn-client mode, do the same, but replace yarn-cluster with yarn-client. To run spark-shell: @@ -181,7 +181,7 @@ In YARN terminology, executors and application masters run inside containers. yarn logs -applicationId app ID -will print out the contents of all log files from all containers from the given application. +will print out the contents of all log files from all containers from the given application. You can also view the container log files directly in HDFS using the HDFS shell or API. The directory where they are located can be found by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and `yarn.nodemanager.remote-app-log-dir-suffix`). When log aggregation isn't turned on, logs are retained locally on each machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and installation. Viewing logs for a container requires going to the host that contains them and looking in this
spark git commit: SPARK-4305 [BUILD] yarn-alpha profile won't build due to network/yarn module
Repository: spark Updated Branches: refs/heads/master deefd9d73 - f820b563d SPARK-4305 [BUILD] yarn-alpha profile won't build due to network/yarn module SPARK-3797 introduced the `network/yarn` module, but its YARN code depends on YARN APIs not present in older versions covered by the `yarn-alpha` profile. As a result builds like `mvn -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package` fail. The solution is just to not build `network/yarn` with profile `yarn-alpha`. Author: Sean Owen so...@cloudera.com Closes #3167 from srowen/SPARK-4305 and squashes the following commits: 88938cb [Sean Owen] Don't build network/yarn in yarn-alpha profile as it won't compile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f820b563 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f820b563 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f820b563 Branch: refs/heads/master Commit: f820b563d88f6a972c219d9340fe95110493fb87 Parents: deefd9d Author: Sean Owen so...@cloudera.com Authored: Tue Nov 11 12:30:35 2014 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Tue Nov 11 12:30:35 2014 -0600 -- pom.xml | 1 - 1 file changed, 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f820b563/pom.xml -- diff --git a/pom.xml b/pom.xml index 88ef67c..4e0cd6c 100644 --- a/pom.xml +++ b/pom.xml @@ -1229,7 +1229,6 @@ idyarn-alpha/id modules moduleyarn/module -modulenetwork/yarn/module /modules /profile - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: SPARK-4305 [BUILD] yarn-alpha profile won't build due to network/yarn module
Repository: spark Updated Branches: refs/heads/branch-1.2 cc1f3a0d6 - 8f7e80f30 SPARK-4305 [BUILD] yarn-alpha profile won't build due to network/yarn module SPARK-3797 introduced the `network/yarn` module, but its YARN code depends on YARN APIs not present in older versions covered by the `yarn-alpha` profile. As a result builds like `mvn -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package` fail. The solution is just to not build `network/yarn` with profile `yarn-alpha`. Author: Sean Owen so...@cloudera.com Closes #3167 from srowen/SPARK-4305 and squashes the following commits: 88938cb [Sean Owen] Don't build network/yarn in yarn-alpha profile as it won't compile (cherry picked from commit f820b563d88f6a972c219d9340fe95110493fb87) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f7e80f3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f7e80f3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f7e80f3 Branch: refs/heads/branch-1.2 Commit: 8f7e80f30bd34897963334d0245c0ea6fccd6182 Parents: cc1f3a0 Author: Sean Owen so...@cloudera.com Authored: Tue Nov 11 12:30:35 2014 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Tue Nov 11 12:30:56 2014 -0600 -- pom.xml | 1 - 1 file changed, 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8f7e80f3/pom.xml -- diff --git a/pom.xml b/pom.xml index 88ef67c..4e0cd6c 100644 --- a/pom.xml +++ b/pom.xml @@ -1229,7 +1229,6 @@ idyarn-alpha/id modules moduleyarn/module -modulenetwork/yarn/module /modules /profile - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4282][YARN] Stopping flag in YarnClientSchedulerBackend should be volatile
Repository: spark Updated Branches: refs/heads/master f820b563d - 7f3718842 [SPARK-4282][YARN] Stopping flag in YarnClientSchedulerBackend should be volatile In YarnClientSchedulerBackend, a variable stopping is used as a flag and it's accessed by some threads so it should be volatile. Author: Kousuke Saruta saru...@oss.nttdata.co.jp Closes #3143 from sarutak/stopping-flag-volatile and squashes the following commits: 58fdcc9 [Kousuke Saruta] Marked stoppig flag as volatile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7f371884 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7f371884 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7f371884 Branch: refs/heads/master Commit: 7f3718842cc4025bb2ee2f5a3ec12efd100f6589 Parents: f820b56 Author: Kousuke Saruta saru...@oss.nttdata.co.jp Authored: Tue Nov 11 12:33:53 2014 -0600 Committer: Thomas Graves tgra...@apache.org Committed: Tue Nov 11 12:33:53 2014 -0600 -- .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7f371884/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index f6f6dc5..2923e67 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -33,7 +33,7 @@ private[spark] class YarnClientSchedulerBackend( private var client: Client = null private var appId: ApplicationId = null - private var stopping: Boolean = false + @volatile private var stopping: Boolean = false /** * Create a Yarn client to submit an application to the ResourceManager. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: SPARK-3837. Warn when YARN kills containers for exceeding memory limits
Repository: spark Updated Branches: refs/heads/master 58a6077e5 - acd4ac7c9 SPARK-3837. Warn when YARN kills containers for exceeding memory limits I triggered the issue and verified the message gets printed on a pseudo-distributed cluster. Author: Sandy Ryza sa...@cloudera.com Closes #2744 from sryza/sandy-spark-3837 and squashes the following commits: 858a268 [Sandy Ryza] Review feedback c937f00 [Sandy Ryza] SPARK-3837. Warn when YARN kills containers for exceeding memory limits Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acd4ac7c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acd4ac7c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acd4ac7c Branch: refs/heads/master Commit: acd4ac7c9a503445e27739708cf36e19119b8ddc Parents: 58a6077 Author: Sandy Ryza sa...@cloudera.com Authored: Fri Oct 31 08:43:06 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Fri Oct 31 08:43:06 2014 -0500 -- .../spark/deploy/yarn/YarnAllocator.scala | 30 +++-- .../spark/deploy/yarn/YarnAllocatorSuite.scala | 34 2 files changed, 61 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/acd4ac7c/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 7ae8ef2..e619619 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -20,6 +20,7 @@ package org.apache.spark.deploy.yarn import java.util.{List = JList} import java.util.concurrent._ import java.util.concurrent.atomic.AtomicInteger +import java.util.regex.Pattern import scala.collection.JavaConversions._ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet} @@ -375,12 +376,22 @@ private[yarn] abstract class YarnAllocator( logInfo(Completed container %s (state: %s, exit status: %s).format( containerId, completedContainer.getState, -completedContainer.getExitStatus())) +completedContainer.getExitStatus)) // Hadoop 2.2.X added a ContainerExitStatus we should switch to use // there are some exit status' we shouldn't necessarily count against us, but for // now I think its ok as none of the containers are expected to exit - if (completedContainer.getExitStatus() != 0) { -logInfo(Container marked as failed: + containerId) + if (completedContainer.getExitStatus == -103) { // vmem limit exceeded +logWarning(memLimitExceededLogMessage( + completedContainer.getDiagnostics, + VMEM_EXCEEDED_PATTERN)) + } else if (completedContainer.getExitStatus == -104) { // pmem limit exceeded +logWarning(memLimitExceededLogMessage( + completedContainer.getDiagnostics, + PMEM_EXCEEDED_PATTERN)) + } else if (completedContainer.getExitStatus != 0) { +logInfo(Container marked as failed: + containerId + + . Exit status: + completedContainer.getExitStatus + + . Diagnostics: + completedContainer.getDiagnostics) numExecutorsFailed.incrementAndGet() } } @@ -428,6 +439,19 @@ private[yarn] abstract class YarnAllocator( } } + private val MEM_REGEX = [0-9.]+ [KMG]B + private val PMEM_EXCEEDED_PATTERN = +Pattern.compile(s$MEM_REGEX of $MEM_REGEX physical memory used) + private val VMEM_EXCEEDED_PATTERN = +Pattern.compile(s$MEM_REGEX of $MEM_REGEX virtual memory used) + + def memLimitExceededLogMessage(diagnostics: String, pattern: Pattern): String = { +val matcher = pattern.matcher(diagnostics) +val diag = if (matcher.find()) + matcher.group() + . else +(Container killed by YARN for exceeding memory limits. + diag + + Consider boosting spark.yarn.executor.memoryOverhead.) + } + protected def allocatedContainersOnHost(host: String): Int = { var retval = 0 allocatedHostToContainersMap.synchronized { http://git-wip-us.apache.org/repos/asf/spark/blob/acd4ac7c/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala -- diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala new file mode 100644 index 000..9fff63f --- /dev/null +++
git commit: [SPARK-4116][YARN]Delete the abandoned log4j-spark-container.properties
Repository: spark Updated Branches: refs/heads/master fae095bc7 - 47346cd02 [SPARK-4116][YARN]Delete the abandoned log4j-spark-container.properties Since its name reduced at https://github.com/apache/spark/pull/560, the log4j-spark-container.properties was never used again. And I have searched its name globally in code and found no cite. Author: WangTaoTheTonic barneystin...@aliyun.com Closes #2977 from WangTaoTheTonic/delLog4j and squashes the following commits: fb2729f [WangTaoTheTonic] delete the log4j file obsoleted Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47346cd0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47346cd0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47346cd0 Branch: refs/heads/master Commit: 47346cd029abc50c70582a721810a7cceb682d8a Parents: fae095b Author: WangTaoTheTonic barneystin...@aliyun.com Authored: Tue Oct 28 08:46:31 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Oct 28 08:46:31 2014 -0500 -- .../resources/log4j-spark-container.properties | 24 1 file changed, 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/47346cd0/yarn/common/src/main/resources/log4j-spark-container.properties -- diff --git a/yarn/common/src/main/resources/log4j-spark-container.properties b/yarn/common/src/main/resources/log4j-spark-container.properties deleted file mode 100644 index a1e37a0..000 --- a/yarn/common/src/main/resources/log4j-spark-container.properties +++ /dev/null @@ -1,24 +0,0 @@ -# -# Licensed under the Apache License, Version 2.0 (the License); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. See accompanying LICENSE file. - -# Set everything to be logged to the console -log4j.rootCategory=INFO, console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.err -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n - -# Settings to quiet third party logs that are too verbose -log4j.logger.org.eclipse.jetty=WARN -log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-4098][YARN]use appUIAddress instead of appUIHostPort in yarn-client mode
Repository: spark Updated Branches: refs/heads/master e8813be65 - 0ac52e305 [SPARK-4098][YARN]use appUIAddress instead of appUIHostPort in yarn-client mode https://issues.apache.org/jira/browse/SPARK-4098 Author: WangTaoTheTonic barneystin...@aliyun.com Closes #2958 from WangTaoTheTonic/useAddress and squashes the following commits: 29236e6 [WangTaoTheTonic] use appUIAddress instead of appUIHostPort in yarn-cluster mode Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ac52e30 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ac52e30 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ac52e30 Branch: refs/heads/master Commit: 0ac52e30552530b247e37a470b8503346f19605c Parents: e8813be Author: WangTaoTheTonic barneystin...@aliyun.com Authored: Tue Oct 28 09:51:44 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Oct 28 09:51:44 2014 -0500 -- .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0ac52e30/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index d948a2a..59b2b47 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -48,7 +48,7 @@ private[spark] class YarnClientSchedulerBackend( val driverHost = conf.get(spark.driver.host) val driverPort = conf.get(spark.driver.port) val hostport = driverHost + : + driverPort -sc.ui.foreach { ui = conf.set(spark.driver.appUIAddress, ui.appUIHostPort) } +sc.ui.foreach { ui = conf.set(spark.driver.appUIAddress, ui.appUIAddress) } val argsArrayBuf = new ArrayBuffer[String]() argsArrayBuf += (--arg, hostport) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces.
Repository: spark Updated Branches: refs/heads/master 35afdfd62 - 7fca8f41c [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces. HA and viewfs use namespaces instead of host names, so you can't resolve them since that will fail. So be smarter to avoid doing unnecessary work. Author: Marcelo Vanzin van...@cloudera.com Closes #2649 from vanzin/SPARK-3788 and squashes the following commits: fedbc73 [Marcelo Vanzin] Update comment. c938845 [Marcelo Vanzin] Use Objects.equal() to avoid issues with ==. 9f7b571 [Marcelo Vanzin] [SPARK-3788] [yarn] Fix compareFs to do the right thing for HA, federation. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7fca8f41 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7fca8f41 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7fca8f41 Branch: refs/heads/master Commit: 7fca8f41c8889a41d9ab05ad0ab39c7639f657ed Parents: 35afdfd Author: Marcelo Vanzin van...@cloudera.com Authored: Wed Oct 8 08:48:55 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Oct 8 08:48:55 2014 -0500 -- .../apache/spark/deploy/yarn/ClientBase.scala | 31 1 file changed, 12 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7fca8f41/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 6ecac6e..14a0386 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -23,6 +23,7 @@ import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, ListBuffer, Map} import scala.util.{Try, Success, Failure} +import com.google.common.base.Objects import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission @@ -64,12 +65,12 @@ private[spark] trait ClientBase extends Logging { smemory capability of the cluster ($maxMem MB per container)) val executorMem = args.executorMemory + executorMemoryOverhead if (executorMem maxMem) { - throw new IllegalArgumentException(sRequired executor memory (${args.executorMemory} + + throw new IllegalArgumentException(sRequired executor memory (${args.executorMemory} + s+$executorMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!) } val amMem = args.amMemory + amMemoryOverhead if (amMem maxMem) { - throw new IllegalArgumentException(sRequired AM memory (${args.amMemory} + + throw new IllegalArgumentException(sRequired AM memory (${args.amMemory} + s+$amMemoryOverhead MB) is above the max threshold ($maxMem MB) of this cluster!) } logInfo(Will allocate AM container, with %d MB memory including %d MB overhead.format( @@ -771,15 +772,17 @@ private[spark] object ClientBase extends Logging { private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = { val srcUri = srcFs.getUri() val dstUri = destFs.getUri() -if (srcUri.getScheme() == null) { - return false -} -if (!srcUri.getScheme().equals(dstUri.getScheme())) { +if (srcUri.getScheme() == null || srcUri.getScheme() != dstUri.getScheme()) { return false } + var srcHost = srcUri.getHost() var dstHost = dstUri.getHost() -if ((srcHost != null) (dstHost != null)) { + +// In HA or when using viewfs, the host part of the URI may not actually be a host, but the +// name of the HDFS namespace. Those names won't resolve, so avoid even trying if they +// match. +if (srcHost != null dstHost != null srcHost != dstHost) { try { srcHost = InetAddress.getByName(srcHost).getCanonicalHostName() dstHost = InetAddress.getByName(dstHost).getCanonicalHostName() @@ -787,19 +790,9 @@ private[spark] object ClientBase extends Logging { case e: UnknownHostException = return false } - if (!srcHost.equals(dstHost)) { -return false - } -} else if (srcHost == null dstHost != null) { - return false -} else if (srcHost != null dstHost == null) { - return false -} -if (srcUri.getPort() != dstUri.getPort()) { - false -} else { - true } + +Objects.equal(srcHost, dstHost) srcUri.getPort() == dstUri.getPort() } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands,
git commit: [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces (1.1 version).
Repository: spark Updated Branches: refs/heads/branch-1.1 a1f833f75 - a44af7302 [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces (1.1 version). HA and viewfs use namespaces instead of host names, so you can't resolve them since that will fail. So be smarter to avoid doing unnecessary work. Author: Marcelo Vanzin van...@cloudera.com Closes #2650 from vanzin/SPARK-3788-1.1 and squashes the following commits: 174bf71 [Marcelo Vanzin] Update comment. 0e36be7 [Marcelo Vanzin] Use Objects.equal() instead of ==. 772aead [Marcelo Vanzin] [SPARK-3788] [yarn] Fix compareFs to do the right thing for HA, federation (1.1 version). Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a44af730 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a44af730 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a44af730 Branch: refs/heads/branch-1.1 Commit: a44af7302f814204fdbcc7ad620bc6984b376468 Parents: a1f833f Author: Marcelo Vanzin van...@cloudera.com Authored: Wed Oct 8 08:51:17 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Oct 8 08:51:17 2014 -0500 -- .../apache/spark/deploy/yarn/ClientBase.scala | 29 1 file changed, 11 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a44af730/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 6da3b16..27ee04a 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -24,6 +24,7 @@ import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, ListBuffer, Map} import scala.util.{Try, Success, Failure} +import com.google.common.base.Objects import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission @@ -122,15 +123,17 @@ trait ClientBase extends Logging { private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = { val srcUri = srcFs.getUri() val dstUri = destFs.getUri() -if (srcUri.getScheme() == null) { - return false -} -if (!srcUri.getScheme().equals(dstUri.getScheme())) { +if (srcUri.getScheme() == null || srcUri.getScheme() != dstUri.getScheme()) { return false } + var srcHost = srcUri.getHost() var dstHost = dstUri.getHost() -if ((srcHost != null) (dstHost != null)) { + +// In HA or when using viewfs, the host part of the URI may not actually be a host, but the +// name of the HDFS namespace. Those names won't resolve, so avoid even trying if they +// match. +if (srcHost != null dstHost != null srcHost != dstHost) { try { srcHost = InetAddress.getByName(srcHost).getCanonicalHostName() dstHost = InetAddress.getByName(dstHost).getCanonicalHostName() @@ -138,19 +141,9 @@ trait ClientBase extends Logging { case e: UnknownHostException = return false } - if (!srcHost.equals(dstHost)) { -return false - } -} else if (srcHost == null dstHost != null) { - return false -} else if (srcHost != null dstHost == null) { - return false -} -if (srcUri.getPort() != dstUri.getPort()) { - false -} else { - true } + +Objects.equal(srcHost, dstHost) srcUri.getPort() == dstUri.getPort() } /** Copy the file into HDFS if needed. */ @@ -621,7 +614,7 @@ object ClientBase extends Logging { YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, path, File.pathSeparator) - /** + /** * Get the list of namenodes the user may access. */ private[yarn] def getNameNodesToAccess(sparkConf: SparkConf): Set[Path] = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3848] yarn alpha doesn't build on master
Repository: spark Updated Branches: refs/heads/master 7fca8f41c - f18dd5962 [SPARK-3848] yarn alpha doesn't build on master yarn alpha build was broken by #2432 as it added an argument to YarnAllocator but not to yarn/alpha YarnAllocationHandler commit https://github.com/apache/spark/commit/79e45c9323455a51f25ed9acd0edd8682b4bbb88 Author: Kousuke Saruta saru...@oss.nttdata.co.jp Closes #2715 from sarutak/SPARK-3848 and squashes the following commits: bafb8d1 [Kousuke Saruta] Fixed parameters for the default constructor of alpha/YarnAllocatorHandler. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f18dd596 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f18dd596 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f18dd596 Branch: refs/heads/master Commit: f18dd5962e4a18c3507de8147bde3a8f56380439 Parents: 7fca8f4 Author: Kousuke Saruta saru...@oss.nttdata.co.jp Authored: Wed Oct 8 11:53:43 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Oct 8 11:53:43 2014 -0500 -- .../scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f18dd596/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala index 6c93d85..abd3783 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala @@ -43,7 +43,7 @@ private[yarn] class YarnAllocationHandler( args: ApplicationMasterArguments, preferredNodes: collection.Map[String, collection.Set[SplitInfo]], securityMgr: SecurityManager) - extends YarnAllocator(conf, sparkConf, args, preferredNodes, securityMgr) { + extends YarnAllocator(conf, sparkConf, appAttemptId, args, preferredNodes, securityMgr) { private val lastResponseId = new AtomicInteger() private val releaseList: CopyOnWriteArrayList[ContainerId] = new CopyOnWriteArrayList() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: Modify default YARN memory_overhead-- from an additive constant to a multiplier
Repository: spark Updated Branches: refs/heads/master 82a6a083a - b4fb7b80a Modify default YARN memory_overhead-- from an additive constant to a multiplier Redone against the recent master branch (https://github.com/apache/spark/pull/1391) Author: Nishkam Ravi nr...@cloudera.com Author: nravi nr...@c1704.halxg.cloudera.com Author: nishkamravi2 nishkamr...@gmail.com Closes #2485 from nishkamravi2/master_nravi and squashes the following commits: 636a9ff [nishkamravi2] Update YarnAllocator.scala 8f76c8b [Nishkam Ravi] Doc change for yarn memory overhead 35daa64 [Nishkam Ravi] Slight change in the doc for yarn memory overhead 5ac2ec1 [Nishkam Ravi] Remove out dac1047 [Nishkam Ravi] Additional documentation for yarn memory overhead issue 42c2c3d [Nishkam Ravi] Additional changes for yarn memory overhead issue 362da5e [Nishkam Ravi] Additional changes for yarn memory overhead c726bd9 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi f00fa31 [Nishkam Ravi] Improving logging for AM memoryOverhead 1cf2d1e [nishkamravi2] Update YarnAllocator.scala ebcde10 [Nishkam Ravi] Modify default YARN memory_overhead-- from an additive constant to a multiplier (redone to resolve merge conflicts) 2e69f11 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark into master_nravi efd688a [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark 2b630f9 [nravi] Accept memory input as 30g, 512M instead of an int value, to be consistent with rest of Spark 3bf8fad [nravi] Merge branch 'master' of https://github.com/apache/spark 5423a03 [nravi] Merge branch 'master' of https://github.com/apache/spark eb663ca [nravi] Merge branch 'master' of https://github.com/apache/spark df2aeb1 [nravi] Improved fix for ConcurrentModificationIssue (Spark-1097, Hadoop-10456) 6b840f0 [nravi] Undo the fix for SPARK-1758 (the problem is fixed) 5108700 [nravi] Fix in Spark for the Concurrent thread modification issue (SPARK-1097, HADOOP-10456) 681b36f [nravi] Fix for SPARK-1758: failing test org.apache.spark.JavaAPISuite.wholeTextFiles Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4fb7b80 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4fb7b80 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4fb7b80 Branch: refs/heads/master Commit: b4fb7b80a0d863500943d788ad3e34d502a6dafa Parents: 82a6a08 Author: Nishkam Ravi nr...@cloudera.com Authored: Thu Oct 2 13:48:35 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Thu Oct 2 13:48:35 2014 -0500 -- docs/running-on-yarn.md | 8 .../apache/spark/deploy/yarn/ClientArguments.scala | 16 +--- .../org/apache/spark/deploy/yarn/ClientBase.scala | 12 .../apache/spark/deploy/yarn/YarnAllocator.scala| 16 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 8 ++-- 5 files changed, 35 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b4fb7b80/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 4b3a49e..695813a 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -79,16 +79,16 @@ Most of the configs are the same for Spark on YARN as for other deployment modes /tr tr tdcodespark.yarn.executor.memoryOverhead/code/td - td384/td + tdexecutorMemory * 0.07, with minimum of 384 /td td -The amount of off heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. +The amount of off heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size (typically 6-10%). /td /tr tr tdcodespark.yarn.driver.memoryOverhead/code/td - td384/td + tddriverMemory * 0.07, with minimum of 384 /td td -The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. +The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size (typically 6-10%). /td /tr tr http://git-wip-us.apache.org/repos/asf/spark/blob/b4fb7b80/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala -- diff --git
git commit: [YARN] SPARK-2668: Add variable of yarn log directory for reference from the log4j configuration
Repository: spark Updated Branches: refs/heads/master f9d6220c7 - 14f8c3404 [YARN] SPARK-2668: Add variable of yarn log directory for reference from the log4j configuration Assign value of yarn container log directory to java opts spark.yarn.app.container.log.dir, So user defined log4j.properties can reference this value and write log to YARN container's log directory. Otherwise, user defined file appender will only write to container's CWD, and log files in CWD will not be displayed on YARN UIï¼and either cannot be aggregated to HDFS log directory after job finished. User defined log4j.properties reference example: log4j.appender.rolling_file.File = ${spark.yarn.app.container.log.dir}/spark.log Author: peng.zhang peng.zh...@xiaomi.com Closes #1573 from renozhang/yarn-log-dir and squashes the following commits: 16c5cb8 [peng.zhang] Update doc f2b5e2a [peng.zhang] Change variable's name, and update running-on-yarn.md 503ea2d [peng.zhang] Support log4j log to yarn container dir Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/14f8c340 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/14f8c340 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/14f8c340 Branch: refs/heads/master Commit: 14f8c340402366cb998c563b3f7d9ff7d9940271 Parents: f9d6220 Author: peng.zhang peng.zh...@xiaomi.com Authored: Tue Sep 23 08:45:56 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Sep 23 08:45:56 2014 -0500 -- docs/running-on-yarn.md | 2 ++ .../src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala | 3 +++ .../scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala | 3 +++ 3 files changed, 8 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/14f8c340/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 74bcc2e..4b3a49e 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -205,6 +205,8 @@ Note that for the first option, both executors and the application master will s log4j configuration, which may cause issues when they run on the same node (e.g. trying to write to the same log file). +If you need a reference to the proper location to put log files in the YARN so that YARN can properly display and aggregate them, use ${spark.yarn.app.container.log.dir} in your log4j.properties. For example, log4j.appender.file_appender.File=${spark.yarn.app.container.log.dir}/spark.log. For streaming application, configuring RollingFileAppender and setting file location to YARN's log directory will avoid disk overflow caused by large log file, and logs can be accessed using YARN's log utility. + # Important notes - Before Hadoop 2.2, YARN does not support cores in container resource requests. Thus, when running against an earlier version, the numbers of cores given via command line arguments cannot be passed to YARN. Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured. http://git-wip-us.apache.org/repos/asf/spark/blob/14f8c340/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index c96f731..6ae4d49 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -388,6 +388,9 @@ trait ClientBase extends Logging { .foreach(p = javaOpts += s-Djava.library.path=$p) } +// For log4j configuration to reference +javaOpts += -D=spark.yarn.app.container.log.dir= + ApplicationConstants.LOG_DIR_EXPANSION_VAR + val userClass = if (args.userClass != null) { Seq(--class, YarnSparkHadoopUtil.escapeForShell(args.userClass)) http://git-wip-us.apache.org/repos/asf/spark/blob/14f8c340/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala index 312d82a..f56f72c 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala @@ -98,6 +98,9 @@ trait ExecutorRunnableUtil extends Logging { } */ +// For log4j
[1/2] [SPARK-3477] Clean up code in Yarn Client / ClientBase
Repository: spark Updated Branches: refs/heads/master 14f8c3404 - c4022dd52 http://git-wip-us.apache.org/repos/asf/spark/blob/c4022dd5/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 82e45e3..0b43e6e 100644 --- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -21,11 +21,9 @@ import java.nio.ByteBuffer import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.DataOutputBuffer -import org.apache.hadoop.yarn.api.protocolrecords._ import org.apache.hadoop.yarn.api.records._ -import org.apache.hadoop.yarn.client.api.YarnClient +import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication} import org.apache.hadoop.yarn.conf.YarnConfiguration -import org.apache.hadoop.yarn.ipc.YarnRPC import org.apache.hadoop.yarn.util.Records import org.apache.spark.{Logging, SparkConf} @@ -34,128 +32,98 @@ import org.apache.spark.deploy.SparkHadoopUtil /** * Version of [[org.apache.spark.deploy.yarn.ClientBase]] tailored to YARN's stable API. */ -class Client(clientArgs: ClientArguments, hadoopConf: Configuration, spConf: SparkConf) +private[spark] class Client( +val args: ClientArguments, +val hadoopConf: Configuration, +val sparkConf: SparkConf) extends ClientBase with Logging { - val yarnClient = YarnClient.createYarnClient - def this(clientArgs: ClientArguments, spConf: SparkConf) = this(clientArgs, SparkHadoopUtil.get.newConfiguration(spConf), spConf) def this(clientArgs: ClientArguments) = this(clientArgs, new SparkConf()) - val args = clientArgs - val conf = hadoopConf - val sparkConf = spConf - var rpc: YarnRPC = YarnRPC.create(conf) - val yarnConf: YarnConfiguration = new YarnConfiguration(conf) - - def runApp(): ApplicationId = { -validateArgs() -// Initialize and start the client service. + val yarnClient = YarnClient.createYarnClient + val yarnConf = new YarnConfiguration(hadoopConf) + + def stop(): Unit = yarnClient.stop() + + /* - * + | The following methods have much in common in the stable and alpha versions of Client, | + | but cannot be implemented in the parent trait due to subtle API differences across| + | hadoop versions. | + * - */ + + /** + * Submit an application running our ApplicationMaster to the ResourceManager. + * + * The stable Yarn API provides a convenience method (YarnClient#createApplication) for + * creating applications and setting up the application submission context. This was not + * available in the alpha API. + */ + override def submitApplication(): ApplicationId = { yarnClient.init(yarnConf) yarnClient.start() -// Log details about this YARN cluster (e.g, the number of slave machines/NodeManagers). -logClusterResourceDetails() - -// Prepare to submit a request to the ResourcManager (specifically its ApplicationsManager (ASM) -// interface). +logInfo(Requesting a new application from cluster with %d NodeManagers + .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers)) -// Get a new client application. +// Get a new application from our RM val newApp = yarnClient.createApplication() val newAppResponse = newApp.getNewApplicationResponse() val appId = newAppResponse.getApplicationId() +// Verify whether the cluster has enough resources for our AM verifyClusterResources(newAppResponse) -// Set up resource and environment variables. -val appStagingDir = getAppStagingDir(appId) -val localResources = prepareLocalResources(appStagingDir) -val launchEnv = setupLaunchEnv(localResources, appStagingDir) -val amContainer = createContainerLaunchContext(newAppResponse, localResources, launchEnv) +// Set up the appropriate contexts to launch our AM +val containerContext = createContainerLaunchContext(newAppResponse) +val appContext = createApplicationSubmissionContext(newApp, containerContext) -// Set up an application submission context. -val appContext = newApp.getApplicationSubmissionContext() -appContext.setApplicationName(args.appName) -appContext.setQueue(args.amQueue) -appContext.setAMContainerSpec(amContainer) -appContext.setApplicationType(SPARK) - -// Memory for the ApplicationMaster. -val memoryResource = Records.newRecord(classOf[Resource]).asInstanceOf[Resource] -memoryResource.setMemory(args.amMemory + memoryOverhead) -
[2/2] git commit: [SPARK-3477] Clean up code in Yarn Client / ClientBase
[SPARK-3477] Clean up code in Yarn Client / ClientBase This is part of a broader effort to clean up the Yarn integration code after #2020. The high-level changes in this PR include: - Removing duplicate code, especially across the alpha and stable APIs - Simplify unnecessarily complex method signatures and hierarchies - Rename unclear variable and method names - Organize logging output produced when the user runs Spark on Yarn - Extensively add documentation - Privatize classes where possible I have tested the stable API on a Hadoop 2.4 cluster. I tested submitting a jar that references classes in other jars in both client and cluster mode. I also made changes in the alpha API, though I do not have access to an alpha cluster. I have verified that it compiles, but it would be ideal if others can help test it. For those interested in some examples in detail, please read on. ***Appendix*** - The loop to `getApplicationReport` from the RM is duplicated in 4 places: in the stable `Client`, alpha `Client`, and twice in `YarnClientSchedulerBackend`. We should not have different loops for client and cluster deploy modes. - There are many fragmented small helper methods that are only used once and should just be inlined. For instance, `ClientBase#getLocalPath` returns `null` on certain conditions, and its only caller `ClientBase#addFileToClasspath` checks whether the value returned is `null`. We could just have the caller check on that same condition to avoid passing `null`s around. - In `YarnSparkHadoopUtil#addToEnvironment`, we take in an argument `classpathSeparator` that always has the same value upstream (i.e. `File.pathSeparator`). This argument is now removed from the signature and all callers of this method upstream. - `ClientBase#copyRemoteFile` is now renamed to `copyFileToRemote`. It was unclear whether we are copying a remote file to our local file system, or copying a locally visible file to a remote file system. Also, even the content of the method has inaccurately named variables. We use `val remoteFs` to signify the file system of the locally visible file and `val fs` to signify the remote, destination file system. These are now renamed `srcFs` and `destFs` respectively. - We currently log the AM container's environment and resource mappings directly as Scala collections. This is incredibly hard to read and probably too verbose for the average Spark user. In other modes (e.g. standalone), we also don't log the launch commands by default, so the logging level of these information is now set to `DEBUG`. - None of these classes (`Client`, `ClientBase`, `YarnSparkHadoopUtil` etc.) is intended to be used by a Spark application (the user should go through Spark submit instead). At the very least they should be `private[spark]`. Author: Andrew Or andrewo...@gmail.com Closes #2350 from andrewor14/yarn-cleanup and squashes the following commits: 39e8c7b [Andrew Or] Address review comments 6619f9b [Andrew Or] Merge branch 'master' of github.com:apache/spark into yarn-cleanup 2ca6d64 [Andrew Or] Improve logging in application monitor a3b9693 [Andrew Or] Minor changes 7dd6298 [Andrew Or] Simplify ClientBase#monitorApplication 547487c [Andrew Or] Provide default values for null application report entries a0ad1e9 [Andrew Or] Fix class not found error 1590141 [Andrew Or] Address review comments 45ccdea [Andrew Or] Remove usages of getAMMemory d8e33b6 [Andrew Or] Merge branch 'master' of github.com:apache/spark into yarn-cleanup ed0b42d [Andrew Or] Fix alpha compilation error c0587b4 [Andrew Or] Merge branch 'master' of github.com:apache/spark into yarn-cleanup 6d74888 [Andrew Or] Minor comment changes 6573c1d [Andrew Or] Clean up, simplify and document code for setting classpaths e4779b6 [Andrew Or] Clean up log messages + variable naming in ClientBase 8766d37 [Andrew Or] Heavily add documentation to Client* classes + various clean-ups 6c94d79 [Andrew Or] Various cleanups in ClientBase and ClientArguments ef7069a [Andrew Or] Clean up YarnClientSchedulerBackend more 6de9072 [Andrew Or] Guard against potential NPE in debug logging mode fabe4c4 [Andrew Or] Reuse more code in YarnClientSchedulerBackend 3f941dc [Andrew Or] First cut at simplifying the Client (stable and alpha) Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c4022dd5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c4022dd5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c4022dd5 Branch: refs/heads/master Commit: c4022dd52b4827323ff956632dc7623f546da937 Parents: 14f8c34 Author: Andrew Or andrewo...@gmail.com Authored: Tue Sep 23 11:20:52 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Sep 23 11:20:52 2014 -0500 --
git commit: [SPARK-3304] [YARN] ApplicationMaster's Finish status is wrong when uncaught exception is thrown from ReporterThread
Repository: spark Updated Branches: refs/heads/master c4022dd52 - 11c10df82 [SPARK-3304] [YARN] ApplicationMaster's Finish status is wrong when uncaught exception is thrown from ReporterThread Author: Kousuke Saruta saru...@oss.nttdata.co.jp Closes #2198 from sarutak/SPARK-3304 and squashes the following commits: 2696237 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3304 5b80363 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3304 4eb0a3e [Kousuke Saruta] Remoed the description about spark.yarn.scheduler.reporterThread.maxFailure 9741597 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3304 f7538d4 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3304 358ef8d [Kousuke Saruta] Merge branch 'SPARK-3304' of github.com:sarutak/spark into SPARK-3304 0d138c6 [Kousuke Saruta] Revert tmp f8da10a [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3304 b6e9879 [Kousuke Saruta] tmp 8d256ed [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3304 13b2652 [Kousuke Saruta] Merge branch 'SPARK-3304' of github.com:sarutak/spark into SPARK-3304 2711e15 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3304 c081f8e [Kousuke Saruta] Modified ApplicationMaster to handle exception in ReporterThread itself 0bbd3a6 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3304 a6982ad [Kousuke Saruta] Added ability handling uncaught exception thrown from Reporter thread Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/11c10df8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/11c10df8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/11c10df8 Branch: refs/heads/master Commit: 11c10df825419372df61a8d23c51e8c3cc78047f Parents: c4022dd Author: Kousuke Saruta saru...@oss.nttdata.co.jp Authored: Tue Sep 23 11:40:14 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Sep 23 11:40:14 2014 -0500 -- .../spark/deploy/yarn/ApplicationMaster.scala | 66 1 file changed, 54 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/11c10df8/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index cde5fff..9050808 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -17,7 +17,10 @@ package org.apache.spark.deploy.yarn +import scala.util.control.NonFatal + import java.io.IOException +import java.lang.reflect.InvocationTargetException import java.net.Socket import java.util.concurrent.atomic.AtomicReference @@ -55,6 +58,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, @volatile private var finished = false @volatile private var finalStatus = FinalApplicationStatus.UNDEFINED + @volatile private var userClassThread: Thread = _ private var reporterThread: Thread = _ private var allocator: YarnAllocator = _ @@ -221,18 +225,48 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, // must be = expiryInterval / 2. val interval = math.max(0, math.min(expiryInterval / 2, schedulerInterval)) +// The number of failures in a row until Reporter thread give up +val reporterMaxFailures = sparkConf.getInt(spark.yarn.scheduler.reporterThread.maxFailures, 5) + val t = new Thread { override def run() { +var failureCount = 0 + while (!finished) { - checkNumExecutorsFailed() - if (!finished) { -logDebug(Sending progress) -allocator.allocateResources() -try { - Thread.sleep(interval) -} catch { - case e: InterruptedException = + try { +checkNumExecutorsFailed() +if (!finished) { + logDebug(Sending progress) + allocator.allocateResources() } +failureCount = 0 + } catch { +case e: Throwable = { + failureCount += 1 + if (!NonFatal(e) || failureCount = reporterMaxFailures) { +logError(Exception was thrown from Reporter thread., e) +finish(FinalApplicationStatus.FAILED, Exception was thrown + + s${failureCount} time(s) from Reporter
git commit: SPARK-3177 (on Master Branch)
Repository: spark Updated Branches: refs/heads/master 983609a4d - 7d1a37239 SPARK-3177 (on Master Branch) The JIRA and PR was original created for branch-1.1, and move to master branch now. Chester The Issue is due to that yarn-alpha and yarn have different APIs for certain class fields. In this particular case, the ClientBase using reflection to to address this issue, and we need to different way to test the ClientBase's method. Original ClientBaseSuite using getFieldValue() method to do this. But it doesn't work for yarn-alpha as the API returns an array of String instead of just String (which is the case for Yarn-stable API). To fix the test, I add a new method def getFieldValue2[A: ClassTag, A1: ClassTag, B](clazz: Class[_], field: String, defaults: = B) (mapTo: A = B)(mapTo1: A1 = B) : B = Try(clazz.getField(field)).map(_.get(null)).map { case v: A = mapTo(v) case v1: A1 = mapTo1(v1) case _ = defaults }.toOption.getOrElse(defaults) to handle the cases where the field type can be either type A or A1. In this new method the type A or A1 is pattern matched and corresponding mapTo function (mapTo or mapTo1) is used. Author: chesterxgchen ches...@alpinenow.com Closes #2204 from chesterxgchen/SPARK-3177-master and squashes the following commits: e72a6ea [chesterxgchen] The Issue is due to that yarn-alpha and yarn have different APIs for certain class fields. In this particular case, the ClientBase using reflection to to address this issue, and we need to different way to test the ClientBase's method. Original ClientBaseSuite using getFieldValue() method to do this. But it doesn't work for yarn-alpha as the API returns an array of String instead of just String (which is the case for Yarn-stable API). Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d1a3723 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d1a3723 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d1a3723 Branch: refs/heads/master Commit: 7d1a37239c50394025d9f16acf5dcd05cfbe7250 Parents: 983609a Author: chesterxgchen ches...@alpinenow.com Authored: Wed Sep 17 10:25:52 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Sep 17 10:25:52 2014 -0500 -- .../spark/deploy/yarn/ClientBaseSuite.scala | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7d1a3723/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala -- diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala index 5480eca..c3b7a2c 100644 --- a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala +++ b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala @@ -38,6 +38,7 @@ import org.scalatest.Matchers import scala.collection.JavaConversions._ import scala.collection.mutable.{ HashMap = MutableHashMap } +import scala.reflect.ClassTag import scala.util.Try import org.apache.spark.{SparkException, SparkConf} @@ -200,9 +201,10 @@ class ClientBaseSuite extends FunSuite with Matchers { val knownDefMRAppCP: Seq[String] = - getFieldValue[String, Seq[String]](classOf[MRJobConfig], - DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH, - Seq[String]())(a = a.split(,)) + getFieldValue2[String, Array[String], Seq[String]]( +classOf[MRJobConfig], +DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH, +Seq[String]())(a = a.split(,))(a = a.toSeq) val knownYARNAppCP = Some(Seq(/known/yarn/path)) @@ -232,6 +234,17 @@ class ClientBaseSuite extends FunSuite with Matchers { def getFieldValue[A, B](clazz: Class[_], field: String, defaults: = B)(mapTo: A = B): B = Try(clazz.getField(field)).map(_.get(null).asInstanceOf[A]).toOption.map(mapTo).getOrElse(defaults) + def getFieldValue2[A: ClassTag, A1: ClassTag, B]( +clazz: Class[_], +field: String, +defaults: = B)(mapTo: A = B)(mapTo1: A1 = B) : B = { +Try(clazz.getField(field)).map(_.get(null)).map { + case v: A = mapTo(v) + case v1: A1 = mapTo1(v1) + case _ = defaults +}.toOption.getOrElse(defaults) + } + private class DummyClient( val args: ClientArguments, val conf: Configuration, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail:
git commit: [SPARK-3410] The priority of shutdownhook for ApplicationMaster should not be integer literal
Repository: spark Updated Branches: refs/heads/master f493f7982 - cc1464446 [SPARK-3410] The priority of shutdownhook for ApplicationMaster should not be integer literal I think, it need to keep the priority of shutdown hook for ApplicationMaster than the priority of shutdown hook for o.a.h.FileSystem depending on changing the priority for FileSystem. Author: Kousuke Saruta saru...@oss.nttdata.co.jp Closes #2283 from sarutak/SPARK-3410 and squashes the following commits: 1d44fef [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3410 bd6cc53 [Kousuke Saruta] Modified style ee6f1aa [Kousuke Saruta] Added constant SHUTDOWN_HOOK_PRIORITY to ApplicationMaster 54eb68f [Kousuke Saruta] Changed Shutdown hook priority to 20 2f0aee3 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into SPARK-3410 4c5cb93 [Kousuke Saruta] Modified the priority for AM's shutdown hook 217d1a4 [Kousuke Saruta] Removed unused import statements 717aba2 [Kousuke Saruta] Modified ApplicationMaster to make to keep the priority of shutdown hook for ApplicationMaster higher than the priority of shutdown hook for HDFS Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cc146444 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cc146444 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cc146444 Branch: refs/heads/master Commit: cc14644460872efb344e8d895859d70213a40840 Parents: f493f79 Author: Kousuke Saruta saru...@oss.nttdata.co.jp Authored: Mon Sep 15 08:53:58 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Mon Sep 15 08:53:58 2014 -0500 -- .../apache/spark/deploy/yarn/ApplicationMaster.scala | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cc146444/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 735d772..cde5fff 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -21,12 +21,8 @@ import java.io.IOException import java.net.Socket import java.util.concurrent.atomic.AtomicReference -import scala.collection.JavaConversions._ -import scala.util.Try - import akka.actor._ import akka.remote._ -import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.util.ShutdownHookManager import org.apache.hadoop.yarn.api._ @@ -107,8 +103,11 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, } } } -// Use priority 30 as it's higher than HDFS. It's the same priority MapReduce is using. -ShutdownHookManager.get().addShutdownHook(cleanupHook, 30) + +// Use higher priority than FileSystem. +assert(ApplicationMaster.SHUTDOWN_HOOK_PRIORITY FileSystem.SHUTDOWN_HOOK_PRIORITY) +ShutdownHookManager + .get().addShutdownHook(cleanupHook, ApplicationMaster.SHUTDOWN_HOOK_PRIORITY) // Call this to force generation of secret so it gets populated into the // Hadoop UGI. This has to happen before the startUserClass which does a @@ -407,6 +406,8 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, object ApplicationMaster extends Logging { + val SHUTDOWN_HOOK_PRIORITY: Int = 30 + private var master: ApplicationMaster = _ def main(args: Array[String]) = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: SPARK-3014. Log a more informative messages in a couple failure scenario...
Repository: spark Updated Branches: refs/heads/master 15a564598 - 1d767967e SPARK-3014. Log a more informative messages in a couple failure scenario... ...s Author: Sandy Ryza sa...@cloudera.com Closes #1934 from sryza/sandy-spark-3014 and squashes the following commits: ae19cc1 [Sandy Ryza] SPARK-3014. Log a more informative messages in a couple failure scenarios Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d767967 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d767967 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d767967 Branch: refs/heads/master Commit: 1d767967e925f1d727957c2d43383ef6ad2c5d5e Parents: 15a5645 Author: Sandy Ryza sa...@cloudera.com Authored: Fri Sep 12 16:48:28 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Fri Sep 12 16:48:28 2014 -0500 -- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 6 -- .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala | 6 ++ 2 files changed, 6 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1d767967/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0fdb5ae..5ed3575 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy import java.io.{File, PrintStream} -import java.lang.reflect.InvocationTargetException +import java.lang.reflect.{Modifier, InvocationTargetException} import java.net.URL import scala.collection.mutable.{ArrayBuffer, HashMap, Map} @@ -323,7 +323,9 @@ object SparkSubmit { } val mainMethod = mainClass.getMethod(main, new Array[String](0).getClass) - +if (!Modifier.isStatic(mainMethod.getModifiers)) { + throw new IllegalStateException(The main method in the given main class must be static) +} try { mainMethod.invoke(null, childArgs.toArray) } catch { http://git-wip-us.apache.org/repos/asf/spark/blob/1d767967/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 878b6db..735d772 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -283,11 +283,9 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, } val sparkContext = sparkContextRef.get() -assert(sparkContext != null || count = numTries) if (sparkContext == null) { - logError( -Unable to retrieve sparkContext inspite of waiting for %d, numTries = %d.format( - count * waitTime, numTries)) + logError((SparkContext did not initialize after waiting for %d ms. Please check earlier ++ log output for errors. Failing the application.).format(numTries * waitTime)) } sparkContext } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-2140] Updating heap memory calculation for YARN stable and alpha.
Repository: spark Updated Branches: refs/heads/branch-1.1 e51ce9a55 - 06fb2d057 [SPARK-2140] Updating heap memory calculation for YARN stable and alpha. Updated pull request, reflecting YARN stable and alpha states. I am getting intermittent test failures on my own test infrastructure. Is that tracked anywhere yet? Author: Chris Cope cc...@resilientscience.com Closes #2253 from copester/master and squashes the following commits: 5ad89da [Chris Cope] [SPARK-2140] Removing calculateAMMemory functions since they are no longer needed. 52b4e45 [Chris Cope] [SPARK-2140] Updating heap memory calculation for YARN stable and alpha. (cherry picked from commit ed1980ffa9ccb87d76694ba910ef22df034bca49) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06fb2d05 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06fb2d05 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06fb2d05 Branch: refs/heads/branch-1.1 Commit: 06fb2d057beb50e9b690bf8b6d5bb7bdb16d8546 Parents: e51ce9a Author: Chris Cope cc...@resilientscience.com Authored: Thu Sep 11 08:13:07 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Thu Sep 11 08:13:27 2014 -0500 -- .../main/scala/org/apache/spark/deploy/yarn/Client.scala| 8 .../scala/org/apache/spark/deploy/yarn/ClientBase.scala | 4 +--- .../org/apache/spark/deploy/yarn/ClientBaseSuite.scala | 3 --- .../main/scala/org/apache/spark/deploy/yarn/Client.scala| 9 - 4 files changed, 1 insertion(+), 23 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/06fb2d05/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 9be7854..3607eed 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -102,14 +102,6 @@ class Client(clientArgs: ClientArguments, hadoopConf: Configuration, spConf: Spa appContext } - def calculateAMMemory(newApp: GetNewApplicationResponse): Int = { -val minResMemory = newApp.getMinimumResourceCapability().getMemory() -val amMemory = ((args.amMemory / minResMemory) * minResMemory) + - ((if ((args.amMemory % minResMemory) == 0) 0 else minResMemory) - - memoryOverhead) -amMemory - } - def setupSecurityToken(amContainer: ContainerLaunchContext) = { // Setup security tokens. val dob = new DataOutputBuffer() http://git-wip-us.apache.org/repos/asf/spark/blob/06fb2d05/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 3897b3a..6da3b16 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -309,8 +309,6 @@ trait ClientBase extends Logging { retval.toString } - def calculateAMMemory(newApp: GetNewApplicationResponse): Int - def setupSecurityToken(amContainer: ContainerLaunchContext) def createContainerLaunchContext( @@ -353,7 +351,7 @@ trait ClientBase extends Logging { } amContainer.setEnvironment(env) -val amMemory = calculateAMMemory(newApp) +val amMemory = args.amMemory val javaOpts = ListBuffer[String]() http://git-wip-us.apache.org/repos/asf/spark/blob/06fb2d05/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala -- diff --git a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala index 68cc289..5480eca 100644 --- a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala +++ b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala @@ -238,9 +238,6 @@ class ClientBaseSuite extends FunSuite with Matchers { val sparkConf: SparkConf, val yarnConf: YarnConfiguration) extends ClientBase { -override def calculateAMMemory(newApp: GetNewApplicationResponse): Int = - throw new UnsupportedOperationException() - override def setupSecurityToken(amContainer: ContainerLaunchContext): Unit = throw new UnsupportedOperationException()
git commit: [SPARK-3286] - Cannot view ApplicationMaster UI when Yarn’s url scheme i...
Repository: spark Updated Branches: refs/heads/master b734ed0c2 - 6f7a76838 [SPARK-3286] - Cannot view ApplicationMaster UI when Yarnâs url scheme i... ...s https Author: Benoy Antony be...@apache.org Closes #2276 from benoyantony/SPARK-3286 and squashes the following commits: c3d51ee [Benoy Antony] Use address with scheme, but Allpha version removes the scheme e82f94e [Benoy Antony] Use address with scheme, but Allpha version removes the scheme 92127c9 [Benoy Antony] rebasing from master 450c536 [Benoy Antony] [SPARK-3286] - Cannot view ApplicationMaster UI when Yarnâs url scheme is https f060c02 [Benoy Antony] [SPARK-3286] - Cannot view ApplicationMaster UI when Yarnâs url scheme is https Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6f7a7683 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6f7a7683 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6f7a7683 Branch: refs/heads/master Commit: 6f7a76838f15687583e3b0ab43309a3c079368c4 Parents: b734ed0 Author: Benoy Antony be...@apache.org Authored: Wed Sep 10 11:59:39 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Sep 10 11:59:39 2014 -0500 -- .../scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala| 4 +++- .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6f7a7683/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala index ad27a9a..fc30953 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala @@ -18,6 +18,7 @@ package org.apache.spark.deploy.yarn import scala.collection.{Map, Set} +import java.net.URI; import org.apache.hadoop.net.NetUtils import org.apache.hadoop.yarn.api._ @@ -97,7 +98,8 @@ private class YarnRMClientImpl(args: ApplicationMasterArguments) extends YarnRMC // Users can then monitor stderr/stdout on that node if required. appMasterRequest.setHost(Utils.localHostName()) appMasterRequest.setRpcPort(0) -appMasterRequest.setTrackingUrl(uiAddress) +//remove the scheme from the url if it exists since Hadoop does not expect scheme +appMasterRequest.setTrackingUrl(new URI(uiAddress).getAuthority()) resourceManager.registerApplicationMaster(appMasterRequest) } http://git-wip-us.apache.org/repos/asf/spark/blob/6f7a7683/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index a879c83..5756263 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -189,7 +189,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments, if (sc == null) { finish(FinalApplicationStatus.FAILED, Timed out waiting for SparkContext.) } else { - registerAM(sc.ui.appUIHostPort, securityMgr) + registerAM(sc.ui.appUIAddress, securityMgr) try { userThread.join() } finally { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: SPARK-1713. Use a thread pool for launching executors.
Repository: spark Updated Branches: refs/heads/master 26503fdf2 - 1f4a648d4 SPARK-1713. Use a thread pool for launching executors. This patch copies the approach used in the MapReduce application master for launching containers. Author: Sandy Ryza sa...@cloudera.com Closes #663 from sryza/sandy-spark-1713 and squashes the following commits: 036550d [Sandy Ryza] SPARK-1713. [YARN] Use a threadpool for launching executor containers Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f4a648d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f4a648d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f4a648d Branch: refs/heads/master Commit: 1f4a648d4e30e837d6cf3ea8de1808e2254ad70b Parents: 26503fd Author: Sandy Ryza sa...@cloudera.com Authored: Wed Sep 10 14:34:24 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Sep 10 14:34:24 2014 -0500 -- docs/running-on-yarn.md | 7 +++ .../org/apache/spark/deploy/yarn/YarnAllocator.scala | 14 -- 2 files changed, 19 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1f4a648d/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 943f06b..d8b22f3 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -125,6 +125,13 @@ Most of the configs are the same for Spark on YARN as for other deployment modes the environment of the executor launcher. /td /tr +tr + tdcodespark.yarn.containerLauncherMaxThreads/code/td + td25/td + td +The maximum number of threads to use in the application master for launching executor containers. + /td +/tr /table # Launching Spark on YARN http://git-wip-us.apache.org/repos/asf/spark/blob/1f4a648d/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 02b9a81..0b8744f 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.yarn import java.util.{List = JList} -import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent._ import java.util.concurrent.atomic.AtomicInteger import scala.collection.JavaConversions._ @@ -32,6 +32,8 @@ import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkEnv} import org.apache.spark.scheduler.{SplitInfo, TaskSchedulerImpl} import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend +import com.google.common.util.concurrent.ThreadFactoryBuilder + object AllocationType extends Enumeration { type AllocationType = Value val HOST, RACK, ANY = Value @@ -95,6 +97,14 @@ private[yarn] abstract class YarnAllocator( protected val (preferredHostToCount, preferredRackToCount) = generateNodeToWeight(conf, preferredNodes) + private val launcherPool = new ThreadPoolExecutor( +// max pool size of Integer.MAX_VALUE is ignored because we use an unbounded queue +sparkConf.getInt(spark.yarn.containerLauncherMaxThreads, 25), Integer.MAX_VALUE, +1, TimeUnit.MINUTES, +new LinkedBlockingQueue[Runnable](), +new ThreadFactoryBuilder().setNameFormat(ContainerLauncher #%d).setDaemon(true).build()) + launcherPool.allowCoreThreadTimeOut(true) + def getNumExecutorsRunning: Int = numExecutorsRunning.intValue def getNumExecutorsFailed: Int = numExecutorsFailed.intValue @@ -283,7 +293,7 @@ private[yarn] abstract class YarnAllocator( executorMemory, executorCores, securityMgr) - new Thread(executorRunnable).start() + launcherPool.execute(executorRunnable) } } logDebug( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3260] yarn - pass acls along with executor launch
Repository: spark Updated Branches: refs/heads/master 6a37ed838 - 51b53a758 [SPARK-3260] yarn - pass acls along with executor launch Pass along the acl settings when we launch a container so that they can be applied to viewing the logs on a running NodeManager. Author: Thomas Graves tgra...@apache.org Closes #2185 from tgravescs/SPARK-3260 and squashes the following commits: 6f94b5a [Thomas Graves] make unit test more robust 28b9dd3 [Thomas Graves] yarn - pass acls along with executor launch Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51b53a75 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51b53a75 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51b53a75 Branch: refs/heads/master Commit: 51b53a758c85f2e20ad9bd73ed815fcfa9c7180b Parents: 6a37ed8 Author: Thomas Graves tgra...@apache.org Authored: Fri Sep 5 09:54:40 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Fri Sep 5 09:54:40 2014 -0500 -- .../spark/deploy/yarn/ExecutorRunnable.scala| 7 +- .../deploy/yarn/YarnAllocationHandler.scala | 7 +- .../spark/deploy/yarn/YarnRMClientImpl.scala| 7 +- .../spark/deploy/yarn/ApplicationMaster.scala | 13 ++-- .../apache/spark/deploy/yarn/ClientBase.scala | 6 +- .../spark/deploy/yarn/YarnAllocator.scala | 10 +-- .../apache/spark/deploy/yarn/YarnRMClient.scala | 5 +- .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 11 ++- .../deploy/yarn/YarnSparkHadoopUtilSuite.scala | 76 +++- .../spark/deploy/yarn/ExecutorRunnable.scala| 7 +- .../deploy/yarn/YarnAllocationHandler.scala | 7 +- .../spark/deploy/yarn/YarnRMClientImpl.scala| 7 +- 12 files changed, 129 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/51b53a75/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala index 7dae248..10cbeb8 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala @@ -35,7 +35,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.ipc.YarnRPC import org.apache.hadoop.yarn.util.{Apps, ConverterUtils, Records, ProtoUtils} -import org.apache.spark.{SparkConf, Logging} +import org.apache.spark.{SecurityManager, SparkConf, Logging} class ExecutorRunnable( @@ -46,7 +46,8 @@ class ExecutorRunnable( slaveId: String, hostname: String, executorMemory: Int, -executorCores: Int) +executorCores: Int, +securityMgr: SecurityManager) extends Runnable with ExecutorRunnableUtil with Logging { var rpc: YarnRPC = YarnRPC.create(conf) @@ -86,6 +87,8 @@ class ExecutorRunnable( logInfo(Setting up executor with commands: + commands) ctx.setCommands(commands) + ctx.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr)) + // Send the start request to the ContainerManager val startReq = Records.newRecord(classOf[StartContainerRequest]) .asInstanceOf[StartContainerRequest] http://git-wip-us.apache.org/repos/asf/spark/blob/51b53a75/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala index 9f9e16c..85d6274 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala @@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicInteger import scala.collection.JavaConversions._ import scala.collection.mutable.{ArrayBuffer, HashMap} -import org.apache.spark.SparkConf +import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.scheduler.SplitInfo import org.apache.hadoop.conf.Configuration @@ -41,8 +41,9 @@ private[yarn] class YarnAllocationHandler( resourceManager: AMRMProtocol, appAttemptId: ApplicationAttemptId, args: ApplicationMasterArguments, -preferredNodes: collection.Map[String, collection.Set[SplitInfo]]) - extends YarnAllocator(conf, sparkConf, args, preferredNodes) { +preferredNodes: collection.Map[String, collection.Set[SplitInfo]], +securityMgr: SecurityManager) + extends YarnAllocator(conf, sparkConf, args, preferredNodes, securityMgr)
git commit: [SPARK-3375] spark on yarn container allocation issues
Repository: spark Updated Branches: refs/heads/master 51b53a758 - 62c557609 [SPARK-3375] spark on yarn container allocation issues If yarn doesn't get the containers immediately it stops asking for them and the yarn application hangs with never getting any executors. The issue here is that we are sending the number of containers as 0 after we send the original one of X. on the yarn side this clears out the original request. For a ping we should just send empty asks. Author: Thomas Graves tgra...@apache.org Closes #2275 from tgravescs/SPARK-3375 and squashes the following commits: 74b6820 [Thomas Graves] send empty resource requests when we aren't asking for containers Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/62c55760 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/62c55760 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/62c55760 Branch: refs/heads/master Commit: 62c557609929982eeec170fe12f810bedfcf97f2 Parents: 51b53a7 Author: Thomas Graves tgra...@apache.org Authored: Fri Sep 5 09:56:22 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Fri Sep 5 09:56:22 2014 -0500 -- .../spark/deploy/yarn/YarnAllocationHandler.scala | 13 +++-- .../spark/deploy/yarn/YarnAllocationHandler.scala | 8 +--- 2 files changed, 12 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/62c55760/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala index 85d6274..5a1b42c 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala @@ -51,12 +51,13 @@ private[yarn] class YarnAllocationHandler( override protected def allocateContainers(count: Int): YarnAllocateResponse = { var resourceRequests: List[ResourceRequest] = null -// default. -if (count = 0 || preferredHostToCount.isEmpty) { - logDebug(numExecutors: + count + , host preferences: + -preferredHostToCount.isEmpty) - resourceRequests = List(createResourceRequest( -AllocationType.ANY, null, count, YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)) +logDebug(numExecutors: + count) +if (count = 0) { + resourceRequests = List() +} else if (preferredHostToCount.isEmpty) { +logDebug(host preferences is empty) +resourceRequests = List(createResourceRequest( + AllocationType.ANY, null, count, YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)) } else { // request for all hosts in preferred nodes and for numExecutors - // candidates.size, request by default allocation policy. http://git-wip-us.apache.org/repos/asf/spark/blob/62c55760/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala -- diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala index c887cb5..5438f15 100644 --- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala +++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala @@ -88,9 +88,11 @@ private[yarn] class YarnAllocationHandler( private def addResourceRequests(numExecutors: Int) { val containerRequests: List[ContainerRequest] = - if (numExecutors = 0 || preferredHostToCount.isEmpty) { -logDebug(numExecutors: + numExecutors + , host preferences: + - preferredHostToCount.isEmpty) + if (numExecutors = 0) { +logDebug(numExecutors: + numExecutors) +List() + } else if (preferredHostToCount.isEmpty) { +logDebug(host preferences is empty) createResourceRequests( AllocationType.ANY, resource = null, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3347] [yarn] Fix yarn-alpha compilation.
Repository: spark Updated Branches: refs/heads/master 8f1f9aaf4 - 066f31a6b [SPARK-3347] [yarn] Fix yarn-alpha compilation. Missing import. Oops. Author: Marcelo Vanzin van...@cloudera.com Closes #2236 from vanzin/SPARK-3347 and squashes the following commits: 594fc39 [Marcelo Vanzin] [SPARK-3347] [yarn] Fix yarn-alpha compilation. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/066f31a6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/066f31a6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/066f31a6 Branch: refs/heads/master Commit: 066f31a6b213121441fc9618abd5bae4a706a215 Parents: 8f1f9aa Author: Marcelo Vanzin van...@cloudera.com Authored: Tue Sep 2 13:33:23 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Sep 2 13:33:23 2014 -0500 -- yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/066f31a6/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 12f1cd3..10fc39b 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.ipc.YarnRPC import org.apache.hadoop.yarn.util.{Apps, Records} import org.apache.spark.{Logging, SparkConf} +import org.apache.spark.deploy.SparkHadoopUtil /** * Version of [[org.apache.spark.deploy.yarn.ClientBase]] tailored to YARN's alpha API. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] [SPARK-2933] [yarn] Refactor and cleanup Yarn AM code.
Repository: spark Updated Branches: refs/heads/master 6f671d04f - b92d823ad http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala index 3474112..d162b4c 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala @@ -19,22 +19,21 @@ package org.apache.spark.scheduler.cluster import org.apache.spark._ import org.apache.hadoop.conf.Configuration -import org.apache.spark.deploy.yarn.YarnAllocationHandler +import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils /** - * - * This scheduler launches executors through Yarn - by calling into Client to launch ExecutorLauncher as AM. + * This scheduler launches executors through Yarn - by calling into Client to launch the Spark AM. */ -private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: Configuration) extends TaskSchedulerImpl(sc) { +private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: Configuration) + extends TaskSchedulerImpl(sc) { def this(sc: SparkContext) = this(sc, new Configuration()) // By default, rack is unknown override def getRackForHost(hostPort: String): Option[String] = { val host = Utils.parseHostPort(hostPort)._1 -val retval = YarnAllocationHandler.lookupRack(conf, host) -if (retval != null) Some(retval) else None +Option(YarnSparkHadoopUtil.lookupRack(conf, host)) } } http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 833e249..a5f537d 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -19,7 +19,7 @@ package org.apache.spark.scheduler.cluster import org.apache.hadoop.yarn.api.records.{ApplicationId, YarnApplicationState} import org.apache.spark.{SparkException, Logging, SparkContext} -import org.apache.spark.deploy.yarn.{Client, ClientArguments, ExecutorLauncher, YarnSparkHadoopUtil} +import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnSparkHadoopUtil} import org.apache.spark.scheduler.TaskSchedulerImpl import scala.collection.mutable.ArrayBuffer @@ -60,10 +60,7 @@ private[spark] class YarnClientSchedulerBackend( val argsArrayBuf = new ArrayBuffer[String]() argsArrayBuf += ( - --class, notused, - --jar, null, // The primary jar will be added dynamically in SparkContext. - --args, hostport, - --am-class, classOf[ExecutorLauncher].getName + --args, hostport ) // process any optional arguments, given either as environment variables http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala index 9aeca4a..69f4022 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala @@ -18,16 +18,17 @@ package org.apache.spark.scheduler.cluster import org.apache.spark._ -import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler} +import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil} import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils import org.apache.hadoop.conf.Configuration /** - * - * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of ApplicationMaster, etc is done + * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of + * ApplicationMaster, etc is done */ -private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration) extends TaskSchedulerImpl(sc) { +private[spark]
[2/2] git commit: [SPARK-2933] [yarn] Refactor and cleanup Yarn AM code.
[SPARK-2933] [yarn] Refactor and cleanup Yarn AM code. This change modifies the Yarn module so that all the logic related to running the ApplicationMaster is localized. Instead of, previously, 4 different classes with mostly identical code, now we have: - A single, shared ApplicationMaster class, which can operate both in client and cluster mode, and substitutes the old ApplicationMaster (for cluster mode) and ExecutorLauncher (for client mode). The benefit here is that all different execution modes for all supported yarn versions use the same shared code for monitoring executor allocation, setting up configuration, and monitoring the process's lifecycle. - A new YarnRMClient interface, which defines basic RM functionality needed by the ApplicationMaster. This interface has concrete implementations for each supported Yarn version. - A new YarnAllocator interface, which just abstracts the existing interface of the YarnAllocationHandler class. This is to avoid having to touch the allocator code too much in this change, although it might benefit from a similar effort in the future. The end result is much easier to understand code, with much less duplication, making it much easier to fix bugs, add features, and test everything knowing that all supported versions will behave the same. Author: Marcelo Vanzin van...@cloudera.com Closes #2020 from vanzin/SPARK-2933 and squashes the following commits: 3bbf3e7 [Marcelo Vanzin] Merge branch 'master' into SPARK-2933 ff389ed [Marcelo Vanzin] Do not interrupt reporter thread from within itself. 3a8ed37 [Marcelo Vanzin] Remote stale comment. 0f5142c [Marcelo Vanzin] Review feedback. 41f8c8a [Marcelo Vanzin] Fix app status reporting. c0794be [Marcelo Vanzin] Correctly clean up staging directory. 92770cc [Marcelo Vanzin] Merge branch 'master' into SPARK-2933 ecaf332 [Marcelo Vanzin] Small fix to shutdown code. f02d3f8 [Marcelo Vanzin] Merge branch 'master' into SPARK-2933 f581122 [Marcelo Vanzin] Review feedback. 557fdeb [Marcelo Vanzin] Cleanup a couple more constants. be6068d [Marcelo Vanzin] Restore shutdown hook to clean up staging dir. 5150993 [Marcelo Vanzin] Some more cleanup. b6289ab [Marcelo Vanzin] Move cluster/client code to separate methods. ecb23cd [Marcelo Vanzin] More trivial cleanup. 34f1e63 [Marcelo Vanzin] Fix some questionable error handling. 5657c7d [Marcelo Vanzin] Finish app if SparkContext initialization times out. 0e4be3d [Marcelo Vanzin] Keep ExecutorLauncher as the main class for client-mode AM. 91beabb [Marcelo Vanzin] Fix UI filter registration. 8c72239 [Marcelo Vanzin] Trivial cleanups. 99a52d5 [Marcelo Vanzin] Changes to the yarn-alpha project to use common AM code. 848ca6d [Marcelo Vanzin] [SPARK-2933] [yarn] Refactor and cleanup Yarn AM code. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b92d823a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b92d823a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b92d823a Branch: refs/heads/master Commit: b92d823ad13f6fcc325eeb99563bea543871c6aa Parents: 6f671d0 Author: Marcelo Vanzin van...@cloudera.com Authored: Wed Aug 27 11:02:04 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Aug 27 11:02:04 2014 -0500 -- .../spark/deploy/yarn/ApplicationMaster.scala | 453 --- .../spark/deploy/yarn/ExecutorLauncher.scala| 315 - .../deploy/yarn/YarnAllocationHandler.scala | 192 ++-- .../spark/deploy/yarn/YarnRMClientImpl.scala| 103 + .../spark/deploy/yarn/ApplicationMaster.scala | 430 ++ .../yarn/ApplicationMasterArguments.scala | 26 +- .../spark/deploy/yarn/ClientArguments.scala | 9 +- .../apache/spark/deploy/yarn/ClientBase.scala | 54 ++- .../spark/deploy/yarn/YarnAllocator.scala | 34 ++ .../apache/spark/deploy/yarn/YarnRMClient.scala | 67 +++ .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 51 +++ .../cluster/YarnClientClusterScheduler.scala| 11 +- .../cluster/YarnClientSchedulerBackend.scala| 7 +- .../cluster/YarnClusterScheduler.scala | 17 +- .../spark/deploy/yarn/ApplicationMaster.scala | 413 - .../spark/deploy/yarn/ExecutorLauncher.scala| 276 --- .../deploy/yarn/YarnAllocationHandler.scala | 196 ++-- .../spark/deploy/yarn/YarnRMClientImpl.scala| 76 18 files changed, 892 insertions(+), 1838 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
git commit: [SPARK-3072] YARN - Exit when reach max number failed executors
Repository: spark Updated Branches: refs/heads/master cd0720ca7 - 7eb9cbc27 [SPARK-3072] YARN - Exit when reach max number failed executors In some cases on hadoop 2.x the spark application master doesn't properly exit and hangs around for 10 minutes after its really done. We should make sure it exits properly and stops the driver. Author: Thomas Graves tgra...@apache.org Closes #2022 from tgravescs/SPARK-3072 and squashes the following commits: 665701d [Thomas Graves] Exit when reach max number failed executors Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7eb9cbc2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7eb9cbc2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7eb9cbc2 Branch: refs/heads/master Commit: 7eb9cbc273d758522e787fcb2ef68ef65911475f Parents: cd0720c Author: Thomas Graves tgra...@apache.org Authored: Tue Aug 19 09:40:31 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Aug 19 09:40:31 2014 -0500 -- .../spark/deploy/yarn/ApplicationMaster.scala | 33 +--- .../spark/deploy/yarn/ExecutorLauncher.scala| 5 +-- .../spark/deploy/yarn/ApplicationMaster.scala | 16 +++--- .../spark/deploy/yarn/ExecutorLauncher.scala| 5 +-- 4 files changed, 40 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7eb9cbc2/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 62b5c3b..46a01f5 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -267,12 +267,10 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, // TODO: This is a bit ugly. Can we make it nicer? // TODO: Handle container failure - // Exists the loop if the user thread exits. - while (yarnAllocator.getNumExecutorsRunning args.numExecutors userThread.isAlive) { -if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) { - finishApplicationMaster(FinalApplicationStatus.FAILED, -max number of executor failures reached) -} + // Exits the loop if the user thread exits. + while (yarnAllocator.getNumExecutorsRunning args.numExecutors userThread.isAlive + !isFinished) { +checkNumExecutorsFailed() yarnAllocator.allocateContainers( math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 0)) Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL) @@ -303,11 +301,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, val t = new Thread { override def run() { -while (userThread.isAlive) { - if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) { -finishApplicationMaster(FinalApplicationStatus.FAILED, - max number of executor failures reached) - } +while (userThread.isAlive !isFinished) { + checkNumExecutorsFailed() val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning if (missingExecutorCount 0) { logInfo(Allocating %d containers to make up for (potentially) lost containers. @@ -327,6 +322,22 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, t } + private def checkNumExecutorsFailed() { +if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) { + logInfo(max number of executor failures reached) + finishApplicationMaster(FinalApplicationStatus.FAILED, +max number of executor failures reached) + // make sure to stop the user thread + val sparkContext = ApplicationMaster.sparkContextRef.get() + if (sparkContext != null) { +logInfo(Invoking sc stop from checkNumExecutorsFailed) +sparkContext.stop() + } else { +logError(sparkContext is null when should shutdown) + } +} + } + private def sendProgress() { logDebug(Sending progress) // Simulated with an allocate request with no nodes requested ... http://git-wip-us.apache.org/repos/asf/spark/blob/7eb9cbc2/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
git commit: SPARK-1528 - spark on yarn, add support for accessing remote HDFS
Repository: spark Updated Branches: refs/heads/master e87075df9 - 2c0f705e2 SPARK-1528 - spark on yarn, add support for accessing remote HDFS Add a config (spark.yarn.access.namenodes) to allow applications running on yarn to access other secure HDFS cluster. User just specifies the namenodes of the other clusters and we get Tokens for those and ship them with the spark application. Author: Thomas Graves tgra...@apache.org Closes #1159 from tgravescs/spark-1528 and squashes the following commits: ddbcd16 [Thomas Graves] review comments 0ac8501 [Thomas Graves] SPARK-1528 - add support for accessing remote HDFS Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c0f705e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c0f705e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c0f705e Branch: refs/heads/master Commit: 2c0f705e26ca3dfc43a1e9a0722c0e57f67c970a Parents: e87075d Author: Thomas Graves tgra...@apache.org Authored: Tue Aug 5 12:48:26 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Aug 5 12:48:26 2014 -0500 -- docs/running-on-yarn.md | 7 +++ .../apache/spark/deploy/yarn/ClientBase.scala | 56 ++-- .../spark/deploy/yarn/ClientBaseSuite.scala | 55 ++- 3 files changed, 101 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2c0f705e/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 0362f5a..573930d 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -106,6 +106,13 @@ Most of the configs are the same for Spark on YARN as for other deployment modes set this configuration to hdfs:///some/path. /td /tr +tr + tdcodespark.yarn.access.namenodes/code/td + td(none)/td + td +A list of secure HDFS namenodes your Spark application is going to access. For example, `spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. The Spark application must have acess to the namenodes listed and Kerberos must be properly configured to be able to access them (either in the same realm or in a trusted realm). Spark acquires security tokens for each of the namenodes so that the Spark application can access those remote HDFS clusters. + /td +/tr /table # Launching Spark on YARN http://git-wip-us.apache.org/repos/asf/spark/blob/2c0f705e/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index b7e8636..ed8f56a 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -29,7 +29,7 @@ import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.mapred.Master import org.apache.hadoop.mapreduce.MRJobConfig -import org.apache.hadoop.security.UserGroupInformation +import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.hadoop.util.StringUtils import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment @@ -191,23 +191,11 @@ trait ClientBase extends Logging { // Upload Spark and the application JAR to the remote file system if necessary. Add them as // local resources to the application master. val fs = FileSystem.get(conf) - -val delegTokenRenewer = Master.getMasterPrincipal(conf) -if (UserGroupInformation.isSecurityEnabled()) { - if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) { -val errorMessage = Can't get Master Kerberos principal for use as renewer -logError(errorMessage) -throw new SparkException(errorMessage) - } -} val dst = new Path(fs.getHomeDirectory(), appStagingDir) -val replication = sparkConf.getInt(spark.yarn.submit.file.replication, 3).toShort - -if (UserGroupInformation.isSecurityEnabled()) { - val dstFs = dst.getFileSystem(conf) - dstFs.addDelegationTokens(delegTokenRenewer, credentials) -} +val nns = ClientBase.getNameNodesToAccess(sparkConf) + dst +ClientBase.obtainTokensForNamenodes(nns, conf, credentials) +val replication = sparkConf.getInt(spark.yarn.submit.file.replication, 3).toShort val localResources = HashMap[String, LocalResource]() FileSystem.mkdirs(fs, dst, new FsPermission(STAGING_DIR_PERMISSION)) @@ -614,4 +602,40 @@ object ClientBase extends Logging {
git commit: SPARK-1528 - spark on yarn, add support for accessing remote HDFS
Repository: spark Updated Branches: refs/heads/branch-1.1 b92a45058 - 6c0c65fc8 SPARK-1528 - spark on yarn, add support for accessing remote HDFS Add a config (spark.yarn.access.namenodes) to allow applications running on yarn to access other secure HDFS cluster. User just specifies the namenodes of the other clusters and we get Tokens for those and ship them with the spark application. Author: Thomas Graves tgra...@apache.org Closes #1159 from tgravescs/spark-1528 and squashes the following commits: ddbcd16 [Thomas Graves] review comments 0ac8501 [Thomas Graves] SPARK-1528 - add support for accessing remote HDFS (cherry picked from commit 2c0f705e26ca3dfc43a1e9a0722c0e57f67c970a) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6c0c65fc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6c0c65fc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6c0c65fc Branch: refs/heads/branch-1.1 Commit: 6c0c65fc85677ab2cae819a546ea50ed660994c3 Parents: b92a450 Author: Thomas Graves tgra...@apache.org Authored: Tue Aug 5 12:48:26 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Aug 5 12:48:51 2014 -0500 -- docs/running-on-yarn.md | 7 +++ .../apache/spark/deploy/yarn/ClientBase.scala | 56 ++-- .../spark/deploy/yarn/ClientBaseSuite.scala | 55 ++- 3 files changed, 101 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6c0c65fc/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 0362f5a..573930d 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -106,6 +106,13 @@ Most of the configs are the same for Spark on YARN as for other deployment modes set this configuration to hdfs:///some/path. /td /tr +tr + tdcodespark.yarn.access.namenodes/code/td + td(none)/td + td +A list of secure HDFS namenodes your Spark application is going to access. For example, `spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. The Spark application must have acess to the namenodes listed and Kerberos must be properly configured to be able to access them (either in the same realm or in a trusted realm). Spark acquires security tokens for each of the namenodes so that the Spark application can access those remote HDFS clusters. + /td +/tr /table # Launching Spark on YARN http://git-wip-us.apache.org/repos/asf/spark/blob/6c0c65fc/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index b7e8636..ed8f56a 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -29,7 +29,7 @@ import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.mapred.Master import org.apache.hadoop.mapreduce.MRJobConfig -import org.apache.hadoop.security.UserGroupInformation +import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.hadoop.util.StringUtils import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment @@ -191,23 +191,11 @@ trait ClientBase extends Logging { // Upload Spark and the application JAR to the remote file system if necessary. Add them as // local resources to the application master. val fs = FileSystem.get(conf) - -val delegTokenRenewer = Master.getMasterPrincipal(conf) -if (UserGroupInformation.isSecurityEnabled()) { - if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) { -val errorMessage = Can't get Master Kerberos principal for use as renewer -logError(errorMessage) -throw new SparkException(errorMessage) - } -} val dst = new Path(fs.getHomeDirectory(), appStagingDir) -val replication = sparkConf.getInt(spark.yarn.submit.file.replication, 3).toShort - -if (UserGroupInformation.isSecurityEnabled()) { - val dstFs = dst.getFileSystem(conf) - dstFs.addDelegationTokens(delegTokenRenewer, credentials) -} +val nns = ClientBase.getNameNodesToAccess(sparkConf) + dst +ClientBase.obtainTokensForNamenodes(nns, conf, credentials) +val replication = sparkConf.getInt(spark.yarn.submit.file.replication, 3).toShort val localResources = HashMap[String, LocalResource]()
git commit: SPARK-1890 and SPARK-1891- add admin and modify acls
Repository: spark Updated Branches: refs/heads/master 2c0f705e2 - 1ca23 SPARK-1890 and SPARK-1891- add admin and modify acls It was easier to combine these 2 jira since they touch many of the same places. This pr adds the following: - adds modify acls - adds admin acls (list of admins/users that get added to both view and modify acls) - modify Kill button on UI to take modify acls into account - changes config name of spark.ui.acls.enable to spark.acls.enable since I choose poorly in original name. We keep backwards compatibility so people can still use spark.ui.acls.enable. The acls should apply to any web ui as well as any CLI interfaces. - send view and modify acls information on to YARN so that YARN interfaces can use (yarn cli for killing applications for example). Author: Thomas Graves tgra...@apache.org Closes #1196 from tgravescs/SPARK-1890 and squashes the following commits: 8292eb1 [Thomas Graves] review comments b92ec89 [Thomas Graves] remove unneeded variable from applistener 4c765f4 [Thomas Graves] Add in admin acls 72eb0ac [Thomas Graves] Add modify acls Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1ca2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1ca2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1ca2 Branch: refs/heads/master Commit: 1ca23d3aa40423d658cfbf2c956ad415a6b1 Parents: 2c0f705 Author: Thomas Graves tgra...@apache.org Authored: Tue Aug 5 12:52:52 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Aug 5 12:52:52 2014 -0500 -- .../org/apache/spark/SecurityManager.scala | 107 --- .../deploy/history/FsHistoryProvider.scala | 4 +- .../scheduler/ApplicationEventListener.scala| 4 +- .../apache/spark/ui/jobs/JobProgressTab.scala | 2 +- .../org/apache/spark/SecurityManagerSuite.scala | 83 -- docs/configuration.md | 27 - docs/security.md| 7 +- .../apache/spark/deploy/yarn/ClientBase.scala | 9 +- 8 files changed, 206 insertions(+), 37 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1ca2/core/src/main/scala/org/apache/spark/SecurityManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala index 74aa441..25c2c9f 100644 --- a/core/src/main/scala/org/apache/spark/SecurityManager.scala +++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala @@ -41,10 +41,19 @@ import org.apache.spark.deploy.SparkHadoopUtil * secure the UI if it has data that other users should not be allowed to see. The javax * servlet filter specified by the user can authenticate the user and then once the user * is logged in, Spark can compare that user versus the view acls to make sure they are - * authorized to view the UI. The configs 'spark.ui.acls.enable' and 'spark.ui.view.acls' + * authorized to view the UI. The configs 'spark.acls.enable' and 'spark.ui.view.acls' * control the behavior of the acls. Note that the person who started the application * always has view access to the UI. * + * Spark has a set of modify acls (`spark.modify.acls`) that controls which users have permission + * to modify a single application. This would include things like killing the application. By + * default the person who started the application has modify access. For modify access through + * the UI, you must have a filter that does authentication in place for the modify acls to work + * properly. + * + * Spark also has a set of admin acls (`spark.admin.acls`) which is a set of users/administrators + * who always have permission to view or modify the Spark application. + * * Spark does not currently support encryption after authentication. * * At this point spark has multiple communication protocols that need to be secured and @@ -137,18 +146,32 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging { private val sparkSecretLookupKey = sparkCookie private val authOn = sparkConf.getBoolean(spark.authenticate, false) - private var uiAclsOn = sparkConf.getBoolean(spark.ui.acls.enable, false) + // keep spark.ui.acls.enable for backwards compatibility with 1.0 + private var aclsOn = sparkConf.getOption(spark.acls.enable).getOrElse( +sparkConf.get(spark.ui.acls.enable, false)).toBoolean + + // admin acls should be set before view or modify acls + private var adminAcls: Set[String] = +stringToSet(sparkConf.get(spark.admin.acls, )) private var viewAcls: Set[String] = _ + + // list of users who have permission to modify the application. This should + // apply to both
git commit: SPARK-1890 and SPARK-1891- add admin and modify acls
Repository: spark Updated Branches: refs/heads/branch-1.1 6c0c65fc8 - e3fe6571d SPARK-1890 and SPARK-1891- add admin and modify acls It was easier to combine these 2 jira since they touch many of the same places. This pr adds the following: - adds modify acls - adds admin acls (list of admins/users that get added to both view and modify acls) - modify Kill button on UI to take modify acls into account - changes config name of spark.ui.acls.enable to spark.acls.enable since I choose poorly in original name. We keep backwards compatibility so people can still use spark.ui.acls.enable. The acls should apply to any web ui as well as any CLI interfaces. - send view and modify acls information on to YARN so that YARN interfaces can use (yarn cli for killing applications for example). Author: Thomas Graves tgra...@apache.org Closes #1196 from tgravescs/SPARK-1890 and squashes the following commits: 8292eb1 [Thomas Graves] review comments b92ec89 [Thomas Graves] remove unneeded variable from applistener 4c765f4 [Thomas Graves] Add in admin acls 72eb0ac [Thomas Graves] Add modify acls (cherry picked from commit 1ca23d3aa40423d658cfbf2c956ad415a6b1) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e3fe6571 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e3fe6571 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e3fe6571 Branch: refs/heads/branch-1.1 Commit: e3fe6571decfdc406ec6d505fd92f9f2b85a618c Parents: 6c0c65f Author: Thomas Graves tgra...@apache.org Authored: Tue Aug 5 12:52:52 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Aug 5 12:53:05 2014 -0500 -- .../org/apache/spark/SecurityManager.scala | 107 --- .../deploy/history/FsHistoryProvider.scala | 4 +- .../scheduler/ApplicationEventListener.scala| 4 +- .../apache/spark/ui/jobs/JobProgressTab.scala | 2 +- .../org/apache/spark/SecurityManagerSuite.scala | 83 -- docs/configuration.md | 27 - docs/security.md| 7 +- .../apache/spark/deploy/yarn/ClientBase.scala | 9 +- 8 files changed, 206 insertions(+), 37 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e3fe6571/core/src/main/scala/org/apache/spark/SecurityManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala index 74aa441..25c2c9f 100644 --- a/core/src/main/scala/org/apache/spark/SecurityManager.scala +++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala @@ -41,10 +41,19 @@ import org.apache.spark.deploy.SparkHadoopUtil * secure the UI if it has data that other users should not be allowed to see. The javax * servlet filter specified by the user can authenticate the user and then once the user * is logged in, Spark can compare that user versus the view acls to make sure they are - * authorized to view the UI. The configs 'spark.ui.acls.enable' and 'spark.ui.view.acls' + * authorized to view the UI. The configs 'spark.acls.enable' and 'spark.ui.view.acls' * control the behavior of the acls. Note that the person who started the application * always has view access to the UI. * + * Spark has a set of modify acls (`spark.modify.acls`) that controls which users have permission + * to modify a single application. This would include things like killing the application. By + * default the person who started the application has modify access. For modify access through + * the UI, you must have a filter that does authentication in place for the modify acls to work + * properly. + * + * Spark also has a set of admin acls (`spark.admin.acls`) which is a set of users/administrators + * who always have permission to view or modify the Spark application. + * * Spark does not currently support encryption after authentication. * * At this point spark has multiple communication protocols that need to be secured and @@ -137,18 +146,32 @@ private[spark] class SecurityManager(sparkConf: SparkConf) extends Logging { private val sparkSecretLookupKey = sparkCookie private val authOn = sparkConf.getBoolean(spark.authenticate, false) - private var uiAclsOn = sparkConf.getBoolean(spark.ui.acls.enable, false) + // keep spark.ui.acls.enable for backwards compatibility with 1.0 + private var aclsOn = sparkConf.getOption(spark.acls.enable).getOrElse( +sparkConf.get(spark.ui.acls.enable, false)).toBoolean + + // admin acls should be set before view or modify acls + private var adminAcls: Set[String] = +stringToSet(sparkConf.get(spark.admin.acls, )) private var
git commit: SPARK-1680: use configs for specifying environment variables on YARN
Repository: spark Updated Branches: refs/heads/master 74f82c71b - 41e0a21b2 SPARK-1680: use configs for specifying environment variables on YARN Note that this also documents spark.executorEnv.* which to me means its public. If we don't want that please speak up. Author: Thomas Graves tgra...@apache.org Closes #1512 from tgravescs/SPARK-1680 and squashes the following commits: 11525df [Thomas Graves] more doc changes 553bad0 [Thomas Graves] fix documentation 152bf7c [Thomas Graves] fix docs 5382326 [Thomas Graves] try fix docs 32f86a4 [Thomas Graves] use configs for specifying environment variables on YARN Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41e0a21b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41e0a21b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41e0a21b Branch: refs/heads/master Commit: 41e0a21b22ccd2788dc079790788e505b0d4e37d Parents: 74f82c7 Author: Thomas Graves tgra...@apache.org Authored: Tue Aug 5 15:57:32 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Aug 5 15:57:32 2014 -0500 -- docs/configuration.md | 8 +++ docs/running-on-yarn.md | 22 +++- .../apache/spark/deploy/yarn/ClientBase.scala | 13 .../deploy/yarn/ExecutorRunnableUtil.scala | 6 +- 4 files changed, 43 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/41e0a21b/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 25adea2..5e7556c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -206,6 +206,14 @@ Apart from these, the following properties are also available, and may be useful used during aggregation goes above this amount, it will spill the data into disks. /td /tr +tr + tdcodespark.executorEnv.[EnvironmentVariableName]/code/td + td(none)/td + td +Add the environment variable specified by codeEnvironmentVariableName/code to the Executor +process. The user can specify multiple of these and to set multiple environment variables. + /td +/tr /table Shuffle Behavior http://git-wip-us.apache.org/repos/asf/spark/blob/41e0a21b/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 573930d..9bc20db 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -17,10 +17,6 @@ To build Spark yourself, refer to the [building with Maven guide](building-with- Most of the configs are the same for Spark on YARN as for other deployment modes. See the [configuration page](configuration.html) for more information on those. These are configs that are specific to Spark on YARN. - Environment Variables - -* `SPARK_YARN_USER_ENV`, to add environment variables to the Spark processes launched on YARN. This can be a comma separated list of environment variables, e.g. `SPARK_YARN_USER_ENV=JAVA_HOME=/jdk64,FOO=bar`. - Spark Properties table class=table @@ -110,7 +106,23 @@ Most of the configs are the same for Spark on YARN as for other deployment modes tdcodespark.yarn.access.namenodes/code/td td(none)/td td -A list of secure HDFS namenodes your Spark application is going to access. For example, `spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. The Spark application must have acess to the namenodes listed and Kerberos must be properly configured to be able to access them (either in the same realm or in a trusted realm). Spark acquires security tokens for each of the namenodes so that the Spark application can access those remote HDFS clusters. +A list of secure HDFS namenodes your Spark application is going to access. For +example, `spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. +The Spark application must have acess to the namenodes listed and Kerberos must +be properly configured to be able to access them (either in the same realm or in +a trusted realm). Spark acquires security tokens for each of the namenodes so that +the Spark application can access those remote HDFS clusters. + /td +/tr +tr + tdcodespark.yarn.appMasterEnv.[EnvironmentVariableName]/code/td + td(none)/td + td + Add the environment variable specified by codeEnvironmentVariableName/code to the + Application Master process launched on YARN. The user can specify multiple of + these and to set multiple environment variables. In yarn-cluster mode this controls + the environment of the SPARK driver and in yarn-client mode it only controls + the environment of the executor
git commit: SPARK-1680: use configs for specifying environment variables on YARN
Repository: spark Updated Branches: refs/heads/branch-1.1 46b698307 - 7b798e10e SPARK-1680: use configs for specifying environment variables on YARN Note that this also documents spark.executorEnv.* which to me means its public. If we don't want that please speak up. Author: Thomas Graves tgra...@apache.org Closes #1512 from tgravescs/SPARK-1680 and squashes the following commits: 11525df [Thomas Graves] more doc changes 553bad0 [Thomas Graves] fix documentation 152bf7c [Thomas Graves] fix docs 5382326 [Thomas Graves] try fix docs 32f86a4 [Thomas Graves] use configs for specifying environment variables on YARN (cherry picked from commit 41e0a21b22ccd2788dc079790788e505b0d4e37d) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7b798e10 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7b798e10 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7b798e10 Branch: refs/heads/branch-1.1 Commit: 7b798e10e214cd407d3399e2cab9e3789f9a929e Parents: 46b6983 Author: Thomas Graves tgra...@apache.org Authored: Tue Aug 5 15:57:32 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Aug 5 15:57:42 2014 -0500 -- docs/configuration.md | 8 +++ docs/running-on-yarn.md | 22 +++- .../apache/spark/deploy/yarn/ClientBase.scala | 13 .../deploy/yarn/ExecutorRunnableUtil.scala | 6 +- 4 files changed, 43 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7b798e10/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 1333465..6ae453d 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -206,6 +206,14 @@ Apart from these, the following properties are also available, and may be useful used during aggregation goes above this amount, it will spill the data into disks. /td /tr +tr + tdcodespark.executorEnv.[EnvironmentVariableName]/code/td + td(none)/td + td +Add the environment variable specified by codeEnvironmentVariableName/code to the Executor +process. The user can specify multiple of these and to set multiple environment variables. + /td +/tr /table Shuffle Behavior http://git-wip-us.apache.org/repos/asf/spark/blob/7b798e10/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 573930d..9bc20db 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -17,10 +17,6 @@ To build Spark yourself, refer to the [building with Maven guide](building-with- Most of the configs are the same for Spark on YARN as for other deployment modes. See the [configuration page](configuration.html) for more information on those. These are configs that are specific to Spark on YARN. - Environment Variables - -* `SPARK_YARN_USER_ENV`, to add environment variables to the Spark processes launched on YARN. This can be a comma separated list of environment variables, e.g. `SPARK_YARN_USER_ENV=JAVA_HOME=/jdk64,FOO=bar`. - Spark Properties table class=table @@ -110,7 +106,23 @@ Most of the configs are the same for Spark on YARN as for other deployment modes tdcodespark.yarn.access.namenodes/code/td td(none)/td td -A list of secure HDFS namenodes your Spark application is going to access. For example, `spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. The Spark application must have acess to the namenodes listed and Kerberos must be properly configured to be able to access them (either in the same realm or in a trusted realm). Spark acquires security tokens for each of the namenodes so that the Spark application can access those remote HDFS clusters. +A list of secure HDFS namenodes your Spark application is going to access. For +example, `spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. +The Spark application must have acess to the namenodes listed and Kerberos must +be properly configured to be able to access them (either in the same realm or in +a trusted realm). Spark acquires security tokens for each of the namenodes so that +the Spark application can access those remote HDFS clusters. + /td +/tr +tr + tdcodespark.yarn.appMasterEnv.[EnvironmentVariableName]/code/td + td(none)/td + td + Add the environment variable specified by codeEnvironmentVariableName/code to the + Application Master process launched on YARN. The user can specify multiple of + these and to set multiple environment variables. In yarn-cluster mode this controls
git commit: SPARK-2150: Provide direct link to finished application UI in yarn resou...
Repository: spark Updated Branches: refs/heads/master 42dfab7d3 - 46e224aaa SPARK-2150: Provide direct link to finished application UI in yarn resou... ...rce manager UI Use the event logger directory to provide a direct link to finished application UI in yarn resourcemanager UI. Author: Rahul Singhal rahul.sing...@guavus.com Closes #1094 from rahulsinghaliitd/SPARK-2150 and squashes the following commits: 95f230c [Rahul Singhal] SPARK-2150: Provide direct link to finished application UI in yarn resource manager UI Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/46e224aa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/46e224aa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/46e224aa Branch: refs/heads/master Commit: 46e224aaa26df4b232c5176e98472a902862b76c Parents: 42dfab7 Author: Rahul Singhal rahul.sing...@guavus.com Authored: Thu Jul 24 09:31:04 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Thu Jul 24 09:31:04 2014 -0500 -- .../spark/deploy/history/FsHistoryProvider.scala| 3 ++- .../apache/spark/deploy/history/HistoryPage.scala | 2 +- .../apache/spark/deploy/history/HistoryServer.scala | 4 +++- .../org/apache/spark/deploy/master/Master.scala | 11 +++ .../spark/scheduler/EventLoggingListener.scala | 7 +++ .../spark/deploy/yarn/ApplicationMaster.scala | 4 +++- .../apache/spark/deploy/yarn/ExecutorLauncher.scala | 2 +- .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 16 .../cluster/YarnClientSchedulerBackend.scala| 3 ++- .../spark/deploy/yarn/ApplicationMaster.scala | 5 +++-- .../apache/spark/deploy/yarn/ExecutorLauncher.scala | 2 +- 11 files changed, 46 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/46e224aa/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index a8c9ac0..01e7065 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -169,7 +169,8 @@ private[history] class FsHistoryProvider(conf: SparkConf) extends ApplicationHis val ui: SparkUI = if (renderUI) { val conf = this.conf.clone() val appSecManager = new SecurityManager(conf) -new SparkUI(conf, appSecManager, replayBus, appId, /history/ + appId) +new SparkUI(conf, appSecManager, replayBus, appId, + HistoryServer.UI_PATH_PREFIX + s/$appId) // Do not call ui.bind() to avoid creating a new server for each application } else { null http://git-wip-us.apache.org/repos/asf/spark/blob/46e224aa/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala index a958c83..d7a3e3f 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala @@ -75,7 +75,7 @@ private[spark] class HistoryPage(parent: HistoryServer) extends WebUIPage() { Last Updated) private def appRow(info: ApplicationHistoryInfo): Seq[Node] = { -val uiAddress = /history/ + info.id +val uiAddress = HistoryServer.UI_PATH_PREFIX + s/${info.id} val startTime = UIUtils.formatDate(info.startTime) val endTime = UIUtils.formatDate(info.endTime) val duration = UIUtils.formatDuration(info.endTime - info.startTime) http://git-wip-us.apache.org/repos/asf/spark/blob/46e224aa/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index 56b38dd..cacb9da 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -114,7 +114,7 @@ class HistoryServer( attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, /static)) val contextHandler = new ServletContextHandler -contextHandler.setContextPath(/history) +contextHandler.setContextPath(HistoryServer.UI_PATH_PREFIX) contextHandler.addServlet(new ServletHolder(loaderServlet), /*)
git commit: [SPARK-2037]: yarn client mode doesn't support spark.yarn.max.executor.failures
Repository: spark Updated Branches: refs/heads/master c960b5051 - 323a83c52 [SPARK-2037]: yarn client mode doesn't support spark.yarn.max.executor.failures Author: GuoQiang Li wi...@qq.com Closes #1180 from witgo/SPARK-2037 and squashes the following commits: 3d52411 [GuoQiang Li] review commit 7058f4d [GuoQiang Li] Correctly stop SparkContext 6d0561f [GuoQiang Li] Fix: yarn client mode doesn't support spark.yarn.max.executor.failures Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/323a83c5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/323a83c5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/323a83c5 Branch: refs/heads/master Commit: 323a83c5235f9289cd9526491d62365df96a429b Parents: c960b50 Author: GuoQiang Li wi...@qq.com Authored: Thu Jul 24 14:46:10 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Thu Jul 24 14:46:10 2014 -0500 -- .../spark/deploy/yarn/ExecutorLauncher.scala| 80 +--- .../cluster/YarnClientSchedulerBackend.scala| 28 +++ .../spark/deploy/yarn/ExecutorLauncher.scala| 45 --- 3 files changed, 115 insertions(+), 38 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/323a83c5/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala index d232c18..184e2ad 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala @@ -28,7 +28,6 @@ import org.apache.hadoop.yarn.ipc.YarnRPC import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import akka.actor._ import akka.remote._ -import akka.actor.Terminated import org.apache.spark.{Logging, SecurityManager, SparkConf} import org.apache.spark.util.{Utils, AkkaUtils} import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend @@ -57,10 +56,17 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp private val yarnConf: YarnConfiguration = new YarnConfiguration(conf) private var yarnAllocator: YarnAllocationHandler = _ - private var driverClosed:Boolean = false + + private var driverClosed: Boolean = false + private var isFinished: Boolean = false + private var registered: Boolean = false + + // Default to numExecutors * 2, with minimum of 3 + private val maxNumExecutorFailures = sparkConf.getInt(spark.yarn.max.executor.failures, +sparkConf.getInt(spark.yarn.max.worker.failures, math.max(args.numExecutors * 2, 3))) val securityManager = new SecurityManager(sparkConf) - val actorSystem : ActorSystem = AkkaUtils.createActorSystem(sparkYarnAM, Utils.localHostName, 0, + val actorSystem: ActorSystem = AkkaUtils.createActorSystem(sparkYarnAM, Utils.localHostName, 0, conf = sparkConf, securityManager = securityManager)._1 var actor: ActorRef = _ @@ -97,23 +103,26 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp appAttemptId = getApplicationAttemptId() resourceManager = registerWithResourceManager() -val appMasterResponse: RegisterApplicationMasterResponse = registerApplicationMaster() - -// Compute number of threads for akka -val minimumMemory = appMasterResponse.getMinimumResourceCapability().getMemory() - -if (minimumMemory 0) { - val mem = args.executorMemory + sparkConf.getInt(spark.yarn.executor.memoryOverhead, -YarnAllocationHandler.MEMORY_OVERHEAD) - val numCore = (mem / minimumMemory) + (if (0 != (mem % minimumMemory)) 1 else 0) - - if (numCore 0) { -// do not override - hits https://issues.apache.org/jira/browse/HADOOP-8406 -// TODO: Uncomment when hadoop is on a version which has this fixed. -// args.workerCores = numCore +synchronized { + if (!isFinished) { +val appMasterResponse: RegisterApplicationMasterResponse = registerApplicationMaster() +// Compute number of threads for akka +val minimumMemory = appMasterResponse.getMinimumResourceCapability().getMemory() + +if (minimumMemory 0) { + val mem = args.executorMemory + sparkConf.getInt(spark.yarn.executor.memoryOverhead, +YarnAllocationHandler.MEMORY_OVERHEAD) + val numCore = (mem / minimumMemory) + (if (0 != (mem % minimumMemory)) 1 else 0) + + if (numCore 0) { +// do not override - hits https://issues.apache.org/jira/browse/HADOOP-8406 +// TODO: Uncomment when hadoop is on a version which has this
git commit: [YARN][SPARK-2606]:In some cases, the spark UI pages display incorrect
Repository: spark Updated Branches: refs/heads/master 5f7b99168 - ddadf1b00 [YARN][SPARK-2606]:In some cases,the spark UI pages display incorrect The issue is caused by #1112 . Author: GuoQiang Li wi...@qq.com Closes #1501 from witgo/webui_style and squashes the following commits: 4b34998 [GuoQiang Li] In some cases, pages display incorrect in WebUI Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ddadf1b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ddadf1b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ddadf1b0 Branch: refs/heads/master Commit: ddadf1b00470b9d7bf7386dacf198d41407a0a2b Parents: 5f7b991 Author: GuoQiang Li wi...@qq.com Authored: Tue Jul 22 20:34:40 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Jul 22 20:34:40 2014 -0500 -- core/src/main/scala/org/apache/spark/ui/UIUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ddadf1b0/core/src/main/scala/org/apache/spark/ui/UIUtils.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index e07aa2e..715cc2f 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -149,7 +149,7 @@ private[spark] object UIUtils extends Logging { def prependBaseUri(basePath: String = , resource: String = ) = uiRoot + basePath + resource - val commonHeaderNodes = { + def commonHeaderNodes = { meta http-equiv=Content-type content=text/html; charset=utf-8 / link rel=stylesheet href={prependBaseUri(/static/bootstrap.min.css)} type=text/css /
git commit: SPARK-1707. Remove unnecessary 3 second sleep in YarnClusterScheduler
Repository: spark Updated Branches: refs/heads/master cd273a238 - f89cf65d7 SPARK-1707. Remove unnecessary 3 second sleep in YarnClusterScheduler Author: Sandy Ryza sa...@cloudera.com Closes #634 from sryza/sandy-spark-1707 and squashes the following commits: 2f6e358 [Sandy Ryza] Default min registered executors ratio to .8 for YARN 354c630 [Sandy Ryza] Remove outdated comments c744ef3 [Sandy Ryza] Take out waitForInitialAllocations 2a4329b [Sandy Ryza] SPARK-1707. Remove unnecessary 3 second sleep in YarnClusterScheduler Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f89cf65d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f89cf65d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f89cf65d Branch: refs/heads/master Commit: f89cf65d7aced0bb387c05586f9f51cb29865022 Parents: cd273a2 Author: Sandy Ryza sa...@cloudera.com Authored: Mon Jul 21 13:15:46 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Mon Jul 21 13:15:46 2014 -0500 -- .../spark/deploy/yarn/ApplicationMaster.scala | 39 -- .../cluster/YarnClientClusterScheduler.scala| 10 - .../cluster/YarnClientSchedulerBackend.scala| 5 +++ .../cluster/YarnClusterScheduler.scala | 8 +--- .../cluster/YarnClusterSchedulerBackend.scala | 5 +++ .../spark/deploy/yarn/ApplicationMaster.scala | 43 6 files changed, 11 insertions(+), 99 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f89cf65d/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 062f946..3ec3648 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -255,10 +255,6 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, sparkContext.getConf) } } -} finally { - // in case of exceptions, etc - ensure that count is atleast ALLOCATOR_LOOP_WAIT_COUNT : - // so that the loop (in ApplicationMaster.sparkContextInitialized) breaks - ApplicationMaster.incrementAllocatorLoop(ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT) } } @@ -277,13 +273,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, } yarnAllocator.allocateContainers( math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 0)) -ApplicationMaster.incrementAllocatorLoop(1) Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL) } -} finally { - // In case of exceptions, etc - ensure that count is at least ALLOCATOR_LOOP_WAIT_COUNT, - // so that the loop in ApplicationMaster#sparkContextInitialized() breaks. - ApplicationMaster.incrementAllocatorLoop(ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT) } logInfo(All executors have launched.) @@ -411,24 +402,10 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, } object ApplicationMaster extends Logging { - // Number of times to wait for the allocator loop to complete. - // Each loop iteration waits for 100ms, so maximum of 3 seconds. - // This is to ensure that we have reasonable number of containers before we start // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be // optimal as more containers are available. Might need to handle this better. - private val ALLOCATOR_LOOP_WAIT_COUNT = 30 private val ALLOCATE_HEARTBEAT_INTERVAL = 100 - def incrementAllocatorLoop(by: Int) { -val count = yarnAllocatorLoop.getAndAdd(by) -if (count = ALLOCATOR_LOOP_WAIT_COUNT) { - yarnAllocatorLoop.synchronized { -// to wake threads off wait ... -yarnAllocatorLoop.notifyAll() - } -} - } - private val applicationMasters = new CopyOnWriteArrayList[ApplicationMaster]() def register(master: ApplicationMaster) { @@ -437,7 +414,6 @@ object ApplicationMaster extends Logging { val sparkContextRef: AtomicReference[SparkContext] = new AtomicReference[SparkContext](null /* initialValue */) - val yarnAllocatorLoop: AtomicInteger = new AtomicInteger(0) def sparkContextInitialized(sc: SparkContext): Boolean = { var modified = false @@ -472,21 +448,6 @@ object ApplicationMaster extends Logging { modified } - - /** - * Returns when we've either - * 1) received all the requested executors, - * 2) waited
git commit: SPARK-1291: Link the spark UI to RM ui in yarn-client mode
Repository: spark Updated Branches: refs/heads/master 9dd635eb5 - 72ea56da8 SPARK-1291: Link the spark UI to RM ui in yarn-client mode Author: witgo wi...@qq.com Closes #1112 from witgo/SPARK-1291 and squashes the following commits: 6022bcd [witgo] review commit 1fbb925 [witgo] add addAmIpFilter to yarn alpha 210299c [witgo] review commit 1b92a07 [witgo] review commit 6896586 [witgo] Add comments to addWebUIFilter 3e9630b [witgo] review commit 142ee29 [witgo] review commit 1fe7710 [witgo] Link the spark UI to RM ui in yarn-client mode Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/72ea56da Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/72ea56da Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/72ea56da Branch: refs/heads/master Commit: 72ea56da8e383c61c6f18eeefef03b9af00f5158 Parents: 9dd635e Author: witgo wi...@qq.com Authored: Tue Jul 15 13:52:56 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Jul 15 13:52:56 2014 -0500 -- .../cluster/CoarseGrainedClusterMessage.scala | 3 +++ .../cluster/CoarseGrainedSchedulerBackend.scala | 18 +++ .../scala/org/apache/spark/ui/UIUtils.scala | 11 +- .../spark/deploy/yarn/ExecutorLauncher.scala| 22 --- .../cluster/YarnClientSchedulerBackend.scala| 1 + .../spark/deploy/yarn/ExecutorLauncher.scala| 23 +--- 6 files changed, 71 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/72ea56da/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index 318e165..6abf6d9 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -66,4 +66,7 @@ private[spark] object CoarseGrainedClusterMessages { case class RemoveExecutor(executorId: String, reason: String) extends CoarseGrainedClusterMessage + case class AddWebUIFilter(filterName:String, filterParams: String, proxyBase :String) +extends CoarseGrainedClusterMessage + } http://git-wip-us.apache.org/repos/asf/spark/blob/72ea56da/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index 0f5545e..9f085ee 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -31,6 +31,7 @@ import org.apache.spark.{SparkEnv, Logging, SparkException, TaskState} import org.apache.spark.scheduler.{SchedulerBackend, SlaveLost, TaskDescription, TaskSchedulerImpl, WorkerOffer} import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ import org.apache.spark.util.{SerializableBuffer, AkkaUtils, Utils} +import org.apache.spark.ui.JettyUtils /** * A scheduler backend that waits for coarse grained executors to connect to it through Akka. @@ -136,6 +137,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A removeExecutor(executorId, reason) sender ! true + case AddWebUIFilter(filterName, filterParams, proxyBase) = +addWebUIFilter(filterName, filterParams, proxyBase) +sender ! true case DisassociatedEvent(_, address, _) = addressToExecutorId.get(address).foreach(removeExecutor(_, remote Akka client disassociated)) @@ -276,6 +280,20 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, actorSystem: A } false } + + // Add filters to the SparkUI + def addWebUIFilter(filterName: String, filterParams: String, proxyBase: String) { +if (proxyBase != null proxyBase.nonEmpty) { + System.setProperty(spark.ui.proxyBase, proxyBase) +} + +if (Seq(filterName, filterParams).forall(t = t != null t.nonEmpty)) { + logInfo(sAdd WebUI Filter. $filterName, $filterParams, $proxyBase) + conf.set(spark.ui.filters, filterName) + conf.set(sspark.$filterName.params, filterParams) + JettyUtils.addFilters(scheduler.sc.ui.getHandlers, conf) +} + } } private[spark] object CoarseGrainedSchedulerBackend {
git commit: [SPARK-1946] Submit tasks after (configured ratio) executors have been registered
Repository: spark Updated Branches: refs/heads/master d60b09bb6 - 3dd8af7a6 [SPARK-1946] Submit tasks after (configured ratio) executors have been registered Because submitting tasks and registering executors are asynchronous, in most situation, early stages' tasks run without preferred locality. A simple solution is sleeping few seconds in application, so that executors have enough time to register. The PR add 2 configuration properties to make TaskScheduler submit tasks after a few of executors have been registered. \# Submit tasks only after (registered executors / total executors) arrived the ratio, default value is 0 spark.scheduler.minRegisteredExecutorsRatio = 0.8 \# Whatever minRegisteredExecutorsRatio is arrived, submit tasks after the maxRegisteredWaitingTime(millisecond), default value is 3 spark.scheduler.maxRegisteredExecutorsWaitingTime = 5000 Author: li-zhihui zhihui...@intel.com Closes #900 from li-zhihui/master and squashes the following commits: b9f8326 [li-zhihui] Add logs edit docs 1ac08b1 [li-zhihui] Add new configs to user docs 22ead12 [li-zhihui] Move waitBackendReady to postStartHook c6f0522 [li-zhihui] Bug fix: numExecutors wasn't set use constant DEFAULT_NUMBER_EXECUTORS 4d6d847 [li-zhihui] Move waitBackendReady to TaskSchedulerImpl.start some code refactor 0ecee9a [li-zhihui] Move waitBackendReady from DAGScheduler.submitStage to TaskSchedulerImpl.submitTasks 4261454 [li-zhihui] Add docs for new configs code style ce0868a [li-zhihui] Code style, rename configuration property name of minRegisteredRatio maxRegisteredWaitingTime 6cfb9ec [li-zhihui] Code style, revert default minRegisteredRatio of yarn to 0, driver get --num-executors in yarn/alpha 812c33c [li-zhihui] Fix driver lost --num-executors option in yarn-cluster mode e7b6272 [li-zhihui] support yarn-cluster 37f7dc2 [li-zhihui] support yarn mode(percentage style) 3f8c941 [li-zhihui] submit stage after (configured ratio of) executors have been registered Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3dd8af7a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3dd8af7a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3dd8af7a Branch: refs/heads/master Commit: 3dd8af7a6623201c28231f4b71f59ea4e9ae29bf Parents: d60b09b Author: li-zhihui zhihui...@intel.com Authored: Mon Jul 14 15:32:49 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Mon Jul 14 15:32:49 2014 -0500 -- .../scala/org/apache/spark/SparkContext.scala | 11 +- .../spark/scheduler/SchedulerBackend.scala | 1 + .../spark/scheduler/TaskSchedulerImpl.scala | 15 .../cluster/CoarseGrainedSchedulerBackend.scala | 29 ++ .../cluster/SparkDeploySchedulerBackend.scala | 1 + docs/configuration.md | 19 ++ .../spark/deploy/yarn/ApplicationMaster.scala | 1 + .../yarn/ApplicationMasterArguments.scala | 6 ++- .../cluster/YarnClientClusterScheduler.scala| 2 + .../cluster/YarnClientSchedulerBackend.scala| 1 + .../cluster/YarnClusterScheduler.scala | 2 + .../cluster/YarnClusterSchedulerBackend.scala | 40 .../spark/deploy/yarn/ApplicationMaster.scala | 1 + 13 files changed, 127 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3dd8af7a/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 8819e73..8052499 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1531,7 +1531,16 @@ object SparkContext extends Logging { throw new SparkException(YARN mode not available ?, e) } } -val backend = new CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem) +val backend = try { + val clazz = + Class.forName(org.apache.spark.scheduler.cluster.YarnClusterSchedulerBackend) + val cons = clazz.getConstructor(classOf[TaskSchedulerImpl], classOf[SparkContext]) + cons.newInstance(scheduler, sc).asInstanceOf[CoarseGrainedSchedulerBackend] +} catch { + case e: Exception = { +throw new SparkException(YARN mode not available ?, e) + } +} scheduler.initialize(backend) scheduler http://git-wip-us.apache.org/repos/asf/spark/blob/3dd8af7a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala -- diff --git
git commit: SPARK-2400 : fix spark.yarn.max.executor.failures explaination
Repository: spark Updated Branches: refs/heads/master c8a2313cd - b520b6453 SPARK-2400 : fix spark.yarn.max.executor.failures explaination According to ```scala private val maxNumExecutorFailures = sparkConf.getInt(spark.yarn.max.executor.failures, sparkConf.getInt(spark.yarn.max.worker.failures, math.max(args.numExecutors * 2, 3))) ``` default value should be numExecutors * 2, with minimum of 3, and it's same to the config `spark.yarn.max.worker.failures` Author: CrazyJvm crazy...@gmail.com Closes #1282 from CrazyJvm/yarn-doc and squashes the following commits: 1a5f25b [CrazyJvm] remove deprecated config c438aec [CrazyJvm] fix style 86effa6 [CrazyJvm] change expression 211f130 [CrazyJvm] fix html tag 2900d23 [CrazyJvm] fix style a4b2e27 [CrazyJvm] fix configuration spark.yarn.max.executor.failures Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b520b645 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b520b645 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b520b645 Branch: refs/heads/master Commit: b520b6453ed76926108e0bdd56114d16e1d86850 Parents: c8a2313 Author: CrazyJvm crazy...@gmail.com Authored: Tue Jul 8 13:55:42 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Jul 8 13:55:42 2014 -0500 -- docs/running-on-yarn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b520b645/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 5d8d603..0362f5a 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -55,7 +55,7 @@ Most of the configs are the same for Spark on YARN as for other deployment modes /tr tr tdcodespark.yarn.max.executor.failures/code/td - td2*numExecutors/td + tdnumExecutors * 2, with minimum of 3/td td The maximum number of executor failures before failing the application. /td
svn commit: r1606091 - in /hadoop/common/tags: release-0.23.11-rc0/ release-0.23.11/
Author: tgraves Date: Fri Jun 27 13:25:29 2014 New Revision: 1606091 URL: http://svn.apache.org/r1606091 Log: Hadoop 0.23.11 release. Added: hadoop/common/tags/release-0.23.11/ (props changed) - copied from r1606090, hadoop/common/tags/release-0.23.11-rc0/ Removed: hadoop/common/tags/release-0.23.11-rc0/ Propchange: hadoop/common/tags/release-0.23.11/ -- --- svn:ignore (added) +++ svn:ignore Fri Jun 27 13:25:29 2014 @@ -0,0 +1,5 @@ +.classpath +.git +.project +.settings +target Propchange: hadoop/common/tags/release-0.23.11/ -- --- svn:mergeinfo (added) +++ svn:mergeinfo Fri Jun 27 13:25:29 2014 @@ -0,0 +1 @@ +/hadoop/common/trunk:1161777,1161781,1162188,1162421,1162491,1162499,1162613,1162928,1162954,1162979,1163050,1163069,1163490,1163768,1163852,1163858,1163981,1164255,1164301,1164339,1166009,1166402,1167001,1167383,1167662,1170085,1170379,1170459,1171297,1172916,1173402,1176550,1177487,1177531,1177859,1177864,1182189,1182205,1182214,1189613,1189932,1189982,1195575,1196113,1196129,1196676,1197801,1199024,1201991,1204114,1204117,1204122,1204124,1204129,1204131,1204177,1204370,1204376,1204388,1205260,1205697,1206786,1206830,1207694,1208153,1208313,1212021,1212062,1212073,1212084,1213537,1213586,1213592-1213593,1213954,1214046,1220510,1221348,1225114,1225192,1225456,1225489,1225591,1226211,1226239,1226350,1227091,1227165,1227423,1227964,1229347,1230398,1231569,1231572,1231627,1231640,1233605,1234555,1235135,1235137,1235956,1236456,1239752,1240897,1240928,1243065,1243104,1244766,1245751,1245762,1293419,1304099,1351818,1373683,1382409
git commit: Remove use of spark.worker.instances
Repository: spark Updated Branches: refs/heads/branch-1.0 47f8829e0 - 2d3080855 Remove use of spark.worker.instances spark.worker.instances was added as part of this commit: https://github.com/apache/spark/commit/1617816090e7b20124a512a43860a21232ebf511 My understanding is that SPARK_WORKER_INSTANCES is supported for backwards compatibility, but spark.worker.instances is never used (SparkSubmit.scala sets spark.executor.instances) so should not have been added. @sryza @pwendell @tgravescs LMK if I'm understanding this correctly Author: Kay Ousterhout kayousterh...@gmail.com Closes #1214 from kayousterhout/yarn_config and squashes the following commits: 3d7c491 [Kay Ousterhout] Remove use of spark.worker.instances (cherry picked from commit 48a82a827c99526b165c78d7e88faec43568a37a) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2d308085 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2d308085 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2d308085 Branch: refs/heads/branch-1.0 Commit: 2d308085558e9a62147bc3e1761cf3a38f5b6fb4 Parents: 47f8829 Author: Kay Ousterhout kayousterh...@gmail.com Authored: Thu Jun 26 08:20:27 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Thu Jun 26 08:20:59 2014 -0500 -- .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2d308085/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index e01ed5a..709871c 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -63,7 +63,7 @@ private[spark] class YarnClientSchedulerBackend( // variables. List((--driver-memory, SPARK_MASTER_MEMORY, spark.master.memory), (--driver-memory, SPARK_DRIVER_MEMORY, spark.driver.memory), - (--num-executors, SPARK_WORKER_INSTANCES, spark.worker.instances), + (--num-executors, SPARK_WORKER_INSTANCES, spark.executor.instances), (--num-executors, SPARK_EXECUTOR_INSTANCES, spark.executor.instances), (--executor-memory, SPARK_WORKER_MEMORY, spark.executor.memory), (--executor-memory, SPARK_EXECUTOR_MEMORY, spark.executor.memory),
git commit: [SPARK-2051]In yarn.ClientBase spark.yarn.dist.* do not work
Repository: spark Updated Branches: refs/heads/master 67fca189c - bce0897bc [SPARK-2051]In yarn.ClientBase spark.yarn.dist.* do not work Author: witgo wi...@qq.com Closes #969 from witgo/yarn_ClientBase and squashes the following commits: 8117765 [witgo] review commit 3bdbc52 [witgo] Merge branch 'master' of https://github.com/apache/spark into yarn_ClientBase 5261b6c [witgo] fix sys.props.get(SPARK_YARN_DIST_FILES) e3c1107 [witgo] update docs b6a9aa1 [witgo] merge master c8b4554 [witgo] review commit 2f48789 [witgo] Merge branch 'master' of https://github.com/apache/spark into yarn_ClientBase 8d7b82f [witgo] Merge branch 'master' of https://github.com/apache/spark into yarn_ClientBase 1048549 [witgo] remove Utils.resolveURIs 871f1db [witgo] add spark.yarn.dist.* documentation 41bce59 [witgo] review commit 35d6fa0 [witgo] move to ClientArguments 55d72fc [witgo] Merge branch 'master' of https://github.com/apache/spark into yarn_ClientBase 9cdff16 [witgo] review commit 8bc2f4b [witgo] review commit 20e667c [witgo] Merge branch 'master' into yarn_ClientBase 0961151 [witgo] merge master ce609fc [witgo] Merge branch 'master' into yarn_ClientBase 8362489 [witgo] yarn.ClientBase spark.yarn.dist.* do not work Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bce0897b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bce0897b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bce0897b Branch: refs/heads/master Commit: bce0897bc6b0fc9bca5444dbe3a9e75523ad7481 Parents: 67fca18 Author: witgo wi...@qq.com Authored: Thu Jun 19 12:11:26 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Thu Jun 19 12:11:26 2014 -0500 -- docs/running-on-yarn.md | 20 +--- .../spark/deploy/yarn/ClientArguments.scala | 15 +-- .../apache/spark/deploy/yarn/ClientBase.scala | 3 ++- .../cluster/YarnClientSchedulerBackend.scala| 4 +--- 4 files changed, 33 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bce0897b/docs/running-on-yarn.md -- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 4243ef4..fecd8f2 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -68,15 +68,29 @@ Most of the configs are the same for Spark on YARN as for other deployment modes /td /tr tr - tdcodespark.yarn.executor.memoryOverhead/code/td - td384/code/td + tdcodespark.yarn.dist.archives/code/td + td(none)/td + td +Comma separated list of archives to be extracted into the working directory of each executor. + /td +/tr +tr + tdcodespark.yarn.dist.files/code/td + td(none)/td + td +Comma-separated list of files to be placed in the working directory of each executor. + td +/tr +tr + tdcodespark.yarn.executor.memoryOverhead/code/td + td384/td td The amount of off heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. /td /tr tr tdcodespark.yarn.driver.memoryOverhead/code/td - td384/code/td + td384/td td The amount of off heap memory (in megabytes) to be allocated per driver. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. /td http://git-wip-us.apache.org/repos/asf/spark/blob/bce0897b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala index fd3ef9e..62f9b3cf 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala @@ -21,8 +21,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap} import org.apache.spark.SparkConf import org.apache.spark.scheduler.InputFormatInfo -import org.apache.spark.util.IntParam -import org.apache.spark.util.MemoryParam +import org.apache.spark.util.{Utils, IntParam, MemoryParam} // TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware ! @@ -45,6 +44,18 @@ class ClientArguments(val args: Array[String], val sparkConf: SparkConf) { parseArgs(args.toList) + // env variable SPARK_YARN_DIST_ARCHIVES/SPARK_YARN_DIST_FILES set in yarn-client then + // it should default to hdfs:// + files = Option(files).getOrElse(sys.env.get(SPARK_YARN_DIST_FILES).orNull) + archives = Option(archives).getOrElse(sys.env.get(SPARK_YARN_DIST_ARCHIVES).orNull) + + //
svn commit: r1603642 - in /hadoop/common/branches/branch-0.23/hadoop-hdfs-project: hadoop-hdfs-httpfs/pom.xml hadoop-hdfs/CHANGES.txt hadoop-hdfs/pom.xml hadoop-hdfs/src/test/aop/build/aop.xml pom.xml
Author: tgraves Date: Wed Jun 18 21:47:05 2014 New Revision: 1603642 URL: http://svn.apache.org/r1603642 Log: Preparing for 0.23.12 development Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml Wed Jun 18 21:47:05 2014 @@ -22,12 +22,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.12-SNAPSHOT/version relativePath../../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-hdfs-httpfs/artifactId - version0.23.11-SNAPSHOT/version + version0.23.12-SNAPSHOT/version packagingwar/packaging nameApache Hadoop HttpFS/name Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Jun 18 21:47:05 2014 @@ -1,6 +1,18 @@ Hadoop HDFS Change Log -Release 0.23.11 - UNRELEASED +Release 0.23.12 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 0.23.11 - 2014-06-26 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml Wed Jun 18 21:47:05 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project-dist/artifactId -version0.23.11-SNAPSHOT/version +version0.23.12-SNAPSHOT/version relativePath../../hadoop-project-dist/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-hdfs/artifactId - version0.23.11-SNAPSHOT/version + version0.23.12-SNAPSHOT/version descriptionApache Hadoop HDFS/description nameApache Hadoop HDFS/name packagingjar/packaging Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml Wed Jun 18 21:47:05 2014 @@ -21,7 +21,7 @@ property name=aspectversion value=1.6.5/ !-- TODO this has to be changed synchronously with build.xml version prop.-- !-- this workarounds of test-patch setting its own 'version' -- - property name=project.version value=0.23.11-SNAPSHOT/ + property name=project.version value=0.23.12-SNAPSHOT/ !-- Properties common for all fault injections -- property name=build-fi.dir value=${basedir}/build-fi/ Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml Wed Jun 18 21:47:05 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11
svn commit: r1603694 - in /hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project: hadoop-hdfs-httpfs/pom.xml hadoop-hdfs/CHANGES.txt hadoop-hdfs/pom.xml hadoop-hdfs/src/test/aop/build/aop.xml pom.
Author: tgraves Date: Thu Jun 19 01:17:43 2014 New Revision: 1603694 URL: http://svn.apache.org/r1603694 Log: Preparing for release 0.23.11 Modified: hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml Modified: hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml Thu Jun 19 01:17:43 2014 @@ -22,12 +22,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-hdfs-httpfs/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version packagingwar/packaging nameApache Hadoop HttpFS/name Modified: hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Thu Jun 19 01:17:43 2014 @@ -1,6 +1,6 @@ Hadoop HDFS Change Log -Release 0.23.11 - UNRELEASED +Release 0.23.11 - 2014-06-26 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml Thu Jun 19 01:17:43 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project-dist/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../../hadoop-project-dist/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-hdfs/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version descriptionApache Hadoop HDFS/description nameApache Hadoop HDFS/name packagingjar/packaging Modified: hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml Thu Jun 19 01:17:43 2014 @@ -21,7 +21,7 @@ property name=aspectversion value=1.6.5/ !-- TODO this has to be changed synchronously with build.xml version prop.-- !-- this workarounds of test-patch setting its own 'version' -- - property name=project.version value=0.23.11-SNAPSHOT/ + property name=project.version value=0.23.11/ !-- Properties common for all fault injections -- property name=build-fi.dir value=${basedir}/build-fi/ Modified: hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml Thu Jun 19 01:17:43 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../hadoop-project/relativePath /parent
svn commit: r1603694 - in /hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/ hadoop-mapreduce-client/hadoop-mapreduce-client-app/ hadoop-mapreduce-client/hado
Author: tgraves Date: Thu Jun 19 01:17:43 2014 New Revision: 1603694 URL: http://svn.apache.org/r1603694 Log: Preparing for release 0.23.11 Modified: hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/pom.xml Modified: hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt Thu Jun 19 01:17:43 2014 @@ -1,6 +1,6 @@ Hadoop MapReduce Change Log -Release 0.23.11 - UNRELEASED +Release 0.23.11 - 2014-06-26 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml Thu Jun 19 01:17:43 2014 @@ -19,12 +19,12 @@ parent artifactIdhadoop-mapreduce-client/artifactId groupIdorg.apache.hadoop/groupId -version0.23.11-SNAPSHOT/version +version0.23.11/version /parent modelVersion4.0.0/modelVersion groupIdorg.apache.hadoop/groupId artifactIdhadoop-mapreduce-client-app/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version namehadoop-mapreduce-client-app/name properties Modified: hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml Thu Jun 19 01:17:43 2014 @@ -19,12 +19,12 @@ parent artifactIdhadoop-mapreduce-client/artifactId groupIdorg.apache.hadoop/groupId -version0.23.11-SNAPSHOT/version +version0.23.11/version /parent modelVersion4.0.0/modelVersion groupIdorg.apache.hadoop/groupId artifactIdhadoop-mapreduce-client-common/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version namehadoop-mapreduce-client-common/name properties Modified: hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project
svn commit: r1603694 - in /hadoop/common/branches/branch-0.23.11/hadoop-yarn-project: ./ hadoop-yarn/ hadoop-yarn/hadoop-yarn-api/ hadoop-yarn/hadoop-yarn-applications/ hadoop-yarn/hadoop-yarn-applica
Author: tgraves Date: Thu Jun 19 01:17:43 2014 New Revision: 1603694 URL: http://svn.apache.org/r1603694 Log: Preparing for release 0.23.11 Modified: hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/pom.xml Modified: hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt Thu Jun 19 01:17:43 2014 @@ -1,6 +1,6 @@ Hadoop YARN Change Log -Release 0.23.11 - UNRELEASED +Release 0.23.11 - 2014-06-26 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml Thu Jun 19 01:17:43 2014 @@ -19,12 +19,12 @@ parent artifactIdhadoop-yarn/artifactId groupIdorg.apache.hadoop/groupId -version0.23.11-SNAPSHOT/version +version0.23.11/version /parent modelVersion4.0.0/modelVersion groupIdorg.apache.hadoop/groupId artifactIdhadoop-yarn-api/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version namehadoop-yarn-api/name properties Modified: hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml Thu Jun 19 01:17:43 2014 @@ -19,12 +19,12 @@ parent artifactIdhadoop-yarn-applications/artifactId groupIdorg.apache.hadoop/groupId -version0.23.11-SNAPSHOT/version +version0.23.11/version /parent modelVersion4.0.0/modelVersion groupIdorg.apache.hadoop/groupId artifactIdhadoop-yarn-applications-distributedshell/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version namehadoop-yarn-applications-distributedshell/name properties Modified: hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
svn commit: r1603641 - /hadoop/common/branches/branch-0.23.11/
Author: tgraves Date: Wed Jun 18 21:43:10 2014 New Revision: 1603641 URL: http://svn.apache.org/r1603641 Log: Branching for 0.23.11 releases Added: hadoop/common/branches/branch-0.23.11/ (props changed) - copied from r1603640, hadoop/common/branches/branch-0.23/ Propchange: hadoop/common/branches/branch-0.23.11/ -- --- svn:ignore (added) +++ svn:ignore Wed Jun 18 21:43:10 2014 @@ -0,0 +1,5 @@ +.classpath +.git +.project +.settings +target Propchange: hadoop/common/branches/branch-0.23.11/ -- --- svn:mergeinfo (added) +++ svn:mergeinfo Wed Jun 18 21:43:10 2014 @@ -0,0 +1 @@ +/hadoop/common/trunk:1161777,1161781,1162188,1162421,1162491,1162499,1162613,1162928,1162954,1162979,1163050,1163069,1163490,1163768,1163852,1163858,1163981,1164255,1164301,1164339,1166009,1166402,1167001,1167383,1167662,1170085,1170379,1170459,1171297,1172916,1173402,1176550,1177487,1177531,1177859,1177864,1182189,1182205,1182214,1189613,1189932,1189982,1195575,1196113,1196129,1196676,1197801,1199024,1201991,1204114,1204117,1204122,1204124,1204129,1204131,1204177,1204370,1204376,1204388,1205260,1205697,1206786,1206830,1207694,1208153,1208313,1212021,1212062,1212073,1212084,1213537,1213586,1213592-1213593,1213954,1214046,1220510,1221348,1225114,1225192,1225456,1225489,1225591,1226211,1226239,1226350,1227091,1227165,1227423,1227964,1229347,1230398,1231569,1231572,1231627,1231640,1233605,1234555,1235135,1235137,1235956,1236456,1239752,1240897,1240928,1243065,1243104,1244766,1245751,1245762,1293419,1304099,1351818,1373683,1382409
svn commit: r1603642 - in /hadoop/common/branches/branch-0.23: ./ hadoop-assemblies/ hadoop-client/ hadoop-dist/ hadoop-minicluster/ hadoop-project-dist/ hadoop-project/ hadoop-tools/ hadoop-tools/had
Author: tgraves Date: Wed Jun 18 21:47:05 2014 New Revision: 1603642 URL: http://svn.apache.org/r1603642 Log: Preparing for 0.23.12 development Modified: hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml hadoop/common/branches/branch-0.23/hadoop-client/pom.xml hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml hadoop/common/branches/branch-0.23/hadoop-project-dist/pom.xml hadoop/common/branches/branch-0.23/hadoop-project/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-archives/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-datajoin/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-distcp/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-extras/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-gridmix/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-pipes/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-rumen/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-streaming/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-tools-dist/pom.xml hadoop/common/branches/branch-0.23/hadoop-tools/pom.xml hadoop/common/branches/branch-0.23/pom.xml Modified: hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml Wed Jun 18 21:47:05 2014 @@ -23,12 +23,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.12-SNAPSHOT/version relativePath../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-assemblies/artifactId - version0.23.11-SNAPSHOT/version + version0.23.12-SNAPSHOT/version nameApache Hadoop Assemblies/name descriptionApache Hadoop Assemblies/description Modified: hadoop/common/branches/branch-0.23/hadoop-client/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-client/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-client/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-client/pom.xml Wed Jun 18 21:47:05 2014 @@ -18,12 +18,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.12-SNAPSHOT/version relativePath../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-client/artifactId - version0.23.11-SNAPSHOT/version + version0.23.12-SNAPSHOT/version packagingjar/packaging descriptionApache Hadoop Client/description Modified: hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml Wed Jun 18 21:47:05 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.12-SNAPSHOT/version relativePath../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-dist/artifactId - version0.23.11-SNAPSHOT/version + version0.23.12-SNAPSHOT/version descriptionApache Hadoop Distribution/description nameApache Hadoop Distribution/name packagingjar/packaging Modified: hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml Wed Jun 18 21:47:05 2014 @@ -18,12 +18,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.12-SNAPSHOT/version relativePath../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-minicluster/artifactId - version0.23.11-SNAPSHOT/version + version0.23.12-SNAPSHOT/version packagingjar/packaging descriptionApache Hadoop Mini
svn commit: r1603642 - in /hadoop/common/branches/branch-0.23/hadoop-common-project: hadoop-annotations/pom.xml hadoop-auth-examples/pom.xml hadoop-auth/pom.xml hadoop-common/CHANGES.txt hadoop-common
Author: tgraves Date: Wed Jun 18 21:47:05 2014 New Revision: 1603642 URL: http://svn.apache.org/r1603642 Log: Preparing for 0.23.12 development Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/pom.xml hadoop/common/branches/branch-0.23/hadoop-common-project/pom.xml Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml Wed Jun 18 21:47:05 2014 @@ -21,12 +21,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.12-SNAPSHOT/version relativePath../../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-annotations/artifactId - version0.23.11-SNAPSHOT/version + version0.23.12-SNAPSHOT/version descriptionApache Hadoop Annotations/description nameApache Hadoop Annotations/name packagingjar/packaging Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml Wed Jun 18 21:47:05 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.12-SNAPSHOT/version relativePath../../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-auth-examples/artifactId - version0.23.11-SNAPSHOT/version + version0.23.12-SNAPSHOT/version packagingwar/packaging nameApache Hadoop Auth Examples/name Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml Wed Jun 18 21:47:05 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.12-SNAPSHOT/version relativePath../../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-auth/artifactId - version0.23.11-SNAPSHOT/version + version0.23.12-SNAPSHOT/version packagingjar/packaging nameApache Hadoop Auth/name Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt Wed Jun 18 21:47:05 2014 @@ -1,6 +1,18 @@ Hadoop Change Log -Release 0.23.11 - UNRELEASED +Release 0.23.12 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 0.23.11 - 2014-06-26 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/pom.xml?rev=1603642r1=1603641r2=1603642view=diff == --- hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/pom.xml (original) +++ hadoop/common/branches/branch-0.23/hadoop-common-project
svn commit: r1603694 - in /hadoop/common/branches/branch-0.23.11: ./ hadoop-assemblies/ hadoop-client/ hadoop-dist/ hadoop-minicluster/ hadoop-project-dist/ hadoop-project/ hadoop-tools/ hadoop-tools/
Author: tgraves Date: Thu Jun 19 01:17:43 2014 New Revision: 1603694 URL: http://svn.apache.org/r1603694 Log: Preparing for release 0.23.11 Modified: hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-project-dist/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-project/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-archives/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-datajoin/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-distcp/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-extras/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-gridmix/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-pipes/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-rumen/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-streaming/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-tools-dist/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-tools/pom.xml hadoop/common/branches/branch-0.23.11/pom.xml Modified: hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml Thu Jun 19 01:17:43 2014 @@ -23,12 +23,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-assemblies/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version nameApache Hadoop Assemblies/name descriptionApache Hadoop Assemblies/description Modified: hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml Thu Jun 19 01:17:43 2014 @@ -18,12 +18,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-client/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version packagingjar/packaging descriptionApache Hadoop Client/description Modified: hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml Thu Jun 19 01:17:43 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-dist/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version descriptionApache Hadoop Distribution/description nameApache Hadoop Distribution/name packagingjar/packaging Modified: hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml Thu Jun 19 01:17:43 2014 @@ -18,12 +18,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-minicluster/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version packagingjar/packaging
svn commit: r1603694 [3/3] - in /hadoop/common/branches/branch-0.23.11/hadoop-common-project: ./ hadoop-annotations/ hadoop-auth-examples/ hadoop-auth/ hadoop-common/ hadoop-common/src/main/docs/
Modified: hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml Thu Jun 19 01:17:43 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-common-project/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version descriptionApache Hadoop Common Project/description nameApache Hadoop Common Project/name packagingpom/packaging
svn commit: r1603694 [1/3] - in /hadoop/common/branches/branch-0.23.11/hadoop-common-project: ./ hadoop-annotations/ hadoop-auth-examples/ hadoop-auth/ hadoop-common/ hadoop-common/src/main/docs/
Author: tgraves Date: Thu Jun 19 01:17:43 2014 New Revision: 1603694 URL: http://svn.apache.org/r1603694 Log: Preparing for release 0.23.11 Modified: hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/pom.xml hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml Modified: hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml Thu Jun 19 01:17:43 2014 @@ -21,12 +21,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-annotations/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version descriptionApache Hadoop Annotations/description nameApache Hadoop Annotations/name packagingjar/packaging Modified: hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml Thu Jun 19 01:17:43 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-auth-examples/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version packagingwar/packaging nameApache Hadoop Auth Examples/name Modified: hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml Thu Jun 19 01:17:43 2014 @@ -20,12 +20,12 @@ parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-project/artifactId -version0.23.11-SNAPSHOT/version +version0.23.11/version relativePath../../hadoop-project/relativePath /parent groupIdorg.apache.hadoop/groupId artifactIdhadoop-auth/artifactId - version0.23.11-SNAPSHOT/version + version0.23.11/version packagingjar/packaging nameApache Hadoop Auth/name Modified: hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt Thu Jun 19 01:17:43 2014 @@ -1,6 +1,6 @@ Hadoop Change Log -Release 0.23.11 - UNRELEASED +Release 0.23.11 - 2014-06-26 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/pom.xml?rev=1603694r1=1603693r2=1603694view=diff == --- hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/pom.xml (original) +++ hadoop/common/branches/branch-0.23.11/hadoop-common
svn commit: r1603696 - /hadoop/common/tags/release-0.23.11-rc0/
Author: tgraves Date: Thu Jun 19 01:32:25 2014 New Revision: 1603696 URL: http://svn.apache.org/r1603696 Log: Hadoop 0.23.11-rc0 release. Added: hadoop/common/tags/release-0.23.11-rc0/ (props changed) - copied from r1603695, hadoop/common/branches/branch-0.23.11/ Propchange: hadoop/common/tags/release-0.23.11-rc0/ -- --- svn:ignore (added) +++ svn:ignore Thu Jun 19 01:32:25 2014 @@ -0,0 +1,5 @@ +.classpath +.git +.project +.settings +target Propchange: hadoop/common/tags/release-0.23.11-rc0/ -- --- svn:mergeinfo (added) +++ svn:mergeinfo Thu Jun 19 01:32:25 2014 @@ -0,0 +1 @@ +/hadoop/common/trunk:1161777,1161781,1162188,1162421,1162491,1162499,1162613,1162928,1162954,1162979,1163050,1163069,1163490,1163768,1163852,1163858,1163981,1164255,1164301,1164339,1166009,1166402,1167001,1167383,1167662,1170085,1170379,1170459,1171297,1172916,1173402,1176550,1177487,1177531,1177859,1177864,1182189,1182205,1182214,1189613,1189932,1189982,1195575,1196113,1196129,1196676,1197801,1199024,1201991,1204114,1204117,1204122,1204124,1204129,1204131,1204177,1204370,1204376,1204388,1205260,1205697,1206786,1206830,1207694,1208153,1208313,1212021,1212062,1212073,1212084,1213537,1213586,1213592-1213593,1213954,1214046,1220510,1221348,1225114,1225192,1225456,1225489,1225591,1226211,1226239,1226350,1227091,1227165,1227423,1227964,1229347,1230398,1231569,1231572,1231627,1231640,1233605,1234555,1235135,1235137,1235956,1236456,1239752,1240897,1240928,1243065,1243104,1244766,1245751,1245762,1293419,1304099,1351818,1373683,1382409
git commit: [SPARK-2080] Yarn: report HS URL in client mode, correct user in cluster mode.
Repository: spark Updated Branches: refs/heads/master 83c226d45 - ecde5b837 [SPARK-2080] Yarn: report HS URL in client mode, correct user in cluster mode. Yarn client mode was not setting the app's tracking URL to the History Server's URL when configured by the user. Now client mode behaves the same as cluster mode. In SparkContext.scala, the user.name system property had precedence over the SPARK_USER environment variable. This means that SPARK_USER was never used, since user.name is always set by the JVM. In Yarn cluster mode, this means the application always reported itself as being run by user yarn (or whatever user was running the Yarn NM). One could argue that the correct fix would be to use UGI.getCurrentUser() here, but at least for Yarn that will match what SPARK_USER is set to. Author: Marcelo Vanzin van...@cloudera.com This patch had conflicts when merged, resolved by Committer: Thomas Graves tgra...@apache.org Closes #1002 from vanzin/yarn-client-url and squashes the following commits: 4046e04 [Marcelo Vanzin] Set HS link in yarn-alpha also. 4c692d9 [Marcelo Vanzin] Yarn: report HS URL in client mode, correct user in cluster mode. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ecde5b83 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ecde5b83 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ecde5b83 Branch: refs/heads/master Commit: ecde5b837534b11d365fcab78089820990b815cf Parents: 83c226d Author: Marcelo Vanzin van...@cloudera.com Authored: Thu Jun 12 16:19:36 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Thu Jun 12 16:19:36 2014 -0500 -- core/src/main/scala/org/apache/spark/SparkContext.scala| 2 +- .../scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala | 1 + .../scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala | 6 +++--- 3 files changed, 5 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ecde5b83/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 8bdaf0b..df15186 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -297,7 +297,7 @@ class SparkContext(config: SparkConf) extends Logging { // Set SPARK_USER for user who is running SparkContext. val sparkUser = Option { - Option(System.getProperty(user.name)).getOrElse(System.getenv(SPARK_USER)) + Option(System.getenv(SPARK_USER)).getOrElse(System.getProperty(user.name)) }.getOrElse { SparkContext.SPARK_UNKNOWN_USER } http://git-wip-us.apache.org/repos/asf/spark/blob/ecde5b83/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala index a3bd915..b6ecae1 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala @@ -271,6 +271,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp .asInstanceOf[FinishApplicationMasterRequest] finishReq.setAppAttemptId(appAttemptId) finishReq.setFinishApplicationStatus(status) +finishReq.setTrackingUrl(sparkConf.get(spark.yarn.historyServer.address, )) resourceManager.finishApplicationMaster(finishReq) } http://git-wip-us.apache.org/repos/asf/spark/blob/ecde5b83/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala -- diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala index 4f8854a..f71ad03 100644 --- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala +++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala @@ -115,7 +115,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp val interval = math.min(timeoutInterval / 2, schedulerInterval) reporterThread = launchReporterThread(interval) - + // Wait for the reporter thread to Finish. reporterThread.join() @@ -134,12 +134,12 @@ class ExecutorLauncher(args: ApplicationMasterArguments, conf: Configuration, sp // LOCAL_DIRS = 2.X, YARN_LOCAL_DIRS = 0.23.X
git commit: SPARK-1639. Tidy up some Spark on YARN code
Repository: spark Updated Branches: refs/heads/master 6e1193031 - 2a4225dd9 SPARK-1639. Tidy up some Spark on YARN code This contains a bunch of small tidyings of the Spark on YARN code. I focused on the yarn stable code. @tgravescs, let me know if you'd like me to make these for the alpha code as well. Author: Sandy Ryza sa...@cloudera.com Closes #561 from sryza/sandy-spark-1639 and squashes the following commits: 72b6a02 [Sandy Ryza] Fix comment and set name on driver thread c2190b2 [Sandy Ryza] SPARK-1639. Tidy up some Spark on YARN code Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a4225dd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a4225dd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a4225dd Branch: refs/heads/master Commit: 2a4225dd91d3f735625bb6bae6fca8fd06ca Parents: 6e11930 Author: Sandy Ryza sa...@cloudera.com Authored: Wed Jun 11 07:57:28 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Wed Jun 11 07:57:28 2014 -0500 -- .../spark/deploy/yarn/ApplicationMaster.scala | 16 +- .../apache/spark/deploy/yarn/ClientBase.scala | 38 ++-- .../deploy/yarn/ExecutorRunnableUtil.scala | 28 +-- .../cluster/YarnClusterScheduler.scala | 10 +- .../spark/deploy/yarn/ApplicationMaster.scala | 197 +-- .../org/apache/spark/deploy/yarn/Client.scala | 10 +- .../spark/deploy/yarn/ExecutorLauncher.scala| 40 ++-- 7 files changed, 161 insertions(+), 178 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2a4225dd/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 8f0ecb8..1cc9c33 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -277,7 +277,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, yarnAllocator.allocateContainers( math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 0)) ApplicationMaster.incrementAllocatorLoop(1) -Thread.sleep(100) +Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL) } } finally { // In case of exceptions, etc - ensure that count is at least ALLOCATOR_LOOP_WAIT_COUNT, @@ -416,6 +416,7 @@ object ApplicationMaster { // TODO: Currently, task to container is computed once (TaskSetManager) - which need not be // optimal as more containers are available. Might need to handle this better. private val ALLOCATOR_LOOP_WAIT_COUNT = 30 + private val ALLOCATE_HEARTBEAT_INTERVAL = 100 def incrementAllocatorLoop(by: Int) { val count = yarnAllocatorLoop.getAndAdd(by) @@ -467,13 +468,22 @@ object ApplicationMaster { }) } -// Wait for initialization to complete and atleast 'some' nodes can get allocated. +modified + } + + + /** + * Returns when we've either + * 1) received all the requested executors, + * 2) waited ALLOCATOR_LOOP_WAIT_COUNT * ALLOCATE_HEARTBEAT_INTERVAL ms, + * 3) hit an error that causes us to terminate trying to get containers. + */ + def waitForInitialAllocations() { yarnAllocatorLoop.synchronized { while (yarnAllocatorLoop.get() = ALLOCATOR_LOOP_WAIT_COUNT) { yarnAllocatorLoop.wait(1000L) } } -modified } def main(argStrings: Array[String]) { http://git-wip-us.apache.org/repos/asf/spark/blob/2a4225dd/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 801e8b3..29a3568 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -19,7 +19,6 @@ package org.apache.spark.deploy.yarn import java.io.File import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException} -import java.nio.ByteBuffer import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, ListBuffer, Map} @@ -37,7 +36,7 @@ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.api.protocolrecords._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration -import
git commit: [SPARK-1978] In some cases, spark-yarn does not automatically restart the failed container
Repository: spark Updated Branches: refs/heads/master a9a461c59 - 884ca718b [SPARK-1978] In some cases, spark-yarn does not automatically restart the failed container Author: witgo wi...@qq.com Closes #921 from witgo/allocateExecutors and squashes the following commits: bc3aa66 [witgo] review commit 8800eba [witgo] Merge branch 'master' of https://github.com/apache/spark into allocateExecutors 32ac7af [witgo] review commit 056b8c7 [witgo] Merge branch 'master' of https://github.com/apache/spark into allocateExecutors 04c6f7e [witgo] Merge branch 'master' into allocateExecutors aff827c [witgo] review commit 5c376e0 [witgo] Merge branch 'master' of https://github.com/apache/spark into allocateExecutors 1faf4f4 [witgo] Merge branch 'master' into allocateExecutors 3c464bd [witgo] add time limit to allocateExecutors e00b656 [witgo] In some cases, yarn does not automatically restart the container Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/884ca718 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/884ca718 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/884ca718 Branch: refs/heads/master Commit: 884ca718b24f0bbe93358f2a366463b4e4d31f49 Parents: a9a461c Author: witgo wi...@qq.com Authored: Tue Jun 10 10:34:57 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Jun 10 10:34:57 2014 -0500 -- .../spark/deploy/yarn/ApplicationMaster.scala | 39 +++- .../spark/deploy/yarn/ExecutorLauncher.scala| 22 ++- 2 files changed, 34 insertions(+), 27 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/884ca718/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index c1dfe3f..33a60d9 100644 --- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -252,15 +252,12 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, try { logInfo(Allocating + args.numExecutors + executors.) // Wait until all containers have finished - // TODO: This is a bit ugly. Can we make it nicer? - // TODO: Handle container failure yarnAllocator.addResourceRequests(args.numExecutors) + yarnAllocator.allocateResources() // Exits the loop if the user thread exits. while (yarnAllocator.getNumExecutorsRunning args.numExecutors userThread.isAlive) { -if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) { - finishApplicationMaster(FinalApplicationStatus.FAILED, -max number of executor failures reached) -} +checkNumExecutorsFailed() +allocateMissingExecutor() yarnAllocator.allocateResources() ApplicationMaster.incrementAllocatorLoop(1) Thread.sleep(100) @@ -289,23 +286,31 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, } } + private def allocateMissingExecutor() { +val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning - + yarnAllocator.getNumPendingAllocate +if (missingExecutorCount 0) { + logInfo(Allocating %d containers to make up for (potentially) lost containers. +format(missingExecutorCount)) + yarnAllocator.addResourceRequests(missingExecutorCount) +} + } + + private def checkNumExecutorsFailed() { +if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) { + finishApplicationMaster(FinalApplicationStatus.FAILED, +max number of executor failures reached) +} + } + private def launchReporterThread(_sleepTime: Long): Thread = { val sleepTime = if (_sleepTime = 0) 0 else _sleepTime val t = new Thread { override def run() { while (userThread.isAlive) { - if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) { -finishApplicationMaster(FinalApplicationStatus.FAILED, - max number of executor failures reached) - } - val missingExecutorCount = args.numExecutors - yarnAllocator.getNumExecutorsRunning - -yarnAllocator.getNumPendingAllocate - if (missingExecutorCount 0) { -logInfo(Allocating %d containers to make up for (potentially) lost containers. - format(missingExecutorCount)) -yarnAllocator.addResourceRequests(missingExecutorCount) - } + checkNumExecutorsFailed() +
git commit: [SPARK-1522] : YARN ClientBase throws a NPE if there is no YARN Application CP
Repository: spark Updated Branches: refs/heads/master 6cf335d79 - e27344768 [SPARK-1522] : YARN ClientBase throws a NPE if there is no YARN Application CP The current implementation of ClientBase.getDefaultYarnApplicationClasspath inspects the MRJobConfig class for the field DEFAULT_YARN_APPLICATION_CLASSPATH when it should be really looking into YarnConfiguration. If the Application Configuration has no yarn.application.classpath defined a NPE exception will be thrown. Additional Changes include: * Test Suite for ClientBase added [ticket: SPARK-1522] : https://issues.apache.org/jira/browse/SPARK-1522 Author : bernardo.gomezpala...@gmail.com Testing : SPARK_HADOOP_VERSION=2.3.0 SPARK_YARN=true ./sbt/sbt test Author: Bernardo Gomez Palacio bernardo.gomezpala...@gmail.com Closes #433 from berngp/feature/SPARK-1522 and squashes the following commits: 2c2e118 [Bernardo Gomez Palacio] [SPARK-1522]: YARN ClientBase throws a NPE if there is no YARN Application specific CP Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e2734476 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e2734476 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e2734476 Branch: refs/heads/master Commit: e273447684779a18bd61d733bfe7958b78657ffd Parents: 6cf335d Author: Bernardo Gomez Palacio bernardo.gomezpala...@gmail.com Authored: Mon Jun 9 16:14:54 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Mon Jun 9 16:14:54 2014 -0500 -- .../apache/spark/deploy/yarn/ClientBase.scala | 89 +-- .../spark/deploy/yarn/ClientBaseSuite.scala | 112 +++ 2 files changed, 167 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e2734476/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index aeb3f00..4b5e0ef 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -23,6 +23,7 @@ import java.nio.ByteBuffer import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, ListBuffer, Map} +import scala.util.{Try, Success, Failure} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ @@ -378,7 +379,7 @@ trait ClientBase extends Logging { } } -object ClientBase { +object ClientBase extends Logging { val SPARK_JAR: String = __spark__.jar val APP_JAR: String = __app__.jar val LOG4J_PROP: String = log4j.properties @@ -388,37 +389,47 @@ object ClientBase { def getSparkJar = sys.env.get(SPARK_JAR).getOrElse(SparkContext.jarOfClass(this.getClass).head) - // Based on code from org.apache.hadoop.mapreduce.v2.util.MRApps - def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]) { -val classpathEntries = Option(conf.getStrings( - YarnConfiguration.YARN_APPLICATION_CLASSPATH)).getOrElse( -getDefaultYarnApplicationClasspath()) -if (classpathEntries != null) { - for (c - classpathEntries) { -YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, c.trim, - File.pathSeparator) - } + def populateHadoopClasspath(conf: Configuration, env: HashMap[String, String]) = { +val classPathElementsToAdd = getYarnAppClasspath(conf) ++ getMRAppClasspath(conf) +for (c - classPathElementsToAdd.flatten) { + YarnSparkHadoopUtil.addToEnvironment( +env, +Environment.CLASSPATH.name, +c.trim, +File.pathSeparator) } +classPathElementsToAdd + } -val mrClasspathEntries = Option(conf.getStrings( - mapreduce.application.classpath)).getOrElse( -getDefaultMRApplicationClasspath()) -if (mrClasspathEntries != null) { - for (c - mrClasspathEntries) { -YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, c.trim, - File.pathSeparator) - } -} + private def getYarnAppClasspath(conf: Configuration): Option[Seq[String]] = +Option(conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) match { + case Some(s) = Some(s.toSeq) + case None = getDefaultYarnApplicationClasspath } - def getDefaultYarnApplicationClasspath(): Array[String] = { -try { - val field = classOf[MRJobConfig].getField(DEFAULT_YARN_APPLICATION_CLASSPATH) - field.get(null).asInstanceOf[Array[String]] -} catch { - case err: NoSuchFieldError = null - case err: NoSuchFieldException = null + private def
git commit: SPARK-1557 Set permissions on event log files/directories
Repository: spark Updated Branches: refs/heads/master 9a1184a8a - 8db0f7e28 SPARK-1557 Set permissions on event log files/directories This adds minimal setting of event log directory/files permissions. To have a secure environment the user must manually create the top level event log directory and set permissions up. We can add logic to do that automatically later if we want. Author: Thomas Graves tgra...@apache.org Closes #538 from tgravescs/SPARK-1557 and squashes the following commits: e471d8e [Thomas Graves] rework d8b6620 [Thomas Graves] update use of octal 3ca9b79 [Thomas Graves] Updated based on comments 5a09709 [Thomas Graves] add in missing import 3150ed6 [Thomas Graves] SPARK-1557 Set permissions on event log files/directories Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8db0f7e2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8db0f7e2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8db0f7e2 Branch: refs/heads/master Commit: 8db0f7e28f5f0330a3344705ff48d8e7b97c383f Parents: 9a1184a Author: Thomas Graves tgra...@apache.org Authored: Tue Apr 29 09:19:48 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Tue Apr 29 09:19:48 2014 -0500 -- .../spark/scheduler/EventLoggingListener.scala | 6 +- .../org/apache/spark/util/FileLogger.scala | 22 +++- docs/security.md| 2 ++ 3 files changed, 24 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8db0f7e2/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala index 2fe65cd..d822a8e 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala @@ -21,6 +21,7 @@ import scala.collection.mutable import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.fs.permission.FsPermission import org.json4s.jackson.JsonMethods._ import org.apache.spark.{Logging, SparkConf, SparkContext} @@ -54,7 +55,7 @@ private[spark] class EventLoggingListener( private val logger = new FileLogger(logDir, conf, hadoopConfiguration, outputBufferSize, shouldCompress, - shouldOverwrite) + shouldOverwrite, Some(LOG_FILE_PERMISSIONS)) /** * Begin logging events. @@ -124,6 +125,9 @@ private[spark] object EventLoggingListener extends Logging { val SPARK_VERSION_PREFIX = SPARK_VERSION_ val COMPRESSION_CODEC_PREFIX = COMPRESSION_CODEC_ val APPLICATION_COMPLETE = APPLICATION_COMPLETE + val LOG_FILE_PERMISSIONS: FsPermission = +FsPermission.createImmutable(Integer.parseInt(770, 8).toShort) + // A cache for compression codecs to avoid creating the same codec many times private val codecMap = new mutable.HashMap[String, CompressionCodec] http://git-wip-us.apache.org/repos/asf/spark/blob/8db0f7e2/core/src/main/scala/org/apache/spark/util/FileLogger.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/FileLogger.scala b/core/src/main/scala/org/apache/spark/util/FileLogger.scala index 1ed3b70..0965e0f 100644 --- a/core/src/main/scala/org/apache/spark/util/FileLogger.scala +++ b/core/src/main/scala/org/apache/spark/util/FileLogger.scala @@ -24,6 +24,7 @@ import java.util.Date import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path} +import org.apache.hadoop.fs.permission.FsPermission import org.apache.spark.{Logging, SparkConf} import org.apache.spark.io.CompressionCodec @@ -42,7 +43,8 @@ private[spark] class FileLogger( hadoopConfiguration: Configuration, outputBufferSize: Int = 8 * 1024, // 8 KB compress: Boolean = false, -overwrite: Boolean = true) +overwrite: Boolean = true, +dirPermissions: Option[FsPermission] = None) extends Logging { private val dateFormat = new ThreadLocal[SimpleDateFormat]() { @@ -79,16 +81,25 @@ private[spark] class FileLogger( if (!fileSystem.mkdirs(path)) { throw new IOException(Error in creating log directory: %s.format(logDir)) } +if (dirPermissions.isDefined) { + val fsStatus = fileSystem.getFileStatus(path) + if (fsStatus.getPermission().toShort() != dirPermissions.get.toShort) { +fileSystem.setPermission(path, dirPermissions.get) + } +} } /** * Create a new writer for the file identified by the given path. + * If
svn commit: r1588283 - in /hadoop/common/branches/branch-2/hadoop-yarn-project: CHANGES.txt hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
Author: tgraves Date: Thu Apr 17 15:25:27 2014 New Revision: 1588283 URL: http://svn.apache.org/r1588283 Log: Merge 1588281 to branch-2. YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility with previous releases (Sandy Ryza via tgraves) Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1588283r1=1588282r2=1588283view=diff == --- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Thu Apr 17 15:25:27 2014 @@ -114,6 +114,9 @@ Release 2.4.1 - UNRELEASED YARN-1934. Fixed a potential NPE in ZKRMStateStore caused by handling Disconnected event from ZK. (Karthik Kambatla via jianhe) +YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility +with previous releases (Sandy Ryza via tgraves) + Release 2.4.0 - 2014-04-07 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java?rev=1588283r1=1588282r2=1588283view=diff == --- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java (original) +++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java Thu Apr 17 15:25:27 2014 @@ -38,7 +38,7 @@ import org.apache.hadoop.yarn.api.record import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; /** - * Yarn application related utilities + * Yarn internal application-related utilities */ @Private public class Apps { @@ -97,6 +97,17 @@ public class Apps { } } } + + /** + * This older version of this method is kept around for compatibility + * because downstream frameworks like Spark and Tez have been using it. + * Downstream frameworks are expected to move off of it. + */ + @Deprecated + public static void setEnvFromInputString(MapString, String env, + String envString) { +setEnvFromInputString(env, envString, File.pathSeparator); + } @Public @Unstable @@ -112,6 +123,18 @@ public class Apps { environment.put(StringInterner.weakIntern(variable), StringInterner.weakIntern(val)); } + + /** + * This older version of this method is kept around for compatibility + * because downstream frameworks like Spark and Tez have been using it. + * Downstream frameworks are expected to move off of it. + */ + @Deprecated + public static void addToEnvironment( + MapString, String environment, + String variable, String value) { +addToEnvironment(environment, variable, value, File.pathSeparator); + } public static String crossPlatformify(String var) { return ApplicationConstants.PARAMETER_EXPANSION_LEFT + var
svn commit: r1588281 - in /hadoop/common/trunk/hadoop-yarn-project: CHANGES.txt hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
Author: tgraves Date: Thu Apr 17 15:23:22 2014 New Revision: 1588281 URL: http://svn.apache.org/r1588281 Log: YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility with previous releases (Sandy Ryza via tgraves) Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1588281r1=1588280r2=1588281view=diff == --- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original) +++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Thu Apr 17 15:23:22 2014 @@ -129,6 +129,9 @@ Release 2.4.1 - UNRELEASED YARN-1934. Fixed a potential NPE in ZKRMStateStore caused by handling Disconnected event from ZK. (Karthik Kambatla via jianhe) +YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility +with previous releases (Sandy Ryza via tgraves) + Release 2.4.0 - 2014-04-07 INCOMPATIBLE CHANGES Modified: hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java?rev=1588281r1=1588280r2=1588281view=diff == --- hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java (original) +++ hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java Thu Apr 17 15:23:22 2014 @@ -38,7 +38,7 @@ import org.apache.hadoop.yarn.api.record import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; /** - * Yarn application related utilities + * Yarn internal application-related utilities */ @Private public class Apps { @@ -97,6 +97,17 @@ public class Apps { } } } + + /** + * This older version of this method is kept around for compatibility + * because downstream frameworks like Spark and Tez have been using it. + * Downstream frameworks are expected to move off of it. + */ + @Deprecated + public static void setEnvFromInputString(MapString, String env, + String envString) { +setEnvFromInputString(env, envString, File.pathSeparator); + } @Public @Unstable @@ -112,6 +123,18 @@ public class Apps { environment.put(StringInterner.weakIntern(variable), StringInterner.weakIntern(val)); } + + /** + * This older version of this method is kept around for compatibility + * because downstream frameworks like Spark and Tez have been using it. + * Downstream frameworks are expected to move off of it. + */ + @Deprecated + public static void addToEnvironment( + MapString, String environment, + String variable, String value) { +addToEnvironment(environment, variable, value, File.pathSeparator); + } public static String crossPlatformify(String var) { return ApplicationConstants.PARAMETER_EXPANSION_LEFT + var
svn commit: r1588287 - in /hadoop/common/branches/branch-2.4/hadoop-yarn-project: CHANGES.txt hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
Author: tgraves Date: Thu Apr 17 15:31:58 2014 New Revision: 1588287 URL: http://svn.apache.org/r1588287 Log: Merge 1588281 to branch-2.4. YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility with previous releases (Sandy Ryza via tgraves) Modified: hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java Modified: hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt?rev=1588287r1=1588286r2=1588287view=diff == --- hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt Thu Apr 17 15:31:58 2014 @@ -61,6 +61,9 @@ Release 2.4.1 - UNRELEASED YARN-1934. Fixed a potential NPE in ZKRMStateStore caused by handling Disconnected event from ZK. (Karthik Kambatla via jianhe) +YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility +with previous releases (Sandy Ryza via tgraves) + Release 2.4.0 - 2014-04-07 INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java?rev=1588287r1=1588286r2=1588287view=diff == --- hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java (original) +++ hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java Thu Apr 17 15:31:58 2014 @@ -38,7 +38,7 @@ import org.apache.hadoop.yarn.api.record import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; /** - * Yarn application related utilities + * Yarn internal application-related utilities */ @Private public class Apps { @@ -97,6 +97,17 @@ public class Apps { } } } + + /** + * This older version of this method is kept around for compatibility + * because downstream frameworks like Spark and Tez have been using it. + * Downstream frameworks are expected to move off of it. + */ + @Deprecated + public static void setEnvFromInputString(MapString, String env, + String envString) { +setEnvFromInputString(env, envString, File.pathSeparator); + } @Public @Unstable @@ -112,6 +123,18 @@ public class Apps { environment.put(StringInterner.weakIntern(variable), StringInterner.weakIntern(val)); } + + /** + * This older version of this method is kept around for compatibility + * because downstream frameworks like Spark and Tez have been using it. + * Downstream frameworks are expected to move off of it. + */ + @Deprecated + public static void addToEnvironment( + MapString, String environment, + String variable, String value) { +addToEnvironment(environment, variable, value, File.pathSeparator); + } public static String crossPlatformify(String var) { return ApplicationConstants.PARAMETER_EXPANSION_LEFT + var
git commit: [SPARK-1395] Allow local: URIs to work on Yarn.
Repository: spark Updated Branches: refs/heads/branch-1.0 b3ad707c4 - a83a794f1 [SPARK-1395] Allow local: URIs to work on Yarn. This only works for the three paths defined in the environment (SPARK_JAR, SPARK_YARN_APP_JAR and SPARK_LOG4J_CONF). Tested by running SparkPi with local: and file: URIs against Yarn cluster (no upload shows up in logs in the local case). Author: Marcelo Vanzin van...@cloudera.com Closes #303 from vanzin/yarn-local and squashes the following commits: 82219c1 [Marcelo Vanzin] [SPARK-1395] Allow local: URIs to work on Yarn. (cherry picked from commit 69047506bf97e6e37e4079c87cb0327d3760ac41) Signed-off-by: Thomas Graves tgra...@apache.org Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a83a794f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a83a794f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a83a794f Branch: refs/heads/branch-1.0 Commit: a83a794f1accc616cfccde78af44c5cbf066c647 Parents: b3ad707 Author: Marcelo Vanzin van...@cloudera.com Authored: Thu Apr 17 10:29:38 2014 -0500 Committer: Thomas Graves tgra...@apache.org Committed: Thu Apr 17 10:33:15 2014 -0500 -- .../org/apache/spark/deploy/SparkSubmit.scala | 4 +- .../spark/deploy/yarn/ExecutorRunnable.scala| 2 +- .../apache/spark/deploy/yarn/ClientBase.scala | 190 +-- .../deploy/yarn/ExecutorRunnableUtil.scala | 17 +- .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 6 +- .../spark/deploy/yarn/ExecutorRunnable.scala| 2 +- 6 files changed, 142 insertions(+), 79 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a83a794f/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index e05fbfe..e5d593c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy import java.io.{PrintStream, File} -import java.net.URL +import java.net.{URI, URL} import org.apache.spark.executor.ExecutorURLClassLoader @@ -216,7 +216,7 @@ object SparkSubmit { } private def addJarToClasspath(localJar: String, loader: ExecutorURLClassLoader) { -val localJarFile = new File(localJar) +val localJarFile = new File(new URI(localJar).getPath()) if (!localJarFile.exists()) { printWarning(sJar $localJar does not exist, skipping.) } http://git-wip-us.apache.org/repos/asf/spark/blob/a83a794f/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala index 3469b7d..7dae248 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala @@ -82,7 +82,7 @@ class ExecutorRunnable( ctx.setContainerTokens(ByteBuffer.wrap(dob.getData())) val commands = prepareCommand(masterAddress, slaveId, hostname, executorMemory, executorCores, - localResources.contains(ClientBase.LOG4J_PROP)) + localResources) logInfo(Setting up executor with commands: + commands) ctx.setCommands(commands) http://git-wip-us.apache.org/repos/asf/spark/blob/a83a794f/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala index 628dd98..566de71 100644 --- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala +++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.yarn import java.io.File -import java.net.{InetAddress, UnknownHostException, URI} +import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException} import java.nio.ByteBuffer import scala.collection.JavaConversions._ @@ -209,53 +209,35 @@ trait ClientBase extends Logging { Map( ClientBase.SPARK_JAR - System.getenv(SPARK_JAR), ClientBase.APP_JAR - args.userJar, - ClientBase.LOG4J_PROP - System.getenv(SPARK_LOG4J_CONF) + ClientBase.LOG4J_PROP - System.getenv(ClientBase.LOG4J_CONF_ENV_KEY) ).foreach { case(destName, _localPath) = val localPath: String = if