spark git commit: [SPARK-11026] [YARN] spark.yarn.user.classpath.first does work for 'spark-submit --jars hdfs://user/foo.jar'

2015-10-13 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 2217f4f8b -> 47bc6c0fa


[SPARK-11026] [YARN] spark.yarn.user.classpath.first does work for 
'spark-submit --jars hdfs://user/foo.jar'

when spark.yarn.user.classpath.first=true and using 'spark-submit --jars 
hdfs://user/foo.jar', it can not put foo.jar to system classpath. so we need to 
put yarn's linkNames of jars to the system classpath. vanzin tgravescs

Author: Lianhui Wang 

Closes #9045 from lianhuiwang/spark-11026.

(cherry picked from commit 626aab79c9b4d4ac9d65bf5fa45b81dd9cbc609c)
Signed-off-by: Tom Graves 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47bc6c0f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47bc6c0f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47bc6c0f

Branch: refs/heads/branch-1.5
Commit: 47bc6c0fa3cfbd92bb4470240b0c97040217f370
Parents: 2217f4f
Author: Lianhui Wang 
Authored: Tue Oct 13 08:29:47 2015 -0500
Committer: Tom Graves 
Committed: Tue Oct 13 08:31:00 2015 -0500

--
 .../org/apache/spark/deploy/yarn/Client.scala   | 23 +---
 1 file changed, 15 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/47bc6c0f/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
--
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index f2e1c2b..f21f5ef 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1144,7 +1144,7 @@ object Client extends Logging {
 } else {
   getMainJarUri(sparkConf.getOption(CONF_SPARK_USER_JAR))
 }
-  mainJar.foreach(addFileToClasspath(sparkConf, _, APP_JAR, env))
+  mainJar.foreach(addFileToClasspath(sparkConf, conf, _, APP_JAR, env))
 
   val secondaryJars =
 if (args != null) {
@@ -1153,10 +1153,10 @@ object Client extends Logging {
   
getSecondaryJarUris(sparkConf.getOption(CONF_SPARK_YARN_SECONDARY_JARS))
 }
   secondaryJars.foreach { x =>
-addFileToClasspath(sparkConf, x, null, env)
+addFileToClasspath(sparkConf, conf, x, null, env)
   }
 }
-addFileToClasspath(sparkConf, new URI(sparkJar(sparkConf)), SPARK_JAR, env)
+addFileToClasspath(sparkConf, conf, new URI(sparkJar(sparkConf)), 
SPARK_JAR, env)
 populateHadoopClasspath(conf, env)
 sys.env.get(ENV_DIST_CLASSPATH).foreach { cp =>
   addClasspathEntry(getClusterPath(sparkConf, cp), env)
@@ -1191,15 +1191,17 @@ object Client extends Logging {
* If an alternate name for the file is given, and it's not a "local:" file, 
the alternate
* name will be added to the classpath (relative to the job's work 
directory).
*
-   * If not a "local:" file and no alternate name, the environment is not 
modified.
+   * If not a "local:" file and no alternate name, the linkName will be added 
to the classpath.
*
-   * @param conf  Spark configuration.
-   * @param uri   URI to add to classpath (optional).
-   * @param fileName  Alternate name for the file (optional).
-   * @param env   Map holding the environment variables.
+   * @param confSpark configuration.
+   * @param hadoopConf  Hadoop configuration.
+   * @param uri URI to add to classpath (optional).
+   * @param fileNameAlternate name for the file (optional).
+   * @param env Map holding the environment variables.
*/
   private def addFileToClasspath(
   conf: SparkConf,
+  hadoopConf: Configuration,
   uri: URI,
   fileName: String,
   env: HashMap[String, String]): Unit = {
@@ -1208,6 +1210,11 @@ object Client extends Logging {
 } else if (fileName != null) {
   addClasspathEntry(buildPath(
 YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), fileName), env)
+} else if (uri != null) {
+  val localPath = getQualifiedLocalPath(uri, hadoopConf)
+  val linkName = Option(uri.getFragment()).getOrElse(localPath.getName())
+  addClasspathEntry(buildPath(
+YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), linkName), env)
 }
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-11026] [YARN] spark.yarn.user.classpath.first does work for 'spark-submit --jars hdfs://user/foo.jar'

2015-10-13 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master c4da5345a -> 626aab79c


[SPARK-11026] [YARN] spark.yarn.user.classpath.first does work for 
'spark-submit --jars hdfs://user/foo.jar'

when spark.yarn.user.classpath.first=true and using 'spark-submit --jars 
hdfs://user/foo.jar', it can not put foo.jar to system classpath. so we need to 
put yarn's linkNames of jars to the system classpath. vanzin tgravescs

Author: Lianhui Wang 

Closes #9045 from lianhuiwang/spark-11026.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/626aab79
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/626aab79
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/626aab79

Branch: refs/heads/master
Commit: 626aab79c9b4d4ac9d65bf5fa45b81dd9cbc609c
Parents: c4da534
Author: Lianhui Wang 
Authored: Tue Oct 13 08:29:47 2015 -0500
Committer: Tom Graves 
Committed: Tue Oct 13 08:29:47 2015 -0500

--
 .../org/apache/spark/deploy/yarn/Client.scala   | 23 +---
 1 file changed, 15 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/626aab79/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
--
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index d25d830..9fcfe36 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1212,7 +1212,7 @@ object Client extends Logging {
 } else {
   getMainJarUri(sparkConf.getOption(CONF_SPARK_USER_JAR))
 }
-  mainJar.foreach(addFileToClasspath(sparkConf, _, APP_JAR, env))
+  mainJar.foreach(addFileToClasspath(sparkConf, conf, _, APP_JAR, env))
 
   val secondaryJars =
 if (args != null) {
@@ -1221,10 +1221,10 @@ object Client extends Logging {
   
getSecondaryJarUris(sparkConf.getOption(CONF_SPARK_YARN_SECONDARY_JARS))
 }
   secondaryJars.foreach { x =>
-addFileToClasspath(sparkConf, x, null, env)
+addFileToClasspath(sparkConf, conf, x, null, env)
   }
 }
-addFileToClasspath(sparkConf, new URI(sparkJar(sparkConf)), SPARK_JAR, env)
+addFileToClasspath(sparkConf, conf, new URI(sparkJar(sparkConf)), 
SPARK_JAR, env)
 populateHadoopClasspath(conf, env)
 sys.env.get(ENV_DIST_CLASSPATH).foreach { cp =>
   addClasspathEntry(getClusterPath(sparkConf, cp), env)
@@ -1259,15 +1259,17 @@ object Client extends Logging {
* If an alternate name for the file is given, and it's not a "local:" file, 
the alternate
* name will be added to the classpath (relative to the job's work 
directory).
*
-   * If not a "local:" file and no alternate name, the environment is not 
modified.
+   * If not a "local:" file and no alternate name, the linkName will be added 
to the classpath.
*
-   * @param conf  Spark configuration.
-   * @param uri   URI to add to classpath (optional).
-   * @param fileName  Alternate name for the file (optional).
-   * @param env   Map holding the environment variables.
+   * @param confSpark configuration.
+   * @param hadoopConf  Hadoop configuration.
+   * @param uri URI to add to classpath (optional).
+   * @param fileNameAlternate name for the file (optional).
+   * @param env Map holding the environment variables.
*/
   private def addFileToClasspath(
   conf: SparkConf,
+  hadoopConf: Configuration,
   uri: URI,
   fileName: String,
   env: HashMap[String, String]): Unit = {
@@ -1276,6 +1278,11 @@ object Client extends Logging {
 } else if (fileName != null) {
   addClasspathEntry(buildPath(
 YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), fileName), env)
+} else if (uri != null) {
+  val localPath = getQualifiedLocalPath(uri, hadoopConf)
+  val linkName = Option(uri.getFragment()).getOrElse(localPath.getName())
+  addClasspathEntry(buildPath(
+YarnSparkHadoopUtil.expandEnvironment(Environment.PWD), linkName), env)
 }
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-9439] [YARN] External shuffle service robust to NM restarts using leveldb

2015-08-21 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master bb220f657 - 708036c1d


[SPARK-9439] [YARN] External shuffle service robust to NM restarts using leveldb

https://issues.apache.org/jira/browse/SPARK-9439

In general, Yarn apps should be robust to NodeManager restarts.  However, if 
you run spark with the external shuffle service on, after a NM restart all 
shuffles fail, b/c the shuffle service has lost some state with info on each 
executor.  (Note the shuffle data is perfectly fine on disk across a NM 
restart, the problem is we've lost the small bit of state that lets us *find* 
those files.)

The solution proposed here is that the external shuffle service can write out 
its state to leveldb (backed by a local file) every time an executor is added.  
When running with yarn, that file is in the NM's local dir.  Whenever the 
service is started, it looks for that file, and if it exists, it reads the file 
and re-registers all executors there.

Nothing is changed in non-yarn modes with this patch.  The service is not given 
a place to save the state to, so it operates the same as before.  This should 
make it easy to update other cluster managers as well, by just supplying the 
right file  the equivalent of yarn's `initializeApplication` -- I'm not 
familiar enough with those modes to know how to do that.

Author: Imran Rashid iras...@cloudera.com

Closes #7943 from squito/leveldb_external_shuffle_service_NM_restart and 
squashes the following commits:

0d285d3 [Imran Rashid] review feedback
70951d6 [Imran Rashid] Merge branch 'master' into 
leveldb_external_shuffle_service_NM_restart
5c71c8c [Imran Rashid] save executor to db before registering; style
2499c8c [Imran Rashid] explicit dependency on jackson-annotations
795d28f [Imran Rashid] review feedback
81f80e2 [Imran Rashid] Merge branch 'master' into 
leveldb_external_shuffle_service_NM_restart
594d520 [Imran Rashid] use json to serialize application executor info
1a7980b [Imran Rashid] version
8267d2a [Imran Rashid] style
e9f99e8 [Imran Rashid] cleanup the handling of bad dbs a little
9378ba3 [Imran Rashid] fail gracefully on corrupt leveldb files
acedb62 [Imran Rashid] switch to writing out one record per executor
79922b7 [Imran Rashid] rely on yarn to call stopApplication; assorted cleanup
12b6a35 [Imran Rashid] save registered executors when apps are removed; add 
tests
c878fbe [Imran Rashid] better explanation of shuffle service port handling
694934c [Imran Rashid] only open leveldb connection once per service
d596410 [Imran Rashid] store executor data in leveldb
59800b7 [Imran Rashid] Files.move in case renaming is unsupported
32fe5ae [Imran Rashid] Merge branch 'master' into 
external_shuffle_service_NM_restart
d7450f0 [Imran Rashid] style
f729e2b [Imran Rashid] debugging
4492835 [Imran Rashid] lol, dont use a PrintWriter b/c of scalastyle checks
0a39b98 [Imran Rashid] Merge branch 'master' into 
external_shuffle_service_NM_restart
55f49fc [Imran Rashid] make sure the service doesnt die if the registered 
executor file is corrupt; add tests
245db19 [Imran Rashid] style
62586a6 [Imran Rashid] just serialize the whole executors map
bdbbf0d [Imran Rashid] comments, remove some unnecessary changes
857331a [Imran Rashid] better tests  comments
bb9d1e6 [Imran Rashid] formatting
bdc4b32 [Imran Rashid] rename
86e0cb9 [Imran Rashid] for tests, shuffle service finds an open port
23994ff [Imran Rashid] style
7504de8 [Imran Rashid] style
a36729c [Imran Rashid] cleanup
efb6195 [Imran Rashid] proper unit test, and no longer leak if apps stop during 
NM restart
dd93dc0 [Imran Rashid] test for shuffle service w/ NM restarts
d596969 [Imran Rashid] cleanup imports
0e9d69b [Imran Rashid] better names
9eae119 [Imran Rashid] cleanup lots of duplication
1136f44 [Imran Rashid] test needs to have an actual shuffle
0b588bd [Imran Rashid] more fixes ...
ad122ef [Imran Rashid] more fixes
5e5a7c3 [Imran Rashid] fix build
c69f46b [Imran Rashid] maybe working version, needs tests  cleanup ...
bb3ba49 [Imran Rashid] minor cleanup
36127d3 [Imran Rashid] wip
b9d2ced [Imran Rashid] incomplete setup for external shuffle service tests


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/708036c1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/708036c1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/708036c1

Branch: refs/heads/master
Commit: 708036c1de52d674ceff30ac465e1dcedeb8dde8
Parents: bb220f6
Author: Imran Rashid iras...@cloudera.com
Authored: Fri Aug 21 08:41:36 2015 -0500
Committer: Tom Graves tgra...@yahoo-inc.com
Committed: Fri Aug 21 08:41:36 2015 -0500

--
 .../spark/deploy/ExternalShuffleService.scala   |   2 +-
 .../mesos/MesosExternalShuffleService.scala |   2 +-
 .../org/apache/spark/storage/BlockManager.scala |  14 +-
 .../spark/ExternalShuffleServiceSuite.scala |   2 +-
 network/shuffle/pom.xml 

spark git commit: [SPARK-8405] [DOC] Add how to view logs on Web UI when yarn log aggregation is enabled

2015-07-27 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.4 2b1973dd2 - a671dad62


[SPARK-8405] [DOC] Add how to view logs on Web UI when yarn log aggregation is 
enabled

Some users may not be aware that the logs are available on Web UI even if Yarn 
log aggregation is enabled. Update the doc to make this clear and what need to 
be configured.

Author: Carson Wang carson.w...@intel.com

Closes #7463 from carsonwang/YarnLogDoc and squashes the following commits:

274c054 [Carson Wang] Minor text fix
74df3a1 [Carson Wang] address comments
5a95046 [Carson Wang] Update the text in the doc
e5775c1 [Carson Wang] Update doc about how to view the logs on Web UI when yarn 
log aggregation is enabled

(cherry picked from commit 622838165756e9669cbf7af13eccbc719638f40b)
Signed-off-by: Tom Graves tgra...@yahoo-inc.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a671dad6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a671dad6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a671dad6

Branch: refs/heads/branch-1.4
Commit: a671dad62362b129ae23c4c8947eaa6efa134e9f
Parents: 2b1973d
Author: Carson Wang carson.w...@intel.com
Authored: Mon Jul 27 08:02:40 2015 -0500
Committer: Tom Graves tgra...@yahoo-inc.com
Committed: Mon Jul 27 08:03:15 2015 -0500

--
 docs/running-on-yarn.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a671dad6/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 07b30bf..5290b21 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -68,9 +68,9 @@ In YARN terminology, executors and application masters run 
inside containers.
 
 yarn logs -applicationId app ID
 
-will print out the contents of all log files from all containers from the 
given application. You can also view the container log files directly in HDFS 
using the HDFS shell or API. The directory where they are located can be found 
by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and 
`yarn.nodemanager.remote-app-log-dir-suffix`).
+will print out the contents of all log files from all containers from the 
given application. You can also view the container log files directly in HDFS 
using the HDFS shell or API. The directory where they are located can be found 
by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and 
`yarn.nodemanager.remote-app-log-dir-suffix`). The logs are also available on 
the Spark Web UI under the Executors Tab. You need to have both the Spark 
history server and the MapReduce history server running and configure 
`yarn.log.server.url` in `yarn-site.xml` properly. The log URL on the Spark 
history server UI will redirect you to the MapReduce history server to show the 
aggregated logs.
 
-When log aggregation isn't turned on, logs are retained locally on each 
machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` 
or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and 
installation. Viewing logs for a container requires going to the host that 
contains them and looking in this directory.  Subdirectories organize log files 
by application ID and container ID.
+When log aggregation isn't turned on, logs are retained locally on each 
machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` 
or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and 
installation. Viewing logs for a container requires going to the host that 
contains them and looking in this directory.  Subdirectories organize log files 
by application ID and container ID. The logs are also available on the Spark 
Web UI under the Executors Tab and doesn't require running the MapReduce 
history server.
 
 To review per-container launch environment, increase 
`yarn.nodemanager.delete.debug-delay-sec` to a
 large value (e.g. 36000), and then access the application cache through 
`yarn.nodemanager.local-dirs`


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-8405] [DOC] Add how to view logs on Web UI when yarn log aggregation is enabled

2015-07-27 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 72981bc8f - 622838165


[SPARK-8405] [DOC] Add how to view logs on Web UI when yarn log aggregation is 
enabled

Some users may not be aware that the logs are available on Web UI even if Yarn 
log aggregation is enabled. Update the doc to make this clear and what need to 
be configured.

Author: Carson Wang carson.w...@intel.com

Closes #7463 from carsonwang/YarnLogDoc and squashes the following commits:

274c054 [Carson Wang] Minor text fix
74df3a1 [Carson Wang] address comments
5a95046 [Carson Wang] Update the text in the doc
e5775c1 [Carson Wang] Update doc about how to view the logs on Web UI when yarn 
log aggregation is enabled


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/62283816
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/62283816
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/62283816

Branch: refs/heads/master
Commit: 622838165756e9669cbf7af13eccbc719638f40b
Parents: 72981bc
Author: Carson Wang carson.w...@intel.com
Authored: Mon Jul 27 08:02:40 2015 -0500
Committer: Tom Graves tgra...@yahoo-inc.com
Committed: Mon Jul 27 08:02:40 2015 -0500

--
 docs/running-on-yarn.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/62283816/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index de22ab5..cac08a9 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -68,9 +68,9 @@ In YARN terminology, executors and application masters run 
inside containers.
 
 yarn logs -applicationId app ID
 
-will print out the contents of all log files from all containers from the 
given application. You can also view the container log files directly in HDFS 
using the HDFS shell or API. The directory where they are located can be found 
by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and 
`yarn.nodemanager.remote-app-log-dir-suffix`).
+will print out the contents of all log files from all containers from the 
given application. You can also view the container log files directly in HDFS 
using the HDFS shell or API. The directory where they are located can be found 
by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and 
`yarn.nodemanager.remote-app-log-dir-suffix`). The logs are also available on 
the Spark Web UI under the Executors Tab. You need to have both the Spark 
history server and the MapReduce history server running and configure 
`yarn.log.server.url` in `yarn-site.xml` properly. The log URL on the Spark 
history server UI will redirect you to the MapReduce history server to show the 
aggregated logs.
 
-When log aggregation isn't turned on, logs are retained locally on each 
machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` 
or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and 
installation. Viewing logs for a container requires going to the host that 
contains them and looking in this directory.  Subdirectories organize log files 
by application ID and container ID.
+When log aggregation isn't turned on, logs are retained locally on each 
machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` 
or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and 
installation. Viewing logs for a container requires going to the host that 
contains them and looking in this directory.  Subdirectories organize log files 
by application ID and container ID. The logs are also available on the Spark 
Web UI under the Executors Tab and doesn't require running the MapReduce 
history server.
 
 To review per-container launch environment, increase 
`yarn.nodemanager.delete.debug-delay-sec` to a
 large value (e.g. 36000), and then access the application cache through 
`yarn.nodemanager.local-dirs`


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-8851] [YARN] In Client mode, make sure the client logs in and updates tokens

2015-07-17 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master ec8973d12 - c043a3e9d


[SPARK-8851] [YARN] In Client mode, make sure the client logs in and updates 
tokens

In client side, the flow is SparkSubmit - SparkContext - yarn/Client. Since 
the yarn client only gets a cloned config and the staging dir is set here, it 
is not really possible to do re-logins in the SparkContext. So, do the initial 
logins in Spark Submit and do re-logins as we do now in the AM, but the Client 
behaves like an executor in this specific context and reads the credentials 
file to update the tokens. This way, even if the streaming context is started 
up from checkpoint - it is fine since we have logged in from SparkSubmit itself 
itself.

Author: Hari Shreedharan hshreedha...@apache.org

Closes #7394 from harishreedharan/yarn-client-login and squashes the following 
commits:

9a2166f [Hari Shreedharan] make it possible to use command line args and config 
parameters together.
de08f57 [Hari Shreedharan] Fix import order.
5c4fa63 [Hari Shreedharan] Add a comment explaining what is being done in 
YarnClientSchedulerBackend.
c872caa [Hari Shreedharan] Fix typo in log message.
2c80540 [Hari Shreedharan] Move token renewal to YarnClientSchedulerBackend.
0c48ac2 [Hari Shreedharan] Remove direct use of ExecutorDelegationTokenUpdater 
in Client.
26f8bfa [Hari Shreedharan] [SPARK-8851][YARN] In Client mode, make sure the 
client logs in and updates tokens.
58b1969 [Hari Shreedharan] Simple attempt 1.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c043a3e9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c043a3e9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c043a3e9

Branch: refs/heads/master
Commit: c043a3e9df55721f21332f7c44ff351832d20324
Parents: ec8973d
Author: Hari Shreedharan hshreedha...@apache.org
Authored: Fri Jul 17 09:38:08 2015 -0500
Committer: Tom Graves tgra...@yahoo-inc.com
Committed: Fri Jul 17 09:38:08 2015 -0500

--
 .../apache/spark/deploy/SparkHadoopUtil.scala   | 29 +++---
 .../org/apache/spark/deploy/SparkSubmit.scala   | 10 --
 .../org/apache/spark/deploy/yarn/Client.scala   | 32 +---
 .../cluster/YarnClientSchedulerBackend.scala| 11 +--
 4 files changed, 56 insertions(+), 26 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c043a3e9/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 9f94118..6b14d40 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -25,6 +25,7 @@ import java.util.{Arrays, Comparator}
 import scala.collection.JavaConversions._
 import scala.concurrent.duration._
 import scala.language.postfixOps
+import scala.util.control.NonFatal
 
 import com.google.common.primitives.Longs
 import org.apache.hadoop.conf.Configuration
@@ -248,19 +249,25 @@ class SparkHadoopUtil extends Logging {
   dir: Path,
   prefix: String,
   exclusionSuffix: String): Array[FileStatus] = {
-val fileStatuses = remoteFs.listStatus(dir,
-  new PathFilter {
-override def accept(path: Path): Boolean = {
-  val name = path.getName
-  name.startsWith(prefix)  !name.endsWith(exclusionSuffix)
+try {
+  val fileStatuses = remoteFs.listStatus(dir,
+new PathFilter {
+  override def accept(path: Path): Boolean = {
+val name = path.getName
+name.startsWith(prefix)  !name.endsWith(exclusionSuffix)
+  }
+})
+  Arrays.sort(fileStatuses, new Comparator[FileStatus] {
+override def compare(o1: FileStatus, o2: FileStatus): Int = {
+  Longs.compare(o1.getModificationTime, o2.getModificationTime)
 }
   })
-Arrays.sort(fileStatuses, new Comparator[FileStatus] {
-  override def compare(o1: FileStatus, o2: FileStatus): Int = {
-Longs.compare(o1.getModificationTime, o2.getModificationTime)
-  }
-})
-fileStatuses
+  fileStatuses
+} catch {
+  case NonFatal(e) =
+logWarning(Error while attempting to list files from application 
staging dir, e)
+Array.empty
+}
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/c043a3e9/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 036cb6e..0b39ee8 100644
--- 

spark git commit: [SPARK-8574] org/apache/spark/unsafe doesn't honor the java source/ta…

2015-06-25 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.4 74001db04 - 13802163d


[SPARK-8574] org/apache/spark/unsafe doesn't honor the java source/ta…

…rget versions.

I basically copied the compatibility rules from the top level pom.xml into 
here.  Someone more familiar with all the options in the top level pom may want 
to make sure nothing else should be copied on down.

With this is allows me to build with jdk8 and run with lower versions.  Source 
shows compiled for jdk6 as its supposed to.

Author: Tom Graves tgra...@yahoo-inc.com
Author: Thomas Graves tgra...@staydecay.corp.gq1.yahoo.com

Closes #6989 from tgravescs/SPARK-8574 and squashes the following commits:

e1ea2d4 [Thomas Graves] Change to use combine.children=append
150d645 [Tom Graves] [SPARK-8574] org/apache/spark/unsafe doesn't honor the 
java source/target versions

(cherry picked from commit e988adb58f02d06065837f3d79eee220f6558def)
Signed-off-by: Tom Graves tgra...@yahoo-inc.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/13802163
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/13802163
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/13802163

Branch: refs/heads/branch-1.4
Commit: 13802163deb39fbffa73d72aa15568b6f2223fa6
Parents: 74001db
Author: Tom Graves tgra...@yahoo-inc.com
Authored: Thu Jun 25 08:27:08 2015 -0500
Committer: Tom Graves tgra...@yahoo-inc.com
Committed: Thu Jun 25 08:27:56 2015 -0500

--
 unsafe/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/13802163/unsafe/pom.xml
--
diff --git a/unsafe/pom.xml b/unsafe/pom.xml
index 7298765..fa5085e 100644
--- a/unsafe/pom.xml
+++ b/unsafe/pom.xml
@@ -80,7 +80,7 @@
   groupIdnet.alchim31.maven/groupId
   artifactIdscala-maven-plugin/artifactId
   configuration
-javacArgs
+javacArgs combine.children=append
   !-- This option is needed to suppress warnings from 
sun.misc.Unsafe usage --
   javacArg-XDignore.symbol.file/javacArg
 /javacArgs


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-7524] [SPARK-7846] add configs for keytab and principal, pass these two configs with different way in different modes

2015-05-29 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 8db40f671 - a51b133de


[SPARK-7524] [SPARK-7846] add configs for keytab and principal, pass these two 
configs with different way in different modes

* As spark now supports long running service by updating tokens for namenode, 
but only accept parameters passed with --k=v format which is not very 
convinient. This patch add spark.* configs in properties file and system 
property.

*  --principal and --keytabl options are passed to client but when we started 
thrift server or spark-shell these two are also passed into the Main class 
(org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 and 
org.apache.spark.repl.Main).
In these two main class, arguments passed in will be processed with some 3rd 
libraries, which will lead to some error: Invalid option: --principal or 
Unrecgnised option: --principal.
We should pass these command args in different forms, say system properties.

Author: WangTaoTheTonic wangtao...@huawei.com

Closes #6051 from WangTaoTheTonic/SPARK-7524 and squashes the following commits:

e65699a [WangTaoTheTonic] change logic to loadEnvironments
ebd9ea0 [WangTaoTheTonic] merge master
ecfe43a [WangTaoTheTonic] pass keytab and principal seperately in different mode
33a7f40 [WangTaoTheTonic] expand the use of the current configs
08bb4e8 [WangTaoTheTonic] fix wrong cite
73afa64 [WangTaoTheTonic] add configs for keytab and principal, move originals 
to internal


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a51b133d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a51b133d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a51b133d

Branch: refs/heads/master
Commit: a51b133de3c65a991ab105b6f020082080121b4c
Parents: 8db40f6
Author: WangTaoTheTonic wangtao...@huawei.com
Authored: Fri May 29 11:06:11 2015 -0500
Committer: Thomas Graves tgra...@thatenemy-lm.champ.corp.yahoo.com
Committed: Fri May 29 11:06:11 2015 -0500

--
 .../scala/org/apache/spark/deploy/SparkSubmit.scala |  8 
 .../apache/spark/deploy/SparkSubmitArguments.scala  |  2 ++
 docs/running-on-yarn.md | 16 
 .../deploy/yarn/AMDelegationTokenRenewer.scala  | 14 --
 .../apache/spark/deploy/yarn/ClientArguments.scala  |  6 ++
 5 files changed, 36 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 92bb505..d1b32ea 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -428,6 +428,8 @@ object SparkSubmit {
   OptionAssigner(args.executorCores, YARN, CLIENT, sysProp = 
spark.executor.cores),
   OptionAssigner(args.files, YARN, CLIENT, sysProp = 
spark.yarn.dist.files),
   OptionAssigner(args.archives, YARN, CLIENT, sysProp = 
spark.yarn.dist.archives),
+  OptionAssigner(args.principal, YARN, CLIENT, sysProp = 
spark.yarn.principal),
+  OptionAssigner(args.keytab, YARN, CLIENT, sysProp = spark.yarn.keytab),
 
   // Yarn cluster only
   OptionAssigner(args.name, YARN, CLUSTER, clOption = --name),
@@ -440,10 +442,8 @@ object SparkSubmit {
   OptionAssigner(args.files, YARN, CLUSTER, clOption = --files),
   OptionAssigner(args.archives, YARN, CLUSTER, clOption = --archives),
   OptionAssigner(args.jars, YARN, CLUSTER, clOption = --addJars),
-
-  // Yarn client or cluster
-  OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, clOption = 
--principal),
-  OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, clOption = 
--keytab),
+  OptionAssigner(args.principal, YARN, CLUSTER, clOption = --principal),
+  OptionAssigner(args.keytab, YARN, CLUSTER, clOption = --keytab),
 
   // Other options
   OptionAssigner(args.executorCores, STANDALONE, ALL_DEPLOY_MODES,

http://git-wip-us.apache.org/repos/asf/spark/blob/a51b133d/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index c0e4c77..cc6a7bd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -169,6 +169,8 @@ private[deploy] class SparkSubmitArguments(args: 
Seq[String], env: Map[String, S
 deployMode = 

spark git commit: [SPARK-6869] [PYSPARK] Add pyspark archives path to PYTHONPATH

2015-05-08 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master c2f0821aa - ebff7327a


[SPARK-6869] [PYSPARK] Add pyspark archives path to PYTHONPATH

Based on https://github.com/apache/spark/pull/5478 that provide a 
PYSPARK_ARCHIVES_PATH env. within this PR, we just should export 
PYSPARK_ARCHIVES_PATH=/user/spark/pyspark.zip,/user/spark/python/lib/py4j-0.8.2.1-src.zip
 in conf/spark-env.sh when we don't install PySpark on each node of Yarn. i run 
python application successfully on yarn-client and yarn-cluster with this PR.
andrewor14 sryza Sephiroth-Lin Can you take a look at this?thanks.

Author: Lianhui Wang lianhuiwan...@gmail.com

Closes #5580 from lianhuiwang/SPARK-6869 and squashes the following commits:

66ffa43 [Lianhui Wang] Update Client.scala
c2ad0f9 [Lianhui Wang] Update Client.scala
1c8f664 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' 
into SPARK-6869
008850a [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' 
into SPARK-6869
f0b4ed8 [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' 
into SPARK-6869
150907b [Lianhui Wang] Merge remote-tracking branch 'remotes/apache/master' 
into SPARK-6869
20402cd [Lianhui Wang] use ZipEntry
9d87c3f [Lianhui Wang] update scala style
e7bd971 [Lianhui Wang] address vanzin's comments
4b8a3ed [Lianhui Wang] use pyArchivesEnvOpt
e6b573b [Lianhui Wang] address vanzin's comments
f11f84a [Lianhui Wang] zip pyspark archives
5192cca [Lianhui Wang] update import path
3b1e4c8 [Lianhui Wang] address tgravescs's comments
9396346 [Lianhui Wang] put zip to make-distribution.sh
0d2baf7 [Lianhui Wang] update import paths
e0179be [Lianhui Wang] add zip pyspark archives in build or sparksubmit
31e8e06 [Lianhui Wang] update code style
9f31dac [Lianhui Wang] update code and add comments
f72987c [Lianhui Wang] add archives path to PYTHONPATH


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ebff7327
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ebff7327
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ebff7327

Branch: refs/heads/master
Commit: ebff7327af5efa9f57c605284de4fae6b050ae0f
Parents: c2f0821
Author: Lianhui Wang lianhuiwan...@gmail.com
Authored: Fri May 8 08:44:46 2015 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Fri May 8 08:44:46 2015 -0500

--
 assembly/pom.xml| 21 ++
 .../org/apache/spark/deploy/SparkSubmit.scala   | 41 
 project/SparkBuild.scala| 37 +-
 .../org/apache/spark/deploy/yarn/Client.scala   | 23 ---
 4 files changed, 114 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ebff7327/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2b4d0a9..626c857 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -92,6 +92,27 @@
   skiptrue/skip
 /configuration
   /plugin
+!-- zip pyspark archives to run python application on yarn mode --
+plugin
+  groupIdorg.apache.maven.plugins/groupId
+artifactIdmaven-antrun-plugin/artifactId
+executions
+  execution
+phasepackage/phase
+  goals
+goalrun/goal
+  /goals
+  /execution
+/executions
+configuration
+  target
+delete dir=${basedir}/../python/lib/pyspark.zip/
+zip destfile=${basedir}/../python/lib/pyspark.zip
+  fileset dir=${basedir}/../python/ 
includes=pyspark/**/*/
+/zip
+  /target
+/configuration
+/plugin
   !-- Use the shade plugin to create a big JAR with all the dependencies 
--
   plugin
 groupIdorg.apache.maven.plugins/groupId

http://git-wip-us.apache.org/repos/asf/spark/blob/ebff7327/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 8a03279..329fa06 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -332,6 +332,47 @@ object SparkSubmit {
   }
 }
 
+// In yarn mode for a python app, add pyspark archives to files
+// that can be distributed with the job
+if (args.isPython  clusterManager == YARN) {
+  var pyArchives: String = null
+  val pyArchivesEnvOpt = sys.env.get(PYSPARK_ARCHIVES_PATH)
+  if (pyArchivesEnvOpt.isDefined) {
+

hadoop git commit: YARN-3600. AM container link is broken (Naganarasimha G R via tgraves (cherry picked from commit 5d708a4725529cf09d2dd8b5b4aa3542cc8610b0)

2015-05-08 Thread tgraves
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 28e0593b9 - 547b06988


YARN-3600. AM container link is broken (Naganarasimha G R via tgraves
(cherry picked from commit 5d708a4725529cf09d2dd8b5b4aa3542cc8610b0)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/547b0698
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/547b0698
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/547b0698

Branch: refs/heads/branch-2
Commit: 547b0698873334bf0bb50a54e41bd45b6c326d06
Parents: 28e0593
Author: Thomas Graves tgra...@apache.org
Authored: Fri May 8 16:35:40 2015 +
Committer: Thomas Graves tgra...@apache.org
Committed: Fri May 8 16:37:20 2015 +

--
 hadoop-yarn-project/CHANGES.txt | 2 ++
 .../yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java   | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/547b0698/hadoop-yarn-project/CHANGES.txt
--
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 5ae87ef..25625b7 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -308,6 +308,8 @@ Release 2.8.0 - UNRELEASED
 
 YARN-3589. RM and AH web UI display DOCTYPE wrongly. (Rohith via ozawa)
 
+YARN-3600. AM container link is broken (Naganarasimha G R via tgraves)
+
 Release 2.7.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/547b0698/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
index 30f55be..34ad08a 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
@@ -229,8 +229,9 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
 AM Container:,
 appAttempt.getAmContainerId() == null || containers == null
 || !hasAMContainer(appAttemptReport.getAMContainerId(), containers)
-? N/A : root_url(container, appAttempt.getAmContainerId()),
-String.valueOf(appAttempt.getAmContainerId()))
+? null : root_url(container, appAttempt.getAmContainerId()),
+appAttempt.getAmContainerId() == null ? N/A :
+  String.valueOf(appAttempt.getAmContainerId()))
   ._(Node:, node)
   ._(
 Tracking URL:,



hadoop git commit: YARN-3600. AM container link is broken (Naganarasimha G R via tgraves

2015-05-08 Thread tgraves
Repository: hadoop
Updated Branches:
  refs/heads/trunk bcf289050 - 5d708a472


YARN-3600. AM container link is broken (Naganarasimha G R via tgraves


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5d708a47
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5d708a47
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5d708a47

Branch: refs/heads/trunk
Commit: 5d708a4725529cf09d2dd8b5b4aa3542cc8610b0
Parents: bcf2890
Author: Thomas Graves tgra...@apache.org
Authored: Fri May 8 16:35:40 2015 +
Committer: Thomas Graves tgra...@apache.org
Committed: Fri May 8 16:35:40 2015 +

--
 hadoop-yarn-project/CHANGES.txt | 2 ++
 .../yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java   | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/5d708a47/hadoop-yarn-project/CHANGES.txt
--
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 7f6a09f..b72c648 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -353,6 +353,8 @@ Release 2.8.0 - UNRELEASED
 
 YARN-3589. RM and AH web UI display DOCTYPE wrongly. (Rohith via ozawa)
 
+YARN-3600. AM container link is broken (Naganarasimha G R via tgraves)
+
 Release 2.7.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5d708a47/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
index 30f55be..34ad08a 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
@@ -229,8 +229,9 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
 AM Container:,
 appAttempt.getAmContainerId() == null || containers == null
 || !hasAMContainer(appAttemptReport.getAMContainerId(), containers)
-? N/A : root_url(container, appAttempt.getAmContainerId()),
-String.valueOf(appAttempt.getAmContainerId()))
+? null : root_url(container, appAttempt.getAmContainerId()),
+appAttempt.getAmContainerId() == null ? N/A :
+  String.valueOf(appAttempt.getAmContainerId()))
   ._(Node:, node)
   ._(
 Tracking URL:,



hadoop git commit: YARN-20. More information for yarn.resourcemanager.webapp.address in yarn-default.xml (Bartosz Ługowski vai tgraves) (cherry picked from commit f0f5e3c0751bcadcacd6d91e2c5504803ec3d

2015-05-08 Thread tgraves
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 68d85e92b - 638feaaa3


YARN-20. More information for yarn.resourcemanager.webapp.address in 
yarn-default.xml (Bartosz Ługowski vai tgraves)
(cherry picked from commit f0f5e3c0751bcadcacd6d91e2c5504803ec3d0a5)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/638feaaa
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/638feaaa
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/638feaaa

Branch: refs/heads/branch-2
Commit: 638feaaa354243bb53f480c161ffefee8a8fbc50
Parents: 68d85e9
Author: Thomas Graves tgra...@apache.org
Authored: Fri May 8 17:20:09 2015 +
Committer: Thomas Graves tgra...@apache.org
Committed: Fri May 8 17:22:33 2015 +

--
 hadoop-yarn-project/CHANGES.txt |  3 +++
 .../src/main/resources/yarn-default.xml | 12 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/638feaaa/hadoop-yarn-project/CHANGES.txt
--
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 25625b7..c2f8fb8 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -151,6 +151,9 @@ Release 2.8.0 - UNRELEASED
 
 YARN-2784. Make POM project names consistent. (Rohith via devaraj)
 
+YARN-20. More information for yarn.resourcemanager.webapp.address in 
+yarn-default.xml (Bartosz Ługowski vai tgraves)
+
   OPTIMIZATIONS
 
 YARN-3339. TestDockerContainerExecutor should pull a single image and not

http://git-wip-us.apache.org/repos/asf/hadoop/blob/638feaaa/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 5d0f07d..e1e0ebd 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -111,13 +111,21 @@
   /property
 
   property
-descriptionThe http address of the RM web application./description
+description
+  The http address of the RM web application.
+  If only a host is provided as the value,
+  the webapp will be served on a random port.
+/description
 nameyarn.resourcemanager.webapp.address/name
 value${yarn.resourcemanager.hostname}:8088/value
   /property
 
   property
-descriptionThe https adddress of the RM web application./description
+description
+  The https address of the RM web application.
+  If only a host is provided as the value,
+  the webapp will be served on a random port.
+/description
 nameyarn.resourcemanager.webapp.https.address/name
 value${yarn.resourcemanager.hostname}:8090/value
   /property



hadoop git commit: YARN-20. More information for yarn.resourcemanager.webapp.address in yarn-default.xml (Bartosz Ługowski vai tgraves)

2015-05-08 Thread tgraves
Repository: hadoop
Updated Branches:
  refs/heads/trunk a2d40bced - f0f5e3c07


YARN-20. More information for yarn.resourcemanager.webapp.address in 
yarn-default.xml (Bartosz Ługowski vai tgraves)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f0f5e3c0
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f0f5e3c0
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f0f5e3c0

Branch: refs/heads/trunk
Commit: f0f5e3c0751bcadcacd6d91e2c5504803ec3d0a5
Parents: a2d40bc
Author: Thomas Graves tgra...@apache.org
Authored: Fri May 8 17:20:09 2015 +
Committer: Thomas Graves tgra...@apache.org
Committed: Fri May 8 17:21:32 2015 +

--
 hadoop-yarn-project/CHANGES.txt |  3 +++
 .../src/main/resources/yarn-default.xml | 12 ++--
 2 files changed, 13 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/f0f5e3c0/hadoop-yarn-project/CHANGES.txt
--
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index b72c648..eb27152 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -196,6 +196,9 @@ Release 2.8.0 - UNRELEASED
 
 YARN-2784. Make POM project names consistent. (Rohith via devaraj)
 
+YARN-20. More information for yarn.resourcemanager.webapp.address in 
+yarn-default.xml (Bartosz Ługowski vai tgraves)
+
   OPTIMIZATIONS
 
 YARN-3339. TestDockerContainerExecutor should pull a single image and not

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f0f5e3c0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index 5d0f07d..e1e0ebd 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -111,13 +111,21 @@
   /property
 
   property
-descriptionThe http address of the RM web application./description
+description
+  The http address of the RM web application.
+  If only a host is provided as the value,
+  the webapp will be served on a random port.
+/description
 nameyarn.resourcemanager.webapp.address/name
 value${yarn.resourcemanager.hostname}:8088/value
   /property
 
   property
-descriptionThe https adddress of the RM web application./description
+description
+  The https address of the RM web application.
+  If only a host is provided as the value,
+  the webapp will be served on a random port.
+/description
 nameyarn.resourcemanager.webapp.https.address/name
 value${yarn.resourcemanager.hostname}:8090/value
   /property



spark git commit: [SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS

2015-05-01 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 4dc8d7449 - b1f4ca82d


[SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS

Take 2. Does the same thing as #4688, but fixes Hadoop-1 build.

Author: Hari Shreedharan hshreedha...@apache.org

Closes #5823 from harishreedharan/kerberos-longrunning and squashes the 
following commits:

3c86bba [Hari Shreedharan] Import fixes. Import postfixOps explicitly.
4d04301 [Hari Shreedharan] Minor formatting fixes.
b5e7a72 [Hari Shreedharan] Remove reflection, use a method in SparkHadoopUtil 
to update the token renewer.
7bff6e9 [Hari Shreedharan] Make sure all required classes are present in the 
jar. Fix import order.
e851f70 [Hari Shreedharan] Move the ExecutorDelegationTokenRenewer to yarn 
module. Use reflection to use it.
36eb8a9 [Hari Shreedharan] Change the renewal interval config param. Fix a 
bunch of comments.
611923a [Hari Shreedharan] Make sure the namenodes are listed correctly for 
creating tokens.
09fe224 [Hari Shreedharan] Use token.renew to get token's renewal interval 
rather than using hdfs-site.xml
6963bbc [Hari Shreedharan] Schedule renewal in AM before starting user class. 
Else, a restarted AM cannot access HDFS if the user class tries to.
072659e [Hari Shreedharan] Fix build failure caused by thread factory getting 
moved to ThreadUtils.
f041dd3 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
42eead4 [Hari Shreedharan] Remove RPC part. Refactor and move methods around, 
use renewal interval rather than max lifetime to create new tokens.
ebb36f5 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
bc083e3 [Hari Shreedharan] Overload RegisteredExecutor to send tokens. Minor 
doc updates.
7b19643 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
8a4f268 [Hari Shreedharan] Added docs in the security guide. Changed some code 
to ensure that the renewer objects are created only if required.
e800c8b [Hari Shreedharan] Restore original RegisteredExecutor message, and 
send new tokens via NewTokens message.
0e9507e [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
7f1bc58 [Hari Shreedharan] Minor fixes, cleanup.
bcd11f9 [Hari Shreedharan] Refactor AM and Executor token update code into 
separate classes, also send tokens via akka on executor startup.
f74303c [Hari Shreedharan] Move the new logic into specialized classes. Add 
cleanup for old credentials files.
2f9975c [Hari Shreedharan] Ensure new tokens are written out immediately on AM 
restart. Also, pikc up the latest suffix from HDFS if the AM is restarted.
61b2b27 [Hari Shreedharan] Account for AM restarts by making sure lastSuffix is 
read from the files on HDFS.
62c45ce [Hari Shreedharan] Relogin from keytab periodically.
fa233bd [Hari Shreedharan] Adding logging, fixing minor formatting and ordering 
issues.
42813b4 [Hari Shreedharan] Remove utils.sh, which was re-added due to merge 
with master.
0de27ee [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
55522e3 [Hari Shreedharan] Fix failure caused by Preconditions ambiguity.
9ef5f1b [Hari Shreedharan] Added explanation of how the credentials refresh 
works, some other minor fixes.
f4fd711 [Hari Shreedharan] Fix SparkConf usage.
2debcea [Hari Shreedharan] Change the file structure for credentials files. I 
will push a followup patch which adds a cleanup mechanism for old credentials 
files. The credentials files are small and few enough for it to cause issues on 
HDFS.
af6d5f0 [Hari Shreedharan] Cleaning up files where changes weren't required.
f0f54cb [Hari Shreedharan] Be more defensive when updating the credentials file.
f6954da [Hari Shreedharan] Got rid of Akka communication to renew, instead the 
executors check a known file's modification time to read the credentials.
5c11c3e [Hari Shreedharan] Move tests to YarnSparkHadoopUtil to fix compile 
issues.
b4cb917 [Hari Shreedharan] Send keytab to AM via DistributedCache rather than 
directly via HDFS
0985b4e [Hari Shreedharan] Write tokens to HDFS and read them back when 
required, rather than sending them over the wire.
d79b2b9 [Hari Shreedharan] Make sure correct credentials are passed to 
FileSystem#addDelegationTokens()
8c6928a [Hari Shreedharan] Fix issue caused by direct creation of Actor object.
fb27f46 [Hari Shreedharan] Make sure principal and keytab are set before 
CoarseGrainedSchedulerBackend is started. Also schedule re-logins in 
CoarseGrainedSchedulerBackend#start()
41efde0 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
d282d7a [Hari Shreedharan] Fix ClientSuite to set YARN mode, so that the 
correct class is used in tests.
bcfc374 [Hari Shreedharan] Fix Hadoop-1 build by adding no-op methods in 
SparkHadoopUtil, with impl in YarnSparkHadoopUtil.
f8fe694 [Hari Shreedharan] Handle None if keytab-login is not scheduled.
2b0d745 [Hari Shreedharan] [SPARK-5342][YARN] Allow long running Spark apps to 
run on secure YARN/HDFS.
ccba5bc [Hari 

spark git commit: [SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS

2015-04-30 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 7dacc08ab - 6c65da6bb


[SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS

Current Spark apps running on Secure YARN/HDFS would not be able to write data
to HDFS after 7 days, since delegation tokens cannot be renewed beyond that. 
This
means Spark Streaming apps will not be able to run on Secure YARN.

This commit adds basic functionality to fix this issue. In this patch:
- new parameters are added - principal and keytab, which can be used to login 
to a KDC
- the client logs in, and then get tokens to start the AM
- the keytab is copied to the staging directory
- the AM waits for 60% of the time till expiry of the tokens and then logs in 
using the keytab
- each time after 60% of the time, new tokens are created and sent to the 
executors

Currently, to avoid complicating the architecture, we set the keytab and 
principal in the
SparkHadoopUtil singleton, and schedule a login. Once the login is completed, a 
callback is scheduled.

This is being posted for feedback, so I can gather feedback on the general 
implementation.

There are currently a bunch of things to do:
- [x] logging
- [x] testing - I plan to manually test this soon. If you have ideas of how to 
add unit tests, comment.
- [x] add code to ensure that if these params are set in non-YARN cluster mode, 
we complain
- [x] documentation
- [x] Have the executors request for credentials from the AM, so that retries 
are possible.

Author: Hari Shreedharan hshreedha...@apache.org

Closes #4688 from harishreedharan/kerberos-longrunning and squashes the 
following commits:

36eb8a9 [Hari Shreedharan] Change the renewal interval config param. Fix a 
bunch of comments.
611923a [Hari Shreedharan] Make sure the namenodes are listed correctly for 
creating tokens.
09fe224 [Hari Shreedharan] Use token.renew to get token's renewal interval 
rather than using hdfs-site.xml
6963bbc [Hari Shreedharan] Schedule renewal in AM before starting user class. 
Else, a restarted AM cannot access HDFS if the user class tries to.
072659e [Hari Shreedharan] Fix build failure caused by thread factory getting 
moved to ThreadUtils.
f041dd3 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
42eead4 [Hari Shreedharan] Remove RPC part. Refactor and move methods around, 
use renewal interval rather than max lifetime to create new tokens.
ebb36f5 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
bc083e3 [Hari Shreedharan] Overload RegisteredExecutor to send tokens. Minor 
doc updates.
7b19643 [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
8a4f268 [Hari Shreedharan] Added docs in the security guide. Changed some code 
to ensure that the renewer objects are created only if required.
e800c8b [Hari Shreedharan] Restore original RegisteredExecutor message, and 
send new tokens via NewTokens message.
0e9507e [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
7f1bc58 [Hari Shreedharan] Minor fixes, cleanup.
bcd11f9 [Hari Shreedharan] Refactor AM and Executor token update code into 
separate classes, also send tokens via akka on executor startup.
f74303c [Hari Shreedharan] Move the new logic into specialized classes. Add 
cleanup for old credentials files.
2f9975c [Hari Shreedharan] Ensure new tokens are written out immediately on AM 
restart. Also, pikc up the latest suffix from HDFS if the AM is restarted.
61b2b27 [Hari Shreedharan] Account for AM restarts by making sure lastSuffix is 
read from the files on HDFS.
62c45ce [Hari Shreedharan] Relogin from keytab periodically.
fa233bd [Hari Shreedharan] Adding logging, fixing minor formatting and ordering 
issues.
42813b4 [Hari Shreedharan] Remove utils.sh, which was re-added due to merge 
with master.
0de27ee [Hari Shreedharan] Merge branch 'master' into kerberos-longrunning
55522e3 [Hari Shreedharan] Fix failure caused by Preconditions ambiguity.
9ef5f1b [Hari Shreedharan] Added explanation of how the credentials refresh 
works, some other minor fixes.
f4fd711 [Hari Shreedharan] Fix SparkConf usage.
2debcea [Hari Shreedharan] Change the file structure for credentials files. I 
will push a followup patch which adds a cleanup mechanism for old credentials 
files. The credentials files are small and few enough for it to cause issues on 
HDFS.
af6d5f0 [Hari Shreedharan] Cleaning up files where changes weren't required.
f0f54cb [Hari Shreedharan] Be more defensive when updating the credentials file.
f6954da [Hari Shreedharan] Got rid of Akka communication to renew, instead the 
executors check a known file's modification time to read the credentials.
5c11c3e [Hari Shreedharan] Move tests to YarnSparkHadoopUtil to fix compile 
issues.
b4cb917 [Hari Shreedharan] Send keytab to AM via DistributedCache rather than 
directly via HDFS
0985b4e [Hari Shreedharan] Write tokens to HDFS and read them back when 
required, rather than sending them over the wire.
d79b2b9 [Hari Shreedharan] Make sure 

hadoop git commit: YARN-3517. RM web ui for dumping scheduler logs should be for admins only (Varun Vasudev via tgraves)

2015-04-29 Thread tgraves
Repository: hadoop
Updated Branches:
  refs/heads/trunk 3dd6395bb - 2e215484b


YARN-3517. RM web ui for dumping scheduler logs should be for admins only 
(Varun Vasudev via tgraves)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2e215484
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2e215484
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2e215484

Branch: refs/heads/trunk
Commit: 2e215484bd05cd5e3b7a81d3558c6879a05dd2d2
Parents: 3dd6395
Author: tgraves tgra...@apache.org
Authored: Wed Apr 29 21:25:42 2015 +
Committer: tgraves tgra...@apache.org
Committed: Wed Apr 29 21:25:42 2015 +

--
 hadoop-yarn-project/CHANGES.txt |  3 +
 .../server/security/ApplicationACLsManager.java | 11 +++
 .../webapp/CapacitySchedulerPage.java   | 51 +
 .../resourcemanager/webapp/RMWebServices.java   | 13 +++-
 .../webapp/TestRMWebServices.java   | 77 
 5 files changed, 139 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e215484/hadoop-yarn-project/CHANGES.txt
--
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index b5581d6..6b8bde9 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -268,6 +268,9 @@ Release 2.8.0 - UNRELEASED
 YARN-2740. Fix NodeLabelsManager to properly handle node label 
modifications 
 when distributed node label configuration enabled. (Naganarasimha G R via 
wangda)
 
+YARN-3517. RM web ui for dumping scheduler logs should be for admins only
+(Varun Vasudev via tgraves)
+
 Release 2.7.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e215484/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
index 4daaa68..97b4163 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
@@ -138,4 +138,15 @@ public class ApplicationACLsManager {
 }
 return false;
   }
+
+  /**
+   * Check if the given user in an admin.
+   *
+   * @param calledUGI
+   *  UserGroupInformation for the user
+   * @return true if the user is an admin, false otherwise
+   */
+  public final boolean isAdmin(final UserGroupInformation calledUGI) {
+return this.adminAclsManager.isAdmin(calledUGI);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e215484/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
index 2eeda66..fa22a0d 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
@@ -24,6 +24,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth;
@@ -33,6 +34,7 @@ import 
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UserInfo
 import 
org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerInfo;
 import 
org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerLeafQueueInfo;
 import

hadoop git commit: YARN-3517. RM web ui for dumping scheduler logs should be for admins only (Varun Vasudev via tgraves) (cherry picked from commit 2e215484bd05cd5e3b7a81d3558c6879a05dd2d2)

2015-04-29 Thread tgraves
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 460127e6f - 2e13183f6


YARN-3517. RM web ui for dumping scheduler logs should be for admins only 
(Varun Vasudev via tgraves)
(cherry picked from commit 2e215484bd05cd5e3b7a81d3558c6879a05dd2d2)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2e13183f
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2e13183f
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2e13183f

Branch: refs/heads/branch-2
Commit: 2e13183f6010182aef7b1dfec2f9c1f1e9968011
Parents: 460127e
Author: tgraves tgra...@apache.org
Authored: Wed Apr 29 21:25:42 2015 +
Committer: tgraves tgra...@apache.org
Committed: Wed Apr 29 21:27:16 2015 +

--
 hadoop-yarn-project/CHANGES.txt |  3 +
 .../server/security/ApplicationACLsManager.java | 11 +++
 .../webapp/CapacitySchedulerPage.java   | 51 +
 .../resourcemanager/webapp/RMWebServices.java   | 13 +++-
 .../webapp/TestRMWebServices.java   | 77 
 5 files changed, 139 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e13183f/hadoop-yarn-project/CHANGES.txt
--
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 98e42c1..8f4907f 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -220,6 +220,9 @@ Release 2.8.0 - UNRELEASED
 YARN-2740. Fix NodeLabelsManager to properly handle node label 
modifications 
 when distributed node label configuration enabled. (Naganarasimha G R via 
wangda)
 
+YARN-3517. RM web ui for dumping scheduler logs should be for admins only
+(Varun Vasudev via tgraves)
+
 Release 2.7.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e13183f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
index 4daaa68..97b4163 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/security/ApplicationACLsManager.java
@@ -138,4 +138,15 @@ public class ApplicationACLsManager {
 }
 return false;
   }
+
+  /**
+   * Check if the given user in an admin.
+   *
+   * @param calledUGI
+   *  UserGroupInformation for the user
+   * @return true if the user is an admin, false otherwise
+   */
+  public final boolean isAdmin(final UserGroupInformation calledUGI) {
+return this.adminAclsManager.isAdmin(calledUGI);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/2e13183f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
index 2eeda66..fa22a0d 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java
@@ -24,6 +24,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth;
@@ -33,6 +34,7 @@ import 
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UserInfo
 import 
org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerInfo;
 import

spark git commit: [SPARK-6918] [YARN] Secure HBase support.

2015-04-29 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master f49284b5b - baed3f2c7


[SPARK-6918] [YARN] Secure HBase support.

Obtain HBase security token with Kerberos credentials locally to be sent to 
executors. Tested on eBay's secure HBase cluster.

Similar to obtainTokenForNamenodes and fails gracefully if HBase classes are 
not included in path.

Requires hbase-site.xml to be in the classpath(typically via conf dir) for the 
zookeeper configuration. Should that go in the docs somewhere? Did not see an 
HBase section.

Author: Dean Chen deanch...@gmail.com

Closes #5586 from deanchen/master and squashes the following commits:

0c190ef [Dean Chen] [SPARK-6918][YARN] Secure HBase support.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/baed3f2c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/baed3f2c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/baed3f2c

Branch: refs/heads/master
Commit: baed3f2c73afd9c7d9de34f0485c507ac6a498b0
Parents: f49284b
Author: Dean Chen deanch...@gmail.com
Authored: Wed Apr 29 08:58:33 2015 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Apr 29 08:58:33 2015 -0500

--
 .../org/apache/spark/deploy/yarn/Client.scala   | 38 +++-
 1 file changed, 37 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/baed3f2c/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
--
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 741239c..4abcf73 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -39,7 +39,7 @@ import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapred.Master
 import org.apache.hadoop.mapreduce.MRJobConfig
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
-import org.apache.hadoop.security.token.Token
+import org.apache.hadoop.security.token.{TokenIdentifier, Token}
 import org.apache.hadoop.util.StringUtils
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
@@ -226,6 +226,7 @@ private[spark] class Client(
 val distributedUris = new HashSet[String]
 obtainTokensForNamenodes(nns, hadoopConf, credentials)
 obtainTokenForHiveMetastore(hadoopConf, credentials)
+obtainTokenForHBase(hadoopConf, credentials)
 
 val replication = sparkConf.getInt(spark.yarn.submit.file.replication,
   fs.getDefaultReplication(dst)).toShort
@@ -1085,6 +1086,41 @@ object Client extends Logging {
   }
 
   /**
+   * Obtain security token for HBase.
+   */
+  def obtainTokenForHBase(conf: Configuration, credentials: Credentials): Unit 
= {
+if (UserGroupInformation.isSecurityEnabled) {
+  val mirror = universe.runtimeMirror(getClass.getClassLoader)
+
+  try {
+val confCreate = mirror.classLoader.
+  loadClass(org.apache.hadoop.hbase.HBaseConfiguration).
+  getMethod(create, classOf[Configuration])
+val obtainToken = mirror.classLoader.
+  loadClass(org.apache.hadoop.hbase.security.token.TokenUtil).
+  getMethod(obtainToken, classOf[Configuration])
+
+logDebug(Attempting to fetch HBase security token.)
+
+val hbaseConf = confCreate.invoke(null, conf)
+val token = obtainToken.invoke(null, 
hbaseConf).asInstanceOf[Token[TokenIdentifier]]
+credentials.addToken(token.getService, token)
+
+logInfo(Added HBase security token to credentials.)
+  } catch {
+case e:java.lang.NoSuchMethodException =
+  logInfo(HBase Method not found:  + e)
+case e:java.lang.ClassNotFoundException =
+  logDebug(HBase Class not found:  + e)
+case e:java.lang.NoClassDefFoundError =
+  logDebug(HBase Class not found:  + e)
+case e:Exception =
+  logError(Exception when obtaining HBase security token:  + e)
+  }
+}
+  }
+
+  /**
* Return whether the two file systems are the same.
*/
   private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



hadoop git commit: YARN-3434. Interaction between reservations and userlimit can result in significant ULF violation (cherry picked from commit 189a63a719c63b67a1783a280bfc2f72dcb55277)

2015-04-23 Thread tgraves
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 889b92fa4 - 1cd2fcf25


YARN-3434. Interaction between reservations and userlimit can result in 
significant ULF violation
(cherry picked from commit 189a63a719c63b67a1783a280bfc2f72dcb55277)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1cd2fcf2
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1cd2fcf2
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1cd2fcf2

Branch: refs/heads/branch-2
Commit: 1cd2fcf25dc614c0567e6da776fef737640e4293
Parents: 889b92f
Author: tgraves tgra...@apache.org
Authored: Thu Apr 23 14:39:25 2015 +
Committer: tgraves tgra...@apache.org
Committed: Thu Apr 23 14:49:24 2015 +

--
 hadoop-yarn-project/CHANGES.txt |   3 +
 .../scheduler/ResourceLimits.java   |  28 +++-
 .../scheduler/capacity/AbstractCSQueue.java |  94 +--
 .../scheduler/capacity/LeafQueue.java   | 162 ---
 .../scheduler/capacity/TestReservations.java|  65 +---
 5 files changed, 186 insertions(+), 166 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cd2fcf2/hadoop-yarn-project/CHANGES.txt
--
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 8b09926..261e052 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -204,6 +204,9 @@ Release 2.8.0 - UNRELEASED
 YARN-3495. Confusing log generated by FairScheduler.
 (Brahma Reddy Battula via ozawa)
 
+YARN-3434. Interaction between reservations and userlimit can result in 
+significant ULF violation (tgraves)
+
 Release 2.7.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cd2fcf2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
index 12333e8..8074794 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
@@ -19,22 +19,44 @@
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
 
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.util.resource.Resources;
 
 /**
  * Resource limits for queues/applications, this means max overall (please note
  * that, it's not extra) resource you can get.
  */
 public class ResourceLimits {
+  volatile Resource limit;
+
+  // This is special limit that goes with the RESERVE_CONT_LOOK_ALL_NODES
+  // config. This limit indicates how much we need to unreserve to allocate
+  // another container.
+  private volatile Resource amountNeededUnreserve;
+
   public ResourceLimits(Resource limit) {
+this.amountNeededUnreserve = Resources.none();
 this.limit = limit;
   }
-  
-  volatile Resource limit;
+
+  public ResourceLimits(Resource limit, Resource amountNeededUnreserve) {
+this.amountNeededUnreserve = amountNeededUnreserve;
+this.limit = limit;
+  }
+
   public Resource getLimit() {
 return limit;
   }
-  
+
+  public Resource getAmountNeededUnreserve() {
+return amountNeededUnreserve;
+  }
+
   public void setLimit(Resource limit) {
 this.limit = limit;
   }
+
+  public void setAmountNeededUnreserve(Resource amountNeededUnreserve) {
+this.amountNeededUnreserve = amountNeededUnreserve;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/1cd2fcf2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn

hadoop git commit: YARN-3434. Interaction between reservations and userlimit can result in significant ULF violation

2015-04-23 Thread tgraves
Repository: hadoop
Updated Branches:
  refs/heads/trunk baf8bc6c4 - 189a63a71


YARN-3434. Interaction between reservations and userlimit can result in 
significant ULF violation


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/189a63a7
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/189a63a7
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/189a63a7

Branch: refs/heads/trunk
Commit: 189a63a719c63b67a1783a280bfc2f72dcb55277
Parents: baf8bc6
Author: tgraves tgra...@apache.org
Authored: Thu Apr 23 14:39:25 2015 +
Committer: tgraves tgra...@apache.org
Committed: Thu Apr 23 14:39:25 2015 +

--
 hadoop-yarn-project/CHANGES.txt |   3 +
 .../scheduler/ResourceLimits.java   |  28 +++-
 .../scheduler/capacity/AbstractCSQueue.java |  94 +--
 .../scheduler/capacity/LeafQueue.java   | 162 ---
 .../scheduler/capacity/TestReservations.java|  65 +---
 5 files changed, 186 insertions(+), 166 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hadoop/blob/189a63a7/hadoop-yarn-project/CHANGES.txt
--
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index f4413a8..d335389 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -252,6 +252,9 @@ Release 2.8.0 - UNRELEASED
 YARN-3495. Confusing log generated by FairScheduler.
 (Brahma Reddy Battula via ozawa)
 
+YARN-3434. Interaction between reservations and userlimit can result in 
+significant ULF violation (tgraves)
+
 Release 2.7.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/189a63a7/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
index 12333e8..8074794 100644
--- 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
+++ 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java
@@ -19,22 +19,44 @@
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
 
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.util.resource.Resources;
 
 /**
  * Resource limits for queues/applications, this means max overall (please note
  * that, it's not extra) resource you can get.
  */
 public class ResourceLimits {
+  volatile Resource limit;
+
+  // This is special limit that goes with the RESERVE_CONT_LOOK_ALL_NODES
+  // config. This limit indicates how much we need to unreserve to allocate
+  // another container.
+  private volatile Resource amountNeededUnreserve;
+
   public ResourceLimits(Resource limit) {
+this.amountNeededUnreserve = Resources.none();
 this.limit = limit;
   }
-  
-  volatile Resource limit;
+
+  public ResourceLimits(Resource limit, Resource amountNeededUnreserve) {
+this.amountNeededUnreserve = amountNeededUnreserve;
+this.limit = limit;
+  }
+
   public Resource getLimit() {
 return limit;
   }
-  
+
+  public Resource getAmountNeededUnreserve() {
+return amountNeededUnreserve;
+  }
+
   public void setLimit(Resource limit) {
 this.limit = limit;
   }
+
+  public void setAmountNeededUnreserve(Resource amountNeededUnreserve) {
+this.amountNeededUnreserve = amountNeededUnreserve;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/189a63a7/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java
--
diff --git 
a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java
 
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java
index 9233e01

spark git commit: [SPARK-2669] [yarn] Distribute client configuration to AM.

2015-04-17 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master c84d91692 - 50ab8a654


[SPARK-2669] [yarn] Distribute client configuration to AM.

Currently, when Spark launches the Yarn AM, the process will use
the local Hadoop configuration on the node where the AM launches,
if one is present. A more correct approach is to use the same
configuration used to launch the Spark job, since the user may
have made modifications (such as adding app-specific configs).

The approach taken here is to use the distributed cache to make
all files in the Hadoop configuration directory available to the
AM. This is a little overkill since only the AM needs them (the
executors use the broadcast Hadoop configuration from the driver),
but is the easier approach.

Even though only a few files in that directory may end up being
used, all of them are uploaded. This allows supporting use cases
such as when auxiliary configuration files are used for SSL
configuration, or when uploading a Hive configuration directory.
Not all of these may be reflected in a o.a.h.conf.Configuration object,
but may be needed when a driver in cluster mode instantiates, for
example, a HiveConf object instead.

Author: Marcelo Vanzin van...@cloudera.com

Closes #4142 from vanzin/SPARK-2669 and squashes the following commits:

f5434b9 [Marcelo Vanzin] Merge branch 'master' into SPARK-2669
013f0fb [Marcelo Vanzin] Review feedback.
f693152 [Marcelo Vanzin] Le sigh.
ed45b7d [Marcelo Vanzin] Zip all config files and upload them as an archive.
5927b6b [Marcelo Vanzin] Merge branch 'master' into SPARK-2669
cbb9fb3 [Marcelo Vanzin] Remove stale test.
e3e58d0 [Marcelo Vanzin] Merge branch 'master' into SPARK-2669
e3d0613 [Marcelo Vanzin] Review feedback.
34bdbd8 [Marcelo Vanzin] Fix test.
022a688 [Marcelo Vanzin] Merge branch 'master' into SPARK-2669
a77ddd5 [Marcelo Vanzin] Merge branch 'master' into SPARK-2669
79221c7 [Marcelo Vanzin] [SPARK-2669] [yarn] Distribute client configuration to 
AM.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50ab8a65
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50ab8a65
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50ab8a65

Branch: refs/heads/master
Commit: 50ab8a6543ad5c31e89c16df374d0cb13222fd1e
Parents: c84d916
Author: Marcelo Vanzin van...@cloudera.com
Authored: Fri Apr 17 14:21:51 2015 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Fri Apr 17 14:21:51 2015 -0500

--
 docs/running-on-yarn.md |   6 +-
 .../org/apache/spark/deploy/yarn/Client.scala   | 125 ---
 .../spark/deploy/yarn/ExecutorRunnable.scala|   2 +-
 .../apache/spark/deploy/yarn/ClientSuite.scala  |  29 +++--
 .../spark/deploy/yarn/YarnClusterSuite.scala|   6 +-
 5 files changed, 132 insertions(+), 36 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/50ab8a65/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 853c9f2..0968fc5 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -211,7 +211,11 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
 # Launching Spark on YARN
 
 Ensure that `HADOOP_CONF_DIR` or `YARN_CONF_DIR` points to the directory which 
contains the (client side) configuration files for the Hadoop cluster.
-These configs are used to write to the dfs and connect to the YARN 
ResourceManager.
+These configs are used to write to the dfs and connect to the YARN 
ResourceManager. The
+configuration contained in this directory will be distributed to the YARN 
cluster so that all
+containers used by the application use the same configuration. If the 
configuration references
+Java system properties or environment variables not managed by YARN, they 
should also be set in the
+Spark application's configuration (driver, executors, and the AM when running 
in client mode).
 
 There are two deploy modes that can be used to launch Spark applications on 
YARN. In yarn-cluster mode, the Spark driver runs inside an application master 
process which is managed by YARN on the cluster, and the client can go away 
after initiating the application. In yarn-client mode, the driver runs in the 
client process, and the application master is only used for requesting 
resources from YARN.
 

http://git-wip-us.apache.org/repos/asf/spark/blob/50ab8a65/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
--
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 52e4dee..019afbd 100644
--- 

spark git commit: [SPARK-6207] [YARN] [SQL] Adds delegation tokens for metastore to conf.

2015-04-13 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master b29663eee - 77620be76


[SPARK-6207] [YARN] [SQL] Adds delegation tokens for metastore to conf.

Adds hive2-metastore delegation token to conf when running in secure mode.
Without this change, running on YARN in cluster mode fails with a
GSS exception.

This is a rough patch that adds a dependency to spark/yarn on hive-exec.
I'm looking for suggestions on how to make this patch better.

This contribution is my original work and that I licenses the work to the
Apache Spark project under the project's open source licenses.

Author: Doug Balog doug.balogtarget.com

Author: Doug Balog doug.ba...@target.com

Closes #5031 from dougb/SPARK-6207 and squashes the following commits:

3e9ac16 [Doug Balog] [SPARK-6207] Fixes minor code spacing issues.
e260765 [Doug Balog] [SPARK-6207] Second pass at adding Hive delegation token 
to conf. - Use reflection instead of adding dependency on hive. - Tested on 
Hive 0.13 and Hadoop 2.4.1
1ab1729 [Doug Balog] Merge branch 'master' of git://github.com/apache/spark 
into SPARK-6207
bf356d2 [Doug Balog] [SPARK-6207] [YARN] [SQL] Adds delegation tokens for 
metastore to conf. Adds hive2-metastore delagations token to conf when running 
in securemode. Without this change, runing on YARN in cluster mode fails with a 
GSS exception.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/77620be7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/77620be7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/77620be7

Branch: refs/heads/master
Commit: 77620be76e82b6cdaae406cd752d3272656f5fe0
Parents: b29663e
Author: Doug Balog doug.ba...@target.com
Authored: Mon Apr 13 09:49:58 2015 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Mon Apr 13 09:49:58 2015 -0500

--
 .../org/apache/spark/deploy/yarn/Client.scala   | 63 
 1 file changed, 63 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/77620be7/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
--
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index c1effd3..1091ff5 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -22,17 +22,21 @@ import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap, ListBuffer, Map}
+import scala.reflect.runtime.universe
 import scala.util.{Try, Success, Failure}
 
 import com.google.common.base.Objects
 
 import org.apache.hadoop.io.DataOutputBuffer
 import org.apache.hadoop.conf.Configuration
+import 
org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
+import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapred.Master
 import org.apache.hadoop.mapreduce.MRJobConfig
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.token.Token
 import org.apache.hadoop.util.StringUtils
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
@@ -220,6 +224,7 @@ private[spark] class Client(
 val dst = new Path(fs.getHomeDirectory(), appStagingDir)
 val nns = getNameNodesToAccess(sparkConf) + dst
 obtainTokensForNamenodes(nns, hadoopConf, credentials)
+obtainTokenForHiveMetastore(hadoopConf, credentials)
 
 val replication = sparkConf.getInt(spark.yarn.submit.file.replication,
   fs.getDefaultReplication(dst)).toShort
@@ -937,6 +942,64 @@ object Client extends Logging {
   }
 
   /**
+   * Obtains token for the Hive metastore and adds them to the credentials.
+   */
+  private def obtainTokenForHiveMetastore(conf: Configuration, credentials: 
Credentials) {
+if (UserGroupInformation.isSecurityEnabled) {
+  val mirror = universe.runtimeMirror(getClass.getClassLoader)
+
+  try {
+val hiveClass = 
mirror.classLoader.loadClass(org.apache.hadoop.hive.ql.metadata.Hive)
+val hive = hiveClass.getMethod(get).invoke(null)
+
+val hiveConf = hiveClass.getMethod(getConf).invoke(hive)
+val hiveConfClass = 
mirror.classLoader.loadClass(org.apache.hadoop.hive.conf.HiveConf)
+
+val hiveConfGet = (param:String) = Option(hiveConfClass
+  .getMethod(get, classOf[java.lang.String])
+  .invoke(hiveConf, param))
+
+val metastore_uri = hiveConfGet(hive.metastore.uris)
+
+// Check for local metastore
+if (metastore_uri != None  metastore_uri.get.toString.size  0) {
+  val 

spark git commit: [SPARK-3591][YARN]fire and forget for YARN cluster mode

2015-04-07 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master ae980eb41 - b65bad65c


[SPARK-3591][YARN]fire and forget for YARN cluster mode

https://issues.apache.org/jira/browse/SPARK-3591

The output after this patch:
doggie153:/opt/oss/spark-1.3.0-bin-hadoop2.4/bin # ./spark-submit  --class 
org.apache.spark.examples.SparkPi --master yarn-cluster 
../lib/spark-examples*.jar
15/03/31 21:15:25 WARN NativeCodeLoader: Unable to load native-hadoop library 
for your platform... using builtin-java classes where applicable
15/03/31 21:15:25 INFO RMProxy: Connecting to ResourceManager at 
doggie153/10.177.112.153:8032
15/03/31 21:15:25 INFO Client: Requesting a new application from cluster with 4 
NodeManagers
15/03/31 21:15:25 INFO Client: Verifying our application has not requested more 
than the maximum memory capability of the cluster (8192 MB per container)
15/03/31 21:15:25 INFO Client: Will allocate AM container, with 896 MB memory 
including 384 MB overhead
15/03/31 21:15:25 INFO Client: Setting up container launch context for our AM
15/03/31 21:15:25 INFO Client: Preparing resources for our AM container
15/03/31 21:15:26 INFO Client: Uploading resource 
file:/opt/oss/spark-1.3.0-bin-hadoop2.4/lib/spark-assembly-1.4.0-SNAPSHOT-hadoop2.4.1.jar
 - 
hdfs://doggie153:9000/user/root/.sparkStaging/application_1427257505534_0016/spark-assembly-1.4.0-SNAPSHOT-hadoop2.4.1.jar
15/03/31 21:15:27 INFO Client: Uploading resource 
file:/opt/oss/spark-1.3.0-bin-hadoop2.4/lib/spark-examples-1.3.0-hadoop2.4.0.jar
 - 
hdfs://doggie153:9000/user/root/.sparkStaging/application_1427257505534_0016/spark-examples-1.3.0-hadoop2.4.0.jar
15/03/31 21:15:28 INFO Client: Setting up the launch environment for our AM 
container
15/03/31 21:15:28 INFO SecurityManager: Changing view acls to: root
15/03/31 21:15:28 INFO SecurityManager: Changing modify acls to: root
15/03/31 21:15:28 INFO SecurityManager: SecurityManager: authentication 
disabled; ui acls disabled; users with view permissions: Set(root); users with 
modify permissions: Set(root)
15/03/31 21:15:28 INFO Client: Submitting application 16 to ResourceManager
15/03/31 21:15:28 INFO YarnClientImpl: Submitted application 
application_1427257505534_0016
15/03/31 21:15:28 INFO Client: ... waiting before polling ResourceManager for 
application state
15/03/31 21:15:33 INFO Client: ... polling ResourceManager for application state
15/03/31 21:15:33 INFO Client: Application report for 
application_1427257505534_0016 (state: RUNNING)
15/03/31 21:15:33 INFO Client:
 client token: N/A
 diagnostics: N/A
 ApplicationMaster host: doggie157
 ApplicationMaster RPC port: 0
 queue: default
 start time: 1427807728307
 final status: UNDEFINED
 tracking URL: 
http://doggie153:8088/proxy/application_1427257505534_0016/
 user: root

/cc andrewor14

Author: WangTaoTheTonic wangtao...@huawei.com

Closes #5297 from WangTaoTheTonic/SPARK-3591 and squashes the following commits:

c76d232 [WangTaoTheTonic] wrap lines
16c90a8 [WangTaoTheTonic] move up lines to avoid duplicate
fea390d [WangTaoTheTonic] log failed/killed report, style and comment
be1cc2e [WangTaoTheTonic] reword
f0bc54f [WangTaoTheTonic] minor: expose appid in excepiton messages
ba9b22b [WangTaoTheTonic] wrong config name
e1a4013 [WangTaoTheTonic] revert to the old version and do some robust
19706c0 [WangTaoTheTonic] add a config to control whether to forget
0cbdce8 [WangTaoTheTonic] fire and forget for YARN cluster mode


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b65bad65
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b65bad65
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b65bad65

Branch: refs/heads/master
Commit: b65bad65c3500475b974ca0219f218eef296db2c
Parents: ae980eb
Author: WangTaoTheTonic wangtao...@huawei.com
Authored: Tue Apr 7 08:36:25 2015 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Apr 7 08:36:25 2015 -0500

--
 .../scala/org/apache/spark/deploy/Client.scala  |  2 +-
 .../deploy/rest/StandaloneRestClient.scala  |  2 +-
 docs/running-on-yarn.md |  9 +++
 .../org/apache/spark/deploy/yarn/Client.scala   | 83 
 4 files changed, 61 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b65bad65/core/src/main/scala/org/apache/spark/deploy/Client.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala 
b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index 65238af..8d13b2a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -89,7 +89,7 @@ private class 

spark git commit: [SPARK-6050] [yarn] Relax matching of vcore count in received containers.

2015-03-02 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.3 a83b9bbb2 - 650d1e7fb


[SPARK-6050] [yarn] Relax matching of vcore count in received containers.

Some YARN configurations return a vcore count for allocated
containers that does not match the requested resource. That means
Spark would always ignore those containers. So relax the the matching
of the vcore count to allow the Spark jobs to run.

Author: Marcelo Vanzin van...@cloudera.com

Closes #4818 from vanzin/SPARK-6050 and squashes the following commits:

991c803 [Marcelo Vanzin] Remove config option, standardize on legacy behavior 
(no vcore matching).
8c9c346 [Marcelo Vanzin] Restrict lax matching to vcores only.
3359692 [Marcelo Vanzin] [SPARK-6050] [yarn] Add config option to do lax 
resource matching.

(cherry picked from commit 6b348d90f475440c285a4b636134ffa9351580b9)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/650d1e7f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/650d1e7f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/650d1e7f

Branch: refs/heads/branch-1.3
Commit: 650d1e7fb13545d0d102de9bb6e11ab4f9ef6359
Parents: a83b9bb
Author: Marcelo Vanzin van...@cloudera.com
Authored: Mon Mar 2 16:41:43 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Mon Mar 2 16:42:02 2015 -0600

--
 .../org/apache/spark/deploy/yarn/YarnAllocator.scala  | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/650d1e7f/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 12c62a6..55bfbcd 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -290,8 +290,14 @@ private[yarn] class YarnAllocator(
   location: String,
   containersToUse: ArrayBuffer[Container],
   remaining: ArrayBuffer[Container]): Unit = {
+// SPARK-6050: certain Yarn configurations return a virtual core count 
that doesn't match the
+// request; for example, capacity scheduler + DefaultResourceCalculator. 
So match on requested
+// memory, but use the asked vcore count for matching, effectively 
disabling matching on vcore
+// count.
+val matchingResource = 
Resource.newInstance(allocatedContainer.getResource.getMemory,
+  resource.getVirtualCores)
 val matchingRequests = 
amClient.getMatchingRequests(allocatedContainer.getPriority, location,
-  allocatedContainer.getResource)
+  matchingResource)
 
 // Match the allocation to a request
 if (!matchingRequests.isEmpty) {
@@ -318,7 +324,7 @@ private[yarn] class YarnAllocator(
   assert(container.getResource.getMemory = resource.getMemory)
 
   logInfo(Launching container %s for on host %s.format(containerId, 
executorHostname))
-  executorIdToContainer(executorId) = container  
+  executorIdToContainer(executorId) = container
 
   val containerSet = 
allocatedHostToContainersMap.getOrElseUpdate(executorHostname,
 new HashSet[ContainerId])


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-6050] [yarn] Relax matching of vcore count in received containers.

2015-03-02 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 582e5a24c - 6b348d90f


[SPARK-6050] [yarn] Relax matching of vcore count in received containers.

Some YARN configurations return a vcore count for allocated
containers that does not match the requested resource. That means
Spark would always ignore those containers. So relax the the matching
of the vcore count to allow the Spark jobs to run.

Author: Marcelo Vanzin van...@cloudera.com

Closes #4818 from vanzin/SPARK-6050 and squashes the following commits:

991c803 [Marcelo Vanzin] Remove config option, standardize on legacy behavior 
(no vcore matching).
8c9c346 [Marcelo Vanzin] Restrict lax matching to vcores only.
3359692 [Marcelo Vanzin] [SPARK-6050] [yarn] Add config option to do lax 
resource matching.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6b348d90
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6b348d90
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6b348d90

Branch: refs/heads/master
Commit: 6b348d90f475440c285a4b636134ffa9351580b9
Parents: 582e5a2
Author: Marcelo Vanzin van...@cloudera.com
Authored: Mon Mar 2 16:41:43 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Mon Mar 2 16:41:43 2015 -0600

--
 .../org/apache/spark/deploy/yarn/YarnAllocator.scala  | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6b348d90/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 12c62a6..55bfbcd 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -290,8 +290,14 @@ private[yarn] class YarnAllocator(
   location: String,
   containersToUse: ArrayBuffer[Container],
   remaining: ArrayBuffer[Container]): Unit = {
+// SPARK-6050: certain Yarn configurations return a virtual core count 
that doesn't match the
+// request; for example, capacity scheduler + DefaultResourceCalculator. 
So match on requested
+// memory, but use the asked vcore count for matching, effectively 
disabling matching on vcore
+// count.
+val matchingResource = 
Resource.newInstance(allocatedContainer.getResource.getMemory,
+  resource.getVirtualCores)
 val matchingRequests = 
amClient.getMatchingRequests(allocatedContainer.getPriority, location,
-  allocatedContainer.getResource)
+  matchingResource)
 
 // Match the allocation to a request
 if (!matchingRequests.isEmpty) {
@@ -318,7 +324,7 @@ private[yarn] class YarnAllocator(
   assert(container.getResource.getMemory = resource.getMemory)
 
   logInfo(Launching container %s for on host %s.format(containerId, 
executorHostname))
-  executorIdToContainer(executorId) = container  
+  executorIdToContainer(executorId) = container
 
   val containerSet = 
allocatedHostToContainersMap.getOrElseUpdate(executorHostname,
 new HashSet[ContainerId])


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: SPARK-5393. Flood of util.RackResolver log messages after SPARK-1714

2015-01-30 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 6f21dce5f - 254eaa4d3


SPARK-5393. Flood of util.RackResolver log messages after SPARK-1714

Previously I had tried to solve this with by adding a line in Spark's 
log4j-defaults.properties.

The issue with the message in log4j-defaults.properties was that the 
log4j.properties packaged inside Hadoop was getting picked up instead. While it 
would be ideal to fix that as well, we still want to quiet this in situations 
where a user supplies their own custom log4j properties.

Author: Sandy Ryza sa...@cloudera.com

Closes #4192 from sryza/sandy-spark-5393 and squashes the following commits:

4d5dedc [Sandy Ryza] Only set log level if unset
46e07c5 [Sandy Ryza] SPARK-5393. Flood of util.RackResolver log messages after 
SPARK-1714


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/254eaa4d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/254eaa4d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/254eaa4d

Branch: refs/heads/master
Commit: 254eaa4d350dafe19f1715e80eb816856a126c21
Parents: 6f21dce
Author: Sandy Ryza sa...@cloudera.com
Authored: Fri Jan 30 11:31:54 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Fri Jan 30 11:31:54 2015 -0600

--
 .../org/apache/spark/log4j-defaults.properties  |  1 -
 .../scala/org/apache/spark/SparkContext.scala   |  2 +-
 .../SparkContextSchedulerCreationSuite.scala|  2 +-
 .../spark/deploy/yarn/YarnAllocator.scala   |  7 
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |  4 --
 .../cluster/YarnClientClusterScheduler.scala| 36 --
 .../cluster/YarnClusterScheduler.scala  | 18 +
 .../spark/scheduler/cluster/YarnScheduler.scala | 40 
 8 files changed, 50 insertions(+), 60 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/254eaa4d/core/src/main/resources/org/apache/spark/log4j-defaults.properties
--
diff --git a/core/src/main/resources/org/apache/spark/log4j-defaults.properties 
b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
index c99a61f..89eec7d 100644
--- a/core/src/main/resources/org/apache/spark/log4j-defaults.properties
+++ b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
@@ -10,4 +10,3 @@ log4j.logger.org.eclipse.jetty=WARN
 log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
 log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
-log4j.logger.org.apache.hadoop.yarn.util.RackResolver=WARN

http://git-wip-us.apache.org/repos/asf/spark/blob/254eaa4d/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 4c4ee04..3c61c10 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1986,7 +1986,7 @@ object SparkContext extends Logging {
   case yarn-client =
 val scheduler = try {
   val clazz =
-
Class.forName(org.apache.spark.scheduler.cluster.YarnClientClusterScheduler)
+Class.forName(org.apache.spark.scheduler.cluster.YarnScheduler)
   val cons = clazz.getConstructor(classOf[SparkContext])
   cons.newInstance(sc).asInstanceOf[TaskSchedulerImpl]
 

http://git-wip-us.apache.org/repos/asf/spark/blob/254eaa4d/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala 
b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
index 8ae4f24..bbed8dd 100644
--- 
a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
@@ -149,7 +149,7 @@ class SparkContextSchedulerCreationSuite
   }
 
   test(yarn-client) {
-testYarn(yarn-client, 
org.apache.spark.scheduler.cluster.YarnClientClusterScheduler)
+testYarn(yarn-client, org.apache.spark.scheduler.cluster.YarnScheduler)
   }
 
   def testMesos(master: String, expectedClass: Class[_], coarse: Boolean) {

http://git-wip-us.apache.org/repos/asf/spark/blob/254eaa4d/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala 

spark git commit: SPARK-5370. [YARN] Remove some unnecessary synchronization in YarnAlloca...

2015-01-22 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 246111d17 - 820ce0359


SPARK-5370. [YARN] Remove some unnecessary synchronization in YarnAlloca...

...tor

Author: Sandy Ryza sa...@cloudera.com

Closes #4164 from sryza/sandy-spark-5370 and squashes the following commits:

0c8d736 [Sandy Ryza] SPARK-5370. [YARN] Remove some unnecessary synchronization 
in YarnAllocator


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/820ce035
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/820ce035
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/820ce035

Branch: refs/heads/master
Commit: 820ce03597350257abe0c5c96435c555038e3e6c
Parents: 246111d
Author: Sandy Ryza sa...@cloudera.com
Authored: Thu Jan 22 13:49:35 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Thu Jan 22 13:49:35 2015 -0600

--
 .../spark/deploy/yarn/YarnAllocator.scala   | 23 +---
 1 file changed, 10 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/820ce035/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 4c35b60..d00f296 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -60,7 +60,6 @@ private[yarn] class YarnAllocator(
 
   import YarnAllocator._
 
-  // These two complementary data structures are locked on 
allocatedHostToContainersMap.
   // Visible for testing.
   val allocatedHostToContainersMap =
 new HashMap[String, collection.mutable.Set[ContainerId]]
@@ -355,20 +354,18 @@ private[yarn] class YarnAllocator(
 }
   }
 
-  allocatedHostToContainersMap.synchronized {
-if (allocatedContainerToHostMap.containsKey(containerId)) {
-  val host = allocatedContainerToHostMap.get(containerId).get
-  val containerSet = allocatedHostToContainersMap.get(host).get
+  if (allocatedContainerToHostMap.containsKey(containerId)) {
+val host = allocatedContainerToHostMap.get(containerId).get
+val containerSet = allocatedHostToContainersMap.get(host).get
 
-  containerSet.remove(containerId)
-  if (containerSet.isEmpty) {
-allocatedHostToContainersMap.remove(host)
-  } else {
-allocatedHostToContainersMap.update(host, containerSet)
-  }
-
-  allocatedContainerToHostMap.remove(containerId)
+containerSet.remove(containerId)
+if (containerSet.isEmpty) {
+  allocatedHostToContainersMap.remove(host)
+} else {
+  allocatedHostToContainersMap.update(host, containerSet)
 }
+
+allocatedContainerToHostMap.remove(containerId)
   }
 }
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: SPARK-1714. Take advantage of AMRMClient APIs to simplify logic in YarnA...

2015-01-21 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 8c06a5faa - 2eeada373


SPARK-1714. Take advantage of AMRMClient APIs to simplify logic in YarnA...

...llocator

The goal of this PR is to simplify YarnAllocator as much as possible and get it 
up to the level of code quality we see in the rest of Spark.

In service of this, it does a few things:
* Uses AMRMClient APIs for matching containers to requests.
* Adds calls to AMRMClient.removeContainerRequest so that, when we use a 
container, we don't end up requesting it again.
* Removes YarnAllocator's host-rack cache. YARN's RackResolver already does 
this caching, so this is redundant.
* Adds tests for basic YarnAllocator functionality.
* Breaks up the allocateResources method, which was previously nearly 300 lines.
* A little bit of stylistic cleanup.
* Fixes a bug that causes three times the requests to be filed when preferred 
host locations are given.

The patch is lossy. In particular, it loses the logic for trying to avoid 
containers bunching up on nodes. As I understand it, the logic that's gone is:

* If, in a single response from the RM, we receive a set of containers on a 
node, and prefer some number of containers on that node greater than 0 but less 
than the number we received, give back the delta between what we preferred and 
what we received.

This seems like a weird way to avoid bunching E.g. it does nothing to avoid 
bunching when we don't request containers on particular nodes.

Author: Sandy Ryza sa...@cloudera.com

Closes #3765 from sryza/sandy-spark-1714 and squashes the following commits:

32a5942 [Sandy Ryza] Muffle RackResolver logs
74f56dd [Sandy Ryza] Fix a couple comments and simplify requestTotalExecutors
60ea4bd [Sandy Ryza] Fix scalastyle
ca35b53 [Sandy Ryza] Simplify further
e9cf8a6 [Sandy Ryza] Fix YarnClusterSuite
257acf3 [Sandy Ryza] Remove locality stuff and more cleanup
59a3c5e [Sandy Ryza] Take out rack stuff
5f72fd5 [Sandy Ryza] Further documentation and cleanup
89edd68 [Sandy Ryza] SPARK-1714. Take advantage of AMRMClient APIs to simplify 
logic in YarnAllocator


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2eeada37
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2eeada37
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2eeada37

Branch: refs/heads/master
Commit: 2eeada373e59d63b774ba92eb5d75fcd3a1cf8f4
Parents: 8c06a5f
Author: Sandy Ryza sa...@cloudera.com
Authored: Wed Jan 21 10:31:54 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Jan 21 10:31:54 2015 -0600

--
 .../org/apache/spark/log4j-defaults.properties  |   1 +
 .../spark/deploy/yarn/YarnAllocator.scala   | 733 ++-
 .../apache/spark/deploy/yarn/YarnRMClient.scala |   3 +-
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |  41 +-
 .../cluster/YarnClientClusterScheduler.scala|   5 +-
 .../cluster/YarnClusterScheduler.scala  |   6 +-
 .../spark/deploy/yarn/YarnAllocatorSuite.scala  | 150 +++-
 7 files changed, 389 insertions(+), 550 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2eeada37/core/src/main/resources/org/apache/spark/log4j-defaults.properties
--
diff --git a/core/src/main/resources/org/apache/spark/log4j-defaults.properties 
b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
index 89eec7d..c99a61f 100644
--- a/core/src/main/resources/org/apache/spark/log4j-defaults.properties
+++ b/core/src/main/resources/org/apache/spark/log4j-defaults.properties
@@ -10,3 +10,4 @@ log4j.logger.org.eclipse.jetty=WARN
 log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
 log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
 log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
+log4j.logger.org.apache.hadoop.yarn.util.RackResolver=WARN

http://git-wip-us.apache.org/repos/asf/spark/blob/2eeada37/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index de65ef2..4c35b60 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.deploy.yarn
 
+import java.util.Collections
 import java.util.concurrent._
-import java.util.concurrent.atomic.AtomicInteger
 import java.util.regex.Pattern
 
 import scala.collection.JavaConversions._
@@ -28,33 +28,26 @@ import 
com.google.common.util.concurrent.ThreadFactoryBuilder
 
 import org.apache.hadoop.conf.Configuration
 

spark git commit: [SPARK-5336][YARN]spark.executor.cores must not be less than spark.task.cpus

2015-01-21 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 424d8c6ff - 8c06a5faa


[SPARK-5336][YARN]spark.executor.cores must not be less than spark.task.cpus

https://issues.apache.org/jira/browse/SPARK-5336

Author: WangTao barneystin...@aliyun.com
Author: WangTaoTheTonic barneystin...@aliyun.com

Closes #4123 from WangTaoTheTonic/SPARK-5336 and squashes the following commits:

6c9676a [WangTao] Update ClientArguments.scala
9632d3a [WangTaoTheTonic] minor comment fix
d03d6fa [WangTaoTheTonic] import ordering should be alphabetical'
3112af9 [WangTao] spark.executor.cores must not be less than spark.task.cpus


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c06a5fa
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c06a5fa
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c06a5fa

Branch: refs/heads/master
Commit: 8c06a5faacfc71050461273133b9cf9a9dd8986f
Parents: 424d8c6
Author: WangTao barneystin...@aliyun.com
Authored: Wed Jan 21 09:42:30 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Jan 21 09:42:30 2015 -0600

--
 .../org/apache/spark/ExecutorAllocationManager.scala  |  2 +-
 .../org/apache/spark/scheduler/TaskSchedulerImpl.scala|  2 +-
 .../org/apache/spark/deploy/yarn/ClientArguments.scala| 10 +++---
 3 files changed, 9 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8c06a5fa/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala 
b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index a0ee2a7..b28da19 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -158,7 +158,7 @@ private[spark] class ExecutorAllocationManager(
 shuffle service. You may enable this through 
spark.shuffle.service.enabled.)
 }
 if (tasksPerExecutor == 0) {
-  throw new SparkException(spark.executor.cores must not be less than 
spark.task.cpus.cores)
+  throw new SparkException(spark.executor.cores must not be less than 
spark.task.cpus.)
 }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/8c06a5fa/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala 
b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index a1dfb01..33a7aae 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -168,7 +168,7 @@ private[spark] class TaskSchedulerImpl(
 if (!hasLaunchedTask) {
   logWarning(Initial job has not accepted any resources;  +
 check your cluster UI to ensure that workers are registered  
+
-and have sufficient memory)
+and have sufficient resources)
 } else {
   this.cancel()
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/8c06a5fa/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 79bead7..f96b245 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -19,9 +19,9 @@ package org.apache.spark.deploy.yarn
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
-import org.apache.spark.util.{Utils, IntParam, MemoryParam}
+import org.apache.spark.util.{IntParam, MemoryParam, Utils}
 
 // TODO: Add code and support for ensuring that yarn resource 'tasks' are 
location aware !
 private[spark] class ClientArguments(args: Array[String], sparkConf: 
SparkConf) {
@@ -95,6 +95,10 @@ private[spark] class ClientArguments(args: Array[String], 
sparkConf: SparkConf)
   throw new IllegalArgumentException(
 You must specify at least 1 executor!\n + getUsageMessage())
 }
+if (executorCores  sparkConf.getInt(spark.task.cpus, 1)) {
+  throw new SparkException(Executor cores must not be less than  +
+spark.task.cpus.)
+}
 if (isClusterMode) {
   for (key - Seq(amMemKey, amMemOverheadKey, 

spark git commit: [SPARK-5169][YARN]fetch the correct max attempts

2015-01-09 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 167a5ab0b - f3da4bd72


[SPARK-5169][YARN]fetch the correct max attempts

Soryy for fetching the wrong max attempts in this commit 
https://github.com/apache/spark/commit/8fdd48959c93b9cf809f03549e2ae6c4687d1fcd.
We need to fix it now.

tgravescs

If we set an spark.yarn.maxAppAttempts which is larger than 
`yarn.resourcemanager.am.max-attempts` in yarn side, it will be overrided as 
described here:
The maximum number of application attempts. It's a global setting for all 
application masters. Each application master can specify its individual 
maximum number of application attempts via the API, but the individual number 
cannot be more than the global upper bound. If it is, the resourcemanager will 
override it. The default number is set to 2, to allow at least one retry for 
AM.

http://hadoop.apache.org/docs/r2.6.0/hadoop-yarn/hadoop-yarn-common/yarn-default.xml

Author: WangTaoTheTonic barneystin...@aliyun.com

Closes #3942 from WangTaoTheTonic/HOTFIX and squashes the following commits:

9ac16ce [WangTaoTheTonic] fetch the correct max attempts


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f3da4bd7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f3da4bd7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f3da4bd7

Branch: refs/heads/master
Commit: f3da4bd7289d493014ad3c5176ada60794dfcfe0
Parents: 167a5ab
Author: WangTaoTheTonic barneystin...@aliyun.com
Authored: Fri Jan 9 08:10:09 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Fri Jan 9 08:10:09 2015 -0600

--
 .../org/apache/spark/deploy/yarn/YarnRMClient.scala | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f3da4bd7/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
index e183efc..b45e599 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
@@ -121,9 +121,15 @@ private[spark] class YarnRMClient(args: 
ApplicationMasterArguments) extends Logg
 
   /** Returns the maximum number of attempts to register the AM. */
   def getMaxRegAttempts(sparkConf: SparkConf, yarnConf: YarnConfiguration): 
Int = {
-sparkConf.getOption(spark.yarn.maxAppAttempts).map(_.toInt).getOrElse(
-  yarnConf.getInt(
-YarnConfiguration.RM_AM_MAX_ATTEMPTS, 
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS))
+val sparkMaxAttempts = 
sparkConf.getOption(spark.yarn.maxAppAttempts).map(_.toInt)
+val yarnMaxAttempts = yarnConf.getInt(
+  YarnConfiguration.RM_AM_MAX_ATTEMPTS, 
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)
+val retval: Int = sparkMaxAttempts match {
+  case Some(x) = if (x = yarnMaxAttempts) x else yarnMaxAttempts
+  case None = yarnMaxAttempts
+}
+
+retval
   }
 
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-2165][YARN]add support for setting maxAppAttempts in the ApplicationSubmissionContext

2015-01-07 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 5fde66163 - 8fdd48959


[SPARK-2165][YARN]add support for setting maxAppAttempts in the 
ApplicationSubmissionContext

...xt

https://issues.apache.org/jira/browse/SPARK-2165

I still have 2 questions:
* If this config is not set, we should use yarn's corresponding value or a 
default value(like 2) on spark side?
* Is the config name best? Or spark.yarn.am.maxAttempts?

Author: WangTaoTheTonic barneystin...@aliyun.com

Closes #3878 from WangTaoTheTonic/SPARK-2165 and squashes the following commits:

1416c83 [WangTaoTheTonic] use the name spark.yarn.maxAppAttempts
202ac85 [WangTaoTheTonic] rephrase some
afdfc99 [WangTaoTheTonic] more detailed description
91562c6 [WangTaoTheTonic] add support for setting maxAppAttempts in the 
ApplicationSubmissionContext


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8fdd4895
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8fdd4895
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8fdd4895

Branch: refs/heads/master
Commit: 8fdd48959c93b9cf809f03549e2ae6c4687d1fcd
Parents: 5fde661
Author: WangTaoTheTonic barneystin...@aliyun.com
Authored: Wed Jan 7 08:14:39 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Jan 7 08:14:39 2015 -0600

--
 docs/running-on-yarn.md  | 8 
 .../org/apache/spark/deploy/yarn/ApplicationMaster.scala | 2 +-
 .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 5 +
 .../scala/org/apache/spark/deploy/yarn/YarnRMClient.scala| 7 +--
 4 files changed, 19 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8fdd4895/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index da1c8e8..183698f 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -149,6 +149,14 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
   In cluster mode, use spark.driver.extraJavaOptions instead.
   /td
 /tr
+tr
+  tdcodespark.yarn.maxAppAttempts/code/td
+  tdyarn.resourcemanager.am.max-attempts in YARN/td
+  td
+  The maximum number of attempts that will be made to submit the application.
+  It should be no larger than the global number of max attempts in the YARN 
configuration.
+  /td
+/tr
 /table
 
 # Launching Spark on YARN

http://git-wip-us.apache.org/repos/asf/spark/blob/8fdd4895/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 618db7f..902bdda 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -102,7 +102,7 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
 logInfo(Invoking sc stop from shutdown hook)
 sc.stop()
   }
-  val maxAppAttempts = client.getMaxRegAttempts(yarnConf)
+  val maxAppAttempts = client.getMaxRegAttempts(sparkConf, yarnConf)
   val isLastAttempt = client.getAttemptId().getAttemptId() = 
maxAppAttempts
 
   if (!finished) {

http://git-wip-us.apache.org/repos/asf/spark/blob/8fdd4895/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
--
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index addaddb..a2c3f91 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -98,6 +98,11 @@ private[spark] class Client(
 appContext.setQueue(args.amQueue)
 appContext.setAMContainerSpec(containerContext)
 appContext.setApplicationType(SPARK)
+sparkConf.getOption(spark.yarn.maxAppAttempts).map(_.toInt) match {
+  case Some(v) = appContext.setMaxAppAttempts(v)
+  case None = logDebug(spark.yarn.maxAppAttempts is not set.  +
+  Cluster's default value will be used.)
+}
 val capability = Records.newRecord(classOf[Resource])
 capability.setMemory(args.amMemory + amMemoryOverhead)
 appContext.setResource(capability)

http://git-wip-us.apache.org/repos/asf/spark/blob/8fdd4895/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala
--
diff --git 

spark git commit: [YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA

2015-01-07 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master e21acc197 - 5fde66163


[YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA

Nowadays, yarn-client will exit directly when the HA change happens no matter 
how many times the am should retry.
The reason may be that the default final status only considerred the sys.exit, 
and the yarn-client HA cann't benefit from this.
So we should distinct the default final status between client and cluster, 
because the SUCCEEDED status may cause the HA failed in client mode and 
UNDEFINED may cause the error reporter in cluster when using sys.exit.

Author: huangzhaowei carlmartin...@gmail.com

Closes #3771 from SaintBacchus/YarnHA and squashes the following commits:

c02bfcc [huangzhaowei] Improve the comment of the funciton 
'getDefaultFinalStatus'
0e69924 [huangzhaowei] Bug fix: fix the yarn-client code to support HA


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fde6616
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fde6616
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fde6616

Branch: refs/heads/master
Commit: 5fde66163fe460d6f64b145047f76cc4ee33601a
Parents: e21acc1
Author: huangzhaowei carlmartin...@gmail.com
Authored: Wed Jan 7 08:10:42 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Jan 7 08:10:42 2015 -0600

--
 .../spark/deploy/yarn/ApplicationMaster.scala   | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5fde6616/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 9c77dff..618db7f 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -60,7 +60,7 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
   @volatile private var exitCode = 0
   @volatile private var unregistered = false
   @volatile private var finished = false
-  @volatile private var finalStatus = FinalApplicationStatus.SUCCEEDED
+  @volatile private var finalStatus = getDefaultFinalStatus
   @volatile private var finalMsg: String = 
   @volatile private var userClassThread: Thread = _
 
@@ -153,6 +153,20 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
   }
 
   /**
+   * Set the default final application status for client mode to UNDEFINED to 
handle
+   * if YARN HA restarts the application so that it properly retries. Set the 
final
+   * status to SUCCEEDED in cluster mode to handle if the user calls 
System.exit
+   * from the application code.
+   */
+  final def getDefaultFinalStatus() = {
+if (isDriver) {
+  FinalApplicationStatus.SUCCEEDED
+} else {
+  FinalApplicationStatus.UNDEFINED
+}
+  }
+
+  /**
* unregister is used to completely unregister the application from the 
ResourceManager.
* This means the ResourceManager will not retry the application attempt on 
your behalf if
* a failure occurred.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA

2015-01-07 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.2 db83acb1f - 7a4be0b45


[YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA

Nowadays, yarn-client will exit directly when the HA change happens no matter 
how many times the am should retry.
The reason may be that the default final status only considerred the sys.exit, 
and the yarn-client HA cann't benefit from this.
So we should distinct the default final status between client and cluster, 
because the SUCCEEDED status may cause the HA failed in client mode and 
UNDEFINED may cause the error reporter in cluster when using sys.exit.

Author: huangzhaowei carlmartin...@gmail.com

Closes #3771 from SaintBacchus/YarnHA and squashes the following commits:

c02bfcc [huangzhaowei] Improve the comment of the funciton 
'getDefaultFinalStatus'
0e69924 [huangzhaowei] Bug fix: fix the yarn-client code to support HA

(cherry picked from commit 5fde66163fe460d6f64b145047f76cc4ee33601a)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a4be0b4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a4be0b4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a4be0b4

Branch: refs/heads/branch-1.2
Commit: 7a4be0b45f003ce92031d36bf74a736a87889026
Parents: db83acb
Author: huangzhaowei carlmartin...@gmail.com
Authored: Wed Jan 7 08:10:42 2015 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Jan 7 08:11:14 2015 -0600

--
 .../spark/deploy/yarn/ApplicationMaster.scala   | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7a4be0b4/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 987b337..166e84e 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -60,7 +60,7 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
   @volatile private var exitCode = 0
   @volatile private var unregistered = false
   @volatile private var finished = false
-  @volatile private var finalStatus = FinalApplicationStatus.SUCCEEDED
+  @volatile private var finalStatus = getDefaultFinalStatus
   @volatile private var finalMsg: String = 
   @volatile private var userClassThread: Thread = _
 
@@ -153,6 +153,20 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
   }
 
   /**
+   * Set the default final application status for client mode to UNDEFINED to 
handle
+   * if YARN HA restarts the application so that it properly retries. Set the 
final
+   * status to SUCCEEDED in cluster mode to handle if the user calls 
System.exit
+   * from the application code.
+   */
+  final def getDefaultFinalStatus() = {
+if (isDriver) {
+  FinalApplicationStatus.SUCCEEDED
+} else {
+  FinalApplicationStatus.UNDEFINED
+}
+  }
+
+  /**
* unregister is used to completely unregister the application from the 
ResourceManager.
* This means the ResourceManager will not retry the application attempt on 
your behalf if
* a failure occurred.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-4966][YARN]The MemoryOverhead value is setted not correctly

2014-12-29 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.2 23d64cf08 - 2cd446a90


[SPARK-4966][YARN]The MemoryOverhead value is setted not correctly

Author: meiyoula 1039320...@qq.com

Closes #3797 from XuTingjun/MemoryOverhead and squashes the following commits:

5a780fc [meiyoula] Update ClientArguments.scala

(cherry picked from commit 14fa87bdf4b89cd392270864ee063ce01bd31887)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2cd446a9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2cd446a9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2cd446a9

Branch: refs/heads/branch-1.2
Commit: 2cd446a90216ac8eb19584c760685fbb470c4301
Parents: 23d64cf
Author: meiyoula 1039320...@qq.com
Authored: Mon Dec 29 08:20:30 2014 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Mon Dec 29 08:21:19 2014 -0600

--
 .../main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2cd446a9/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 4d85945..7687a9b 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -39,6 +39,8 @@ private[spark] class ClientArguments(args: Array[String], 
sparkConf: SparkConf)
   var appName: String = Spark
   var priority = 0
 
+  parseArgs(args.toList)
+
   // Additional memory to allocate to containers
   // For now, use driver's memory overhead as our AM container's memory 
overhead
   val amMemoryOverhead = sparkConf.getInt(spark.yarn.driver.memoryOverhead,
@@ -50,7 +52,6 @@ private[spark] class ClientArguments(args: Array[String], 
sparkConf: SparkConf)
   private val isDynamicAllocationEnabled =
 sparkConf.getBoolean(spark.dynamicAllocation.enabled, false)
 
-  parseArgs(args.toList)
   loadEnvironmentArgs()
   validateArgs()
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-4344][DOCS] adding documentation on spark.yarn.user.classpath.first

2014-11-25 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master fef27b294 - d24076019


[SPARK-4344][DOCS] adding documentation on spark.yarn.user.classpath.first

The documentation for the two parameters is the same with a pointer from the 
standalone parameter to the yarn parameter

Author: arahuja aahuj...@gmail.com

Closes #3209 from arahuja/yarn-classpath-first-param and squashes the following 
commits:

51cb9b2 [arahuja] [SPARK-4344][DOCS] adding documentation for YARN on 
userClassPathFirst


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d2407601
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d2407601
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d2407601

Branch: refs/heads/master
Commit: d240760191f692ee7b88dfc82f06a31a340a88a2
Parents: fef27b2
Author: arahuja aahuj...@gmail.com
Authored: Tue Nov 25 08:23:41 2014 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Nov 25 08:23:41 2014 -0600

--
 docs/configuration.md | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d2407601/docs/configuration.md
--
diff --git a/docs/configuration.md b/docs/configuration.md
index 8839162..0b77f5a 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -224,6 +224,7 @@ Apart from these, the following properties are also 
available, and may be useful
 (Experimental) Whether to give user-added jars precedence over Spark's own 
jars when
 loading classes in Executors. This feature can be used to mitigate 
conflicts between
 Spark's dependencies and user dependencies. It is currently an 
experimental feature.
+(Currently, this setting does not work for YARN, see a 
href=https://issues.apache.org/jira/browse/SPARK-2996;SPARK-2996/a for more 
details).
   /td
 /tr
 tr


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-4344][DOCS] adding documentation on spark.yarn.user.classpath.first

2014-11-25 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.2 b026546e3 - a689ab98d


[SPARK-4344][DOCS] adding documentation on spark.yarn.user.classpath.first

The documentation for the two parameters is the same with a pointer from the 
standalone parameter to the yarn parameter

Author: arahuja aahuj...@gmail.com

Closes #3209 from arahuja/yarn-classpath-first-param and squashes the following 
commits:

51cb9b2 [arahuja] [SPARK-4344][DOCS] adding documentation for YARN on 
userClassPathFirst

(cherry picked from commit d240760191f692ee7b88dfc82f06a31a340a88a2)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a689ab98
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a689ab98
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a689ab98

Branch: refs/heads/branch-1.2
Commit: a689ab98d944dbe4b239449897841543c0450450
Parents: b026546
Author: arahuja aahuj...@gmail.com
Authored: Tue Nov 25 08:23:41 2014 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Nov 25 08:23:52 2014 -0600

--
 docs/configuration.md | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a689ab98/docs/configuration.md
--
diff --git a/docs/configuration.md b/docs/configuration.md
index f0b396e..be418aa 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -224,6 +224,7 @@ Apart from these, the following properties are also 
available, and may be useful
 (Experimental) Whether to give user-added jars precedence over Spark's own 
jars when
 loading classes in Executors. This feature can be used to mitigate 
conflicts between
 Spark's dependencies and user dependencies. It is currently an 
experimental feature.
+(Currently, this setting does not work for YARN, see a 
href=https://issues.apache.org/jira/browse/SPARK-2996;SPARK-2996/a for more 
details).
   /td
 /tr
 tr


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: SPARK-4457. Document how to build for Hadoop versions greater than 2.4

2014-11-24 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 9b2a3c612 - 29372b631


SPARK-4457. Document how to build for Hadoop versions greater than 2.4

Author: Sandy Ryza sa...@cloudera.com

Closes #3322 from sryza/sandy-spark-4457 and squashes the following commits:

5e72b77 [Sandy Ryza] Feedback
0cf05c1 [Sandy Ryza] Caveat
be8084b [Sandy Ryza] SPARK-4457. Document how to build for Hadoop versions 
greater than 2.4


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/29372b63
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/29372b63
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/29372b63

Branch: refs/heads/master
Commit: 29372b63185a4a170178b6ec2362d7112f389852
Parents: 9b2a3c6
Author: Sandy Ryza sa...@cloudera.com
Authored: Mon Nov 24 13:28:48 2014 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Mon Nov 24 13:28:48 2014 -0600

--
 docs/building-spark.md | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/29372b63/docs/building-spark.md
--
diff --git a/docs/building-spark.md b/docs/building-spark.md
index bb18414..fee6a84 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -92,8 +92,11 @@ mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests 
clean package
 # Apache Hadoop 2.3.X
 mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package
 
-# Apache Hadoop 2.4.X
-mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -DskipTests clean package
+# Apache Hadoop 2.4.X or 2.5.X
+mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=VERSION -DskipTests clean package
+
+Versions of Hadoop after 2.5.X may or may not work with the -Phadoop-2.4 
profile (they were
+released after this version of Spark).
 
 # Different versions of HDFS and YARN.
 mvn -Pyarn-alpha -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=0.23.7 
-DskipTests clean package


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-3722][Docs]minor improvement and fix in docs

2014-11-14 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 825709a0b - e421072da


[SPARK-3722][Docs]minor improvement and fix in docs

https://issues.apache.org/jira/browse/SPARK-3722

Author: WangTao barneystin...@aliyun.com

Closes #2579 from WangTaoTheTonic/docsWork and squashes the following commits:

6f91cec [WangTao] use more wording express
29d22fa [WangTao] delete the specified version link
34cb4ea [WangTao] Update running-on-yarn.md
4ee1a26 [WangTao] minor improvement and fix in docs


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e421072d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e421072d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e421072d

Branch: refs/heads/master
Commit: e421072da0ea87e7056cc3f2130ddaafc731530f
Parents: 825709a
Author: WangTao barneystin...@aliyun.com
Authored: Fri Nov 14 08:09:42 2014 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Fri Nov 14 08:09:42 2014 -0600

--
 docs/configuration.md   | 2 +-
 docs/running-on-yarn.md | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e421072d/docs/configuration.md
--
diff --git a/docs/configuration.md b/docs/configuration.md
index f0b396e..8839162 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -52,7 +52,7 @@ Then, you can supply configuration values at runtime:
   --conf spark.executor.extraJavaOptions=-XX:+PrintGCDetails 
-XX:+PrintGCTimeStamps myApp.jar 
 {% endhighlight %}
 
-The Spark shell and 
[`spark-submit`](cluster-overview.html#launching-applications-with-spark-submit)
+The Spark shell and [`spark-submit`](submitting-applications.html)
 tool support two ways to load configurations dynamically. The first are 
command line options,
 such as `--master`, as shown above. `spark-submit` can accept any Spark 
property using the `--conf`
 flag, but uses special flags for properties that play a part in launching the 
Spark application.

http://git-wip-us.apache.org/repos/asf/spark/blob/e421072d/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 2f7e498..dfe2db4 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -39,7 +39,7 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
   tdcodespark.yarn.preserve.staging.files/code/td
   tdfalse/td
   td
-Set to true to preserve the staged files (Spark jar, app jar, distributed 
cache files) at the end of the job rather then delete them.
+Set to true to preserve the staged files (Spark jar, app jar, distributed 
cache files) at the end of the job rather than delete them.
   /td
 /tr
 tr
@@ -159,7 +159,7 @@ For example:
 lib/spark-examples*.jar \
 10
 
-The above starts a YARN client program which starts the default Application 
Master. Then SparkPi will be run as a child thread of Application Master. The 
client will periodically poll the Application Master for status updates and 
display them in the console. The client will exit once your application has 
finished running.  Refer to the Viewing Logs section below for how to see 
driver and executor logs.
+The above starts a YARN client program which starts the default Application 
Master. Then SparkPi will be run as a child thread of Application Master. The 
client will periodically poll the Application Master for status updates and 
display them in the console. The client will exit once your application has 
finished running.  Refer to the Debugging your Application section below for 
how to see driver and executor logs.
 
 To launch a Spark application in yarn-client mode, do the same, but replace 
yarn-cluster with yarn-client.  To run spark-shell:
 
@@ -181,7 +181,7 @@ In YARN terminology, executors and application masters run 
inside containers.
 
 yarn logs -applicationId app ID
 
-will print out the contents of all log files from all containers from the 
given application.
+will print out the contents of all log files from all containers from the 
given application. You can also view the container log files directly in HDFS 
using the HDFS shell or API. The directory where they are located can be found 
by looking at your YARN configs (`yarn.nodemanager.remote-app-log-dir` and 
`yarn.nodemanager.remote-app-log-dir-suffix`).
 
 When log aggregation isn't turned on, logs are retained locally on each 
machine under `YARN_APP_LOGS_DIR`, which is usually configured to `/tmp/logs` 
or `$HADOOP_HOME/logs/userlogs` depending on the Hadoop version and 
installation. Viewing logs for a container requires going to the host that 
contains them and looking in this 

spark git commit: SPARK-4305 [BUILD] yarn-alpha profile won't build due to network/yarn module

2014-11-11 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master deefd9d73 - f820b563d


SPARK-4305 [BUILD] yarn-alpha profile won't build due to network/yarn module

SPARK-3797 introduced the `network/yarn` module, but its YARN code depends on 
YARN APIs not present in older versions covered by the `yarn-alpha` profile. As 
a result builds like `mvn -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.7 
-DskipTests clean package` fail.

The solution is just to not build `network/yarn` with profile `yarn-alpha`.

Author: Sean Owen so...@cloudera.com

Closes #3167 from srowen/SPARK-4305 and squashes the following commits:

88938cb [Sean Owen] Don't build network/yarn in yarn-alpha profile as it won't 
compile


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f820b563
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f820b563
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f820b563

Branch: refs/heads/master
Commit: f820b563d88f6a972c219d9340fe95110493fb87
Parents: deefd9d
Author: Sean Owen so...@cloudera.com
Authored: Tue Nov 11 12:30:35 2014 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Nov 11 12:30:35 2014 -0600

--
 pom.xml | 1 -
 1 file changed, 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f820b563/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 88ef67c..4e0cd6c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1229,7 +1229,6 @@
   idyarn-alpha/id
   modules
 moduleyarn/module
-modulenetwork/yarn/module
   /modules
 /profile
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: SPARK-4305 [BUILD] yarn-alpha profile won't build due to network/yarn module

2014-11-11 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.2 cc1f3a0d6 - 8f7e80f30


SPARK-4305 [BUILD] yarn-alpha profile won't build due to network/yarn module

SPARK-3797 introduced the `network/yarn` module, but its YARN code depends on 
YARN APIs not present in older versions covered by the `yarn-alpha` profile. As 
a result builds like `mvn -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.7 
-DskipTests clean package` fail.

The solution is just to not build `network/yarn` with profile `yarn-alpha`.

Author: Sean Owen so...@cloudera.com

Closes #3167 from srowen/SPARK-4305 and squashes the following commits:

88938cb [Sean Owen] Don't build network/yarn in yarn-alpha profile as it won't 
compile

(cherry picked from commit f820b563d88f6a972c219d9340fe95110493fb87)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f7e80f3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f7e80f3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f7e80f3

Branch: refs/heads/branch-1.2
Commit: 8f7e80f30bd34897963334d0245c0ea6fccd6182
Parents: cc1f3a0
Author: Sean Owen so...@cloudera.com
Authored: Tue Nov 11 12:30:35 2014 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Nov 11 12:30:56 2014 -0600

--
 pom.xml | 1 -
 1 file changed, 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8f7e80f3/pom.xml
--
diff --git a/pom.xml b/pom.xml
index 88ef67c..4e0cd6c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1229,7 +1229,6 @@
   idyarn-alpha/id
   modules
 moduleyarn/module
-modulenetwork/yarn/module
   /modules
 /profile
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-4282][YARN] Stopping flag in YarnClientSchedulerBackend should be volatile

2014-11-11 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master f820b563d - 7f3718842


[SPARK-4282][YARN] Stopping flag in YarnClientSchedulerBackend should be 
volatile

In YarnClientSchedulerBackend, a variable stopping is used as a flag and it's 
accessed by some threads so it should be volatile.

Author: Kousuke Saruta saru...@oss.nttdata.co.jp

Closes #3143 from sarutak/stopping-flag-volatile and squashes the following 
commits:

58fdcc9 [Kousuke Saruta] Marked stoppig flag as volatile


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7f371884
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7f371884
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7f371884

Branch: refs/heads/master
Commit: 7f3718842cc4025bb2ee2f5a3ec12efd100f6589
Parents: f820b56
Author: Kousuke Saruta saru...@oss.nttdata.co.jp
Authored: Tue Nov 11 12:33:53 2014 -0600
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Nov 11 12:33:53 2014 -0600

--
 .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7f371884/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index f6f6dc5..2923e67 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -33,7 +33,7 @@ private[spark] class YarnClientSchedulerBackend(
 
   private var client: Client = null
   private var appId: ApplicationId = null
-  private var stopping: Boolean = false
+  @volatile private var stopping: Boolean = false
 
   /**
* Create a Yarn client to submit an application to the ResourceManager.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: SPARK-3837. Warn when YARN kills containers for exceeding memory limits

2014-10-31 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 58a6077e5 - acd4ac7c9


SPARK-3837. Warn when YARN kills containers for exceeding memory limits

I triggered the issue and verified the message gets printed on a 
pseudo-distributed cluster.

Author: Sandy Ryza sa...@cloudera.com

Closes #2744 from sryza/sandy-spark-3837 and squashes the following commits:

858a268 [Sandy Ryza] Review feedback
c937f00 [Sandy Ryza] SPARK-3837. Warn when YARN kills containers for exceeding 
memory limits


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acd4ac7c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acd4ac7c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acd4ac7c

Branch: refs/heads/master
Commit: acd4ac7c9a503445e27739708cf36e19119b8ddc
Parents: 58a6077
Author: Sandy Ryza sa...@cloudera.com
Authored: Fri Oct 31 08:43:06 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Fri Oct 31 08:43:06 2014 -0500

--
 .../spark/deploy/yarn/YarnAllocator.scala   | 30 +++--
 .../spark/deploy/yarn/YarnAllocatorSuite.scala  | 34 
 2 files changed, 61 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/acd4ac7c/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 7ae8ef2..e619619 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -20,6 +20,7 @@ package org.apache.spark.deploy.yarn
 import java.util.{List = JList}
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicInteger
+import java.util.regex.Pattern
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
@@ -375,12 +376,22 @@ private[yarn] abstract class YarnAllocator(
   logInfo(Completed container %s (state: %s, exit status: %s).format(
 containerId,
 completedContainer.getState,
-completedContainer.getExitStatus()))
+completedContainer.getExitStatus))
   // Hadoop 2.2.X added a ContainerExitStatus we should switch to use
   // there are some exit status' we shouldn't necessarily count 
against us, but for
   // now I think its ok as none of the containers are expected to exit
-  if (completedContainer.getExitStatus() != 0) {
-logInfo(Container marked as failed:  + containerId)
+  if (completedContainer.getExitStatus == -103) { // vmem limit 
exceeded
+logWarning(memLimitExceededLogMessage(
+  completedContainer.getDiagnostics,
+  VMEM_EXCEEDED_PATTERN))
+  } else if (completedContainer.getExitStatus == -104) { // pmem limit 
exceeded
+logWarning(memLimitExceededLogMessage(
+  completedContainer.getDiagnostics,
+  PMEM_EXCEEDED_PATTERN))
+  } else if (completedContainer.getExitStatus != 0) {
+logInfo(Container marked as failed:  + containerId +
+  . Exit status:  + completedContainer.getExitStatus +
+  . Diagnostics:  + completedContainer.getDiagnostics)
 numExecutorsFailed.incrementAndGet()
   }
 }
@@ -428,6 +439,19 @@ private[yarn] abstract class YarnAllocator(
 }
   }
 
+  private val MEM_REGEX = [0-9.]+ [KMG]B
+  private val PMEM_EXCEEDED_PATTERN =
+Pattern.compile(s$MEM_REGEX of $MEM_REGEX physical memory used)
+  private val VMEM_EXCEEDED_PATTERN =
+Pattern.compile(s$MEM_REGEX of $MEM_REGEX virtual memory used)
+
+  def memLimitExceededLogMessage(diagnostics: String, pattern: Pattern): 
String = {
+val matcher = pattern.matcher(diagnostics)
+val diag = if (matcher.find())   + matcher.group() + . else 
+(Container killed by YARN for exceeding memory limits. + diag
+  +  Consider boosting spark.yarn.executor.memoryOverhead.)
+  }
+
   protected def allocatedContainersOnHost(host: String): Int = {
 var retval = 0
 allocatedHostToContainersMap.synchronized {

http://git-wip-us.apache.org/repos/asf/spark/blob/acd4ac7c/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
--
diff --git 
a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
 
b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
new file mode 100644
index 000..9fff63f
--- /dev/null
+++ 

git commit: [SPARK-4116][YARN]Delete the abandoned log4j-spark-container.properties

2014-10-28 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master fae095bc7 - 47346cd02


[SPARK-4116][YARN]Delete the abandoned log4j-spark-container.properties

Since its name reduced at https://github.com/apache/spark/pull/560, the 
log4j-spark-container.properties was never used again.
And I have searched its name globally in code and found no cite.

Author: WangTaoTheTonic barneystin...@aliyun.com

Closes #2977 from WangTaoTheTonic/delLog4j and squashes the following commits:

fb2729f [WangTaoTheTonic] delete the log4j file obsoleted


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/47346cd0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/47346cd0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/47346cd0

Branch: refs/heads/master
Commit: 47346cd029abc50c70582a721810a7cceb682d8a
Parents: fae095b
Author: WangTaoTheTonic barneystin...@aliyun.com
Authored: Tue Oct 28 08:46:31 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Oct 28 08:46:31 2014 -0500

--
 .../resources/log4j-spark-container.properties  | 24 
 1 file changed, 24 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/47346cd0/yarn/common/src/main/resources/log4j-spark-container.properties
--
diff --git a/yarn/common/src/main/resources/log4j-spark-container.properties 
b/yarn/common/src/main/resources/log4j-spark-container.properties
deleted file mode 100644
index a1e37a0..000
--- a/yarn/common/src/main/resources/log4j-spark-container.properties
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Licensed under the Apache License, Version 2.0 (the License);
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an AS IS BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
-
-# Set everything to be logged to the console
-log4j.rootCategory=INFO, console
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p 
%c{1}: %m%n
-
-# Settings to quiet third party logs that are too verbose
-log4j.logger.org.eclipse.jetty=WARN
-log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
-log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: [SPARK-4098][YARN]use appUIAddress instead of appUIHostPort in yarn-client mode

2014-10-28 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master e8813be65 - 0ac52e305


[SPARK-4098][YARN]use appUIAddress instead of appUIHostPort in yarn-client mode

https://issues.apache.org/jira/browse/SPARK-4098

Author: WangTaoTheTonic barneystin...@aliyun.com

Closes #2958 from WangTaoTheTonic/useAddress and squashes the following commits:

29236e6 [WangTaoTheTonic] use appUIAddress instead of appUIHostPort in 
yarn-cluster mode


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ac52e30
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ac52e30
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ac52e30

Branch: refs/heads/master
Commit: 0ac52e30552530b247e37a470b8503346f19605c
Parents: e8813be
Author: WangTaoTheTonic barneystin...@aliyun.com
Authored: Tue Oct 28 09:51:44 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Oct 28 09:51:44 2014 -0500

--
 .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0ac52e30/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index d948a2a..59b2b47 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -48,7 +48,7 @@ private[spark] class YarnClientSchedulerBackend(
 val driverHost = conf.get(spark.driver.host)
 val driverPort = conf.get(spark.driver.port)
 val hostport = driverHost + : + driverPort
-sc.ui.foreach { ui = conf.set(spark.driver.appUIAddress, 
ui.appUIHostPort) }
+sc.ui.foreach { ui = conf.set(spark.driver.appUIAddress, 
ui.appUIAddress) }
 
 val argsArrayBuf = new ArrayBuffer[String]()
 argsArrayBuf += (--arg, hostport)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces.

2014-10-08 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 35afdfd62 - 7fca8f41c


[SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces.

HA and viewfs use namespaces instead of host names, so you can't
resolve them since that will fail. So be smarter to avoid doing
unnecessary work.

Author: Marcelo Vanzin van...@cloudera.com

Closes #2649 from vanzin/SPARK-3788 and squashes the following commits:

fedbc73 [Marcelo Vanzin] Update comment.
c938845 [Marcelo Vanzin] Use Objects.equal() to avoid issues with ==.
9f7b571 [Marcelo Vanzin] [SPARK-3788] [yarn] Fix compareFs to do the right 
thing for HA, federation.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7fca8f41
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7fca8f41
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7fca8f41

Branch: refs/heads/master
Commit: 7fca8f41c8889a41d9ab05ad0ab39c7639f657ed
Parents: 35afdfd
Author: Marcelo Vanzin van...@cloudera.com
Authored: Wed Oct 8 08:48:55 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Oct 8 08:48:55 2014 -0500

--
 .../apache/spark/deploy/yarn/ClientBase.scala   | 31 
 1 file changed, 12 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7fca8f41/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 6ecac6e..14a0386 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConversions._
 import scala.collection.mutable.{HashMap, ListBuffer, Map}
 import scala.util.{Try, Success, Failure}
 
+import com.google.common.base.Objects
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
@@ -64,12 +65,12 @@ private[spark] trait ClientBase extends Logging {
   smemory capability of the cluster ($maxMem MB per container))
 val executorMem = args.executorMemory + executorMemoryOverhead
 if (executorMem  maxMem) {
-  throw new IllegalArgumentException(sRequired executor memory 
(${args.executorMemory} + 
+  throw new IllegalArgumentException(sRequired executor memory 
(${args.executorMemory} +
 s+$executorMemoryOverhead MB) is above the max threshold ($maxMem MB) 
of this cluster!)
 }
 val amMem = args.amMemory + amMemoryOverhead
 if (amMem  maxMem) {
-  throw new IllegalArgumentException(sRequired AM memory 
(${args.amMemory} + 
+  throw new IllegalArgumentException(sRequired AM memory 
(${args.amMemory} +
 s+$amMemoryOverhead MB) is above the max threshold ($maxMem MB) of 
this cluster!)
 }
 logInfo(Will allocate AM container, with %d MB memory including %d MB 
overhead.format(
@@ -771,15 +772,17 @@ private[spark] object ClientBase extends Logging {
   private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = {
 val srcUri = srcFs.getUri()
 val dstUri = destFs.getUri()
-if (srcUri.getScheme() == null) {
-  return false
-}
-if (!srcUri.getScheme().equals(dstUri.getScheme())) {
+if (srcUri.getScheme() == null || srcUri.getScheme() != 
dstUri.getScheme()) {
   return false
 }
+
 var srcHost = srcUri.getHost()
 var dstHost = dstUri.getHost()
-if ((srcHost != null)  (dstHost != null)) {
+
+// In HA or when using viewfs, the host part of the URI may not actually 
be a host, but the
+// name of the HDFS namespace. Those names won't resolve, so avoid even 
trying if they
+// match.
+if (srcHost != null  dstHost != null  srcHost != dstHost) {
   try {
 srcHost = InetAddress.getByName(srcHost).getCanonicalHostName()
 dstHost = InetAddress.getByName(dstHost).getCanonicalHostName()
@@ -787,19 +790,9 @@ private[spark] object ClientBase extends Logging {
 case e: UnknownHostException =
   return false
   }
-  if (!srcHost.equals(dstHost)) {
-return false
-  }
-} else if (srcHost == null  dstHost != null) {
-  return false
-} else if (srcHost != null  dstHost == null) {
-  return false
-}
-if (srcUri.getPort() != dstUri.getPort()) {
-  false
-} else {
-  true
 }
+
+Objects.equal(srcHost, dstHost)  srcUri.getPort() == dstUri.getPort()
   }
 
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, 

git commit: [SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces (1.1 version).

2014-10-08 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.1 a1f833f75 - a44af7302


[SPARK-3788] [yarn] Fix compareFs to do the right thing for HDFS namespaces 
(1.1 version).

HA and viewfs use namespaces instead of host names, so you can't
resolve them since that will fail. So be smarter to avoid doing
unnecessary work.

Author: Marcelo Vanzin van...@cloudera.com

Closes #2650 from vanzin/SPARK-3788-1.1 and squashes the following commits:

174bf71 [Marcelo Vanzin] Update comment.
0e36be7 [Marcelo Vanzin] Use Objects.equal() instead of ==.
772aead [Marcelo Vanzin] [SPARK-3788] [yarn] Fix compareFs to do the right 
thing for HA, federation (1.1 version).


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a44af730
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a44af730
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a44af730

Branch: refs/heads/branch-1.1
Commit: a44af7302f814204fdbcc7ad620bc6984b376468
Parents: a1f833f
Author: Marcelo Vanzin van...@cloudera.com
Authored: Wed Oct 8 08:51:17 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Oct 8 08:51:17 2014 -0500

--
 .../apache/spark/deploy/yarn/ClientBase.scala   | 29 
 1 file changed, 11 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a44af730/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 6da3b16..27ee04a 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -24,6 +24,7 @@ import scala.collection.JavaConversions._
 import scala.collection.mutable.{HashMap, ListBuffer, Map}
 import scala.util.{Try, Success, Failure}
 
+import com.google.common.base.Objects
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
@@ -122,15 +123,17 @@ trait ClientBase extends Logging {
   private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = {
 val srcUri = srcFs.getUri()
 val dstUri = destFs.getUri()
-if (srcUri.getScheme() == null) {
-  return false
-}
-if (!srcUri.getScheme().equals(dstUri.getScheme())) {
+if (srcUri.getScheme() == null || srcUri.getScheme() != 
dstUri.getScheme()) {
   return false
 }
+
 var srcHost = srcUri.getHost()
 var dstHost = dstUri.getHost()
-if ((srcHost != null)  (dstHost != null)) {
+
+// In HA or when using viewfs, the host part of the URI may not actually 
be a host, but the
+// name of the HDFS namespace. Those names won't resolve, so avoid even 
trying if they
+// match.
+if (srcHost != null  dstHost != null  srcHost != dstHost) {
   try {
 srcHost = InetAddress.getByName(srcHost).getCanonicalHostName()
 dstHost = InetAddress.getByName(dstHost).getCanonicalHostName()
@@ -138,19 +141,9 @@ trait ClientBase extends Logging {
 case e: UnknownHostException =
   return false
   }
-  if (!srcHost.equals(dstHost)) {
-return false
-  }
-} else if (srcHost == null  dstHost != null) {
-  return false
-} else if (srcHost != null  dstHost == null) {
-  return false
-}
-if (srcUri.getPort() != dstUri.getPort()) {
-  false
-} else {
-  true
 }
+
+Objects.equal(srcHost, dstHost)  srcUri.getPort() == dstUri.getPort()
   }
 
   /** Copy the file into HDFS if needed. */
@@ -621,7 +614,7 @@ object ClientBase extends Logging {
 YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, path,
 File.pathSeparator)
 
-  /** 
+  /**
* Get the list of namenodes the user may access.
*/
   private[yarn] def getNameNodesToAccess(sparkConf: SparkConf): Set[Path] = {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: [SPARK-3848] yarn alpha doesn't build on master

2014-10-08 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 7fca8f41c - f18dd5962


[SPARK-3848] yarn alpha doesn't build on master

yarn alpha build was broken by #2432
as it added an argument to YarnAllocator but not to yarn/alpha 
YarnAllocationHandler
commit 
https://github.com/apache/spark/commit/79e45c9323455a51f25ed9acd0edd8682b4bbb88

Author: Kousuke Saruta saru...@oss.nttdata.co.jp

Closes #2715 from sarutak/SPARK-3848 and squashes the following commits:

bafb8d1 [Kousuke Saruta] Fixed parameters for the default constructor of 
alpha/YarnAllocatorHandler.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f18dd596
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f18dd596
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f18dd596

Branch: refs/heads/master
Commit: f18dd5962e4a18c3507de8147bde3a8f56380439
Parents: 7fca8f4
Author: Kousuke Saruta saru...@oss.nttdata.co.jp
Authored: Wed Oct 8 11:53:43 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Oct 8 11:53:43 2014 -0500

--
 .../scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f18dd596/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index 6c93d85..abd3783 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -43,7 +43,7 @@ private[yarn] class YarnAllocationHandler(
 args: ApplicationMasterArguments,
 preferredNodes: collection.Map[String, collection.Set[SplitInfo]],
 securityMgr: SecurityManager)
-  extends YarnAllocator(conf, sparkConf, args, preferredNodes, securityMgr) {
+  extends YarnAllocator(conf, sparkConf, appAttemptId, args, preferredNodes, 
securityMgr) {
 
   private val lastResponseId = new AtomicInteger()
   private val releaseList: CopyOnWriteArrayList[ContainerId] = new 
CopyOnWriteArrayList()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: Modify default YARN memory_overhead-- from an additive constant to a multiplier

2014-10-02 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 82a6a083a - b4fb7b80a


Modify default YARN memory_overhead-- from an additive constant to a multiplier

Redone against the recent master branch 
(https://github.com/apache/spark/pull/1391)

Author: Nishkam Ravi nr...@cloudera.com
Author: nravi nr...@c1704.halxg.cloudera.com
Author: nishkamravi2 nishkamr...@gmail.com

Closes #2485 from nishkamravi2/master_nravi and squashes the following commits:

636a9ff [nishkamravi2] Update YarnAllocator.scala
8f76c8b [Nishkam Ravi] Doc change for yarn memory overhead
35daa64 [Nishkam Ravi] Slight change in the doc for yarn memory overhead
5ac2ec1 [Nishkam Ravi] Remove out
dac1047 [Nishkam Ravi] Additional documentation for yarn memory overhead issue
42c2c3d [Nishkam Ravi] Additional changes for yarn memory overhead issue
362da5e [Nishkam Ravi] Additional changes for yarn memory overhead
c726bd9 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark 
into master_nravi
f00fa31 [Nishkam Ravi] Improving logging for AM memoryOverhead
1cf2d1e [nishkamravi2] Update YarnAllocator.scala
ebcde10 [Nishkam Ravi] Modify default YARN memory_overhead-- from an additive 
constant to a multiplier (redone to resolve merge conflicts)
2e69f11 [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark 
into master_nravi
efd688a [Nishkam Ravi] Merge branch 'master' of https://github.com/apache/spark
2b630f9 [nravi] Accept memory input as 30g, 512M instead of an int value, 
to be consistent with rest of Spark
3bf8fad [nravi] Merge branch 'master' of https://github.com/apache/spark
5423a03 [nravi] Merge branch 'master' of https://github.com/apache/spark
eb663ca [nravi] Merge branch 'master' of https://github.com/apache/spark
df2aeb1 [nravi] Improved fix for ConcurrentModificationIssue (Spark-1097, 
Hadoop-10456)
6b840f0 [nravi] Undo the fix for SPARK-1758 (the problem is fixed)
5108700 [nravi] Fix in Spark for the Concurrent thread modification issue 
(SPARK-1097, HADOOP-10456)
681b36f [nravi] Fix for SPARK-1758: failing test 
org.apache.spark.JavaAPISuite.wholeTextFiles


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4fb7b80
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4fb7b80
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4fb7b80

Branch: refs/heads/master
Commit: b4fb7b80a0d863500943d788ad3e34d502a6dafa
Parents: 82a6a08
Author: Nishkam Ravi nr...@cloudera.com
Authored: Thu Oct 2 13:48:35 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Thu Oct 2 13:48:35 2014 -0500

--
 docs/running-on-yarn.md |  8 
 .../apache/spark/deploy/yarn/ClientArguments.scala  | 16 +---
 .../org/apache/spark/deploy/yarn/ClientBase.scala   | 12 
 .../apache/spark/deploy/yarn/YarnAllocator.scala| 16 
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |  8 ++--
 5 files changed, 35 insertions(+), 25 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b4fb7b80/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 4b3a49e..695813a 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -79,16 +79,16 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
 /tr
 tr
  tdcodespark.yarn.executor.memoryOverhead/code/td
-  td384/td
+  tdexecutorMemory * 0.07, with minimum of 384 /td
   td
-The amount of off heap memory (in megabytes) to be allocated per executor. 
This is memory that accounts for things like VM overheads, interned strings, 
other native overheads, etc.
+The amount of off heap memory (in megabytes) to be allocated per executor. 
This is memory that accounts for things like VM overheads, interned strings, 
other native overheads, etc. This tends to grow with the executor size 
(typically 6-10%).
   /td
 /tr
 tr
   tdcodespark.yarn.driver.memoryOverhead/code/td
-  td384/td
+  tddriverMemory * 0.07, with minimum of 384 /td
   td
-The amount of off heap memory (in megabytes) to be allocated per driver. 
This is memory that accounts for things like VM overheads, interned strings, 
other native overheads, etc.
+The amount of off heap memory (in megabytes) to be allocated per driver. 
This is memory that accounts for things like VM overheads, interned strings, 
other native overheads, etc. This tends to grow with the container size 
(typically 6-10%).
   /td
 /tr
 tr

http://git-wip-us.apache.org/repos/asf/spark/blob/b4fb7b80/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
--
diff --git 

git commit: [YARN] SPARK-2668: Add variable of yarn log directory for reference from the log4j configuration

2014-09-23 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master f9d6220c7 - 14f8c3404


[YARN] SPARK-2668: Add variable of yarn log directory for reference from the 
log4j configuration

Assign value of yarn container log directory to java opts 
spark.yarn.app.container.log.dir, So user defined log4j.properties can 
reference this value and write log to YARN container's log directory.
Otherwise, user defined file appender will only write to container's CWD, and 
log files in CWD will not be displayed on YARN UI,and either cannot be 
aggregated to HDFS log directory after job finished.

User defined log4j.properties reference example:
log4j.appender.rolling_file.File = ${spark.yarn.app.container.log.dir}/spark.log

Author: peng.zhang peng.zh...@xiaomi.com

Closes #1573 from renozhang/yarn-log-dir and squashes the following commits:

16c5cb8 [peng.zhang] Update doc
f2b5e2a [peng.zhang] Change variable's name, and update running-on-yarn.md
503ea2d [peng.zhang] Support log4j log to yarn container dir


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/14f8c340
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/14f8c340
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/14f8c340

Branch: refs/heads/master
Commit: 14f8c340402366cb998c563b3f7d9ff7d9940271
Parents: f9d6220
Author: peng.zhang peng.zh...@xiaomi.com
Authored: Tue Sep 23 08:45:56 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Sep 23 08:45:56 2014 -0500

--
 docs/running-on-yarn.md   | 2 ++
 .../src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala  | 3 +++
 .../scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala | 3 +++
 3 files changed, 8 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/14f8c340/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 74bcc2e..4b3a49e 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -205,6 +205,8 @@ Note that for the first option, both executors and the 
application master will s
 log4j configuration, which may cause issues when they run on the same node 
(e.g. trying to write
 to the same log file).
 
+If you need a reference to the proper location to put log files in the YARN so 
that YARN can properly display and aggregate them, use 
${spark.yarn.app.container.log.dir} in your log4j.properties. For example, 
log4j.appender.file_appender.File=${spark.yarn.app.container.log.dir}/spark.log.
 For streaming application, configuring RollingFileAppender and setting file 
location to YARN's log directory will avoid disk overflow caused by large log 
file, and logs can be accessed using YARN's log utility.
+
 # Important notes
 
 - Before Hadoop 2.2, YARN does not support cores in container resource 
requests. Thus, when running against an earlier version, the numbers of cores 
given via command line arguments cannot be passed to YARN.  Whether core 
requests are honored in scheduling decisions depends on which scheduler is in 
use and how it is configured.

http://git-wip-us.apache.org/repos/asf/spark/blob/14f8c340/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index c96f731..6ae4d49 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -388,6 +388,9 @@ trait ClientBase extends Logging {
 .foreach(p = javaOpts += s-Djava.library.path=$p)
 }
 
+// For log4j configuration to reference
+javaOpts += -D=spark.yarn.app.container.log.dir= + 
ApplicationConstants.LOG_DIR_EXPANSION_VAR
+
 val userClass =
   if (args.userClass != null) {
 Seq(--class, YarnSparkHadoopUtil.escapeForShell(args.userClass))

http://git-wip-us.apache.org/repos/asf/spark/blob/14f8c340/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
index 312d82a..f56f72c 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnableUtil.scala
@@ -98,6 +98,9 @@ trait ExecutorRunnableUtil extends Logging {
 }
 */
 
+// For log4j 

[1/2] [SPARK-3477] Clean up code in Yarn Client / ClientBase

2014-09-23 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 14f8c3404 - c4022dd52


http://git-wip-us.apache.org/repos/asf/spark/blob/c4022dd5/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
--
diff --git 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 82e45e3..0b43e6e 100644
--- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -21,11 +21,9 @@ import java.nio.ByteBuffer
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.DataOutputBuffer
-import org.apache.hadoop.yarn.api.protocolrecords._
 import org.apache.hadoop.yarn.api.records._
-import org.apache.hadoop.yarn.client.api.YarnClient
+import org.apache.hadoop.yarn.client.api.{YarnClient, YarnClientApplication}
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{Logging, SparkConf}
@@ -34,128 +32,98 @@ import org.apache.spark.deploy.SparkHadoopUtil
 /**
  * Version of [[org.apache.spark.deploy.yarn.ClientBase]] tailored to YARN's 
stable API.
  */
-class Client(clientArgs: ClientArguments, hadoopConf: Configuration, spConf: 
SparkConf)
+private[spark] class Client(
+val args: ClientArguments,
+val hadoopConf: Configuration,
+val sparkConf: SparkConf)
   extends ClientBase with Logging {
 
-  val yarnClient = YarnClient.createYarnClient
-
   def this(clientArgs: ClientArguments, spConf: SparkConf) =
 this(clientArgs, SparkHadoopUtil.get.newConfiguration(spConf), spConf)
 
   def this(clientArgs: ClientArguments) = this(clientArgs, new SparkConf())
 
-  val args = clientArgs
-  val conf = hadoopConf
-  val sparkConf = spConf
-  var rpc: YarnRPC = YarnRPC.create(conf)
-  val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
-
-  def runApp(): ApplicationId = {
-validateArgs()
-// Initialize and start the client service.
+  val yarnClient = YarnClient.createYarnClient
+  val yarnConf = new YarnConfiguration(hadoopConf)
+
+  def stop(): Unit = yarnClient.stop()
+
+  /* 
-
 *
+   | The following methods have much in common in the stable and alpha 
versions of Client, |
+   | but cannot be implemented in the parent trait due to subtle API 
differences across|
+   | hadoop versions.  
|
+   * 
-
 */
+
+  /**
+   * Submit an application running our ApplicationMaster to the 
ResourceManager.
+   *
+   * The stable Yarn API provides a convenience method 
(YarnClient#createApplication) for
+   * creating applications and setting up the application submission context. 
This was not
+   * available in the alpha API.
+   */
+  override def submitApplication(): ApplicationId = {
 yarnClient.init(yarnConf)
 yarnClient.start()
 
-// Log details about this YARN cluster (e.g, the number of slave 
machines/NodeManagers).
-logClusterResourceDetails()
-
-// Prepare to submit a request to the ResourcManager (specifically its 
ApplicationsManager (ASM)
-// interface).
+logInfo(Requesting a new application from cluster with %d NodeManagers
+  .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers))
 
-// Get a new client application.
+// Get a new application from our RM
 val newApp = yarnClient.createApplication()
 val newAppResponse = newApp.getNewApplicationResponse()
 val appId = newAppResponse.getApplicationId()
 
+// Verify whether the cluster has enough resources for our AM
 verifyClusterResources(newAppResponse)
 
-// Set up resource and environment variables.
-val appStagingDir = getAppStagingDir(appId)
-val localResources = prepareLocalResources(appStagingDir)
-val launchEnv = setupLaunchEnv(localResources, appStagingDir)
-val amContainer = createContainerLaunchContext(newAppResponse, 
localResources, launchEnv)
+// Set up the appropriate contexts to launch our AM
+val containerContext = createContainerLaunchContext(newAppResponse)
+val appContext = createApplicationSubmissionContext(newApp, 
containerContext)
 
-// Set up an application submission context.
-val appContext = newApp.getApplicationSubmissionContext()
-appContext.setApplicationName(args.appName)
-appContext.setQueue(args.amQueue)
-appContext.setAMContainerSpec(amContainer)
-appContext.setApplicationType(SPARK)
-
-// Memory for the ApplicationMaster.
-val memoryResource = 
Records.newRecord(classOf[Resource]).asInstanceOf[Resource]
-memoryResource.setMemory(args.amMemory + memoryOverhead)
- 

[2/2] git commit: [SPARK-3477] Clean up code in Yarn Client / ClientBase

2014-09-23 Thread tgraves
[SPARK-3477] Clean up code in Yarn Client / ClientBase

This is part of a broader effort to clean up the Yarn integration code after 
#2020.

The high-level changes in this PR include:
- Removing duplicate code, especially across the alpha and stable APIs
- Simplify unnecessarily complex method signatures and hierarchies
- Rename unclear variable and method names
- Organize logging output produced when the user runs Spark on Yarn
- Extensively add documentation
- Privatize classes where possible

I have tested the stable API on a Hadoop 2.4 cluster. I tested submitting a jar 
that references classes in other jars in both client and cluster mode. I also 
made changes in the alpha API, though I do not have access to an alpha cluster. 
I have verified that it compiles, but it would be ideal if others can help test 
it.

For those interested in some examples in detail, please read on.



***Appendix***

- The loop to `getApplicationReport` from the RM is duplicated in 4 places: in 
the stable `Client`, alpha `Client`, and twice in `YarnClientSchedulerBackend`. 
We should not have different loops for client and cluster deploy modes.
- There are many fragmented small helper methods that are only used once and 
should just be inlined. For instance, `ClientBase#getLocalPath` returns `null` 
on certain conditions, and its only caller `ClientBase#addFileToClasspath` 
checks whether the value returned is `null`. We could just have the caller 
check on that same condition to avoid passing `null`s around.
- In `YarnSparkHadoopUtil#addToEnvironment`, we take in an argument 
`classpathSeparator` that always has the same value upstream (i.e. 
`File.pathSeparator`). This argument is now removed from the signature and all 
callers of this method upstream.
- `ClientBase#copyRemoteFile` is now renamed to `copyFileToRemote`. It was 
unclear whether we are copying a remote file to our local file system, or 
copying a locally visible file to a remote file system. Also, even the content 
of the method has inaccurately named variables. We use `val remoteFs` to 
signify the file system of the locally visible file and `val fs` to signify the 
remote, destination file system. These are now renamed `srcFs` and `destFs` 
respectively.
- We currently log the AM container's environment and resource mappings 
directly as Scala collections. This is incredibly hard to read and probably too 
verbose for the average Spark user. In other modes (e.g. standalone), we also 
don't log the launch commands by default, so the logging level of these 
information is now set to `DEBUG`.
- None of these classes (`Client`, `ClientBase`, `YarnSparkHadoopUtil` etc.) is 
intended to be used by a Spark application (the user should go through Spark 
submit instead). At the very least they should be `private[spark]`.

Author: Andrew Or andrewo...@gmail.com

Closes #2350 from andrewor14/yarn-cleanup and squashes the following commits:

39e8c7b [Andrew Or] Address review comments
6619f9b [Andrew Or] Merge branch 'master' of github.com:apache/spark into 
yarn-cleanup
2ca6d64 [Andrew Or] Improve logging in application monitor
a3b9693 [Andrew Or] Minor changes
7dd6298 [Andrew Or] Simplify ClientBase#monitorApplication
547487c [Andrew Or] Provide default values for null application report entries
a0ad1e9 [Andrew Or] Fix class not found error
1590141 [Andrew Or] Address review comments
45ccdea [Andrew Or] Remove usages of getAMMemory
d8e33b6 [Andrew Or] Merge branch 'master' of github.com:apache/spark into 
yarn-cleanup
ed0b42d [Andrew Or] Fix alpha compilation error
c0587b4 [Andrew Or] Merge branch 'master' of github.com:apache/spark into 
yarn-cleanup
6d74888 [Andrew Or] Minor comment changes
6573c1d [Andrew Or] Clean up, simplify and document code for setting classpaths
e4779b6 [Andrew Or] Clean up log messages + variable naming in ClientBase
8766d37 [Andrew Or] Heavily add documentation to Client* classes + various 
clean-ups
6c94d79 [Andrew Or] Various cleanups in ClientBase and ClientArguments
ef7069a [Andrew Or] Clean up YarnClientSchedulerBackend more
6de9072 [Andrew Or] Guard against potential NPE in debug logging mode
fabe4c4 [Andrew Or] Reuse more code in YarnClientSchedulerBackend
3f941dc [Andrew Or] First cut at simplifying the Client (stable and alpha)


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c4022dd5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c4022dd5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c4022dd5

Branch: refs/heads/master
Commit: c4022dd52b4827323ff956632dc7623f546da937
Parents: 14f8c34
Author: Andrew Or andrewo...@gmail.com
Authored: Tue Sep 23 11:20:52 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Sep 23 11:20:52 2014 -0500

--
 

git commit: [SPARK-3304] [YARN] ApplicationMaster's Finish status is wrong when uncaught exception is thrown from ReporterThread

2014-09-23 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master c4022dd52 - 11c10df82


[SPARK-3304] [YARN] ApplicationMaster's Finish status is wrong when uncaught 
exception is thrown from ReporterThread

Author: Kousuke Saruta saru...@oss.nttdata.co.jp

Closes #2198 from sarutak/SPARK-3304 and squashes the following commits:

2696237 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3304
5b80363 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3304
4eb0a3e [Kousuke Saruta] Remoed the description about 
spark.yarn.scheduler.reporterThread.maxFailure
9741597 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3304
f7538d4 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3304
358ef8d [Kousuke Saruta] Merge branch 'SPARK-3304' of github.com:sarutak/spark 
into SPARK-3304
0d138c6 [Kousuke Saruta] Revert tmp
f8da10a [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3304
b6e9879 [Kousuke Saruta] tmp
8d256ed [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3304
13b2652 [Kousuke Saruta] Merge branch 'SPARK-3304' of github.com:sarutak/spark 
into SPARK-3304
2711e15 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3304
c081f8e [Kousuke Saruta] Modified ApplicationMaster to handle exception in 
ReporterThread itself
0bbd3a6 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3304
a6982ad [Kousuke Saruta] Added ability handling uncaught exception thrown from 
Reporter thread


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/11c10df8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/11c10df8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/11c10df8

Branch: refs/heads/master
Commit: 11c10df825419372df61a8d23c51e8c3cc78047f
Parents: c4022dd
Author: Kousuke Saruta saru...@oss.nttdata.co.jp
Authored: Tue Sep 23 11:40:14 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Sep 23 11:40:14 2014 -0500

--
 .../spark/deploy/yarn/ApplicationMaster.scala   | 66 
 1 file changed, 54 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/11c10df8/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index cde5fff..9050808 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark.deploy.yarn
 
+import scala.util.control.NonFatal
+
 import java.io.IOException
+import java.lang.reflect.InvocationTargetException
 import java.net.Socket
 import java.util.concurrent.atomic.AtomicReference
 
@@ -55,6 +58,7 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
 
   @volatile private var finished = false
   @volatile private var finalStatus = FinalApplicationStatus.UNDEFINED
+  @volatile private var userClassThread: Thread = _
 
   private var reporterThread: Thread = _
   private var allocator: YarnAllocator = _
@@ -221,18 +225,48 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
 // must be = expiryInterval / 2.
 val interval = math.max(0, math.min(expiryInterval / 2, schedulerInterval))
 
+// The number of failures in a row until Reporter thread give up
+val reporterMaxFailures = 
sparkConf.getInt(spark.yarn.scheduler.reporterThread.maxFailures, 5)
+
 val t = new Thread {
   override def run() {
+var failureCount = 0
+
 while (!finished) {
-  checkNumExecutorsFailed()
-  if (!finished) {
-logDebug(Sending progress)
-allocator.allocateResources()
-try {
-  Thread.sleep(interval)
-} catch {
-  case e: InterruptedException =
+  try {
+checkNumExecutorsFailed()
+if (!finished) {
+  logDebug(Sending progress)
+  allocator.allocateResources()
 }
+failureCount = 0
+  } catch {
+case e: Throwable = {
+  failureCount += 1
+  if (!NonFatal(e) || failureCount = reporterMaxFailures) {
+logError(Exception was thrown from Reporter thread., e)
+finish(FinalApplicationStatus.FAILED, Exception was thrown +
+  s${failureCount} time(s) from Reporter 

git commit: SPARK-3177 (on Master Branch)

2014-09-17 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 983609a4d - 7d1a37239


SPARK-3177 (on Master Branch)

The JIRA and PR was original created for branch-1.1, and move to master branch 
now.
Chester

The Issue is due to that yarn-alpha and yarn have different APIs for certain 
class fields. In this particular case,  the ClientBase using reflection to to 
address this issue, and we need to different way to test the ClientBase's 
method.  Original ClientBaseSuite using getFieldValue() method to do this. But 
it doesn't work for yarn-alpha as the API returns an array of String instead of 
just String (which is the case for Yarn-stable API).

 To fix the test, I add a new method

  def getFieldValue2[A: ClassTag, A1: ClassTag, B](clazz: Class[_], field: 
String,
  defaults: = B)
  (mapTo:  A = B)(mapTo1: A1 = B) : B =
Try(clazz.getField(field)).map(_.get(null)).map {
  case v: A = mapTo(v)
  case v1: A1 = mapTo1(v1)
  case _ = defaults
}.toOption.getOrElse(defaults)

to handle the cases where the field type can be either type A or A1. In this 
new method the type A or A1 is pattern matched and corresponding mapTo function 
(mapTo or mapTo1) is used.

Author: chesterxgchen ches...@alpinenow.com

Closes #2204 from chesterxgchen/SPARK-3177-master and squashes the following 
commits:

e72a6ea [chesterxgchen]  The Issue is due to that yarn-alpha and yarn have 
different APIs for certain class fields. In this particular case,  the 
ClientBase using reflection to to address this issue, and we need to different 
way to test the ClientBase's method.  Original ClientBaseSuite using 
getFieldValue() method to do this. But it doesn't work for yarn-alpha as the 
API returns an array of String instead of just String (which is the case for 
Yarn-stable API).


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d1a3723
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d1a3723
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d1a3723

Branch: refs/heads/master
Commit: 7d1a37239c50394025d9f16acf5dcd05cfbe7250
Parents: 983609a
Author: chesterxgchen ches...@alpinenow.com
Authored: Wed Sep 17 10:25:52 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Sep 17 10:25:52 2014 -0500

--
 .../spark/deploy/yarn/ClientBaseSuite.scala  | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7d1a3723/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
--
diff --git 
a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala 
b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
index 5480eca..c3b7a2c 100644
--- 
a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
+++ 
b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
@@ -38,6 +38,7 @@ import org.scalatest.Matchers
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ HashMap = MutableHashMap }
+import scala.reflect.ClassTag
 import scala.util.Try
 
 import org.apache.spark.{SparkException, SparkConf}
@@ -200,9 +201,10 @@ class ClientBaseSuite extends FunSuite with Matchers {
 
 
 val knownDefMRAppCP: Seq[String] =
-  getFieldValue[String, Seq[String]](classOf[MRJobConfig],
- 
DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH,
- Seq[String]())(a = a.split(,))
+  getFieldValue2[String, Array[String], Seq[String]](
+classOf[MRJobConfig],
+DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH,
+Seq[String]())(a = a.split(,))(a = a.toSeq)
 
 val knownYARNAppCP = Some(Seq(/known/yarn/path))
 
@@ -232,6 +234,17 @@ class ClientBaseSuite extends FunSuite with Matchers {
   def getFieldValue[A, B](clazz: Class[_], field: String, defaults: = 
B)(mapTo: A = B): B =
 
Try(clazz.getField(field)).map(_.get(null).asInstanceOf[A]).toOption.map(mapTo).getOrElse(defaults)
 
+  def getFieldValue2[A: ClassTag, A1: ClassTag, B](
+clazz: Class[_],
+field: String,
+defaults: = B)(mapTo:  A = B)(mapTo1: A1 = B) : B = {
+Try(clazz.getField(field)).map(_.get(null)).map {
+  case v: A = mapTo(v)
+  case v1: A1 = mapTo1(v1)
+  case _ = defaults
+}.toOption.getOrElse(defaults)
+  }
+
   private class DummyClient(
   val args: ClientArguments,
   val conf: Configuration,


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: 

git commit: [SPARK-3410] The priority of shutdownhook for ApplicationMaster should not be integer literal

2014-09-15 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master f493f7982 - cc1464446


[SPARK-3410] The priority of shutdownhook for ApplicationMaster should not be 
integer literal

I think, it need to keep the priority of shutdown hook for ApplicationMaster 
than the priority of shutdown hook for o.a.h.FileSystem depending on changing 
the priority for FileSystem.

Author: Kousuke Saruta saru...@oss.nttdata.co.jp

Closes #2283 from sarutak/SPARK-3410 and squashes the following commits:

1d44fef [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3410
bd6cc53 [Kousuke Saruta] Modified style
ee6f1aa [Kousuke Saruta] Added constant SHUTDOWN_HOOK_PRIORITY to 
ApplicationMaster
54eb68f [Kousuke Saruta] Changed Shutdown hook priority to 20
2f0aee3 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark 
into SPARK-3410
4c5cb93 [Kousuke Saruta] Modified the priority for AM's shutdown hook
217d1a4 [Kousuke Saruta] Removed unused import statements
717aba2 [Kousuke Saruta] Modified ApplicationMaster to make to keep the 
priority of shutdown hook for ApplicationMaster higher than the priority of 
shutdown hook for HDFS


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cc146444
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cc146444
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cc146444

Branch: refs/heads/master
Commit: cc14644460872efb344e8d895859d70213a40840
Parents: f493f79
Author: Kousuke Saruta saru...@oss.nttdata.co.jp
Authored: Mon Sep 15 08:53:58 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Mon Sep 15 08:53:58 2014 -0500

--
 .../apache/spark/deploy/yarn/ApplicationMaster.scala   | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cc146444/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 735d772..cde5fff 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -21,12 +21,8 @@ import java.io.IOException
 import java.net.Socket
 import java.util.concurrent.atomic.AtomicReference
 
-import scala.collection.JavaConversions._
-import scala.util.Try
-
 import akka.actor._
 import akka.remote._
-import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.util.ShutdownHookManager
 import org.apache.hadoop.yarn.api._
@@ -107,8 +103,11 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
 }
   }
 }
-// Use priority 30 as it's higher than HDFS. It's the same priority 
MapReduce is using.
-ShutdownHookManager.get().addShutdownHook(cleanupHook, 30)
+
+// Use higher priority than FileSystem.
+assert(ApplicationMaster.SHUTDOWN_HOOK_PRIORITY  
FileSystem.SHUTDOWN_HOOK_PRIORITY)
+ShutdownHookManager
+  .get().addShutdownHook(cleanupHook, 
ApplicationMaster.SHUTDOWN_HOOK_PRIORITY)
 
 // Call this to force generation of secret so it gets populated into the
 // Hadoop UGI. This has to happen before the startUserClass which does a
@@ -407,6 +406,8 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
 
 object ApplicationMaster extends Logging {
 
+  val SHUTDOWN_HOOK_PRIORITY: Int = 30
+
   private var master: ApplicationMaster = _
 
   def main(args: Array[String]) = {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: SPARK-3014. Log a more informative messages in a couple failure scenario...

2014-09-12 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 15a564598 - 1d767967e


SPARK-3014. Log a more informative messages in a couple failure scenario...

...s

Author: Sandy Ryza sa...@cloudera.com

Closes #1934 from sryza/sandy-spark-3014 and squashes the following commits:

ae19cc1 [Sandy Ryza] SPARK-3014. Log a more informative messages in a couple 
failure scenarios


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d767967
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d767967
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d767967

Branch: refs/heads/master
Commit: 1d767967e925f1d727957c2d43383ef6ad2c5d5e
Parents: 15a5645
Author: Sandy Ryza sa...@cloudera.com
Authored: Fri Sep 12 16:48:28 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Fri Sep 12 16:48:28 2014 -0500

--
 core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala  | 6 --
 .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala | 6 ++
 2 files changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1d767967/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 0fdb5ae..5ed3575 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io.{File, PrintStream}
-import java.lang.reflect.InvocationTargetException
+import java.lang.reflect.{Modifier, InvocationTargetException}
 import java.net.URL
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, Map}
@@ -323,7 +323,9 @@ object SparkSubmit {
 }
 
 val mainMethod = mainClass.getMethod(main, new Array[String](0).getClass)
-
+if (!Modifier.isStatic(mainMethod.getModifiers)) {
+  throw new IllegalStateException(The main method in the given main class 
must be static)
+}
 try {
   mainMethod.invoke(null, childArgs.toArray)
 } catch {

http://git-wip-us.apache.org/repos/asf/spark/blob/1d767967/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 878b6db..735d772 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -283,11 +283,9 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
 }
 
 val sparkContext = sparkContextRef.get()
-assert(sparkContext != null || count = numTries)
 if (sparkContext == null) {
-  logError(
-Unable to retrieve sparkContext inspite of waiting for %d, 
numTries = %d.format(
-  count * waitTime, numTries))
+  logError((SparkContext did not initialize after waiting for %d ms. 
Please check earlier
++  log output for errors. Failing the 
application.).format(numTries * waitTime))
 }
 sparkContext
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: [SPARK-2140] Updating heap memory calculation for YARN stable and alpha.

2014-09-11 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.1 e51ce9a55 - 06fb2d057


[SPARK-2140] Updating heap memory calculation for YARN stable and alpha.

Updated pull request, reflecting YARN stable and alpha states. I am getting 
intermittent test failures on my own test infrastructure. Is that tracked 
anywhere yet?

Author: Chris Cope cc...@resilientscience.com

Closes #2253 from copester/master and squashes the following commits:

5ad89da [Chris Cope] [SPARK-2140] Removing calculateAMMemory functions since 
they are no longer needed.
52b4e45 [Chris Cope] [SPARK-2140] Updating heap memory calculation for YARN 
stable and alpha.

(cherry picked from commit ed1980ffa9ccb87d76694ba910ef22df034bca49)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06fb2d05
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06fb2d05
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06fb2d05

Branch: refs/heads/branch-1.1
Commit: 06fb2d057beb50e9b690bf8b6d5bb7bdb16d8546
Parents: e51ce9a
Author: Chris Cope cc...@resilientscience.com
Authored: Thu Sep 11 08:13:07 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Thu Sep 11 08:13:27 2014 -0500

--
 .../main/scala/org/apache/spark/deploy/yarn/Client.scala| 8 
 .../scala/org/apache/spark/deploy/yarn/ClientBase.scala | 4 +---
 .../org/apache/spark/deploy/yarn/ClientBaseSuite.scala  | 3 ---
 .../main/scala/org/apache/spark/deploy/yarn/Client.scala| 9 -
 4 files changed, 1 insertion(+), 23 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/06fb2d05/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 9be7854..3607eed 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -102,14 +102,6 @@ class Client(clientArgs: ClientArguments, hadoopConf: 
Configuration, spConf: Spa
 appContext
   }
 
-  def calculateAMMemory(newApp: GetNewApplicationResponse): Int = {
-val minResMemory = newApp.getMinimumResourceCapability().getMemory()
-val amMemory = ((args.amMemory / minResMemory) * minResMemory) +
-  ((if ((args.amMemory % minResMemory) == 0) 0 else minResMemory) -
-  memoryOverhead)
-amMemory
-  }
-
   def setupSecurityToken(amContainer: ContainerLaunchContext) = {
 // Setup security tokens.
 val dob = new DataOutputBuffer()

http://git-wip-us.apache.org/repos/asf/spark/blob/06fb2d05/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 3897b3a..6da3b16 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -309,8 +309,6 @@ trait ClientBase extends Logging {
 retval.toString
   }
 
-  def calculateAMMemory(newApp: GetNewApplicationResponse): Int
-
   def setupSecurityToken(amContainer: ContainerLaunchContext)
 
   def createContainerLaunchContext(
@@ -353,7 +351,7 @@ trait ClientBase extends Logging {
 }
 amContainer.setEnvironment(env)
 
-val amMemory = calculateAMMemory(newApp)
+val amMemory = args.amMemory
 
 val javaOpts = ListBuffer[String]()
 

http://git-wip-us.apache.org/repos/asf/spark/blob/06fb2d05/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
--
diff --git 
a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala 
b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
index 68cc289..5480eca 100644
--- 
a/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
+++ 
b/yarn/common/src/test/scala/org/apache/spark/deploy/yarn/ClientBaseSuite.scala
@@ -238,9 +238,6 @@ class ClientBaseSuite extends FunSuite with Matchers {
   val sparkConf: SparkConf,
   val yarnConf: YarnConfiguration) extends ClientBase {
 
-override def calculateAMMemory(newApp: GetNewApplicationResponse): Int =
-  throw new UnsupportedOperationException()
-
 override def setupSecurityToken(amContainer: ContainerLaunchContext): Unit 
=
   throw new UnsupportedOperationException()
 


git commit: [SPARK-3286] - Cannot view ApplicationMaster UI when Yarn’s url scheme i...

2014-09-10 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master b734ed0c2 - 6f7a76838


[SPARK-3286] - Cannot view ApplicationMaster UI when Yarn’s url scheme i...

...s https

Author: Benoy Antony be...@apache.org

Closes #2276 from benoyantony/SPARK-3286 and squashes the following commits:

c3d51ee [Benoy Antony] Use address with scheme, but Allpha version removes the 
scheme
e82f94e [Benoy Antony] Use address with scheme, but Allpha version removes the 
scheme
92127c9 [Benoy Antony] rebasing from master
450c536 [Benoy Antony] [SPARK-3286] - Cannot view ApplicationMaster UI when 
Yarn’s url scheme is https
f060c02 [Benoy Antony] [SPARK-3286] - Cannot view ApplicationMaster UI when 
Yarn’s url scheme is https


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6f7a7683
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6f7a7683
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6f7a7683

Branch: refs/heads/master
Commit: 6f7a76838f15687583e3b0ab43309a3c079368c4
Parents: b734ed0
Author: Benoy Antony be...@apache.org
Authored: Wed Sep 10 11:59:39 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Sep 10 11:59:39 2014 -0500

--
 .../scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala| 4 +++-
 .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala   | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6f7a7683/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
index ad27a9a..fc30953 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClientImpl.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.deploy.yarn
 
 import scala.collection.{Map, Set}
+import java.net.URI;
 
 import org.apache.hadoop.net.NetUtils
 import org.apache.hadoop.yarn.api._
@@ -97,7 +98,8 @@ private class YarnRMClientImpl(args: 
ApplicationMasterArguments) extends YarnRMC
 // Users can then monitor stderr/stdout on that node if required.
 appMasterRequest.setHost(Utils.localHostName())
 appMasterRequest.setRpcPort(0)
-appMasterRequest.setTrackingUrl(uiAddress)
+//remove the scheme from the url if it exists since Hadoop does not expect 
scheme
+appMasterRequest.setTrackingUrl(new URI(uiAddress).getAuthority())
 resourceManager.registerApplicationMaster(appMasterRequest)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/6f7a7683/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index a879c83..5756263 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -189,7 +189,7 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments,
 if (sc == null) {
   finish(FinalApplicationStatus.FAILED, Timed out waiting for 
SparkContext.)
 } else {
-  registerAM(sc.ui.appUIHostPort, securityMgr)
+  registerAM(sc.ui.appUIAddress, securityMgr)
   try {
 userThread.join()
   } finally {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: SPARK-1713. Use a thread pool for launching executors.

2014-09-10 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 26503fdf2 - 1f4a648d4


SPARK-1713. Use a thread pool for launching executors.

This patch copies the approach used in the MapReduce application master for 
launching containers.

Author: Sandy Ryza sa...@cloudera.com

Closes #663 from sryza/sandy-spark-1713 and squashes the following commits:

036550d [Sandy Ryza] SPARK-1713. [YARN] Use a threadpool for launching executor 
containers


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f4a648d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f4a648d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f4a648d

Branch: refs/heads/master
Commit: 1f4a648d4e30e837d6cf3ea8de1808e2254ad70b
Parents: 26503fd
Author: Sandy Ryza sa...@cloudera.com
Authored: Wed Sep 10 14:34:24 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Sep 10 14:34:24 2014 -0500

--
 docs/running-on-yarn.md   |  7 +++
 .../org/apache/spark/deploy/yarn/YarnAllocator.scala  | 14 --
 2 files changed, 19 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1f4a648d/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 943f06b..d8b22f3 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -125,6 +125,13 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
  the environment of the executor launcher. 
   /td
 /tr
+tr
+  tdcodespark.yarn.containerLauncherMaxThreads/code/td
+  td25/td
+  td
+The maximum number of threads to use in the application master for 
launching executor containers.
+  /td
+/tr
 /table
 
 # Launching Spark on YARN

http://git-wip-us.apache.org/repos/asf/spark/blob/1f4a648d/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 02b9a81..0b8744f 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy.yarn
 
 import java.util.{List = JList}
-import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConversions._
@@ -32,6 +32,8 @@ import org.apache.spark.{Logging, SecurityManager, SparkConf, 
SparkEnv}
 import org.apache.spark.scheduler.{SplitInfo, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder
+
 object AllocationType extends Enumeration {
   type AllocationType = Value
   val HOST, RACK, ANY = Value
@@ -95,6 +97,14 @@ private[yarn] abstract class YarnAllocator(
   protected val (preferredHostToCount, preferredRackToCount) =
 generateNodeToWeight(conf, preferredNodes)
 
+  private val launcherPool = new ThreadPoolExecutor(
+// max pool size of Integer.MAX_VALUE is ignored because we use an 
unbounded queue
+sparkConf.getInt(spark.yarn.containerLauncherMaxThreads, 25), 
Integer.MAX_VALUE,
+1, TimeUnit.MINUTES,
+new LinkedBlockingQueue[Runnable](),
+new ThreadFactoryBuilder().setNameFormat(ContainerLauncher 
#%d).setDaemon(true).build())
+  launcherPool.allowCoreThreadTimeOut(true)
+
   def getNumExecutorsRunning: Int = numExecutorsRunning.intValue
 
   def getNumExecutorsFailed: Int = numExecutorsFailed.intValue
@@ -283,7 +293,7 @@ private[yarn] abstract class YarnAllocator(
 executorMemory,
 executorCores,
 securityMgr)
-  new Thread(executorRunnable).start()
+  launcherPool.execute(executorRunnable)
 }
   }
   logDebug(


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: [SPARK-3260] yarn - pass acls along with executor launch

2014-09-05 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 6a37ed838 - 51b53a758


[SPARK-3260] yarn - pass acls along with executor launch

Pass along the acl settings when we launch a container so that they can be 
applied to viewing the logs on a running NodeManager.

Author: Thomas Graves tgra...@apache.org

Closes #2185 from tgravescs/SPARK-3260 and squashes the following commits:

6f94b5a [Thomas Graves] make unit test more robust
28b9dd3 [Thomas Graves] yarn - pass acls along with executor launch


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51b53a75
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51b53a75
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51b53a75

Branch: refs/heads/master
Commit: 51b53a758c85f2e20ad9bd73ed815fcfa9c7180b
Parents: 6a37ed8
Author: Thomas Graves tgra...@apache.org
Authored: Fri Sep 5 09:54:40 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Fri Sep 5 09:54:40 2014 -0500

--
 .../spark/deploy/yarn/ExecutorRunnable.scala|  7 +-
 .../deploy/yarn/YarnAllocationHandler.scala |  7 +-
 .../spark/deploy/yarn/YarnRMClientImpl.scala|  7 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   | 13 ++--
 .../apache/spark/deploy/yarn/ClientBase.scala   |  6 +-
 .../spark/deploy/yarn/YarnAllocator.scala   | 10 +--
 .../apache/spark/deploy/yarn/YarnRMClient.scala |  5 +-
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 11 ++-
 .../deploy/yarn/YarnSparkHadoopUtilSuite.scala  | 76 +++-
 .../spark/deploy/yarn/ExecutorRunnable.scala|  7 +-
 .../deploy/yarn/YarnAllocationHandler.scala |  7 +-
 .../spark/deploy/yarn/YarnRMClientImpl.scala|  7 +-
 12 files changed, 129 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/51b53a75/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 7dae248..10cbeb8 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -35,7 +35,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{Apps, ConverterUtils, Records, ProtoUtils}
 
-import org.apache.spark.{SparkConf, Logging}
+import org.apache.spark.{SecurityManager, SparkConf, Logging}
 
 
 class ExecutorRunnable(
@@ -46,7 +46,8 @@ class ExecutorRunnable(
 slaveId: String,
 hostname: String,
 executorMemory: Int,
-executorCores: Int)
+executorCores: Int,
+securityMgr: SecurityManager)
   extends Runnable with ExecutorRunnableUtil with Logging {
 
   var rpc: YarnRPC = YarnRPC.create(conf)
@@ -86,6 +87,8 @@ class ExecutorRunnable(
 logInfo(Setting up executor with commands:  + commands)
 ctx.setCommands(commands)
 
+
ctx.setApplicationACLs(YarnSparkHadoopUtil.getApplicationAclsForYarn(securityMgr))
+
 // Send the start request to the ContainerManager
 val startReq = Records.newRecord(classOf[StartContainerRequest])
 .asInstanceOf[StartContainerRequest]

http://git-wip-us.apache.org/repos/asf/spark/blob/51b53a75/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index 9f9e16c..85d6274 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicInteger
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.scheduler.SplitInfo
 
 import org.apache.hadoop.conf.Configuration
@@ -41,8 +41,9 @@ private[yarn] class YarnAllocationHandler(
 resourceManager: AMRMProtocol,
 appAttemptId: ApplicationAttemptId,
 args: ApplicationMasterArguments,
-preferredNodes: collection.Map[String, collection.Set[SplitInfo]])
-  extends YarnAllocator(conf, sparkConf, args, preferredNodes) {
+preferredNodes: collection.Map[String, collection.Set[SplitInfo]],
+securityMgr: SecurityManager)
+  extends YarnAllocator(conf, sparkConf, args, preferredNodes, securityMgr) 

git commit: [SPARK-3375] spark on yarn container allocation issues

2014-09-05 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 51b53a758 - 62c557609


[SPARK-3375] spark on yarn container allocation issues

If yarn doesn't get the containers immediately it stops asking for them and the 
yarn application hangs with never getting any executors.

The issue here is that we are sending the number of containers as 0 after we 
send the original one of X. on the yarn side this clears out the original 
request.

For a ping we should just send empty asks.

Author: Thomas Graves tgra...@apache.org

Closes #2275 from tgravescs/SPARK-3375 and squashes the following commits:

74b6820 [Thomas Graves] send empty resource requests when we aren't asking for 
containers


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/62c55760
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/62c55760
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/62c55760

Branch: refs/heads/master
Commit: 62c557609929982eeec170fe12f810bedfcf97f2
Parents: 51b53a7
Author: Thomas Graves tgra...@apache.org
Authored: Fri Sep 5 09:56:22 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Fri Sep 5 09:56:22 2014 -0500

--
 .../spark/deploy/yarn/YarnAllocationHandler.scala  | 13 +++--
 .../spark/deploy/yarn/YarnAllocationHandler.scala  |  8 +---
 2 files changed, 12 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/62c55760/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index 85d6274..5a1b42c 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -51,12 +51,13 @@ private[yarn] class YarnAllocationHandler(
   override protected def allocateContainers(count: Int): YarnAllocateResponse 
= {
 var resourceRequests: List[ResourceRequest] = null
 
-// default.
-if (count = 0 || preferredHostToCount.isEmpty) {
-  logDebug(numExecutors:  + count + , host preferences:  +
-preferredHostToCount.isEmpty)
-  resourceRequests = List(createResourceRequest(
-AllocationType.ANY, null, count, 
YarnSparkHadoopUtil.RM_REQUEST_PRIORITY))
+logDebug(numExecutors:  + count)
+if (count = 0) {
+  resourceRequests = List()
+} else if (preferredHostToCount.isEmpty) {
+logDebug(host preferences is empty)
+resourceRequests = List(createResourceRequest(
+  AllocationType.ANY, null, count, 
YarnSparkHadoopUtil.RM_REQUEST_PRIORITY))
 } else {
   // request for all hosts in preferred nodes and for numExecutors -
   // candidates.size, request by default allocation policy.

http://git-wip-us.apache.org/repos/asf/spark/blob/62c55760/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
--
diff --git 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
index c887cb5..5438f15 100644
--- 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
+++ 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala
@@ -88,9 +88,11 @@ private[yarn] class YarnAllocationHandler(
 
   private def addResourceRequests(numExecutors: Int) {
 val containerRequests: List[ContainerRequest] =
-  if (numExecutors = 0 || preferredHostToCount.isEmpty) {
-logDebug(numExecutors:  + numExecutors + , host preferences:  +
-  preferredHostToCount.isEmpty)
+  if (numExecutors = 0) {
+logDebug(numExecutors:  + numExecutors)
+List()
+  } else if (preferredHostToCount.isEmpty) {
+logDebug(host preferences is empty)
 createResourceRequests(
   AllocationType.ANY,
   resource = null,


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



git commit: [SPARK-3347] [yarn] Fix yarn-alpha compilation.

2014-09-02 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 8f1f9aaf4 - 066f31a6b


[SPARK-3347] [yarn] Fix yarn-alpha compilation.

Missing import. Oops.

Author: Marcelo Vanzin van...@cloudera.com

Closes #2236 from vanzin/SPARK-3347 and squashes the following commits:

594fc39 [Marcelo Vanzin] [SPARK-3347] [yarn] Fix yarn-alpha compilation.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/066f31a6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/066f31a6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/066f31a6

Branch: refs/heads/master
Commit: 066f31a6b213121441fc9618abd5bae4a706a215
Parents: 8f1f9aa
Author: Marcelo Vanzin van...@cloudera.com
Authored: Tue Sep 2 13:33:23 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Sep 2 13:33:23 2014 -0500

--
 yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/066f31a6/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 12f1cd3..10fc39b 100644
--- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{Apps, Records}
 
 import org.apache.spark.{Logging, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
 
 /**
  * Version of [[org.apache.spark.deploy.yarn.ClientBase]] tailored to YARN's 
alpha API.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[1/2] [SPARK-2933] [yarn] Refactor and cleanup Yarn AM code.

2014-08-27 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 6f671d04f - b92d823ad


http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
index 3474112..d162b4c 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala
@@ -19,22 +19,21 @@ package org.apache.spark.scheduler.cluster
 
 import org.apache.spark._
 import org.apache.hadoop.conf.Configuration
-import org.apache.spark.deploy.yarn.YarnAllocationHandler
+import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.util.Utils
 
 /**
- *
- * This scheduler launches executors through Yarn - by calling into Client to 
launch ExecutorLauncher as AM.
+ * This scheduler launches executors through Yarn - by calling into Client to 
launch the Spark AM.
  */
-private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: 
Configuration) extends TaskSchedulerImpl(sc) {
+private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: 
Configuration)
+  extends TaskSchedulerImpl(sc) {
 
   def this(sc: SparkContext) = this(sc, new Configuration())
 
   // By default, rack is unknown
   override def getRackForHost(hostPort: String): Option[String] = {
 val host = Utils.parseHostPort(hostPort)._1
-val retval = YarnAllocationHandler.lookupRack(conf, host)
-if (retval != null) Some(retval) else None
+Option(YarnSparkHadoopUtil.lookupRack(conf, host))
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 833e249..a5f537d 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler.cluster
 
 import org.apache.hadoop.yarn.api.records.{ApplicationId, YarnApplicationState}
 import org.apache.spark.{SparkException, Logging, SparkContext}
-import org.apache.spark.deploy.yarn.{Client, ClientArguments, 
ExecutorLauncher, YarnSparkHadoopUtil}
+import org.apache.spark.deploy.yarn.{Client, ClientArguments, 
YarnSparkHadoopUtil}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 
 import scala.collection.mutable.ArrayBuffer
@@ -60,10 +60,7 @@ private[spark] class YarnClientSchedulerBackend(
 
 val argsArrayBuf = new ArrayBuffer[String]()
 argsArrayBuf += (
-  --class, notused,
-  --jar, null, // The primary jar will be added dynamically in 
SparkContext.
-  --args, hostport,
-  --am-class, classOf[ExecutorLauncher].getName
+  --args, hostport
 )
 
 // process any optional arguments, given either as environment variables

http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
index 9aeca4a..69f4022 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -18,16 +18,17 @@
 package org.apache.spark.scheduler.cluster
 
 import org.apache.spark._
-import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler}
+import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil}
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.util.Utils
 import org.apache.hadoop.conf.Configuration
 
 /**
- *
- * This is a simple extension to ClusterScheduler - to ensure that appropriate 
initialization of ApplicationMaster, etc is done
+ * This is a simple extension to ClusterScheduler - to ensure that appropriate 
initialization of
+ * ApplicationMaster, etc is done
  */
-private[spark] class YarnClusterScheduler(sc: SparkContext, conf: 
Configuration) extends TaskSchedulerImpl(sc) {
+private[spark] 

[2/2] git commit: [SPARK-2933] [yarn] Refactor and cleanup Yarn AM code.

2014-08-27 Thread tgraves
[SPARK-2933] [yarn] Refactor and cleanup Yarn AM code.

This change modifies the Yarn module so that all the logic related
to running the ApplicationMaster is localized. Instead of, previously,
4 different classes with mostly identical code, now we have:

- A single, shared ApplicationMaster class, which can operate both in
  client and cluster mode, and substitutes the old ApplicationMaster
  (for cluster mode) and ExecutorLauncher (for client mode).

The benefit here is that all different execution modes for all supported
yarn versions use the same shared code for monitoring executor allocation,
setting up configuration, and monitoring the process's lifecycle.

- A new YarnRMClient interface, which defines basic RM functionality needed
  by the ApplicationMaster. This interface has concrete implementations for
  each supported Yarn version.

- A new YarnAllocator interface, which just abstracts the existing interface
  of the YarnAllocationHandler class. This is to avoid having to touch the
  allocator code too much in this change, although it might benefit from a
  similar effort in the future.

The end result is much easier to understand code, with much less duplication,
making it much easier to fix bugs, add features, and test everything knowing
that all supported versions will behave the same.

Author: Marcelo Vanzin van...@cloudera.com

Closes #2020 from vanzin/SPARK-2933 and squashes the following commits:

3bbf3e7 [Marcelo Vanzin] Merge branch 'master' into SPARK-2933
ff389ed [Marcelo Vanzin] Do not interrupt reporter thread from within itself.
3a8ed37 [Marcelo Vanzin] Remote stale comment.
0f5142c [Marcelo Vanzin] Review feedback.
41f8c8a [Marcelo Vanzin] Fix app status reporting.
c0794be [Marcelo Vanzin] Correctly clean up staging directory.
92770cc [Marcelo Vanzin] Merge branch 'master' into SPARK-2933
ecaf332 [Marcelo Vanzin] Small fix to shutdown code.
f02d3f8 [Marcelo Vanzin] Merge branch 'master' into SPARK-2933
f581122 [Marcelo Vanzin] Review feedback.
557fdeb [Marcelo Vanzin] Cleanup a couple more constants.
be6068d [Marcelo Vanzin] Restore shutdown hook to clean up staging dir.
5150993 [Marcelo Vanzin] Some more cleanup.
b6289ab [Marcelo Vanzin] Move cluster/client code to separate methods.
ecb23cd [Marcelo Vanzin] More trivial cleanup.
34f1e63 [Marcelo Vanzin] Fix some questionable error handling.
5657c7d [Marcelo Vanzin] Finish app if SparkContext initialization times out.
0e4be3d [Marcelo Vanzin] Keep ExecutorLauncher as the main class for 
client-mode AM.
91beabb [Marcelo Vanzin] Fix UI filter registration.
8c72239 [Marcelo Vanzin] Trivial cleanups.
99a52d5 [Marcelo Vanzin] Changes to the yarn-alpha project to use common AM 
code.
848ca6d [Marcelo Vanzin] [SPARK-2933] [yarn] Refactor and cleanup Yarn AM code.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b92d823a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b92d823a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b92d823a

Branch: refs/heads/master
Commit: b92d823ad13f6fcc325eeb99563bea543871c6aa
Parents: 6f671d0
Author: Marcelo Vanzin van...@cloudera.com
Authored: Wed Aug 27 11:02:04 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Aug 27 11:02:04 2014 -0500

--
 .../spark/deploy/yarn/ApplicationMaster.scala   | 453 ---
 .../spark/deploy/yarn/ExecutorLauncher.scala| 315 -
 .../deploy/yarn/YarnAllocationHandler.scala | 192 ++--
 .../spark/deploy/yarn/YarnRMClientImpl.scala| 103 +
 .../spark/deploy/yarn/ApplicationMaster.scala   | 430 ++
 .../yarn/ApplicationMasterArguments.scala   |  26 +-
 .../spark/deploy/yarn/ClientArguments.scala |   9 +-
 .../apache/spark/deploy/yarn/ClientBase.scala   |  54 ++-
 .../spark/deploy/yarn/YarnAllocator.scala   |  34 ++
 .../apache/spark/deploy/yarn/YarnRMClient.scala |  67 +++
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |  51 +++
 .../cluster/YarnClientClusterScheduler.scala|  11 +-
 .../cluster/YarnClientSchedulerBackend.scala|   7 +-
 .../cluster/YarnClusterScheduler.scala  |  17 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   | 413 -
 .../spark/deploy/yarn/ExecutorLauncher.scala| 276 ---
 .../deploy/yarn/YarnAllocationHandler.scala | 196 ++--
 .../spark/deploy/yarn/YarnRMClientImpl.scala|  76 
 18 files changed, 892 insertions(+), 1838 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 

git commit: [SPARK-3072] YARN - Exit when reach max number failed executors

2014-08-19 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master cd0720ca7 - 7eb9cbc27


[SPARK-3072] YARN - Exit when reach max number failed executors

In some cases on hadoop 2.x the spark application master doesn't properly exit 
and hangs around for 10 minutes after its really done.  We should make sure it 
exits properly and stops the driver.

Author: Thomas Graves tgra...@apache.org

Closes #2022 from tgravescs/SPARK-3072 and squashes the following commits:

665701d [Thomas Graves] Exit when reach max number failed executors


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7eb9cbc2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7eb9cbc2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7eb9cbc2

Branch: refs/heads/master
Commit: 7eb9cbc273d758522e787fcb2ef68ef65911475f
Parents: cd0720c
Author: Thomas Graves tgra...@apache.org
Authored: Tue Aug 19 09:40:31 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Aug 19 09:40:31 2014 -0500

--
 .../spark/deploy/yarn/ApplicationMaster.scala   | 33 +---
 .../spark/deploy/yarn/ExecutorLauncher.scala|  5 +--
 .../spark/deploy/yarn/ApplicationMaster.scala   | 16 +++---
 .../spark/deploy/yarn/ExecutorLauncher.scala|  5 +--
 4 files changed, 40 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7eb9cbc2/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 62b5c3b..46a01f5 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -267,12 +267,10 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
   // TODO: This is a bit ugly. Can we make it nicer?
   // TODO: Handle container failure
 
-  // Exists the loop if the user thread exits.
-  while (yarnAllocator.getNumExecutorsRunning  args.numExecutors  
userThread.isAlive) {
-if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) {
-  finishApplicationMaster(FinalApplicationStatus.FAILED,
-max number of executor failures reached)
-}
+  // Exits the loop if the user thread exits.
+  while (yarnAllocator.getNumExecutorsRunning  args.numExecutors  
userThread.isAlive
+   !isFinished) {
+checkNumExecutorsFailed()
 yarnAllocator.allocateContainers(
   math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 
0))
 Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL)
@@ -303,11 +301,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 
 val t = new Thread {
   override def run() {
-while (userThread.isAlive) {
-  if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) {
-finishApplicationMaster(FinalApplicationStatus.FAILED,
-  max number of executor failures reached)
-  }
+while (userThread.isAlive  !isFinished) {
+  checkNumExecutorsFailed()
   val missingExecutorCount = args.numExecutors - 
yarnAllocator.getNumExecutorsRunning
   if (missingExecutorCount  0) {
 logInfo(Allocating %d containers to make up for (potentially) 
lost containers.
@@ -327,6 +322,22 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 t
   }
 
+  private def checkNumExecutorsFailed() {
+if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) {
+  logInfo(max number of executor failures reached)
+  finishApplicationMaster(FinalApplicationStatus.FAILED,
+max number of executor failures reached)
+  // make sure to stop the user thread
+  val sparkContext = ApplicationMaster.sparkContextRef.get()
+  if (sparkContext != null) {
+logInfo(Invoking sc stop from checkNumExecutorsFailed)
+sparkContext.stop()
+  } else {
+logError(sparkContext is null when should shutdown)
+  }
+}
+  }
+
   private def sendProgress() {
 logDebug(Sending progress)
 // Simulated with an allocate request with no nodes requested ...

http://git-wip-us.apache.org/repos/asf/spark/blob/7eb9cbc2/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala 

git commit: SPARK-1528 - spark on yarn, add support for accessing remote HDFS

2014-08-05 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master e87075df9 - 2c0f705e2


SPARK-1528 - spark on yarn, add support for accessing remote HDFS

Add a config (spark.yarn.access.namenodes) to allow applications running on 
yarn to access other secure HDFS cluster.  User just specifies the namenodes of 
the other clusters and we get Tokens for those and ship them with the spark 
application.

Author: Thomas Graves tgra...@apache.org

Closes #1159 from tgravescs/spark-1528 and squashes the following commits:

ddbcd16 [Thomas Graves] review comments
0ac8501 [Thomas Graves] SPARK-1528 - add support for accessing remote HDFS


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c0f705e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c0f705e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c0f705e

Branch: refs/heads/master
Commit: 2c0f705e26ca3dfc43a1e9a0722c0e57f67c970a
Parents: e87075d
Author: Thomas Graves tgra...@apache.org
Authored: Tue Aug 5 12:48:26 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Aug 5 12:48:26 2014 -0500

--
 docs/running-on-yarn.md |  7 +++
 .../apache/spark/deploy/yarn/ClientBase.scala   | 56 ++--
 .../spark/deploy/yarn/ClientBaseSuite.scala | 55 ++-
 3 files changed, 101 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2c0f705e/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 0362f5a..573930d 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -106,6 +106,13 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
 set this configuration to hdfs:///some/path.
   /td
 /tr
+tr
+  tdcodespark.yarn.access.namenodes/code/td
+  td(none)/td
+  td
+A list of secure HDFS namenodes your Spark application is going to access. 
For example, 
`spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. The 
Spark application must have acess to the namenodes listed and Kerberos must be 
properly configured to be able to access them (either in the same realm or in a 
trusted realm). Spark acquires security tokens for each of the namenodes so 
that the Spark application can access those remote HDFS clusters.
+  /td
+/tr
 /table
 
 # Launching Spark on YARN

http://git-wip-us.apache.org/repos/asf/spark/blob/2c0f705e/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index b7e8636..ed8f56a 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -29,7 +29,7 @@ import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
 import org.apache.hadoop.mapred.Master
 import org.apache.hadoop.mapreduce.MRJobConfig
-import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.util.StringUtils
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
@@ -191,23 +191,11 @@ trait ClientBase extends Logging {
 // Upload Spark and the application JAR to the remote file system if 
necessary. Add them as
 // local resources to the application master.
 val fs = FileSystem.get(conf)
-
-val delegTokenRenewer = Master.getMasterPrincipal(conf)
-if (UserGroupInformation.isSecurityEnabled()) {
-  if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
-val errorMessage = Can't get Master Kerberos principal for use as 
renewer
-logError(errorMessage)
-throw new SparkException(errorMessage)
-  }
-}
 val dst = new Path(fs.getHomeDirectory(), appStagingDir)
-val replication = sparkConf.getInt(spark.yarn.submit.file.replication, 
3).toShort
-
-if (UserGroupInformation.isSecurityEnabled()) {
-  val dstFs = dst.getFileSystem(conf)
-  dstFs.addDelegationTokens(delegTokenRenewer, credentials)
-}
+val nns = ClientBase.getNameNodesToAccess(sparkConf) + dst
+ClientBase.obtainTokensForNamenodes(nns, conf, credentials)
 
+val replication = sparkConf.getInt(spark.yarn.submit.file.replication, 
3).toShort
 val localResources = HashMap[String, LocalResource]()
 FileSystem.mkdirs(fs, dst, new FsPermission(STAGING_DIR_PERMISSION))
 
@@ -614,4 +602,40 @@ object ClientBase extends Logging {
 

git commit: SPARK-1528 - spark on yarn, add support for accessing remote HDFS

2014-08-05 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.1 b92a45058 - 6c0c65fc8


SPARK-1528 - spark on yarn, add support for accessing remote HDFS

Add a config (spark.yarn.access.namenodes) to allow applications running on 
yarn to access other secure HDFS cluster.  User just specifies the namenodes of 
the other clusters and we get Tokens for those and ship them with the spark 
application.

Author: Thomas Graves tgra...@apache.org

Closes #1159 from tgravescs/spark-1528 and squashes the following commits:

ddbcd16 [Thomas Graves] review comments
0ac8501 [Thomas Graves] SPARK-1528 - add support for accessing remote HDFS

(cherry picked from commit 2c0f705e26ca3dfc43a1e9a0722c0e57f67c970a)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6c0c65fc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6c0c65fc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6c0c65fc

Branch: refs/heads/branch-1.1
Commit: 6c0c65fc85677ab2cae819a546ea50ed660994c3
Parents: b92a450
Author: Thomas Graves tgra...@apache.org
Authored: Tue Aug 5 12:48:26 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Aug 5 12:48:51 2014 -0500

--
 docs/running-on-yarn.md |  7 +++
 .../apache/spark/deploy/yarn/ClientBase.scala   | 56 ++--
 .../spark/deploy/yarn/ClientBaseSuite.scala | 55 ++-
 3 files changed, 101 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6c0c65fc/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 0362f5a..573930d 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -106,6 +106,13 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
 set this configuration to hdfs:///some/path.
   /td
 /tr
+tr
+  tdcodespark.yarn.access.namenodes/code/td
+  td(none)/td
+  td
+A list of secure HDFS namenodes your Spark application is going to access. 
For example, 
`spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. The 
Spark application must have acess to the namenodes listed and Kerberos must be 
properly configured to be able to access them (either in the same realm or in a 
trusted realm). Spark acquires security tokens for each of the namenodes so 
that the Spark application can access those remote HDFS clusters.
+  /td
+/tr
 /table
 
 # Launching Spark on YARN

http://git-wip-us.apache.org/repos/asf/spark/blob/6c0c65fc/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index b7e8636..ed8f56a 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -29,7 +29,7 @@ import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
 import org.apache.hadoop.mapred.Master
 import org.apache.hadoop.mapreduce.MRJobConfig
-import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.util.StringUtils
 import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
@@ -191,23 +191,11 @@ trait ClientBase extends Logging {
 // Upload Spark and the application JAR to the remote file system if 
necessary. Add them as
 // local resources to the application master.
 val fs = FileSystem.get(conf)
-
-val delegTokenRenewer = Master.getMasterPrincipal(conf)
-if (UserGroupInformation.isSecurityEnabled()) {
-  if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
-val errorMessage = Can't get Master Kerberos principal for use as 
renewer
-logError(errorMessage)
-throw new SparkException(errorMessage)
-  }
-}
 val dst = new Path(fs.getHomeDirectory(), appStagingDir)
-val replication = sparkConf.getInt(spark.yarn.submit.file.replication, 
3).toShort
-
-if (UserGroupInformation.isSecurityEnabled()) {
-  val dstFs = dst.getFileSystem(conf)
-  dstFs.addDelegationTokens(delegTokenRenewer, credentials)
-}
+val nns = ClientBase.getNameNodesToAccess(sparkConf) + dst
+ClientBase.obtainTokensForNamenodes(nns, conf, credentials)
 
+val replication = sparkConf.getInt(spark.yarn.submit.file.replication, 
3).toShort
 val localResources = HashMap[String, LocalResource]()
 

git commit: SPARK-1890 and SPARK-1891- add admin and modify acls

2014-08-05 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 2c0f705e2 - 1ca23


SPARK-1890 and SPARK-1891- add admin and modify acls

It was easier to combine these 2 jira since they touch many of the same places. 
 This pr adds the following:

- adds modify acls
- adds admin acls (list of admins/users that get added to both view and modify 
acls)
- modify Kill button on UI to take modify acls into account
- changes config name of spark.ui.acls.enable to spark.acls.enable since I 
choose poorly in original name. We keep backwards compatibility so people can 
still use spark.ui.acls.enable. The acls should apply to any web ui as well as 
any CLI interfaces.
- send view and modify acls information on to YARN so that YARN interfaces can 
use (yarn cli for killing applications for example).

Author: Thomas Graves tgra...@apache.org

Closes #1196 from tgravescs/SPARK-1890 and squashes the following commits:

8292eb1 [Thomas Graves] review comments
b92ec89 [Thomas Graves] remove unneeded variable from applistener
4c765f4 [Thomas Graves] Add in admin acls
72eb0ac [Thomas Graves] Add modify acls


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1ca2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1ca2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1ca2

Branch: refs/heads/master
Commit: 1ca23d3aa40423d658cfbf2c956ad415a6b1
Parents: 2c0f705
Author: Thomas Graves tgra...@apache.org
Authored: Tue Aug 5 12:52:52 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Aug 5 12:52:52 2014 -0500

--
 .../org/apache/spark/SecurityManager.scala  | 107 ---
 .../deploy/history/FsHistoryProvider.scala  |   4 +-
 .../scheduler/ApplicationEventListener.scala|   4 +-
 .../apache/spark/ui/jobs/JobProgressTab.scala   |   2 +-
 .../org/apache/spark/SecurityManagerSuite.scala |  83 --
 docs/configuration.md   |  27 -
 docs/security.md|   7 +-
 .../apache/spark/deploy/yarn/ClientBase.scala   |   9 +-
 8 files changed, 206 insertions(+), 37 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1ca2/core/src/main/scala/org/apache/spark/SecurityManager.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala 
b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 74aa441..25c2c9f 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -41,10 +41,19 @@ import org.apache.spark.deploy.SparkHadoopUtil
  * secure the UI if it has data that other users should not be allowed to see. 
The javax
  * servlet filter specified by the user can authenticate the user and then 
once the user
  * is logged in, Spark can compare that user versus the view acls to make sure 
they are
- * authorized to view the UI. The configs 'spark.ui.acls.enable' and 
'spark.ui.view.acls'
+ * authorized to view the UI. The configs 'spark.acls.enable' and 
'spark.ui.view.acls'
  * control the behavior of the acls. Note that the person who started the 
application
  * always has view access to the UI.
  *
+ * Spark has a set of modify acls (`spark.modify.acls`) that controls which 
users have permission
+ * to  modify a single application. This would include things like killing the 
application. By
+ * default the person who started the application has modify access. For 
modify access through
+ * the UI, you must have a filter that does authentication in place for the 
modify acls to work
+ * properly.
+ *
+ * Spark also has a set of admin acls (`spark.admin.acls`) which is a set of 
users/administrators
+ * who always have permission to view or modify the Spark application.
+ *
  * Spark does not currently support encryption after authentication.
  *
  * At this point spark has multiple communication protocols that need to be 
secured and
@@ -137,18 +146,32 @@ private[spark] class SecurityManager(sparkConf: 
SparkConf) extends Logging {
   private val sparkSecretLookupKey = sparkCookie
 
   private val authOn = sparkConf.getBoolean(spark.authenticate, false)
-  private var uiAclsOn = sparkConf.getBoolean(spark.ui.acls.enable, false)
+  // keep spark.ui.acls.enable for backwards compatibility with 1.0
+  private var aclsOn = sparkConf.getOption(spark.acls.enable).getOrElse(
+sparkConf.get(spark.ui.acls.enable, false)).toBoolean
+
+  // admin acls should be set before view or modify acls
+  private var adminAcls: Set[String] =
+stringToSet(sparkConf.get(spark.admin.acls, ))
 
   private var viewAcls: Set[String] = _
+
+  // list of users who have permission to modify the application. This should
+  // apply to both 

git commit: SPARK-1890 and SPARK-1891- add admin and modify acls

2014-08-05 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.1 6c0c65fc8 - e3fe6571d


SPARK-1890 and SPARK-1891- add admin and modify acls

It was easier to combine these 2 jira since they touch many of the same places. 
 This pr adds the following:

- adds modify acls
- adds admin acls (list of admins/users that get added to both view and modify 
acls)
- modify Kill button on UI to take modify acls into account
- changes config name of spark.ui.acls.enable to spark.acls.enable since I 
choose poorly in original name. We keep backwards compatibility so people can 
still use spark.ui.acls.enable. The acls should apply to any web ui as well as 
any CLI interfaces.
- send view and modify acls information on to YARN so that YARN interfaces can 
use (yarn cli for killing applications for example).

Author: Thomas Graves tgra...@apache.org

Closes #1196 from tgravescs/SPARK-1890 and squashes the following commits:

8292eb1 [Thomas Graves] review comments
b92ec89 [Thomas Graves] remove unneeded variable from applistener
4c765f4 [Thomas Graves] Add in admin acls
72eb0ac [Thomas Graves] Add modify acls

(cherry picked from commit 1ca23d3aa40423d658cfbf2c956ad415a6b1)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e3fe6571
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e3fe6571
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e3fe6571

Branch: refs/heads/branch-1.1
Commit: e3fe6571decfdc406ec6d505fd92f9f2b85a618c
Parents: 6c0c65f
Author: Thomas Graves tgra...@apache.org
Authored: Tue Aug 5 12:52:52 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Aug 5 12:53:05 2014 -0500

--
 .../org/apache/spark/SecurityManager.scala  | 107 ---
 .../deploy/history/FsHistoryProvider.scala  |   4 +-
 .../scheduler/ApplicationEventListener.scala|   4 +-
 .../apache/spark/ui/jobs/JobProgressTab.scala   |   2 +-
 .../org/apache/spark/SecurityManagerSuite.scala |  83 --
 docs/configuration.md   |  27 -
 docs/security.md|   7 +-
 .../apache/spark/deploy/yarn/ClientBase.scala   |   9 +-
 8 files changed, 206 insertions(+), 37 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e3fe6571/core/src/main/scala/org/apache/spark/SecurityManager.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala 
b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 74aa441..25c2c9f 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -41,10 +41,19 @@ import org.apache.spark.deploy.SparkHadoopUtil
  * secure the UI if it has data that other users should not be allowed to see. 
The javax
  * servlet filter specified by the user can authenticate the user and then 
once the user
  * is logged in, Spark can compare that user versus the view acls to make sure 
they are
- * authorized to view the UI. The configs 'spark.ui.acls.enable' and 
'spark.ui.view.acls'
+ * authorized to view the UI. The configs 'spark.acls.enable' and 
'spark.ui.view.acls'
  * control the behavior of the acls. Note that the person who started the 
application
  * always has view access to the UI.
  *
+ * Spark has a set of modify acls (`spark.modify.acls`) that controls which 
users have permission
+ * to  modify a single application. This would include things like killing the 
application. By
+ * default the person who started the application has modify access. For 
modify access through
+ * the UI, you must have a filter that does authentication in place for the 
modify acls to work
+ * properly.
+ *
+ * Spark also has a set of admin acls (`spark.admin.acls`) which is a set of 
users/administrators
+ * who always have permission to view or modify the Spark application.
+ *
  * Spark does not currently support encryption after authentication.
  *
  * At this point spark has multiple communication protocols that need to be 
secured and
@@ -137,18 +146,32 @@ private[spark] class SecurityManager(sparkConf: 
SparkConf) extends Logging {
   private val sparkSecretLookupKey = sparkCookie
 
   private val authOn = sparkConf.getBoolean(spark.authenticate, false)
-  private var uiAclsOn = sparkConf.getBoolean(spark.ui.acls.enable, false)
+  // keep spark.ui.acls.enable for backwards compatibility with 1.0
+  private var aclsOn = sparkConf.getOption(spark.acls.enable).getOrElse(
+sparkConf.get(spark.ui.acls.enable, false)).toBoolean
+
+  // admin acls should be set before view or modify acls
+  private var adminAcls: Set[String] =
+stringToSet(sparkConf.get(spark.admin.acls, ))
 
   private var 

git commit: SPARK-1680: use configs for specifying environment variables on YARN

2014-08-05 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 74f82c71b - 41e0a21b2


SPARK-1680: use configs for specifying environment variables on YARN

Note that this also documents spark.executorEnv.*  which to me means its 
public.  If we don't want that please speak up.

Author: Thomas Graves tgra...@apache.org

Closes #1512 from tgravescs/SPARK-1680 and squashes the following commits:

11525df [Thomas Graves] more doc changes
553bad0 [Thomas Graves] fix documentation
152bf7c [Thomas Graves] fix docs
5382326 [Thomas Graves] try fix docs
32f86a4 [Thomas Graves] use configs for specifying environment variables on YARN


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41e0a21b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41e0a21b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41e0a21b

Branch: refs/heads/master
Commit: 41e0a21b22ccd2788dc079790788e505b0d4e37d
Parents: 74f82c7
Author: Thomas Graves tgra...@apache.org
Authored: Tue Aug 5 15:57:32 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Aug 5 15:57:32 2014 -0500

--
 docs/configuration.md   |  8 +++
 docs/running-on-yarn.md | 22 +++-
 .../apache/spark/deploy/yarn/ClientBase.scala   | 13 
 .../deploy/yarn/ExecutorRunnableUtil.scala  |  6 +-
 4 files changed, 43 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/41e0a21b/docs/configuration.md
--
diff --git a/docs/configuration.md b/docs/configuration.md
index 25adea2..5e7556c 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -206,6 +206,14 @@ Apart from these, the following properties are also 
available, and may be useful
 used during aggregation goes above this amount, it will spill the data 
into disks.
   /td
 /tr
+tr
+  tdcodespark.executorEnv.[EnvironmentVariableName]/code/td
+  td(none)/td
+  td
+Add the environment variable specified by 
codeEnvironmentVariableName/code to the Executor 
+process. The user can specify multiple of these and to set multiple 
environment variables. 
+  /td
+/tr
 /table
 
  Shuffle Behavior

http://git-wip-us.apache.org/repos/asf/spark/blob/41e0a21b/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 573930d..9bc20db 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -17,10 +17,6 @@ To build Spark yourself, refer to the [building with Maven 
guide](building-with-
 
 Most of the configs are the same for Spark on YARN as for other deployment 
modes. See the [configuration page](configuration.html) for more information on 
those.  These are configs that are specific to Spark on YARN.
 
- Environment Variables
-
-* `SPARK_YARN_USER_ENV`, to add environment variables to the Spark processes 
launched on YARN. This can be a comma separated list of environment variables, 
e.g. `SPARK_YARN_USER_ENV=JAVA_HOME=/jdk64,FOO=bar`.
-
  Spark Properties
 
 table class=table
@@ -110,7 +106,23 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
   tdcodespark.yarn.access.namenodes/code/td
   td(none)/td
   td
-A list of secure HDFS namenodes your Spark application is going to access. 
For example, 
`spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. The 
Spark application must have acess to the namenodes listed and Kerberos must be 
properly configured to be able to access them (either in the same realm or in a 
trusted realm). Spark acquires security tokens for each of the namenodes so 
that the Spark application can access those remote HDFS clusters.
+A list of secure HDFS namenodes your Spark application is going to access. 
For 
+example, 
`spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. 
+The Spark application must have acess to the namenodes listed and Kerberos 
must 
+be properly configured to be able to access them (either in the same realm 
or in 
+a trusted realm). Spark acquires security tokens for each of the namenodes 
so that 
+the Spark application can access those remote HDFS clusters.
+  /td
+/tr
+tr
+  tdcodespark.yarn.appMasterEnv.[EnvironmentVariableName]/code/td
+  td(none)/td
+  td
+ Add the environment variable specified by 
codeEnvironmentVariableName/code to the 
+ Application Master process launched on YARN. The user can specify 
multiple of 
+ these and to set multiple environment variables. In yarn-cluster mode 
this controls 
+ the environment of the SPARK driver and in yarn-client mode it only 
controls 
+ the environment of the executor 

git commit: SPARK-1680: use configs for specifying environment variables on YARN

2014-08-05 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.1 46b698307 - 7b798e10e


SPARK-1680: use configs for specifying environment variables on YARN

Note that this also documents spark.executorEnv.*  which to me means its 
public.  If we don't want that please speak up.

Author: Thomas Graves tgra...@apache.org

Closes #1512 from tgravescs/SPARK-1680 and squashes the following commits:

11525df [Thomas Graves] more doc changes
553bad0 [Thomas Graves] fix documentation
152bf7c [Thomas Graves] fix docs
5382326 [Thomas Graves] try fix docs
32f86a4 [Thomas Graves] use configs for specifying environment variables on YARN

(cherry picked from commit 41e0a21b22ccd2788dc079790788e505b0d4e37d)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7b798e10
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7b798e10
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7b798e10

Branch: refs/heads/branch-1.1
Commit: 7b798e10e214cd407d3399e2cab9e3789f9a929e
Parents: 46b6983
Author: Thomas Graves tgra...@apache.org
Authored: Tue Aug 5 15:57:32 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Aug 5 15:57:42 2014 -0500

--
 docs/configuration.md   |  8 +++
 docs/running-on-yarn.md | 22 +++-
 .../apache/spark/deploy/yarn/ClientBase.scala   | 13 
 .../deploy/yarn/ExecutorRunnableUtil.scala  |  6 +-
 4 files changed, 43 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7b798e10/docs/configuration.md
--
diff --git a/docs/configuration.md b/docs/configuration.md
index 1333465..6ae453d 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -206,6 +206,14 @@ Apart from these, the following properties are also 
available, and may be useful
 used during aggregation goes above this amount, it will spill the data 
into disks.
   /td
 /tr
+tr
+  tdcodespark.executorEnv.[EnvironmentVariableName]/code/td
+  td(none)/td
+  td
+Add the environment variable specified by 
codeEnvironmentVariableName/code to the Executor 
+process. The user can specify multiple of these and to set multiple 
environment variables. 
+  /td
+/tr
 /table
 
  Shuffle Behavior

http://git-wip-us.apache.org/repos/asf/spark/blob/7b798e10/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 573930d..9bc20db 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -17,10 +17,6 @@ To build Spark yourself, refer to the [building with Maven 
guide](building-with-
 
 Most of the configs are the same for Spark on YARN as for other deployment 
modes. See the [configuration page](configuration.html) for more information on 
those.  These are configs that are specific to Spark on YARN.
 
- Environment Variables
-
-* `SPARK_YARN_USER_ENV`, to add environment variables to the Spark processes 
launched on YARN. This can be a comma separated list of environment variables, 
e.g. `SPARK_YARN_USER_ENV=JAVA_HOME=/jdk64,FOO=bar`.
-
  Spark Properties
 
 table class=table
@@ -110,7 +106,23 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
   tdcodespark.yarn.access.namenodes/code/td
   td(none)/td
   td
-A list of secure HDFS namenodes your Spark application is going to access. 
For example, 
`spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. The 
Spark application must have acess to the namenodes listed and Kerberos must be 
properly configured to be able to access them (either in the same realm or in a 
trusted realm). Spark acquires security tokens for each of the namenodes so 
that the Spark application can access those remote HDFS clusters.
+A list of secure HDFS namenodes your Spark application is going to access. 
For 
+example, 
`spark.yarn.access.namenodes=hdfs://nn1.com:8032,hdfs://nn2.com:8032`. 
+The Spark application must have acess to the namenodes listed and Kerberos 
must 
+be properly configured to be able to access them (either in the same realm 
or in 
+a trusted realm). Spark acquires security tokens for each of the namenodes 
so that 
+the Spark application can access those remote HDFS clusters.
+  /td
+/tr
+tr
+  tdcodespark.yarn.appMasterEnv.[EnvironmentVariableName]/code/td
+  td(none)/td
+  td
+ Add the environment variable specified by 
codeEnvironmentVariableName/code to the 
+ Application Master process launched on YARN. The user can specify 
multiple of 
+ these and to set multiple environment variables. In yarn-cluster mode 
this controls 

git commit: SPARK-2150: Provide direct link to finished application UI in yarn resou...

2014-07-24 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 42dfab7d3 - 46e224aaa


SPARK-2150: Provide direct link to finished application UI in yarn resou...

...rce manager UI

Use the event logger directory to provide a direct link to finished
application UI in yarn resourcemanager UI.

Author: Rahul Singhal rahul.sing...@guavus.com

Closes #1094 from rahulsinghaliitd/SPARK-2150 and squashes the following 
commits:

95f230c [Rahul Singhal] SPARK-2150: Provide direct link to finished application 
UI in yarn resource manager UI


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/46e224aa
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/46e224aa
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/46e224aa

Branch: refs/heads/master
Commit: 46e224aaa26df4b232c5176e98472a902862b76c
Parents: 42dfab7
Author: Rahul Singhal rahul.sing...@guavus.com
Authored: Thu Jul 24 09:31:04 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Thu Jul 24 09:31:04 2014 -0500

--
 .../spark/deploy/history/FsHistoryProvider.scala|  3 ++-
 .../apache/spark/deploy/history/HistoryPage.scala   |  2 +-
 .../apache/spark/deploy/history/HistoryServer.scala |  4 +++-
 .../org/apache/spark/deploy/master/Master.scala | 11 +++
 .../spark/scheduler/EventLoggingListener.scala  |  7 +++
 .../spark/deploy/yarn/ApplicationMaster.scala   |  4 +++-
 .../apache/spark/deploy/yarn/ExecutorLauncher.scala |  2 +-
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 16 
 .../cluster/YarnClientSchedulerBackend.scala|  3 ++-
 .../spark/deploy/yarn/ApplicationMaster.scala   |  5 +++--
 .../apache/spark/deploy/yarn/ExecutorLauncher.scala |  2 +-
 11 files changed, 46 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/46e224aa/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index a8c9ac0..01e7065 100644
--- 
a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ 
b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -169,7 +169,8 @@ private[history] class FsHistoryProvider(conf: SparkConf) 
extends ApplicationHis
 val ui: SparkUI = if (renderUI) {
 val conf = this.conf.clone()
 val appSecManager = new SecurityManager(conf)
-new SparkUI(conf, appSecManager, replayBus, appId, /history/ + appId)
+new SparkUI(conf, appSecManager, replayBus, appId,
+  HistoryServer.UI_PATH_PREFIX + s/$appId)
 // Do not call ui.bind() to avoid creating a new server for each 
application
   } else {
 null

http://git-wip-us.apache.org/repos/asf/spark/blob/46e224aa/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index a958c83..d7a3e3f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -75,7 +75,7 @@ private[spark] class HistoryPage(parent: HistoryServer) 
extends WebUIPage() {
 Last Updated)
 
   private def appRow(info: ApplicationHistoryInfo): Seq[Node] = {
-val uiAddress = /history/ + info.id
+val uiAddress = HistoryServer.UI_PATH_PREFIX + s/${info.id}
 val startTime = UIUtils.formatDate(info.startTime)
 val endTime = UIUtils.formatDate(info.endTime)
 val duration = UIUtils.formatDuration(info.endTime - info.startTime)

http://git-wip-us.apache.org/repos/asf/spark/blob/46e224aa/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 56b38dd..cacb9da 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -114,7 +114,7 @@ class HistoryServer(
 attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, /static))
 
 val contextHandler = new ServletContextHandler
-contextHandler.setContextPath(/history)
+contextHandler.setContextPath(HistoryServer.UI_PATH_PREFIX)
 contextHandler.addServlet(new ServletHolder(loaderServlet), /*)
 

git commit: [SPARK-2037]: yarn client mode doesn't support spark.yarn.max.executor.failures

2014-07-24 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master c960b5051 - 323a83c52


[SPARK-2037]: yarn client mode doesn't support spark.yarn.max.executor.failures

Author: GuoQiang Li wi...@qq.com

Closes #1180 from witgo/SPARK-2037 and squashes the following commits:

3d52411 [GuoQiang Li] review commit
7058f4d [GuoQiang Li] Correctly stop SparkContext
6d0561f [GuoQiang Li] Fix: yarn client mode doesn't support 
spark.yarn.max.executor.failures


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/323a83c5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/323a83c5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/323a83c5

Branch: refs/heads/master
Commit: 323a83c5235f9289cd9526491d62365df96a429b
Parents: c960b50
Author: GuoQiang Li wi...@qq.com
Authored: Thu Jul 24 14:46:10 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Thu Jul 24 14:46:10 2014 -0500

--
 .../spark/deploy/yarn/ExecutorLauncher.scala| 80 +---
 .../cluster/YarnClientSchedulerBackend.scala| 28 +++
 .../spark/deploy/yarn/ExecutorLauncher.scala| 45 ---
 3 files changed, 115 insertions(+), 38 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/323a83c5/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
index d232c18..184e2ad 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
@@ -28,7 +28,6 @@ import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 import akka.actor._
 import akka.remote._
-import akka.actor.Terminated
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
 import org.apache.spark.util.{Utils, AkkaUtils}
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
@@ -57,10 +56,17 @@ class ExecutorLauncher(args: ApplicationMasterArguments, 
conf: Configuration, sp
   private val yarnConf: YarnConfiguration = new YarnConfiguration(conf)
 
   private var yarnAllocator: YarnAllocationHandler = _
-  private var driverClosed:Boolean = false
+
+  private var driverClosed: Boolean = false
+  private var isFinished: Boolean = false
+  private var registered: Boolean = false
+
+  // Default to numExecutors * 2, with minimum of 3
+  private val maxNumExecutorFailures = 
sparkConf.getInt(spark.yarn.max.executor.failures,
+sparkConf.getInt(spark.yarn.max.worker.failures, 
math.max(args.numExecutors * 2, 3)))
 
   val securityManager = new SecurityManager(sparkConf)
-  val actorSystem : ActorSystem = AkkaUtils.createActorSystem(sparkYarnAM, 
Utils.localHostName, 0,
+  val actorSystem: ActorSystem = AkkaUtils.createActorSystem(sparkYarnAM, 
Utils.localHostName, 0,
 conf = sparkConf, securityManager = securityManager)._1
   var actor: ActorRef = _
 
@@ -97,23 +103,26 @@ class ExecutorLauncher(args: ApplicationMasterArguments, 
conf: Configuration, sp
 appAttemptId = getApplicationAttemptId()
 resourceManager = registerWithResourceManager()
 
-val appMasterResponse: RegisterApplicationMasterResponse = 
registerApplicationMaster()
-
-// Compute number of threads for akka
-val minimumMemory = 
appMasterResponse.getMinimumResourceCapability().getMemory()
-
-if (minimumMemory  0) {
-  val mem = args.executorMemory + 
sparkConf.getInt(spark.yarn.executor.memoryOverhead,
-YarnAllocationHandler.MEMORY_OVERHEAD)
-  val numCore = (mem  / minimumMemory) + (if (0 != (mem % minimumMemory)) 
1 else 0)
-
-  if (numCore  0) {
-// do not override - hits 
https://issues.apache.org/jira/browse/HADOOP-8406
-// TODO: Uncomment when hadoop is on a version which has this fixed.
-// args.workerCores = numCore
+synchronized {
+  if (!isFinished) {
+val appMasterResponse: RegisterApplicationMasterResponse = 
registerApplicationMaster()
+// Compute number of threads for akka
+val minimumMemory = 
appMasterResponse.getMinimumResourceCapability().getMemory()
+
+if (minimumMemory  0) {
+  val mem = args.executorMemory + 
sparkConf.getInt(spark.yarn.executor.memoryOverhead,
+YarnAllocationHandler.MEMORY_OVERHEAD)
+  val numCore = (mem  / minimumMemory) + (if (0 != (mem % 
minimumMemory)) 1 else 0)
+
+  if (numCore  0) {
+// do not override - hits 
https://issues.apache.org/jira/browse/HADOOP-8406
+// TODO: Uncomment when hadoop is on a version which has this 

git commit: [YARN][SPARK-2606]:In some cases, the spark UI pages display incorrect

2014-07-22 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 5f7b99168 - ddadf1b00


[YARN][SPARK-2606]:In some cases,the spark UI pages display incorrect

The issue is caused by #1112 .

Author: GuoQiang Li wi...@qq.com

Closes #1501 from witgo/webui_style and squashes the following commits:

4b34998 [GuoQiang Li] In some cases, pages display incorrect in WebUI


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ddadf1b0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ddadf1b0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ddadf1b0

Branch: refs/heads/master
Commit: ddadf1b00470b9d7bf7386dacf198d41407a0a2b
Parents: 5f7b991
Author: GuoQiang Li wi...@qq.com
Authored: Tue Jul 22 20:34:40 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Jul 22 20:34:40 2014 -0500

--
 core/src/main/scala/org/apache/spark/ui/UIUtils.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ddadf1b0/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala 
b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index e07aa2e..715cc2f 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -149,7 +149,7 @@ private[spark] object UIUtils extends Logging {
 
   def prependBaseUri(basePath: String = , resource: String = ) = uiRoot + 
basePath + resource
 
-  val commonHeaderNodes = {
+  def commonHeaderNodes = {
 meta http-equiv=Content-type content=text/html; charset=utf-8 /
 link rel=stylesheet href={prependBaseUri(/static/bootstrap.min.css)}
   type=text/css /



git commit: SPARK-1707. Remove unnecessary 3 second sleep in YarnClusterScheduler

2014-07-21 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master cd273a238 - f89cf65d7


SPARK-1707. Remove unnecessary 3 second sleep in YarnClusterScheduler

Author: Sandy Ryza sa...@cloudera.com

Closes #634 from sryza/sandy-spark-1707 and squashes the following commits:

2f6e358 [Sandy Ryza] Default min registered executors ratio to .8 for YARN
354c630 [Sandy Ryza] Remove outdated comments
c744ef3 [Sandy Ryza] Take out waitForInitialAllocations
2a4329b [Sandy Ryza] SPARK-1707. Remove unnecessary 3 second sleep in 
YarnClusterScheduler


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f89cf65d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f89cf65d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f89cf65d

Branch: refs/heads/master
Commit: f89cf65d7aced0bb387c05586f9f51cb29865022
Parents: cd273a2
Author: Sandy Ryza sa...@cloudera.com
Authored: Mon Jul 21 13:15:46 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Mon Jul 21 13:15:46 2014 -0500

--
 .../spark/deploy/yarn/ApplicationMaster.scala   | 39 --
 .../cluster/YarnClientClusterScheduler.scala| 10 -
 .../cluster/YarnClientSchedulerBackend.scala|  5 +++
 .../cluster/YarnClusterScheduler.scala  |  8 +---
 .../cluster/YarnClusterSchedulerBackend.scala   |  5 +++
 .../spark/deploy/yarn/ApplicationMaster.scala   | 43 
 6 files changed, 11 insertions(+), 99 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f89cf65d/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 062f946..3ec3648 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -255,10 +255,6 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 sparkContext.getConf)
 }
   }
-} finally {
-  // in case of exceptions, etc - ensure that count is atleast 
ALLOCATOR_LOOP_WAIT_COUNT :
-  // so that the loop (in ApplicationMaster.sparkContextInitialized) breaks
-  
ApplicationMaster.incrementAllocatorLoop(ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT)
 }
   }
 
@@ -277,13 +273,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 }
 yarnAllocator.allocateContainers(
   math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 
0))
-ApplicationMaster.incrementAllocatorLoop(1)
 Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL)
   }
-} finally {
-  // In case of exceptions, etc - ensure that count is at least 
ALLOCATOR_LOOP_WAIT_COUNT,
-  // so that the loop in ApplicationMaster#sparkContextInitialized() 
breaks.
-  
ApplicationMaster.incrementAllocatorLoop(ApplicationMaster.ALLOCATOR_LOOP_WAIT_COUNT)
 }
 logInfo(All executors have launched.)
 
@@ -411,24 +402,10 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 }
 
 object ApplicationMaster extends Logging {
-  // Number of times to wait for the allocator loop to complete.
-  // Each loop iteration waits for 100ms, so maximum of 3 seconds.
-  // This is to ensure that we have reasonable number of containers before we 
start
   // TODO: Currently, task to container is computed once (TaskSetManager) - 
which need not be
   // optimal as more containers are available. Might need to handle this 
better.
-  private val ALLOCATOR_LOOP_WAIT_COUNT = 30
   private val ALLOCATE_HEARTBEAT_INTERVAL = 100
 
-  def incrementAllocatorLoop(by: Int) {
-val count = yarnAllocatorLoop.getAndAdd(by)
-if (count = ALLOCATOR_LOOP_WAIT_COUNT) {
-  yarnAllocatorLoop.synchronized {
-// to wake threads off wait ...
-yarnAllocatorLoop.notifyAll()
-  }
-}
-  }
-
   private val applicationMasters = new 
CopyOnWriteArrayList[ApplicationMaster]()
 
   def register(master: ApplicationMaster) {
@@ -437,7 +414,6 @@ object ApplicationMaster extends Logging {
 
   val sparkContextRef: AtomicReference[SparkContext] =
 new AtomicReference[SparkContext](null /* initialValue */)
-  val yarnAllocatorLoop: AtomicInteger = new AtomicInteger(0)
 
   def sparkContextInitialized(sc: SparkContext): Boolean = {
 var modified = false
@@ -472,21 +448,6 @@ object ApplicationMaster extends Logging {
 modified
   }
 
-
-  /**
-   * Returns when we've either
-   *  1) received all the requested executors,
-   *  2) waited 

git commit: SPARK-1291: Link the spark UI to RM ui in yarn-client mode

2014-07-15 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 9dd635eb5 - 72ea56da8


SPARK-1291: Link the spark UI to RM ui in yarn-client mode

Author: witgo wi...@qq.com

Closes #1112 from witgo/SPARK-1291 and squashes the following commits:

6022bcd [witgo] review commit
1fbb925 [witgo] add addAmIpFilter to yarn alpha
210299c [witgo] review commit
1b92a07 [witgo] review commit
6896586 [witgo] Add comments to addWebUIFilter
3e9630b [witgo] review commit
142ee29 [witgo] review commit
1fe7710 [witgo] Link the spark UI to RM ui in yarn-client mode


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/72ea56da
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/72ea56da
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/72ea56da

Branch: refs/heads/master
Commit: 72ea56da8e383c61c6f18eeefef03b9af00f5158
Parents: 9dd635e
Author: witgo wi...@qq.com
Authored: Tue Jul 15 13:52:56 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Jul 15 13:52:56 2014 -0500

--
 .../cluster/CoarseGrainedClusterMessage.scala   |  3 +++
 .../cluster/CoarseGrainedSchedulerBackend.scala | 18 +++
 .../scala/org/apache/spark/ui/UIUtils.scala | 11 +-
 .../spark/deploy/yarn/ExecutorLauncher.scala| 22 ---
 .../cluster/YarnClientSchedulerBackend.scala|  1 +
 .../spark/deploy/yarn/ExecutorLauncher.scala| 23 +---
 6 files changed, 71 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/72ea56da/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index 318e165..6abf6d9 100644
--- 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -66,4 +66,7 @@ private[spark] object CoarseGrainedClusterMessages {
 
   case class RemoveExecutor(executorId: String, reason: String) extends 
CoarseGrainedClusterMessage
 
+  case class AddWebUIFilter(filterName:String, filterParams: String, proxyBase 
:String)
+extends CoarseGrainedClusterMessage
+
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/72ea56da/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 0f5545e..9f085ee 100644
--- 
a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ 
b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -31,6 +31,7 @@ import org.apache.spark.{SparkEnv, Logging, SparkException, 
TaskState}
 import org.apache.spark.scheduler.{SchedulerBackend, SlaveLost, 
TaskDescription, TaskSchedulerImpl, WorkerOffer}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.util.{SerializableBuffer, AkkaUtils, Utils}
+import org.apache.spark.ui.JettyUtils
 
 /**
  * A scheduler backend that waits for coarse grained executors to connect to 
it through Akka.
@@ -136,6 +137,9 @@ class CoarseGrainedSchedulerBackend(scheduler: 
TaskSchedulerImpl, actorSystem: A
 removeExecutor(executorId, reason)
 sender ! true
 
+  case AddWebUIFilter(filterName, filterParams, proxyBase) =
+addWebUIFilter(filterName, filterParams, proxyBase)
+sender ! true
   case DisassociatedEvent(_, address, _) =
 addressToExecutorId.get(address).foreach(removeExecutor(_,
   remote Akka client disassociated))
@@ -276,6 +280,20 @@ class CoarseGrainedSchedulerBackend(scheduler: 
TaskSchedulerImpl, actorSystem: A
 }
 false
   }
+
+  // Add filters to the SparkUI
+  def addWebUIFilter(filterName: String, filterParams: String, proxyBase: 
String) {
+if (proxyBase != null  proxyBase.nonEmpty) {
+  System.setProperty(spark.ui.proxyBase, proxyBase)
+}
+
+if (Seq(filterName, filterParams).forall(t = t != null  t.nonEmpty)) {
+  logInfo(sAdd WebUI Filter. $filterName, $filterParams, $proxyBase)
+  conf.set(spark.ui.filters, filterName)
+  conf.set(sspark.$filterName.params, filterParams)
+  JettyUtils.addFilters(scheduler.sc.ui.getHandlers, conf)
+}
+  }
 }
 
 private[spark] object CoarseGrainedSchedulerBackend {


git commit: [SPARK-1946] Submit tasks after (configured ratio) executors have been registered

2014-07-14 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master d60b09bb6 - 3dd8af7a6


[SPARK-1946] Submit tasks after (configured ratio) executors have been 
registered

Because submitting tasks and registering executors are asynchronous, in most 
situation, early stages' tasks run without preferred locality.

A simple solution is sleeping few seconds in application, so that executors 
have enough time to register.

The PR add 2 configuration properties to make TaskScheduler submit tasks after 
a few of executors have been registered.

\# Submit tasks only after (registered executors / total executors) arrived the 
ratio, default value is 0
spark.scheduler.minRegisteredExecutorsRatio = 0.8

\# Whatever minRegisteredExecutorsRatio is arrived, submit tasks after the 
maxRegisteredWaitingTime(millisecond), default value is 3
spark.scheduler.maxRegisteredExecutorsWaitingTime = 5000

Author: li-zhihui zhihui...@intel.com

Closes #900 from li-zhihui/master and squashes the following commits:

b9f8326 [li-zhihui] Add logs  edit docs
1ac08b1 [li-zhihui] Add new configs to user docs
22ead12 [li-zhihui] Move waitBackendReady to postStartHook
c6f0522 [li-zhihui] Bug fix: numExecutors wasn't set  use constant 
DEFAULT_NUMBER_EXECUTORS
4d6d847 [li-zhihui] Move waitBackendReady to TaskSchedulerImpl.start  some 
code refactor
0ecee9a [li-zhihui] Move waitBackendReady from DAGScheduler.submitStage to 
TaskSchedulerImpl.submitTasks
4261454 [li-zhihui] Add docs for new configs  code style
ce0868a [li-zhihui] Code style, rename configuration property name of 
minRegisteredRatio  maxRegisteredWaitingTime
6cfb9ec [li-zhihui] Code style, revert default minRegisteredRatio of yarn to 0, 
driver get --num-executors in yarn/alpha
812c33c [li-zhihui] Fix driver lost --num-executors option in yarn-cluster mode
e7b6272 [li-zhihui] support yarn-cluster
37f7dc2 [li-zhihui] support yarn mode(percentage style)
3f8c941 [li-zhihui] submit stage after (configured ratio of) executors have 
been registered


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3dd8af7a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3dd8af7a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3dd8af7a

Branch: refs/heads/master
Commit: 3dd8af7a6623201c28231f4b71f59ea4e9ae29bf
Parents: d60b09b
Author: li-zhihui zhihui...@intel.com
Authored: Mon Jul 14 15:32:49 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Mon Jul 14 15:32:49 2014 -0500

--
 .../scala/org/apache/spark/SparkContext.scala   | 11 +-
 .../spark/scheduler/SchedulerBackend.scala  |  1 +
 .../spark/scheduler/TaskSchedulerImpl.scala | 15 
 .../cluster/CoarseGrainedSchedulerBackend.scala | 29 ++
 .../cluster/SparkDeploySchedulerBackend.scala   |  1 +
 docs/configuration.md   | 19 ++
 .../spark/deploy/yarn/ApplicationMaster.scala   |  1 +
 .../yarn/ApplicationMasterArguments.scala   |  6 ++-
 .../cluster/YarnClientClusterScheduler.scala|  2 +
 .../cluster/YarnClientSchedulerBackend.scala|  1 +
 .../cluster/YarnClusterScheduler.scala  |  2 +
 .../cluster/YarnClusterSchedulerBackend.scala   | 40 
 .../spark/deploy/yarn/ApplicationMaster.scala   |  1 +
 13 files changed, 127 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3dd8af7a/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 8819e73..8052499 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1531,7 +1531,16 @@ object SparkContext extends Logging {
 throw new SparkException(YARN mode not available ?, e)
   }
 }
-val backend = new CoarseGrainedSchedulerBackend(scheduler, 
sc.env.actorSystem)
+val backend = try {
+  val clazz =
+
Class.forName(org.apache.spark.scheduler.cluster.YarnClusterSchedulerBackend)
+  val cons = clazz.getConstructor(classOf[TaskSchedulerImpl], 
classOf[SparkContext])
+  cons.newInstance(scheduler, 
sc).asInstanceOf[CoarseGrainedSchedulerBackend]
+} catch {
+  case e: Exception = {
+throw new SparkException(YARN mode not available ?, e)
+  }
+}
 scheduler.initialize(backend)
 scheduler
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3dd8af7a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
--
diff --git 

git commit: SPARK-2400 : fix spark.yarn.max.executor.failures explaination

2014-07-08 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master c8a2313cd - b520b6453


SPARK-2400 : fix spark.yarn.max.executor.failures explaination

According to
```scala
  private val maxNumExecutorFailures = 
sparkConf.getInt(spark.yarn.max.executor.failures,
sparkConf.getInt(spark.yarn.max.worker.failures, 
math.max(args.numExecutors * 2, 3)))
```
default value should be numExecutors * 2, with minimum of 3,  and it's same to 
the config
`spark.yarn.max.worker.failures`

Author: CrazyJvm crazy...@gmail.com

Closes #1282 from CrazyJvm/yarn-doc and squashes the following commits:

1a5f25b [CrazyJvm] remove deprecated config
c438aec [CrazyJvm] fix style
86effa6 [CrazyJvm] change expression
211f130 [CrazyJvm] fix html tag
2900d23 [CrazyJvm] fix style
a4b2e27 [CrazyJvm] fix configuration spark.yarn.max.executor.failures


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b520b645
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b520b645
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b520b645

Branch: refs/heads/master
Commit: b520b6453ed76926108e0bdd56114d16e1d86850
Parents: c8a2313
Author: CrazyJvm crazy...@gmail.com
Authored: Tue Jul 8 13:55:42 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Jul 8 13:55:42 2014 -0500

--
 docs/running-on-yarn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b520b645/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 5d8d603..0362f5a 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -55,7 +55,7 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
 /tr
 tr
   tdcodespark.yarn.max.executor.failures/code/td
-  td2*numExecutors/td
+  tdnumExecutors * 2, with minimum of 3/td
   td
 The maximum number of executor failures before failing the application.
   /td



svn commit: r1606091 - in /hadoop/common/tags: release-0.23.11-rc0/ release-0.23.11/

2014-06-27 Thread tgraves
Author: tgraves
Date: Fri Jun 27 13:25:29 2014
New Revision: 1606091

URL: http://svn.apache.org/r1606091
Log:
Hadoop 0.23.11 release.

Added:
hadoop/common/tags/release-0.23.11/   (props changed)
  - copied from r1606090, hadoop/common/tags/release-0.23.11-rc0/
Removed:
hadoop/common/tags/release-0.23.11-rc0/

Propchange: hadoop/common/tags/release-0.23.11/
--
--- svn:ignore (added)
+++ svn:ignore Fri Jun 27 13:25:29 2014
@@ -0,0 +1,5 @@
+.classpath
+.git
+.project
+.settings
+target

Propchange: hadoop/common/tags/release-0.23.11/
--
--- svn:mergeinfo (added)
+++ svn:mergeinfo Fri Jun 27 13:25:29 2014
@@ -0,0 +1 @@
+/hadoop/common/trunk:1161777,1161781,1162188,1162421,1162491,1162499,1162613,1162928,1162954,1162979,1163050,1163069,1163490,1163768,1163852,1163858,1163981,1164255,1164301,1164339,1166009,1166402,1167001,1167383,1167662,1170085,1170379,1170459,1171297,1172916,1173402,1176550,1177487,1177531,1177859,1177864,1182189,1182205,1182214,1189613,1189932,1189982,1195575,1196113,1196129,1196676,1197801,1199024,1201991,1204114,1204117,1204122,1204124,1204129,1204131,1204177,1204370,1204376,1204388,1205260,1205697,1206786,1206830,1207694,1208153,1208313,1212021,1212062,1212073,1212084,1213537,1213586,1213592-1213593,1213954,1214046,1220510,1221348,1225114,1225192,1225456,1225489,1225591,1226211,1226239,1226350,1227091,1227165,1227423,1227964,1229347,1230398,1231569,1231572,1231627,1231640,1233605,1234555,1235135,1235137,1235956,1236456,1239752,1240897,1240928,1243065,1243104,1244766,1245751,1245762,1293419,1304099,1351818,1373683,1382409




git commit: Remove use of spark.worker.instances

2014-06-26 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 47f8829e0 - 2d3080855


Remove use of spark.worker.instances

spark.worker.instances was added as part of this commit: 
https://github.com/apache/spark/commit/1617816090e7b20124a512a43860a21232ebf511

My understanding is that SPARK_WORKER_INSTANCES is supported for backwards 
compatibility,
but spark.worker.instances is never used (SparkSubmit.scala sets 
spark.executor.instances) so should
not have been added.

@sryza @pwendell @tgravescs LMK if I'm understanding this correctly

Author: Kay Ousterhout kayousterh...@gmail.com

Closes #1214 from kayousterhout/yarn_config and squashes the following commits:

3d7c491 [Kay Ousterhout] Remove use of spark.worker.instances

(cherry picked from commit 48a82a827c99526b165c78d7e88faec43568a37a)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2d308085
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2d308085
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2d308085

Branch: refs/heads/branch-1.0
Commit: 2d308085558e9a62147bc3e1761cf3a38f5b6fb4
Parents: 47f8829
Author: Kay Ousterhout kayousterh...@gmail.com
Authored: Thu Jun 26 08:20:27 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Thu Jun 26 08:20:59 2014 -0500

--
 .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2d308085/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index e01ed5a..709871c 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -63,7 +63,7 @@ private[spark] class YarnClientSchedulerBackend(
 // variables.
 List((--driver-memory, SPARK_MASTER_MEMORY, spark.master.memory),
   (--driver-memory, SPARK_DRIVER_MEMORY, spark.driver.memory),
-  (--num-executors, SPARK_WORKER_INSTANCES, spark.worker.instances),
+  (--num-executors, SPARK_WORKER_INSTANCES, 
spark.executor.instances),
   (--num-executors, SPARK_EXECUTOR_INSTANCES, 
spark.executor.instances),
   (--executor-memory, SPARK_WORKER_MEMORY, spark.executor.memory),
   (--executor-memory, SPARK_EXECUTOR_MEMORY, spark.executor.memory),



git commit: [SPARK-2051]In yarn.ClientBase spark.yarn.dist.* do not work

2014-06-19 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 67fca189c - bce0897bc


[SPARK-2051]In yarn.ClientBase spark.yarn.dist.* do not work

Author: witgo wi...@qq.com

Closes #969 from witgo/yarn_ClientBase and squashes the following commits:

8117765 [witgo] review commit
3bdbc52 [witgo] Merge branch 'master' of https://github.com/apache/spark into 
yarn_ClientBase
5261b6c [witgo] fix sys.props.get(SPARK_YARN_DIST_FILES)
e3c1107 [witgo] update docs
b6a9aa1 [witgo] merge master
c8b4554 [witgo] review commit
2f48789 [witgo] Merge branch 'master' of https://github.com/apache/spark into 
yarn_ClientBase
8d7b82f [witgo] Merge branch 'master' of https://github.com/apache/spark into 
yarn_ClientBase
1048549 [witgo] remove Utils.resolveURIs
871f1db [witgo] add spark.yarn.dist.* documentation
41bce59 [witgo] review commit
35d6fa0 [witgo] move to ClientArguments
55d72fc [witgo] Merge branch 'master' of https://github.com/apache/spark into 
yarn_ClientBase
9cdff16 [witgo] review commit
8bc2f4b [witgo] review commit
20e667c [witgo] Merge branch 'master' into yarn_ClientBase
0961151 [witgo] merge master
ce609fc [witgo] Merge branch 'master' into yarn_ClientBase
8362489 [witgo] yarn.ClientBase spark.yarn.dist.* do not work


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bce0897b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bce0897b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bce0897b

Branch: refs/heads/master
Commit: bce0897bc6b0fc9bca5444dbe3a9e75523ad7481
Parents: 67fca18
Author: witgo wi...@qq.com
Authored: Thu Jun 19 12:11:26 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Thu Jun 19 12:11:26 2014 -0500

--
 docs/running-on-yarn.md | 20 +---
 .../spark/deploy/yarn/ClientArguments.scala | 15 +--
 .../apache/spark/deploy/yarn/ClientBase.scala   |  3 ++-
 .../cluster/YarnClientSchedulerBackend.scala|  4 +---
 4 files changed, 33 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bce0897b/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 4243ef4..fecd8f2 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -68,15 +68,29 @@ Most of the configs are the same for Spark on YARN as for 
other deployment modes
   /td
 /tr
 tr
-  tdcodespark.yarn.executor.memoryOverhead/code/td
-  td384/code/td
+  tdcodespark.yarn.dist.archives/code/td
+  td(none)/td
+  td
+Comma separated list of archives to be extracted into the working 
directory of each executor.
+  /td
+/tr
+tr
+  tdcodespark.yarn.dist.files/code/td
+  td(none)/td
+  td
+Comma-separated list of files to be placed in the working directory of 
each executor.
+  td
+/tr
+tr
+ tdcodespark.yarn.executor.memoryOverhead/code/td
+  td384/td
   td
 The amount of off heap memory (in megabytes) to be allocated per executor. 
This is memory that accounts for things like VM overheads, interned strings, 
other native overheads, etc.
   /td
 /tr
 tr
   tdcodespark.yarn.driver.memoryOverhead/code/td
-  td384/code/td
+  td384/td
   td
 The amount of off heap memory (in megabytes) to be allocated per driver. 
This is memory that accounts for things like VM overheads, interned strings, 
other native overheads, etc.
   /td

http://git-wip-us.apache.org/repos/asf/spark/blob/bce0897b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index fd3ef9e..62f9b3cf 100644
--- 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -21,8 +21,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.scheduler.InputFormatInfo
-import org.apache.spark.util.IntParam
-import org.apache.spark.util.MemoryParam
+import org.apache.spark.util.{Utils, IntParam, MemoryParam}
 
 
 // TODO: Add code and support for ensuring that yarn resource 'tasks' are 
location aware !
@@ -45,6 +44,18 @@ class ClientArguments(val args: Array[String], val 
sparkConf: SparkConf) {
 
   parseArgs(args.toList)
 
+  // env variable SPARK_YARN_DIST_ARCHIVES/SPARK_YARN_DIST_FILES set in 
yarn-client then
+  // it should default to hdfs://
+  files = Option(files).getOrElse(sys.env.get(SPARK_YARN_DIST_FILES).orNull)
+  archives = 
Option(archives).getOrElse(sys.env.get(SPARK_YARN_DIST_ARCHIVES).orNull)
+
+  // 

svn commit: r1603642 - in /hadoop/common/branches/branch-0.23/hadoop-hdfs-project: hadoop-hdfs-httpfs/pom.xml hadoop-hdfs/CHANGES.txt hadoop-hdfs/pom.xml hadoop-hdfs/src/test/aop/build/aop.xml pom.xml

2014-06-18 Thread tgraves
Author: tgraves
Date: Wed Jun 18 21:47:05 2014
New Revision: 1603642

URL: http://svn.apache.org/r1603642
Log:
Preparing for 0.23.12 development

Modified:

hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml

hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml

hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml

Modified: 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
 Wed Jun 18 21:47:05 2014
@@ -22,12 +22,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.12-SNAPSHOT/version
 relativePath../../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-hdfs-httpfs/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.12-SNAPSHOT/version
   packagingwar/packaging
 
   nameApache Hadoop HttpFS/name

Modified: 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1603642r1=1603641r2=1603642view=diff
==
--- 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt 
(original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt 
Wed Jun 18 21:47:05 2014
@@ -1,6 +1,18 @@
 Hadoop HDFS Change Log
 
-Release 0.23.11 - UNRELEASED
+Release 0.23.12 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+Release 0.23.11 - 2014-06-26
 
   INCOMPATIBLE CHANGES
 

Modified: 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml 
(original)
+++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/pom.xml 
Wed Jun 18 21:47:05 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project-dist/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.12-SNAPSHOT/version
 relativePath../../hadoop-project-dist/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-hdfs/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.12-SNAPSHOT/version
   descriptionApache Hadoop HDFS/description
   nameApache Hadoop HDFS/name
   packagingjar/packaging

Modified: 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml
 Wed Jun 18 21:47:05 2014
@@ -21,7 +21,7 @@
   property name=aspectversion value=1.6.5/
   !-- TODO this has to be changed synchronously with build.xml version 
prop.--
   !-- this workarounds of test-patch setting its own 'version' --
-  property name=project.version value=0.23.11-SNAPSHOT/
+  property name=project.version value=0.23.12-SNAPSHOT/
 
   !-- Properties common for all fault injections --
   property name=build-fi.dir value=${basedir}/build-fi/

Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml (original)
+++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/pom.xml Wed Jun 18 
21:47:05 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11

svn commit: r1603694 - in /hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project: hadoop-hdfs-httpfs/pom.xml hadoop-hdfs/CHANGES.txt hadoop-hdfs/pom.xml hadoop-hdfs/src/test/aop/build/aop.xml pom.

2014-06-18 Thread tgraves
Author: tgraves
Date: Thu Jun 19 01:17:43 2014
New Revision: 1603694

URL: http://svn.apache.org/r1603694
Log:
Preparing for release 0.23.11

Modified:

hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
 Thu Jun 19 01:17:43 2014
@@ -22,12 +22,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-hdfs-httpfs/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   packagingwar/packaging
 
   nameApache Hadoop HttpFS/name

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
 Thu Jun 19 01:17:43 2014
@@ -1,6 +1,6 @@
 Hadoop HDFS Change Log
 
-Release 0.23.11 - UNRELEASED
+Release 0.23.11 - 2014-06-26
 
   INCOMPATIBLE CHANGES
 

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml 
(original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/pom.xml 
Thu Jun 19 01:17:43 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project-dist/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../../hadoop-project-dist/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-hdfs/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   descriptionApache Hadoop HDFS/description
   nameApache Hadoop HDFS/name
   packagingjar/packaging

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/build/aop.xml
 Thu Jun 19 01:17:43 2014
@@ -21,7 +21,7 @@
   property name=aspectversion value=1.6.5/
   !-- TODO this has to be changed synchronously with build.xml version 
prop.--
   !-- this workarounds of test-patch setting its own 'version' --
-  property name=project.version value=0.23.11-SNAPSHOT/
+  property name=project.version value=0.23.11/
 
   !-- Properties common for all fault injections --
   property name=build-fi.dir value=${basedir}/build-fi/

Modified: hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml (original)
+++ hadoop/common/branches/branch-0.23.11/hadoop-hdfs-project/pom.xml Thu Jun 
19 01:17:43 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../hadoop-project/relativePath
   /parent

svn commit: r1603694 - in /hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/ hadoop-mapreduce-client/hadoop-mapreduce-client-app/ hadoop-mapreduce-client/hado

2014-06-18 Thread tgraves
Author: tgraves
Date: Thu Jun 19 01:17:43 2014
New Revision: 1603694

URL: http://svn.apache.org/r1603694
Log:
Preparing for release 0.23.11

Modified:
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt

hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/pom.xml

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt?rev=1603694r1=1603693r2=1603694view=diff
==
--- hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt 
(original)
+++ hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/CHANGES.txt 
Thu Jun 19 01:17:43 2014
@@ -1,6 +1,6 @@
 Hadoop MapReduce Change Log
 
-Release 0.23.11 - UNRELEASED
+Release 0.23.11 - 2014-06-26
 
   INCOMPATIBLE CHANGES
 

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml
 Thu Jun 19 01:17:43 2014
@@ -19,12 +19,12 @@
   parent
 artifactIdhadoop-mapreduce-client/artifactId
 groupIdorg.apache.hadoop/groupId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
   /parent
   modelVersion4.0.0/modelVersion
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-mapreduce-client-app/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   namehadoop-mapreduce-client-app/name
 
   properties

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml
 Thu Jun 19 01:17:43 2014
@@ -19,12 +19,12 @@
   parent
 artifactIdhadoop-mapreduce-client/artifactId
 groupIdorg.apache.hadoop/groupId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
   /parent
   modelVersion4.0.0/modelVersion
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-mapreduce-client-common/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   namehadoop-mapreduce-client-common/name
 
   properties

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-mapreduce-project

svn commit: r1603694 - in /hadoop/common/branches/branch-0.23.11/hadoop-yarn-project: ./ hadoop-yarn/ hadoop-yarn/hadoop-yarn-api/ hadoop-yarn/hadoop-yarn-applications/ hadoop-yarn/hadoop-yarn-applica

2014-06-18 Thread tgraves
Author: tgraves
Date: Thu Jun 19 01:17:43 2014
New Revision: 1603694

URL: http://svn.apache.org/r1603694
Log:
Preparing for release 0.23.11

Modified:
hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/pom.xml

Modified: hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt?rev=1603694r1=1603693r2=1603694view=diff
==
--- hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt 
(original)
+++ hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/CHANGES.txt Thu 
Jun 19 01:17:43 2014
@@ -1,6 +1,6 @@
 Hadoop YARN Change Log
 
-Release 0.23.11 - UNRELEASED
+Release 0.23.11 - 2014-06-26
 
   INCOMPATIBLE CHANGES
 

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml
 Thu Jun 19 01:17:43 2014
@@ -19,12 +19,12 @@
   parent
 artifactIdhadoop-yarn/artifactId
 groupIdorg.apache.hadoop/groupId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
   /parent
   modelVersion4.0.0/modelVersion
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-yarn-api/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   namehadoop-yarn-api/name
 
   properties

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml
 Thu Jun 19 01:17:43 2014
@@ -19,12 +19,12 @@
   parent
 artifactIdhadoop-yarn-applications/artifactId
 groupIdorg.apache.hadoop/groupId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
   /parent
   modelVersion4.0.0/modelVersion
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-yarn-applications-distributedshell/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   namehadoop-yarn-applications-distributedshell/name
 
   properties

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml?rev=1603694r1=1603693r2=1603694view=diff

svn commit: r1603641 - /hadoop/common/branches/branch-0.23.11/

2014-06-18 Thread tgraves
Author: tgraves
Date: Wed Jun 18 21:43:10 2014
New Revision: 1603641

URL: http://svn.apache.org/r1603641
Log:
Branching for 0.23.11 releases

Added:
hadoop/common/branches/branch-0.23.11/   (props changed)
  - copied from r1603640, hadoop/common/branches/branch-0.23/

Propchange: hadoop/common/branches/branch-0.23.11/
--
--- svn:ignore (added)
+++ svn:ignore Wed Jun 18 21:43:10 2014
@@ -0,0 +1,5 @@
+.classpath
+.git
+.project
+.settings
+target

Propchange: hadoop/common/branches/branch-0.23.11/
--
--- svn:mergeinfo (added)
+++ svn:mergeinfo Wed Jun 18 21:43:10 2014
@@ -0,0 +1 @@
+/hadoop/common/trunk:1161777,1161781,1162188,1162421,1162491,1162499,1162613,1162928,1162954,1162979,1163050,1163069,1163490,1163768,1163852,1163858,1163981,1164255,1164301,1164339,1166009,1166402,1167001,1167383,1167662,1170085,1170379,1170459,1171297,1172916,1173402,1176550,1177487,1177531,1177859,1177864,1182189,1182205,1182214,1189613,1189932,1189982,1195575,1196113,1196129,1196676,1197801,1199024,1201991,1204114,1204117,1204122,1204124,1204129,1204131,1204177,1204370,1204376,1204388,1205260,1205697,1206786,1206830,1207694,1208153,1208313,1212021,1212062,1212073,1212084,1213537,1213586,1213592-1213593,1213954,1214046,1220510,1221348,1225114,1225192,1225456,1225489,1225591,1226211,1226239,1226350,1227091,1227165,1227423,1227964,1229347,1230398,1231569,1231572,1231627,1231640,1233605,1234555,1235135,1235137,1235956,1236456,1239752,1240897,1240928,1243065,1243104,1244766,1245751,1245762,1293419,1304099,1351818,1373683,1382409




svn commit: r1603642 - in /hadoop/common/branches/branch-0.23: ./ hadoop-assemblies/ hadoop-client/ hadoop-dist/ hadoop-minicluster/ hadoop-project-dist/ hadoop-project/ hadoop-tools/ hadoop-tools/had

2014-06-18 Thread tgraves
Author: tgraves
Date: Wed Jun 18 21:47:05 2014
New Revision: 1603642

URL: http://svn.apache.org/r1603642
Log:
Preparing for 0.23.12 development

Modified:
hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml
hadoop/common/branches/branch-0.23/hadoop-client/pom.xml
hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml
hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml
hadoop/common/branches/branch-0.23/hadoop-project-dist/pom.xml
hadoop/common/branches/branch-0.23/hadoop-project/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-archives/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-datajoin/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-distcp/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-extras/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-gridmix/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-pipes/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-rumen/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-streaming/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/hadoop-tools-dist/pom.xml
hadoop/common/branches/branch-0.23/hadoop-tools/pom.xml
hadoop/common/branches/branch-0.23/pom.xml

Modified: hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml (original)
+++ hadoop/common/branches/branch-0.23/hadoop-assemblies/pom.xml Wed Jun 18 
21:47:05 2014
@@ -23,12 +23,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.12-SNAPSHOT/version
 relativePath../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-assemblies/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.12-SNAPSHOT/version
   nameApache Hadoop Assemblies/name
   descriptionApache Hadoop Assemblies/description
 

Modified: hadoop/common/branches/branch-0.23/hadoop-client/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-client/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- hadoop/common/branches/branch-0.23/hadoop-client/pom.xml (original)
+++ hadoop/common/branches/branch-0.23/hadoop-client/pom.xml Wed Jun 18 
21:47:05 2014
@@ -18,12 +18,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.12-SNAPSHOT/version
 relativePath../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-client/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.12-SNAPSHOT/version
   packagingjar/packaging
 
   descriptionApache Hadoop Client/description

Modified: hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml (original)
+++ hadoop/common/branches/branch-0.23/hadoop-dist/pom.xml Wed Jun 18 21:47:05 
2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.12-SNAPSHOT/version
 relativePath../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-dist/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.12-SNAPSHOT/version
   descriptionApache Hadoop Distribution/description
   nameApache Hadoop Distribution/name
   packagingjar/packaging

Modified: hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml (original)
+++ hadoop/common/branches/branch-0.23/hadoop-minicluster/pom.xml Wed Jun 18 
21:47:05 2014
@@ -18,12 +18,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.12-SNAPSHOT/version
 relativePath../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-minicluster/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.12-SNAPSHOT/version
   packagingjar/packaging
 
   descriptionApache Hadoop Mini

svn commit: r1603642 - in /hadoop/common/branches/branch-0.23/hadoop-common-project: hadoop-annotations/pom.xml hadoop-auth-examples/pom.xml hadoop-auth/pom.xml hadoop-common/CHANGES.txt hadoop-common

2014-06-18 Thread tgraves
Author: tgraves
Date: Wed Jun 18 21:47:05 2014
New Revision: 1603642

URL: http://svn.apache.org/r1603642
Log:
Preparing for 0.23.12 development

Modified:

hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml

hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml

hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt

hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/pom.xml
hadoop/common/branches/branch-0.23/hadoop-common-project/pom.xml

Modified: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-annotations/pom.xml
 Wed Jun 18 21:47:05 2014
@@ -21,12 +21,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.12-SNAPSHOT/version
 relativePath../../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-annotations/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.12-SNAPSHOT/version
   descriptionApache Hadoop Annotations/description
   nameApache Hadoop Annotations/name
   packagingjar/packaging

Modified: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth-examples/pom.xml
 Wed Jun 18 21:47:05 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.12-SNAPSHOT/version
 relativePath../../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-auth-examples/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.12-SNAPSHOT/version
   packagingwar/packaging
 
   nameApache Hadoop Auth Examples/name

Modified: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml 
(original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-auth/pom.xml 
Wed Jun 18 21:47:05 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.12-SNAPSHOT/version
 relativePath../../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-auth/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.12-SNAPSHOT/version
   packagingjar/packaging
 
   nameApache Hadoop Auth/name

Modified: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1603642r1=1603641r2=1603642view=diff
==
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt
 (original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/CHANGES.txt
 Wed Jun 18 21:47:05 2014
@@ -1,6 +1,18 @@
 Hadoop Change Log
 
-Release 0.23.11 - UNRELEASED
+Release 0.23.12 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+Release 0.23.11 - 2014-06-26
 
   INCOMPATIBLE CHANGES
 

Modified: 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/pom.xml?rev=1603642r1=1603641r2=1603642view=diff
==
--- 
hadoop/common/branches/branch-0.23/hadoop-common-project/hadoop-common/pom.xml 
(original)
+++ 
hadoop/common/branches/branch-0.23/hadoop-common-project

svn commit: r1603694 - in /hadoop/common/branches/branch-0.23.11: ./ hadoop-assemblies/ hadoop-client/ hadoop-dist/ hadoop-minicluster/ hadoop-project-dist/ hadoop-project/ hadoop-tools/ hadoop-tools/

2014-06-18 Thread tgraves
Author: tgraves
Date: Thu Jun 19 01:17:43 2014
New Revision: 1603694

URL: http://svn.apache.org/r1603694
Log:
Preparing for release 0.23.11

Modified:
hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-project-dist/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-project/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-archives/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-datajoin/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-distcp/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-extras/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-gridmix/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-pipes/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-rumen/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-streaming/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/hadoop-tools-dist/pom.xml
hadoop/common/branches/branch-0.23.11/hadoop-tools/pom.xml
hadoop/common/branches/branch-0.23.11/pom.xml

Modified: hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml (original)
+++ hadoop/common/branches/branch-0.23.11/hadoop-assemblies/pom.xml Thu Jun 19 
01:17:43 2014
@@ -23,12 +23,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-assemblies/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   nameApache Hadoop Assemblies/name
   descriptionApache Hadoop Assemblies/description
 

Modified: hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml (original)
+++ hadoop/common/branches/branch-0.23.11/hadoop-client/pom.xml Thu Jun 19 
01:17:43 2014
@@ -18,12 +18,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-client/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   packagingjar/packaging
 
   descriptionApache Hadoop Client/description

Modified: hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml (original)
+++ hadoop/common/branches/branch-0.23.11/hadoop-dist/pom.xml Thu Jun 19 
01:17:43 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-dist/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   descriptionApache Hadoop Distribution/description
   nameApache Hadoop Distribution/name
   packagingjar/packaging

Modified: hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml (original)
+++ hadoop/common/branches/branch-0.23.11/hadoop-minicluster/pom.xml Thu Jun 19 
01:17:43 2014
@@ -18,12 +18,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-minicluster/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   packagingjar/packaging

svn commit: r1603694 [3/3] - in /hadoop/common/branches/branch-0.23.11/hadoop-common-project: ./ hadoop-annotations/ hadoop-auth-examples/ hadoop-auth/ hadoop-common/ hadoop-common/src/main/docs/

2014-06-18 Thread tgraves
Modified: hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml 
(original)
+++ hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml Thu Jun 
19 01:17:43 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-common-project/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   descriptionApache Hadoop Common Project/description
   nameApache Hadoop Common Project/name
   packagingpom/packaging




svn commit: r1603694 [1/3] - in /hadoop/common/branches/branch-0.23.11/hadoop-common-project: ./ hadoop-annotations/ hadoop-auth-examples/ hadoop-auth/ hadoop-common/ hadoop-common/src/main/docs/

2014-06-18 Thread tgraves
Author: tgraves
Date: Thu Jun 19 01:17:43 2014
New Revision: 1603694

URL: http://svn.apache.org/r1603694
Log:
Preparing for release 0.23.11

Modified:

hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt

hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/pom.xml

hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html
hadoop/common/branches/branch-0.23.11/hadoop-common-project/pom.xml

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-annotations/pom.xml
 Thu Jun 19 01:17:43 2014
@@ -21,12 +21,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-annotations/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   descriptionApache Hadoop Annotations/description
   nameApache Hadoop Annotations/name
   packagingjar/packaging

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth-examples/pom.xml
 Thu Jun 19 01:17:43 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-auth-examples/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   packagingwar/packaging
 
   nameApache Hadoop Auth Examples/name

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml 
(original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-auth/pom.xml 
Thu Jun 19 01:17:43 2014
@@ -20,12 +20,12 @@
   parent
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-project/artifactId
-version0.23.11-SNAPSHOT/version
+version0.23.11/version
 relativePath../../hadoop-project/relativePath
   /parent
   groupIdorg.apache.hadoop/groupId
   artifactIdhadoop-auth/artifactId
-  version0.23.11-SNAPSHOT/version
+  version0.23.11/version
   packagingjar/packaging
 
   nameApache Hadoop Auth/name

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/CHANGES.txt
 Thu Jun 19 01:17:43 2014
@@ -1,6 +1,6 @@
 Hadoop Change Log
 
-Release 0.23.11 - UNRELEASED
+Release 0.23.11 - 2014-06-26
 
   INCOMPATIBLE CHANGES
 

Modified: 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/pom.xml
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/pom.xml?rev=1603694r1=1603693r2=1603694view=diff
==
--- 
hadoop/common/branches/branch-0.23.11/hadoop-common-project/hadoop-common/pom.xml
 (original)
+++ 
hadoop/common/branches/branch-0.23.11/hadoop-common

svn commit: r1603696 - /hadoop/common/tags/release-0.23.11-rc0/

2014-06-18 Thread tgraves
Author: tgraves
Date: Thu Jun 19 01:32:25 2014
New Revision: 1603696

URL: http://svn.apache.org/r1603696
Log:
Hadoop 0.23.11-rc0 release.

Added:
hadoop/common/tags/release-0.23.11-rc0/   (props changed)
  - copied from r1603695, hadoop/common/branches/branch-0.23.11/

Propchange: hadoop/common/tags/release-0.23.11-rc0/
--
--- svn:ignore (added)
+++ svn:ignore Thu Jun 19 01:32:25 2014
@@ -0,0 +1,5 @@
+.classpath
+.git
+.project
+.settings
+target

Propchange: hadoop/common/tags/release-0.23.11-rc0/
--
--- svn:mergeinfo (added)
+++ svn:mergeinfo Thu Jun 19 01:32:25 2014
@@ -0,0 +1 @@
+/hadoop/common/trunk:1161777,1161781,1162188,1162421,1162491,1162499,1162613,1162928,1162954,1162979,1163050,1163069,1163490,1163768,1163852,1163858,1163981,1164255,1164301,1164339,1166009,1166402,1167001,1167383,1167662,1170085,1170379,1170459,1171297,1172916,1173402,1176550,1177487,1177531,1177859,1177864,1182189,1182205,1182214,1189613,1189932,1189982,1195575,1196113,1196129,1196676,1197801,1199024,1201991,1204114,1204117,1204122,1204124,1204129,1204131,1204177,1204370,1204376,1204388,1205260,1205697,1206786,1206830,1207694,1208153,1208313,1212021,1212062,1212073,1212084,1213537,1213586,1213592-1213593,1213954,1214046,1220510,1221348,1225114,1225192,1225456,1225489,1225591,1226211,1226239,1226350,1227091,1227165,1227423,1227964,1229347,1230398,1231569,1231572,1231627,1231640,1233605,1234555,1235135,1235137,1235956,1236456,1239752,1240897,1240928,1243065,1243104,1244766,1245751,1245762,1293419,1304099,1351818,1373683,1382409




git commit: [SPARK-2080] Yarn: report HS URL in client mode, correct user in cluster mode.

2014-06-12 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 83c226d45 - ecde5b837


[SPARK-2080] Yarn: report HS URL in client mode, correct user in cluster mode.

Yarn client mode was not setting the app's tracking URL to the
History Server's URL when configured by the user. Now client mode
behaves the same as cluster mode.

In SparkContext.scala, the user.name system property had precedence
over the SPARK_USER environment variable. This means that SPARK_USER
was never used, since user.name is always set by the JVM. In Yarn
cluster mode, this means the application always reported itself as
being run by user yarn (or whatever user was running the Yarn NM).
One could argue that the correct fix would be to use UGI.getCurrentUser()
here, but at least for Yarn that will match what SPARK_USER is set
to.

Author: Marcelo Vanzin van...@cloudera.com

This patch had conflicts when merged, resolved by
Committer: Thomas Graves tgra...@apache.org

Closes #1002 from vanzin/yarn-client-url and squashes the following commits:

4046e04 [Marcelo Vanzin] Set HS link in yarn-alpha also.
4c692d9 [Marcelo Vanzin] Yarn: report HS URL in client mode, correct user in 
cluster mode.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ecde5b83
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ecde5b83
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ecde5b83

Branch: refs/heads/master
Commit: ecde5b837534b11d365fcab78089820990b815cf
Parents: 83c226d
Author: Marcelo Vanzin van...@cloudera.com
Authored: Thu Jun 12 16:19:36 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Thu Jun 12 16:19:36 2014 -0500

--
 core/src/main/scala/org/apache/spark/SparkContext.scala| 2 +-
 .../scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala  | 1 +
 .../scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala  | 6 +++---
 3 files changed, 5 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ecde5b83/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 8bdaf0b..df15186 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -297,7 +297,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
   // Set SPARK_USER for user who is running SparkContext.
   val sparkUser = Option {
-
Option(System.getProperty(user.name)).getOrElse(System.getenv(SPARK_USER))
+
Option(System.getenv(SPARK_USER)).getOrElse(System.getProperty(user.name))
   }.getOrElse {
 SparkContext.SPARK_UNKNOWN_USER
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/ecde5b83/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
index a3bd915..b6ecae1 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
@@ -271,6 +271,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, 
conf: Configuration, sp
   .asInstanceOf[FinishApplicationMasterRequest]
 finishReq.setAppAttemptId(appAttemptId)
 finishReq.setFinishApplicationStatus(status)
+finishReq.setTrackingUrl(sparkConf.get(spark.yarn.historyServer.address, 
))
 resourceManager.finishApplicationMaster(finishReq)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ecde5b83/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
--
diff --git 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
index 4f8854a..f71ad03 100644
--- 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
+++ 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ExecutorLauncher.scala
@@ -115,7 +115,7 @@ class ExecutorLauncher(args: ApplicationMasterArguments, 
conf: Configuration, sp
 val interval = math.min(timeoutInterval / 2, schedulerInterval)
 
 reporterThread = launchReporterThread(interval)
-
+
 
 // Wait for the reporter thread to Finish.
 reporterThread.join()
@@ -134,12 +134,12 @@ class ExecutorLauncher(args: ApplicationMasterArguments, 
conf: Configuration, sp
 // LOCAL_DIRS = 2.X, YARN_LOCAL_DIRS = 0.23.X
 

git commit: SPARK-1639. Tidy up some Spark on YARN code

2014-06-11 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 6e1193031 - 2a4225dd9


SPARK-1639. Tidy up some Spark on YARN code

This contains a bunch of small tidyings of the Spark on YARN code.

I focused on the yarn stable code.  @tgravescs, let me know if you'd like me to 
make these for the alpha code as well.

Author: Sandy Ryza sa...@cloudera.com

Closes #561 from sryza/sandy-spark-1639 and squashes the following commits:

72b6a02 [Sandy Ryza] Fix comment and set name on driver thread
c2190b2 [Sandy Ryza] SPARK-1639. Tidy up some Spark on YARN code


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2a4225dd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2a4225dd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2a4225dd

Branch: refs/heads/master
Commit: 2a4225dd91d3f735625bb6bae6fca8fd06ca
Parents: 6e11930
Author: Sandy Ryza sa...@cloudera.com
Authored: Wed Jun 11 07:57:28 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Wed Jun 11 07:57:28 2014 -0500

--
 .../spark/deploy/yarn/ApplicationMaster.scala   |  16 +-
 .../apache/spark/deploy/yarn/ClientBase.scala   |  38 ++--
 .../deploy/yarn/ExecutorRunnableUtil.scala  |  28 +--
 .../cluster/YarnClusterScheduler.scala  |  10 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   | 197 +--
 .../org/apache/spark/deploy/yarn/Client.scala   |  10 +-
 .../spark/deploy/yarn/ExecutorLauncher.scala|  40 ++--
 7 files changed, 161 insertions(+), 178 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2a4225dd/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 8f0ecb8..1cc9c33 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -277,7 +277,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 yarnAllocator.allocateContainers(
   math.max(args.numExecutors - yarnAllocator.getNumExecutorsRunning, 
0))
 ApplicationMaster.incrementAllocatorLoop(1)
-Thread.sleep(100)
+Thread.sleep(ApplicationMaster.ALLOCATE_HEARTBEAT_INTERVAL)
   }
 } finally {
   // In case of exceptions, etc - ensure that count is at least 
ALLOCATOR_LOOP_WAIT_COUNT,
@@ -416,6 +416,7 @@ object ApplicationMaster {
   // TODO: Currently, task to container is computed once (TaskSetManager) - 
which need not be
   // optimal as more containers are available. Might need to handle this 
better.
   private val ALLOCATOR_LOOP_WAIT_COUNT = 30
+  private val ALLOCATE_HEARTBEAT_INTERVAL = 100
 
   def incrementAllocatorLoop(by: Int) {
 val count = yarnAllocatorLoop.getAndAdd(by)
@@ -467,13 +468,22 @@ object ApplicationMaster {
   })
 }
 
-// Wait for initialization to complete and atleast 'some' nodes can get 
allocated.
+modified
+  }
+
+
+  /**
+   * Returns when we've either
+   *  1) received all the requested executors,
+   *  2) waited ALLOCATOR_LOOP_WAIT_COUNT * ALLOCATE_HEARTBEAT_INTERVAL ms,
+   *  3) hit an error that causes us to terminate trying to get containers.
+   */
+  def waitForInitialAllocations() {
 yarnAllocatorLoop.synchronized {
   while (yarnAllocatorLoop.get() = ALLOCATOR_LOOP_WAIT_COUNT) {
 yarnAllocatorLoop.wait(1000L)
   }
 }
-modified
   }
 
   def main(argStrings: Array[String]) {

http://git-wip-us.apache.org/repos/asf/spark/blob/2a4225dd/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 801e8b3..29a3568 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -19,7 +19,6 @@ package org.apache.spark.deploy.yarn
 
 import java.io.File
 import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
-import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{HashMap, ListBuffer, Map}
@@ -37,7 +36,7 @@ import 
org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.api.protocolrecords._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
-import 

git commit: [SPARK-1978] In some cases, spark-yarn does not automatically restart the failed container

2014-06-10 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master a9a461c59 - 884ca718b


[SPARK-1978] In some cases, spark-yarn does not automatically restart the 
failed container

Author: witgo wi...@qq.com

Closes #921 from witgo/allocateExecutors and squashes the following commits:

bc3aa66 [witgo] review commit
8800eba [witgo] Merge branch 'master' of https://github.com/apache/spark into 
allocateExecutors
32ac7af [witgo] review commit
056b8c7 [witgo] Merge branch 'master' of https://github.com/apache/spark into 
allocateExecutors
04c6f7e [witgo] Merge branch 'master' into allocateExecutors
aff827c [witgo] review commit
5c376e0 [witgo] Merge branch 'master' of https://github.com/apache/spark into 
allocateExecutors
1faf4f4 [witgo] Merge branch 'master' into allocateExecutors
3c464bd [witgo] add time limit to allocateExecutors
e00b656 [witgo] In some cases, yarn does not automatically restart the container


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/884ca718
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/884ca718
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/884ca718

Branch: refs/heads/master
Commit: 884ca718b24f0bbe93358f2a366463b4e4d31f49
Parents: a9a461c
Author: witgo wi...@qq.com
Authored: Tue Jun 10 10:34:57 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Jun 10 10:34:57 2014 -0500

--
 .../spark/deploy/yarn/ApplicationMaster.scala   | 39 +++-
 .../spark/deploy/yarn/ExecutorLauncher.scala| 22 ++-
 2 files changed, 34 insertions(+), 27 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/884ca718/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index c1dfe3f..33a60d9 100644
--- 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -252,15 +252,12 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 try {
   logInfo(Allocating  + args.numExecutors +  executors.)
   // Wait until all containers have finished
-  // TODO: This is a bit ugly. Can we make it nicer?
-  // TODO: Handle container failure
   yarnAllocator.addResourceRequests(args.numExecutors)
+  yarnAllocator.allocateResources()
   // Exits the loop if the user thread exits.
   while (yarnAllocator.getNumExecutorsRunning  args.numExecutors  
userThread.isAlive) {
-if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) {
-  finishApplicationMaster(FinalApplicationStatus.FAILED,
-max number of executor failures reached)
-}
+checkNumExecutorsFailed()
+allocateMissingExecutor()
 yarnAllocator.allocateResources()
 ApplicationMaster.incrementAllocatorLoop(1)
 Thread.sleep(100)
@@ -289,23 +286,31 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 }
   }
 
+  private def allocateMissingExecutor() {
+val missingExecutorCount = args.numExecutors - 
yarnAllocator.getNumExecutorsRunning -
+  yarnAllocator.getNumPendingAllocate
+if (missingExecutorCount  0) {
+  logInfo(Allocating %d containers to make up for (potentially) lost 
containers.
+format(missingExecutorCount))
+  yarnAllocator.addResourceRequests(missingExecutorCount)
+}
+  }
+
+  private def checkNumExecutorsFailed() {
+if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) {
+  finishApplicationMaster(FinalApplicationStatus.FAILED,
+max number of executor failures reached)
+}
+  }
+
   private def launchReporterThread(_sleepTime: Long): Thread = {
 val sleepTime = if (_sleepTime = 0) 0 else _sleepTime
 
 val t = new Thread {
   override def run() {
 while (userThread.isAlive) {
-  if (yarnAllocator.getNumExecutorsFailed = maxNumExecutorFailures) {
-finishApplicationMaster(FinalApplicationStatus.FAILED,
-  max number of executor failures reached)
-  }
-  val missingExecutorCount = args.numExecutors - 
yarnAllocator.getNumExecutorsRunning -
-yarnAllocator.getNumPendingAllocate
-  if (missingExecutorCount  0) {
-logInfo(Allocating %d containers to make up for (potentially) 
lost containers.
-  format(missingExecutorCount))
-yarnAllocator.addResourceRequests(missingExecutorCount)
-  }
+  checkNumExecutorsFailed()
+  

git commit: [SPARK-1522] : YARN ClientBase throws a NPE if there is no YARN Application CP

2014-06-09 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 6cf335d79 - e27344768


[SPARK-1522] : YARN ClientBase throws a NPE if there is no YARN Application CP

The current implementation of ClientBase.getDefaultYarnApplicationClasspath 
inspects
the MRJobConfig class for the field DEFAULT_YARN_APPLICATION_CLASSPATH when it 
should
be really looking into YarnConfiguration. If the Application Configuration has 
no
yarn.application.classpath defined a NPE exception will be thrown.

Additional Changes include:
* Test Suite for ClientBase added

[ticket: SPARK-1522] : https://issues.apache.org/jira/browse/SPARK-1522

Author  : bernardo.gomezpala...@gmail.com
Testing : SPARK_HADOOP_VERSION=2.3.0 SPARK_YARN=true ./sbt/sbt test

Author: Bernardo Gomez Palacio bernardo.gomezpala...@gmail.com

Closes #433 from berngp/feature/SPARK-1522 and squashes the following commits:

2c2e118 [Bernardo Gomez Palacio] [SPARK-1522]: YARN ClientBase throws a NPE if 
there is no YARN Application specific CP


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e2734476
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e2734476
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e2734476

Branch: refs/heads/master
Commit: e273447684779a18bd61d733bfe7958b78657ffd
Parents: 6cf335d
Author: Bernardo Gomez Palacio bernardo.gomezpala...@gmail.com
Authored: Mon Jun 9 16:14:54 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Mon Jun 9 16:14:54 2014 -0500

--
 .../apache/spark/deploy/yarn/ClientBase.scala   |  89 +--
 .../spark/deploy/yarn/ClientBaseSuite.scala | 112 +++
 2 files changed, 167 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e2734476/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index aeb3f00..4b5e0ef 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -23,6 +23,7 @@ import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable.{HashMap, ListBuffer, Map}
+import scala.util.{Try, Success, Failure}
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
@@ -378,7 +379,7 @@ trait ClientBase extends Logging {
   }
 }
 
-object ClientBase {
+object ClientBase extends Logging {
   val SPARK_JAR: String = __spark__.jar
   val APP_JAR: String = __app__.jar
   val LOG4J_PROP: String = log4j.properties
@@ -388,37 +389,47 @@ object ClientBase {
 
   def getSparkJar = 
sys.env.get(SPARK_JAR).getOrElse(SparkContext.jarOfClass(this.getClass).head)
 
-  // Based on code from org.apache.hadoop.mapreduce.v2.util.MRApps
-  def populateHadoopClasspath(conf: Configuration, env: HashMap[String, 
String]) {
-val classpathEntries = Option(conf.getStrings(
-  YarnConfiguration.YARN_APPLICATION_CLASSPATH)).getOrElse(
-getDefaultYarnApplicationClasspath())
-if (classpathEntries != null) {
-  for (c - classpathEntries) {
-YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, 
c.trim,
-  File.pathSeparator)
-  }
+  def populateHadoopClasspath(conf: Configuration, env: HashMap[String, 
String]) = {
+val classPathElementsToAdd = getYarnAppClasspath(conf) ++ 
getMRAppClasspath(conf)
+for (c - classPathElementsToAdd.flatten) {
+  YarnSparkHadoopUtil.addToEnvironment(
+env,
+Environment.CLASSPATH.name,
+c.trim,
+File.pathSeparator)
 }
+classPathElementsToAdd
+  }
 
-val mrClasspathEntries = Option(conf.getStrings(
-  mapreduce.application.classpath)).getOrElse(
-getDefaultMRApplicationClasspath())
-if (mrClasspathEntries != null) {
-  for (c - mrClasspathEntries) {
-YarnSparkHadoopUtil.addToEnvironment(env, Environment.CLASSPATH.name, 
c.trim,
-  File.pathSeparator)
-  }
-}
+  private def getYarnAppClasspath(conf: Configuration): Option[Seq[String]] =
+Option(conf.getStrings(YarnConfiguration.YARN_APPLICATION_CLASSPATH)) 
match {
+  case Some(s) = Some(s.toSeq)
+  case None = getDefaultYarnApplicationClasspath
   }
 
-  def getDefaultYarnApplicationClasspath(): Array[String] = {
-try {
-  val field = 
classOf[MRJobConfig].getField(DEFAULT_YARN_APPLICATION_CLASSPATH)
-  field.get(null).asInstanceOf[Array[String]]
-} catch {
-  case err: NoSuchFieldError = null
-  case err: NoSuchFieldException = null
+  private def 

git commit: SPARK-1557 Set permissions on event log files/directories

2014-04-29 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 9a1184a8a - 8db0f7e28


SPARK-1557 Set permissions on event log files/directories

This adds minimal setting of event log directory/files permissions.  To have a 
secure environment the user must manually create the top level event log 
directory and set permissions up.   We can add logic to do that automatically 
later if we want.

Author: Thomas Graves tgra...@apache.org

Closes #538 from tgravescs/SPARK-1557 and squashes the following commits:

e471d8e [Thomas Graves] rework
d8b6620 [Thomas Graves] update use of octal
3ca9b79 [Thomas Graves] Updated based on comments
5a09709 [Thomas Graves] add in missing import
3150ed6 [Thomas Graves] SPARK-1557 Set permissions on event log 
files/directories


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8db0f7e2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8db0f7e2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8db0f7e2

Branch: refs/heads/master
Commit: 8db0f7e28f5f0330a3344705ff48d8e7b97c383f
Parents: 9a1184a
Author: Thomas Graves tgra...@apache.org
Authored: Tue Apr 29 09:19:48 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Tue Apr 29 09:19:48 2014 -0500

--
 .../spark/scheduler/EventLoggingListener.scala  |  6 +-
 .../org/apache/spark/util/FileLogger.scala  | 22 +++-
 docs/security.md|  2 ++
 3 files changed, 24 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8db0f7e2/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala 
b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index 2fe65cd..d822a8e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.permission.FsPermission
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{Logging, SparkConf, SparkContext}
@@ -54,7 +55,7 @@ private[spark] class EventLoggingListener(
 
   private val logger =
 new FileLogger(logDir, conf, hadoopConfiguration, outputBufferSize, 
shouldCompress,
-  shouldOverwrite)
+  shouldOverwrite, Some(LOG_FILE_PERMISSIONS))
 
   /**
* Begin logging events.
@@ -124,6 +125,9 @@ private[spark] object EventLoggingListener extends Logging {
   val SPARK_VERSION_PREFIX = SPARK_VERSION_
   val COMPRESSION_CODEC_PREFIX = COMPRESSION_CODEC_
   val APPLICATION_COMPLETE = APPLICATION_COMPLETE
+  val LOG_FILE_PERMISSIONS: FsPermission =
+FsPermission.createImmutable(Integer.parseInt(770, 8).toShort)
+
 
   // A cache for compression codecs to avoid creating the same codec many times
   private val codecMap = new mutable.HashMap[String, CompressionCodec]

http://git-wip-us.apache.org/repos/asf/spark/blob/8db0f7e2/core/src/main/scala/org/apache/spark/util/FileLogger.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/FileLogger.scala 
b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
index 1ed3b70..0965e0f 100644
--- a/core/src/main/scala/org/apache/spark/util/FileLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
@@ -24,6 +24,7 @@ import java.util.Date
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
+import org.apache.hadoop.fs.permission.FsPermission
 
 import org.apache.spark.{Logging, SparkConf}
 import org.apache.spark.io.CompressionCodec
@@ -42,7 +43,8 @@ private[spark] class FileLogger(
 hadoopConfiguration: Configuration,
 outputBufferSize: Int = 8 * 1024, // 8 KB
 compress: Boolean = false,
-overwrite: Boolean = true)
+overwrite: Boolean = true,
+dirPermissions: Option[FsPermission] = None)
   extends Logging {
 
   private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
@@ -79,16 +81,25 @@ private[spark] class FileLogger(
 if (!fileSystem.mkdirs(path)) {
   throw new IOException(Error in creating log directory: 
%s.format(logDir))
 }
+if (dirPermissions.isDefined) {
+  val fsStatus = fileSystem.getFileStatus(path)
+  if (fsStatus.getPermission().toShort() != dirPermissions.get.toShort) {
+fileSystem.setPermission(path, dirPermissions.get)
+  }
+}
   }
 
   /**
* Create a new writer for the file identified by the given path.
+   * If 

svn commit: r1588283 - in /hadoop/common/branches/branch-2/hadoop-yarn-project: CHANGES.txt hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java

2014-04-17 Thread tgraves
Author: tgraves
Date: Thu Apr 17 15:25:27 2014
New Revision: 1588283

URL: http://svn.apache.org/r1588283
Log:
Merge 1588281 to branch-2. YARN-1931. Private API change in YARN-1824 in 2.4 
broke compatibility with previous releases (Sandy Ryza via tgraves)

Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt

hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1588283r1=1588282r2=1588283view=diff
==
--- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Thu Apr 17 
15:25:27 2014
@@ -114,6 +114,9 @@ Release 2.4.1 - UNRELEASED
 YARN-1934. Fixed a potential NPE in ZKRMStateStore caused by handling
 Disconnected event from ZK. (Karthik Kambatla via jianhe)
 
+YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility 
+with previous releases (Sandy Ryza via tgraves)
+
 Release 2.4.0 - 2014-04-07 
 
   INCOMPATIBLE CHANGES

Modified: 
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java?rev=1588283r1=1588282r2=1588283view=diff
==
--- 
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
 (original)
+++ 
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
 Thu Apr 17 15:25:27 2014
@@ -38,7 +38,7 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 
 /**
- * Yarn application related utilities
+ * Yarn internal application-related utilities
  */
 @Private
 public class Apps {
@@ -97,6 +97,17 @@ public class Apps {
   }
 }
   }
+  
+  /**
+   * This older version of this method is kept around for compatibility
+   * because downstream frameworks like Spark and Tez have been using it.
+   * Downstream frameworks are expected to move off of it.
+   */
+  @Deprecated
+  public static void setEnvFromInputString(MapString, String env,
+  String envString) {
+setEnvFromInputString(env, envString, File.pathSeparator);
+  }
 
   @Public
   @Unstable
@@ -112,6 +123,18 @@ public class Apps {
 environment.put(StringInterner.weakIntern(variable), 
 StringInterner.weakIntern(val));
   }
+  
+  /**
+   * This older version of this method is kept around for compatibility
+   * because downstream frameworks like Spark and Tez have been using it.
+   * Downstream frameworks are expected to move off of it.
+   */
+  @Deprecated
+  public static void addToEnvironment(
+  MapString, String environment,
+  String variable, String value) {
+addToEnvironment(environment, variable, value, File.pathSeparator);
+  }
 
   public static String crossPlatformify(String var) {
 return ApplicationConstants.PARAMETER_EXPANSION_LEFT + var




svn commit: r1588281 - in /hadoop/common/trunk/hadoop-yarn-project: CHANGES.txt hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java

2014-04-17 Thread tgraves
Author: tgraves
Date: Thu Apr 17 15:23:22 2014
New Revision: 1588281

URL: http://svn.apache.org/r1588281
Log:
YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility with 
previous releases (Sandy Ryza via tgraves)

Modified:
hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt

hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java

Modified: hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt?rev=1588281r1=1588280r2=1588281view=diff
==
--- hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-yarn-project/CHANGES.txt Thu Apr 17 15:23:22 2014
@@ -129,6 +129,9 @@ Release 2.4.1 - UNRELEASED
 YARN-1934. Fixed a potential NPE in ZKRMStateStore caused by handling
 Disconnected event from ZK. (Karthik Kambatla via jianhe)
 
+YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility 
+with previous releases (Sandy Ryza via tgraves)
+
 Release 2.4.0 - 2014-04-07 
 
   INCOMPATIBLE CHANGES

Modified: 
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java?rev=1588281r1=1588280r2=1588281view=diff
==
--- 
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
 (original)
+++ 
hadoop/common/trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
 Thu Apr 17 15:23:22 2014
@@ -38,7 +38,7 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 
 /**
- * Yarn application related utilities
+ * Yarn internal application-related utilities
  */
 @Private
 public class Apps {
@@ -97,6 +97,17 @@ public class Apps {
   }
 }
   }
+  
+  /**
+   * This older version of this method is kept around for compatibility
+   * because downstream frameworks like Spark and Tez have been using it.
+   * Downstream frameworks are expected to move off of it.
+   */
+  @Deprecated
+  public static void setEnvFromInputString(MapString, String env,
+  String envString) {
+setEnvFromInputString(env, envString, File.pathSeparator);
+  }
 
   @Public
   @Unstable
@@ -112,6 +123,18 @@ public class Apps {
 environment.put(StringInterner.weakIntern(variable), 
 StringInterner.weakIntern(val));
   }
+  
+  /**
+   * This older version of this method is kept around for compatibility
+   * because downstream frameworks like Spark and Tez have been using it.
+   * Downstream frameworks are expected to move off of it.
+   */
+  @Deprecated
+  public static void addToEnvironment(
+  MapString, String environment,
+  String variable, String value) {
+addToEnvironment(environment, variable, value, File.pathSeparator);
+  }
 
   public static String crossPlatformify(String var) {
 return ApplicationConstants.PARAMETER_EXPANSION_LEFT + var




svn commit: r1588287 - in /hadoop/common/branches/branch-2.4/hadoop-yarn-project: CHANGES.txt hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java

2014-04-17 Thread tgraves
Author: tgraves
Date: Thu Apr 17 15:31:58 2014
New Revision: 1588287

URL: http://svn.apache.org/r1588287
Log:
Merge 1588281 to branch-2.4. YARN-1931. Private API change in YARN-1824 in 2.4 
broke compatibility with previous releases (Sandy Ryza via tgraves)

Modified:
hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt

hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java

Modified: hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt?rev=1588287r1=1588286r2=1588287view=diff
==
--- hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2.4/hadoop-yarn-project/CHANGES.txt Thu Apr 
17 15:31:58 2014
@@ -61,6 +61,9 @@ Release 2.4.1 - UNRELEASED
 YARN-1934. Fixed a potential NPE in ZKRMStateStore caused by handling
 Disconnected event from ZK. (Karthik Kambatla via jianhe)
 
+YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility 
+with previous releases (Sandy Ryza via tgraves)
+
 Release 2.4.0 - 2014-04-07 
 
   INCOMPATIBLE CHANGES

Modified: 
hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java?rev=1588287r1=1588286r2=1588287view=diff
==
--- 
hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
 (original)
+++ 
hadoop/common/branches/branch-2.4/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
 Thu Apr 17 15:31:58 2014
@@ -38,7 +38,7 @@ import org.apache.hadoop.yarn.api.record
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 
 /**
- * Yarn application related utilities
+ * Yarn internal application-related utilities
  */
 @Private
 public class Apps {
@@ -97,6 +97,17 @@ public class Apps {
   }
 }
   }
+  
+  /**
+   * This older version of this method is kept around for compatibility
+   * because downstream frameworks like Spark and Tez have been using it.
+   * Downstream frameworks are expected to move off of it.
+   */
+  @Deprecated
+  public static void setEnvFromInputString(MapString, String env,
+  String envString) {
+setEnvFromInputString(env, envString, File.pathSeparator);
+  }
 
   @Public
   @Unstable
@@ -112,6 +123,18 @@ public class Apps {
 environment.put(StringInterner.weakIntern(variable), 
 StringInterner.weakIntern(val));
   }
+  
+  /**
+   * This older version of this method is kept around for compatibility
+   * because downstream frameworks like Spark and Tez have been using it.
+   * Downstream frameworks are expected to move off of it.
+   */
+  @Deprecated
+  public static void addToEnvironment(
+  MapString, String environment,
+  String variable, String value) {
+addToEnvironment(environment, variable, value, File.pathSeparator);
+  }
 
   public static String crossPlatformify(String var) {
 return ApplicationConstants.PARAMETER_EXPANSION_LEFT + var




git commit: [SPARK-1395] Allow local: URIs to work on Yarn.

2014-04-17 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 b3ad707c4 - a83a794f1


[SPARK-1395] Allow local: URIs to work on Yarn.

This only works for the three paths defined in the environment
(SPARK_JAR, SPARK_YARN_APP_JAR and SPARK_LOG4J_CONF).

Tested by running SparkPi with local: and file: URIs against Yarn cluster (no 
upload shows up in logs in the local case).

Author: Marcelo Vanzin van...@cloudera.com

Closes #303 from vanzin/yarn-local and squashes the following commits:

82219c1 [Marcelo Vanzin] [SPARK-1395] Allow local: URIs to work on Yarn.

(cherry picked from commit 69047506bf97e6e37e4079c87cb0327d3760ac41)
Signed-off-by: Thomas Graves tgra...@apache.org


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a83a794f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a83a794f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a83a794f

Branch: refs/heads/branch-1.0
Commit: a83a794f1accc616cfccde78af44c5cbf066c647
Parents: b3ad707
Author: Marcelo Vanzin van...@cloudera.com
Authored: Thu Apr 17 10:29:38 2014 -0500
Committer: Thomas Graves tgra...@apache.org
Committed: Thu Apr 17 10:33:15 2014 -0500

--
 .../org/apache/spark/deploy/SparkSubmit.scala   |   4 +-
 .../spark/deploy/yarn/ExecutorRunnable.scala|   2 +-
 .../apache/spark/deploy/yarn/ClientBase.scala   | 190 +--
 .../deploy/yarn/ExecutorRunnableUtil.scala  |  17 +-
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |   6 +-
 .../spark/deploy/yarn/ExecutorRunnable.scala|   2 +-
 6 files changed, 142 insertions(+), 79 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a83a794f/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index e05fbfe..e5d593c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io.{PrintStream, File}
-import java.net.URL
+import java.net.{URI, URL}
 
 import org.apache.spark.executor.ExecutorURLClassLoader
 
@@ -216,7 +216,7 @@ object SparkSubmit {
   }
 
   private def addJarToClasspath(localJar: String, loader: 
ExecutorURLClassLoader) {
-val localJarFile = new File(localJar)
+val localJarFile = new File(new URI(localJar).getPath())
 if (!localJarFile.exists()) {
   printWarning(sJar $localJar does not exist, skipping.)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/a83a794f/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 3469b7d..7dae248 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -82,7 +82,7 @@ class ExecutorRunnable(
 ctx.setContainerTokens(ByteBuffer.wrap(dob.getData()))
 
 val commands = prepareCommand(masterAddress, slaveId, hostname, 
executorMemory, executorCores,
-  localResources.contains(ClientBase.LOG4J_PROP))
+  localResources)
 logInfo(Setting up executor with commands:  + commands)
 ctx.setCommands(commands)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/a83a794f/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 628dd98..566de71 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy.yarn
 
 import java.io.File
-import java.net.{InetAddress, UnknownHostException, URI}
+import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions._
@@ -209,53 +209,35 @@ trait ClientBase extends Logging {
 
 Map(
   ClientBase.SPARK_JAR - System.getenv(SPARK_JAR), ClientBase.APP_JAR 
- args.userJar,
-  ClientBase.LOG4J_PROP - System.getenv(SPARK_LOG4J_CONF)
+  ClientBase.LOG4J_PROP - System.getenv(ClientBase.LOG4J_CONF_ENV_KEY)
 ).foreach { case(destName, _localPath) =
   val localPath: String = if 

<    1   2   3   4   5   6   7   8   9   10   >