Repository: spark Updated Branches: refs/heads/branch-1.0 00a3ccc7c -> 866b03ef4
Spark 1490 Add kerberos support to the HistoryServer Here I've added the ability for the History server to login from a kerberos keytab file so that the history server can be run as a super user and stay up for along period of time while reading the history files from HDFS. Author: Thomas Graves <[email protected]> Closes #513 from tgravescs/SPARK-1490 and squashes the following commits: e204a99 [Thomas Graves] remove extra logging 5418daa [Thomas Graves] fix typo in config 0076b99 [Thomas Graves] Update docs 4d76545 [Thomas Graves] SPARK-1490 Add kerberos support to the HistoryServer (cherry picked from commit bd375094a1480b0ff9c16ab8ddd2dba8731506df) Signed-off-by: Patrick Wendell <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/866b03ef Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/866b03ef Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/866b03ef Branch: refs/heads/branch-1.0 Commit: 866b03ef4d27b2160563b58d577de29ba6eb4442 Parents: 00a3ccc Author: Thomas Graves <[email protected]> Authored: Thu Apr 24 11:15:12 2014 -0700 Committer: Patrick Wendell <[email protected]> Committed: Thu Apr 24 11:16:47 2014 -0700 ---------------------------------------------------------------------- .../apache/spark/deploy/SparkHadoopUtil.scala | 4 ++++ .../spark/deploy/history/HistoryServer.scala | 16 +++++++++++++ docs/monitoring.md | 24 ++++++++++++++++++++ 3 files changed, 44 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/866b03ef/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 9bdbfb3..498fcc5 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -75,6 +75,10 @@ class SparkHadoopUtil { def getSecretKeyFromUserCredentials(key: String): Array[Byte] = { null } + def loginUserFromKeytab(principalName: String, keytabFilename: String) { + UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename) + } + } object SparkHadoopUtil { http://git-wip-us.apache.org/repos/asf/spark/blob/866b03ef/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index b8f5623..d7a3246 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -22,6 +22,7 @@ import scala.collection.mutable import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.spark.{Logging, SecurityManager, SparkConf} +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.scheduler._ import org.apache.spark.ui.{WebUI, SparkUI} import org.apache.spark.ui.JettyUtils._ @@ -257,6 +258,7 @@ object HistoryServer { val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR def main(argStrings: Array[String]) { + initSecurity() val args = new HistoryServerArguments(argStrings) val securityManager = new SecurityManager(conf) val server = new HistoryServer(args.logDir, securityManager, conf) @@ -266,6 +268,20 @@ object HistoryServer { while(true) { Thread.sleep(Int.MaxValue) } server.stop() } + + def initSecurity() { + // If we are accessing HDFS and it has security enabled (Kerberos), we have to login + // from a keytab file so that we can access HDFS beyond the kerberos ticket expiration. + // As long as it is using Hadoop rpc (hdfs://), a relogin will automatically + // occur from the keytab. + if (conf.getBoolean("spark.history.kerberos.enabled", false)) { + // if you have enabled kerberos the following 2 params must be set + val principalName = conf.get("spark.history.kerberos.principal") + val keytabFilename = conf.get("spark.history.kerberos.keytab") + SparkHadoopUtil.get.loginUserFromKeytab(principalName, keytabFilename) + } + } + } http://git-wip-us.apache.org/repos/asf/spark/blob/866b03ef/docs/monitoring.md ---------------------------------------------------------------------- diff --git a/docs/monitoring.md b/docs/monitoring.md index 144be3d..347a9b1 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -91,6 +91,30 @@ represents an application's event logs. This creates a web interface at The port to which the web interface of the history server binds. </td> </tr> + <tr> + <td>spark.history.kerberos.enabled</td> + <td>false</td> + <td> + Indicates whether the history server should use kerberos to login. This is useful + if the history server is accessing HDFS files on a secure Hadoop cluster. If this is + true it looks uses the configs <code>spark.history.kerberos.principal</code> and + <code>spark.history.kerberos.keytab</code>. + </td> + </tr> + <tr> + <td>spark.history.kerberos.principal</td> + <td>(none)</td> + <td> + Kerberos principal name for the History Server. + </td> + </tr> + <tr> + <td>spark.history.kerberos.keytab</td> + <td>(none)</td> + <td> + Location of the kerberos keytab file for the History Server. + </td> + </tr> </table> Note that in all of these UIs, the tables are sortable by clicking their headers,
