git commit: Spark 1490 Add kerberos support to the HistoryServer

pwendell Thu, 24 Apr 2014 11:17:52 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-1.0 00a3ccc7c -> 866b03ef4



Spark 1490 Add kerberos support to the HistoryServer

Here I've added the ability for the History server to login from a kerberos 
keytab file so that the history server can be run as a super user and stay up 
for along period of time while reading the history files from HDFS.

Author: Thomas Graves <[email protected]>

Closes #513 from tgravescs/SPARK-1490 and squashes the following commits:

e204a99 [Thomas Graves] remove extra logging
5418daa [Thomas Graves] fix typo in config
0076b99 [Thomas Graves] Update docs
4d76545 [Thomas Graves] SPARK-1490 Add kerberos support to the HistoryServer
(cherry picked from commit bd375094a1480b0ff9c16ab8ddd2dba8731506df)

Signed-off-by: Patrick Wendell <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/866b03ef
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/866b03ef
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/866b03ef

Branch: refs/heads/branch-1.0
Commit: 866b03ef4d27b2160563b58d577de29ba6eb4442
Parents: 00a3ccc
Author: Thomas Graves <[email protected]>
Authored: Thu Apr 24 11:15:12 2014 -0700
Committer: Patrick Wendell <[email protected]>
Committed: Thu Apr 24 11:16:47 2014 -0700

----------------------------------------------------------------------
 .../apache/spark/deploy/SparkHadoopUtil.scala   |  4 ++++
 .../spark/deploy/history/HistoryServer.scala    | 16 +++++++++++++
 docs/monitoring.md                              | 24 ++++++++++++++++++++
 3 files changed, 44 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/866b03ef/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 9bdbfb3..498fcc5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -75,6 +75,10 @@ class SparkHadoopUtil {
 
   def getSecretKeyFromUserCredentials(key: String): Array[Byte] = { null }
 
+  def loginUserFromKeytab(principalName: String, keytabFilename: String) { 
+    UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename)
+  }
+
 }
 
 object SparkHadoopUtil {

http://git-wip-us.apache.org/repos/asf/spark/blob/866b03ef/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala 
b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index b8f5623..d7a3246 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable
 import org.apache.hadoop.fs.{FileStatus, Path}
 
 import org.apache.spark.{Logging, SecurityManager, SparkConf}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.scheduler._
 import org.apache.spark.ui.{WebUI, SparkUI}
 import org.apache.spark.ui.JettyUtils._
@@ -257,6 +258,7 @@ object HistoryServer {
   val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR
 
   def main(argStrings: Array[String]) {
+    initSecurity()
     val args = new HistoryServerArguments(argStrings)
     val securityManager = new SecurityManager(conf)
     val server = new HistoryServer(args.logDir, securityManager, conf)
@@ -266,6 +268,20 @@ object HistoryServer {
     while(true) { Thread.sleep(Int.MaxValue) }
     server.stop()
   }
+
+  def initSecurity() {
+    // If we are accessing HDFS and it has security enabled (Kerberos), we 
have to login
+    // from a keytab file so that we can access HDFS beyond the kerberos 
ticket expiration.
+    // As long as it is using Hadoop rpc (hdfs://), a relogin will 
automatically
+    // occur from the keytab.
+    if (conf.getBoolean("spark.history.kerberos.enabled", false)) {
+      // if you have enabled kerberos the following 2 params must be set
+      val principalName = conf.get("spark.history.kerberos.principal")
+      val keytabFilename = conf.get("spark.history.kerberos.keytab")
+      SparkHadoopUtil.get.loginUserFromKeytab(principalName, keytabFilename)
+    }
+  }
+
 }
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/866b03ef/docs/monitoring.md
----------------------------------------------------------------------
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 144be3d..347a9b1 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -91,6 +91,30 @@ represents an application's event logs. This creates a web 
interface at
       The port to which the web interface of the history server binds.
     </td>
   </tr>
+  <tr>
+    <td>spark.history.kerberos.enabled</td>
+    <td>false</td>
+    <td>
+      Indicates whether the history server should use kerberos to login. This 
is useful
+      if the history server is accessing HDFS files on a secure Hadoop 
cluster. If this is 
+      true it looks uses the configs 
<code>spark.history.kerberos.principal</code> and
+      <code>spark.history.kerberos.keytab</code>. 
+    </td>
+  </tr>
+  <tr>
+    <td>spark.history.kerberos.principal</td>
+    <td>(none)</td>
+    <td>
+      Kerberos principal name for the History Server.
+    </td>
+  </tr>
+  <tr>
+    <td>spark.history.kerberos.keytab</td>
+    <td>(none)</td>
+    <td>
+      Location of the kerberos keytab file for the History Server.
+    </td>
+  </tr>
 </table>
 
 Note that in all of these UIs, the tables are sortable by clicking their 
headers,

git commit: Spark 1490 Add kerberos support to the HistoryServer

Reply via email to