Repository: spark
Updated Branches:
  refs/heads/master ca2a780e7 -> 57accf6e3


[SPARK-22319][CORE] call loginUserFromKeytab before accessing hdfs

In `SparkSubmit`, call `loginUserFromKeytab` before attempting to make RPC 
calls to the NameNode.

I manually tested this patch by:

1. Confirming that my Spark application failed to launch with the error 
reported in https://issues.apache.org/jira/browse/SPARK-22319.
2. Applying this patch and confirming that the app no longer fails to launch, 
even when I have not manually run `kinit` on the host.

Presumably we also want integration tests for secure clusters so that we catch 
this sort of thing. I'm happy to take a shot at this if it's feasible and 
someone can point me in the right direction.

Author: Steven Rand <[email protected]>

Closes #19540 from sjrand/SPARK-22319.

Change-Id: Ic306bfe7181107fbcf92f61d75856afcb5b6f761


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57accf6e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57accf6e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57accf6e

Branch: refs/heads/master
Commit: 57accf6e3965ff69adc4408623916c5003918235
Parents: ca2a780
Author: Steven Rand <[email protected]>
Authored: Mon Oct 23 09:43:45 2017 +0800
Committer: jerryshao <[email protected]>
Committed: Mon Oct 23 09:43:45 2017 +0800

----------------------------------------------------------------------
 .../org/apache/spark/deploy/SparkSubmit.scala   | 32 ++++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/57accf6e/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 135bbe9..b7e6d0e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -342,6 +342,22 @@ object SparkSubmit extends CommandLineUtils with Logging {
     val hadoopConf = 
conf.getOrElse(SparkHadoopUtil.newConfiguration(sparkConf))
     val targetDir = Utils.createTempDir()
 
+    // assure a keytab is available from any place in a JVM
+    if (clusterManager == YARN || clusterManager == LOCAL || clusterManager == 
MESOS) {
+      if (args.principal != null) {
+        if (args.keytab != null) {
+          require(new File(args.keytab).exists(), s"Keytab file: 
${args.keytab} does not exist")
+          // Add keytab and principal configurations in sysProps to make them 
available
+          // for later use; e.g. in spark sql, the isolated class loader used 
to talk
+          // to HiveMetastore will use these settings. They will be set as 
Java system
+          // properties and then loaded by SparkConf
+          sysProps.put("spark.yarn.keytab", args.keytab)
+          sysProps.put("spark.yarn.principal", args.principal)
+          UserGroupInformation.loginUserFromKeytab(args.principal, args.keytab)
+        }
+      }
+    }
+
     // Resolve glob path for different resources.
     args.jars = Option(args.jars).map(resolveGlobPaths(_, hadoopConf)).orNull
     args.files = Option(args.files).map(resolveGlobPaths(_, hadoopConf)).orNull
@@ -641,22 +657,6 @@ object SparkSubmit extends CommandLineUtils with Logging {
       }
     }
 
-    // assure a keytab is available from any place in a JVM
-    if (clusterManager == YARN || clusterManager == LOCAL || clusterManager == 
MESOS) {
-      if (args.principal != null) {
-        if (args.keytab != null) {
-          require(new File(args.keytab).exists(), s"Keytab file: 
${args.keytab} does not exist")
-          // Add keytab and principal configurations in sysProps to make them 
available
-          // for later use; e.g. in spark sql, the isolated class loader used 
to talk
-          // to HiveMetastore will use these settings. They will be set as 
Java system
-          // properties and then loaded by SparkConf
-          sysProps.put("spark.yarn.keytab", args.keytab)
-          sysProps.put("spark.yarn.principal", args.principal)
-          UserGroupInformation.loginUserFromKeytab(args.principal, args.keytab)
-        }
-      }
-    }
-
     if (clusterManager == MESOS && UserGroupInformation.isSecurityEnabled) {
       setRMPrincipal(sysProps)
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to