Repository: spark
Updated Branches:
  refs/heads/branch-2.0 8cdd12156 -> a6af60f25


[SPARK-19038][YARN] Avoid overwriting keytab configuration in yarn-client

## What changes were proposed in this pull request?

Because yarn#client will reset the `spark.yarn.keytab` configuration to point 
to the location in distributed file, so if user still uses the old `SparkConf` 
to create `SparkSession` with Hive enabled, it will read keytab from the path 
in distributed cached. This is OK for yarn cluster mode, but in yarn client 
mode where driver is running out of container, it will be failed to fetch the 
keytab.

So here we should avoid reseting this configuration in the `yarn#client` and 
only overwriting it for AM, so using `spark.yarn.keytab` could get correct 
keytab path no matter running in client (keytab in local fs) or cluster (keytab 
in distributed cache) mode.

## How was this patch tested?

Verified in security cluster.

Author: jerryshao <[email protected]>

Closes #16923 from jerryshao/SPARK-19038.

(cherry picked from commit a920a4369434c84274866a09f61e402232c3b47c)
Signed-off-by: Marcelo Vanzin <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a6af60f2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a6af60f2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a6af60f2

Branch: refs/heads/branch-2.0
Commit: a6af60f25b3d04670123c848831f066d8998b1a8
Parents: 8cdd121
Author: jerryshao <[email protected]>
Authored: Fri Feb 24 09:31:52 2017 -0800
Committer: Marcelo Vanzin <[email protected]>
Committed: Fri Feb 24 09:32:32 2017 -0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/hive/client/HiveClientImpl.scala   | 4 ----
 .../main/scala/org/apache/spark/deploy/yarn/Client.scala    | 9 ++++++---
 2 files changed, 6 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a6af60f2/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 7db51d4..7dcfdb3 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -102,10 +102,6 @@ private[hive] class HiveClientImpl(
 
     // Set up kerberos credentials for UserGroupInformation.loginUser within
     // current class loader
-    // Instead of using the spark conf of the current spark context, a new
-    // instance of SparkConf is needed for the original value of 
spark.yarn.keytab
-    // and spark.yarn.principal set in SparkSubmit, as yarn.Client resets the
-    // keytab configuration for the link name in distributed cache
     if (sparkConf.contains("spark.yarn.principal") && 
sparkConf.contains("spark.yarn.keytab")) {
       val principalName = sparkConf.get("spark.yarn.principal")
       val keytabFileName = sparkConf.get("spark.yarn.keytab")

http://git-wip-us.apache.org/repos/asf/spark/blob/a6af60f2/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
----------------------------------------------------------------------
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index f78f9ce..151b08e 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -102,6 +102,7 @@ private[spark] class Client(
   private var principal: String = null
   private var keytab: String = null
   private var credentials: Credentials = null
+  private var amKeytabFileName: String = null
 
   private val launcherBackend = new LauncherBackend() {
     override def onStopRequest(): Unit = {
@@ -456,7 +457,7 @@ private[spark] class Client(
       logInfo("To enable the AM to login from keytab, credentials are being 
copied over to the AM" +
         " via the YARN Secure Distributed Cache.")
       val (_, localizedPath) = distribute(keytab,
-        destName = sparkConf.get(KEYTAB),
+        destName = Some(amKeytabFileName),
         appMasterOnly = true)
       require(localizedPath != null, "Keytab file already distributed.")
     }
@@ -685,6 +686,9 @@ private[spark] class Client(
       // Save Spark configuration to a file in the archive.
       val props = new Properties()
       sparkConf.getAll.foreach { case (k, v) => props.setProperty(k, v) }
+      // Override spark.yarn.key to point to the location in distributed cache 
which will be used
+      // by AM.
+      Option(amKeytabFileName).foreach { k => props.setProperty(KEYTAB.key, k) 
}
       confStream.putNextEntry(new ZipEntry(SPARK_CONF_FILE))
       val writer = new OutputStreamWriter(confStream, StandardCharsets.UTF_8)
       props.store(writer, "Spark configuration.")
@@ -1003,8 +1007,7 @@ private[spark] class Client(
       val f = new File(keytab)
       // Generate a file name that can be used for the keytab file, that does 
not conflict
       // with any user file.
-      val keytabFileName = f.getName + "-" + UUID.randomUUID().toString
-      sparkConf.set(KEYTAB.key, keytabFileName)
+      amKeytabFileName = f.getName + "-" + UUID.randomUUID().toString
       sparkConf.set(PRINCIPAL.key, principal)
     }
     // Defensive copy of the credentials


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to