Repository: spark Updated Branches: refs/heads/master 5ee216618 -> 51540c2fa
[SPARK-25372][YARN][K8S] Deprecate and generalize keytab / principal config ## What changes were proposed in this pull request? SparkSubmit already logs in the user if a keytab is provided, the only issue is that it uses the existing configs which have "yarn" in their name. As such, the configs were changed to: `spark.kerberos.keytab` and `spark.kerberos.principal`. ## How was this patch tested? Will be tested with K8S tests, but needs to be tested with Yarn - [x] K8S Secure HDFS tests - [x] Yarn Secure HDFS tests vanzin Closes #22362 from ifilonenko/SPARK-25372. Authored-by: Ilan Filonenko <i...@cornell.edu> Signed-off-by: Marcelo Vanzin <van...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51540c2f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51540c2f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51540c2f Branch: refs/heads/master Commit: 51540c2fa677658be954c820bc18ba748e4c8583 Parents: 5ee2166 Author: Ilan Filonenko <i...@cornell.edu> Authored: Wed Sep 26 17:24:52 2018 -0700 Committer: Marcelo Vanzin <van...@cloudera.com> Committed: Wed Sep 26 17:24:52 2018 -0700 ---------------------------------------------------------------------- R/pkg/R/sparkR.R | 2 ++ R/pkg/vignettes/sparkr-vignettes.Rmd | 4 ++-- core/src/main/scala/org/apache/spark/SparkConf.scala | 6 +++++- .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 6 ++++-- .../org/apache/spark/deploy/SparkSubmitArguments.scala | 10 ++++++++-- .../scala/org/apache/spark/internal/config/package.scala | 4 ++-- docs/running-on-yarn.md | 4 ++-- docs/sparkr.md | 4 ++-- .../scala/org/apache/spark/streaming/Checkpoint.scala | 2 ++ 9 files changed, 29 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/51540c2f/R/pkg/R/sparkR.R ---------------------------------------------------------------------- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index d3a9cba..038fefa 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -626,6 +626,8 @@ sparkConfToSubmitOps[["spark.driver.extraLibraryPath"]] <- "--driver-library-pat sparkConfToSubmitOps[["spark.master"]] <- "--master" sparkConfToSubmitOps[["spark.yarn.keytab"]] <- "--keytab" sparkConfToSubmitOps[["spark.yarn.principal"]] <- "--principal" +sparkConfToSubmitOps[["spark.kerberos.keytab"]] <- "--keytab" +sparkConfToSubmitOps[["spark.kerberos.principal"]] <- "--principal" # Utility function that returns Spark Submit arguments as a string http://git-wip-us.apache.org/repos/asf/spark/blob/51540c2f/R/pkg/vignettes/sparkr-vignettes.Rmd ---------------------------------------------------------------------- diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 090363c..ad93494 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -157,8 +157,8 @@ Property Name | Property group | spark-submit equivalent `spark.driver.extraClassPath` | Runtime Environment | `--driver-class-path` `spark.driver.extraJavaOptions` | Runtime Environment | `--driver-java-options` `spark.driver.extraLibraryPath` | Runtime Environment | `--driver-library-path` -`spark.yarn.keytab` | Application Properties | `--keytab` -`spark.yarn.principal` | Application Properties | `--principal` +`spark.kerberos.keytab` | Application Properties | `--keytab` +`spark.kerberos.principal` | Application Properties | `--principal` **For Windows users**: Due to different file prefixes across operating systems, to avoid the issue of potential wrong prefix, a current workaround is to specify `spark.sql.warehouse.dir` when starting the `SparkSession`. http://git-wip-us.apache.org/repos/asf/spark/blob/51540c2f/core/src/main/scala/org/apache/spark/SparkConf.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 6c4c5c9..e0f98f1 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -726,7 +726,11 @@ private[spark] object SparkConf extends Logging { DRIVER_MEMORY_OVERHEAD.key -> Seq( AlternateConfig("spark.yarn.driver.memoryOverhead", "2.3")), EXECUTOR_MEMORY_OVERHEAD.key -> Seq( - AlternateConfig("spark.yarn.executor.memoryOverhead", "2.3")) + AlternateConfig("spark.yarn.executor.memoryOverhead", "2.3")), + KEYTAB.key -> Seq( + AlternateConfig("spark.yarn.keytab", "2.5")), + PRINCIPAL.key -> Seq( + AlternateConfig("spark.yarn.principal", "2.5")) ) /** http://git-wip-us.apache.org/repos/asf/spark/blob/51540c2f/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index cf902db..d5f2865 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -520,6 +520,10 @@ private[spark] class SparkSubmit extends Logging { confKey = "spark.driver.extraJavaOptions"), OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = "spark.driver.extraLibraryPath"), + OptionAssigner(args.principal, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, + confKey = PRINCIPAL.key), + OptionAssigner(args.keytab, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, + confKey = KEYTAB.key), // Propagate attributes for dependency resolution at the driver side OptionAssigner(args.packages, STANDALONE | MESOS, CLUSTER, confKey = "spark.jars.packages"), @@ -537,8 +541,6 @@ private[spark] class SparkSubmit extends Logging { OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.jars"), OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.files"), OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.dist.archives"), - OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.principal"), - OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, confKey = "spark.yarn.keytab"), // Other options OptionAssigner(args.executorCores, STANDALONE | YARN | KUBERNETES, ALL_DEPLOY_MODES, http://git-wip-us.apache.org/repos/asf/spark/blob/51540c2f/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 0998757..4cf08a7 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -199,8 +199,14 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S numExecutors = Option(numExecutors) .getOrElse(sparkProperties.get("spark.executor.instances").orNull) queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull - keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull - principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull + keytab = Option(keytab) + .orElse(sparkProperties.get("spark.kerberos.keytab")) + .orElse(sparkProperties.get("spark.yarn.keytab")) + .orNull + principal = Option(principal) + .orElse(sparkProperties.get("spark.kerberos.principal")) + .orElse(sparkProperties.get("spark.yarn.principal")) + .orNull dynamicAllocationEnabled = sparkProperties.get("spark.dynamicAllocation.enabled").exists("true".equalsIgnoreCase) http://git-wip-us.apache.org/repos/asf/spark/blob/51540c2f/core/src/main/scala/org/apache/spark/internal/config/package.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 9891b6a..7f63422 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -152,11 +152,11 @@ package object config { private[spark] val SHUFFLE_SERVICE_PORT = ConfigBuilder("spark.shuffle.service.port").intConf.createWithDefault(7337) - private[spark] val KEYTAB = ConfigBuilder("spark.yarn.keytab") + private[spark] val KEYTAB = ConfigBuilder("spark.kerberos.keytab") .doc("Location of user's keytab.") .stringConf.createOptional - private[spark] val PRINCIPAL = ConfigBuilder("spark.yarn.principal") + private[spark] val PRINCIPAL = ConfigBuilder("spark.kerberos.principal") .doc("Name of the Kerberos principal.") .stringConf.createOptional http://git-wip-us.apache.org/repos/asf/spark/blob/51540c2f/docs/running-on-yarn.md ---------------------------------------------------------------------- diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index e3d67c3..687f9e4 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -465,7 +465,7 @@ providers can be disabled individually by setting `spark.security.credentials.{s <table class="table"> <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> <tr> - <td><code>spark.yarn.keytab</code></td> + <td><code>spark.kerberos.keytab</code></td> <td>(none)</td> <td> The full path to the file that contains the keytab for the principal specified above. This keytab @@ -477,7 +477,7 @@ providers can be disabled individually by setting `spark.security.credentials.{s </td> </tr> <tr> - <td><code>spark.yarn.principal</code></td> + <td><code>spark.kerberos.principal</code></td> <td>(none)</td> <td> Principal to be used to login to KDC, while running on secure clusters. Equivalent to the http://git-wip-us.apache.org/repos/asf/spark/blob/51540c2f/docs/sparkr.md ---------------------------------------------------------------------- diff --git a/docs/sparkr.md b/docs/sparkr.md index b4248e8..55e8f15 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -70,12 +70,12 @@ The following Spark driver properties can be set in `sparkConfig` with `sparkR.s <td><code>--master</code></td> </tr> <tr> - <td><code>spark.yarn.keytab</code></td> + <td><code>spark.kerberos.keytab</code></td> <td>Application Properties</td> <td><code>--keytab</code></td> </tr> <tr> - <td><code>spark.yarn.principal</code></td> + <td><code>spark.kerberos.principal</code></td> <td>Application Properties</td> <td><code>--principal</code></td> </tr> http://git-wip-us.apache.org/repos/asf/spark/blob/51540c2f/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala ---------------------------------------------------------------------- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala index a882558..135430f 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala @@ -59,6 +59,8 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time) "spark.yarn.jars", "spark.yarn.keytab", "spark.yarn.principal", + "spark.kerberos.keytab", + "spark.kerberos.principal", "spark.ui.filters", "spark.mesos.driver.frameworkId") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org