This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 7cf579fc8cf [SPARK-40612][CORE] Fixing the principal used for delegation token renewal on non-YARN resource managers 7cf579fc8cf is described below commit 7cf579fc8cfc8e6de3acb1d16436ca5024f9e61e Author: attilapiros <piros.attila.zs...@gmail.com> AuthorDate: Fri Sep 30 14:52:40 2022 -0700 [SPARK-40612][CORE] Fixing the principal used for delegation token renewal on non-YARN resource managers ### What changes were proposed in this pull request? When the delegation token is fetched at the first time (see the `fetchDelegationTokens()` call at `HadoopFSDelegationTokenProvider#getTokenRenewalInterval()`) the principal is the current user but at the subsequent token renewals (see `obtainDelegationTokens()` where `getTokenRenewer()` is used to identify the principal) are using a MapReduce/Yarn specific principal even on resource managers different from YARN. This PR fixes `getTokenRenewer()` to use the current user instead of `org.apache.hadoop.mapred.Master.getMasterPrincipal(hadoopConf)` when the resource manager is not YARN. The condition `(master != null && master.contains("yarn"))` is the very same what we already have in `hadoopFSsToAccess()`. I would like to say thank you for squito who have done the investigation regarding of the problem which lead to this PR. ### Why are the changes needed? To avoid `org.apache.hadoop.security.AccessControlException: Permission denied.` for long running applications. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually. Closes #38048 from attilapiros/SPARK-40612. Authored-by: attilapiros <piros.attila.zs...@gmail.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> (cherry picked from commit 6484992535767ae8dc93df1c79efc66420728155) Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../deploy/security/HadoopFSDelegationTokenProvider.scala | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala index 3120d482f11..6ec281f5b44 100644 --- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala @@ -52,8 +52,8 @@ private[deploy] class HadoopFSDelegationTokenProvider val fsToExclude = sparkConf.get(YARN_KERBEROS_FILESYSTEM_RENEWAL_EXCLUDE) .map(new Path(_).getFileSystem(hadoopConf).getUri.getHost) .toSet - val fetchCreds = fetchDelegationTokens(getTokenRenewer(hadoopConf), fileSystems, creds, - fsToExclude) + val fetchCreds = fetchDelegationTokens(getTokenRenewer(sparkConf, hadoopConf), fileSystems, + creds, fsToExclude) // Get the token renewal interval if it is not set. It will only be called once. if (tokenRenewalInterval == null) { @@ -88,8 +88,13 @@ private[deploy] class HadoopFSDelegationTokenProvider UserGroupInformation.isSecurityEnabled } - private def getTokenRenewer(hadoopConf: Configuration): String = { - val tokenRenewer = Master.getMasterPrincipal(hadoopConf) + private def getTokenRenewer(sparkConf: SparkConf, hadoopConf: Configuration): String = { + val master = sparkConf.get("spark.master", null) + val tokenRenewer = if (master != null && master.contains("yarn")) { + Master.getMasterPrincipal(hadoopConf) + } else { + UserGroupInformation.getCurrentUser().getUserName() + } logDebug("Delegation token renewer is: " + tokenRenewer) if (tokenRenewer == null || tokenRenewer.length() == 0) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org