[
https://issues.apache.org/jira/browse/HDDS-9442?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
István Fajth updated HDDS-9442:
-------------------------------
Description:
In case an OM Leader change happens, none of the longer running jobs that are
using Delegation Tokens can use the cluster anymore and fail.
The problem is with Delegation Token renewal, and the following exception can
be seen in the leader OM logs:
{code}
WARN org.apache.hadoop.ipc.Client: Exception encountered while connecting to
the server : javax.security.sasl.SaslException: GSS initiate failed [Caused by
GSSException: No valid credentials provided (Mechanism level: Failed to find
any Kerberos tgt)]
DEBUG org.apache.hadoop.security.UserGroupInformation:
PrivilegedActionException as:<non_om_principal> (auth:KERBEROS)
cause:java.io.IOException: javax.security.sasl.SaslException: GSS initiate
failed [Caused by GSSException: No valid credentials provided (Mechanism level:
Failed to find any Kerberos tgt)]
DEBUG org.apache.hadoop.ipc.Client: closing ipc connection to <server>:9961:
javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException:
No valid credentials provided (Mechanism level: Failed to find any Kerberos
tgt)]
java.io.IOException: javax.security.sasl.SaslException: GSS initiate failed
[Caused by GSSException: No valid credentials provided (Mechanism level: Failed
to find any Kerberos tgt)]
at org.apache.hadoop.ipc.Client$Connection$1.run(Client.java:805)
at java.base/java.security.AccessController.doPrivileged(Native Method)
at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
at
org.apache.hadoop.ipc.Client$Connection.handleSaslConnectionFailure(Client.java:768)
at
org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:863)
at org.apache.hadoop.ipc.Client$Connection.access$3800(Client.java:430)
at org.apache.hadoop.ipc.Client.getConnection(Client.java:1678)
at org.apache.hadoop.ipc.Client.call(Client.java:1503)
at org.apache.hadoop.ipc.Client.call(Client.java:1456)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)
at com.sun.proxy.$Proxy57.submitRequest(Unknown Source)
at jdk.internal.reflect.GeneratedMethodAccessor637.invoke(Unknown
Source)
at
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:431)
at
org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:166)
at
org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:158)
at
org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:96)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:362)
at com.sun.proxy.$Proxy57.submitRequest(Unknown Source)
at
org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB.submitRequest(SCMSecurityProtocolClientSideTranslatorPB.java:108)
at
org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB.getCertificate(SCMSecurityProtocolClientSideTranslatorPB.java:260)
at
org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient.getCertificateFromScm(DefaultCertificateClient.java:342)
at
org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient.getCertificate(DefaultCertificateClient.java:297)
at
org.apache.hadoop.ozone.security.OzoneDelegationTokenSecretManager.verifySignature(OzoneDelegationTokenSecretManager.java:469)
at
org.apache.hadoop.ozone.security.OzoneDelegationTokenSecretManager.validateToken(OzoneDelegationTokenSecretManager.java:453)
at
org.apache.hadoop.ozone.security.OzoneDelegationTokenSecretManager.renewToken(OzoneDelegationTokenSecretManager.java:313)
at
org.apache.hadoop.ozone.om.OzoneManager.renewDelegationToken(OzoneManager.java:2314)
at
org.apache.hadoop.ozone.om.request.security.OMRenewDelegationTokenRequest.preExecute(OMRenewDelegationTokenRequest.java:86)
at
org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.processRequest(OzoneManagerProtocolServerSideTranslatorPB.java:190)
at
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:87)
at
org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.submitRequest(OzoneManagerProtocolServerSideTranslatorPB.java:147)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$OzoneManagerService$2.callBlockingMethod(OzoneManagerProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:989)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:917)
at java.base/java.security.AccessController.doPrivileged(Native Method)
at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2894)
Caused by: javax.security.sasl.SaslException: GSS initiate failed [Caused by
GSSException: No valid credentials provided (Mechanism level: Failed to find
any Kerberos tgt)]
at
jdk.security.jgss/com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:211)
at
org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:408)
at
org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:639)
at org.apache.hadoop.ipc.Client$Connection.access$2300(Client.java:430)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:850)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:846)
at java.base/java.security.AccessController.doPrivileged(Native Method)
at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
at
org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:846)
... 37 more
Caused by: GSSException: No valid credentials provided (Mechanism level: Failed
to find any Kerberos tgt)
at
java.security.jgss/sun.security.jgss.krb5.Krb5InitCredential.getInstance(Krb5InitCredential.java:147)
at
java.security.jgss/sun.security.jgss.krb5.Krb5MechFactory.getCredentialElement(Krb5MechFactory.java:126)
at
java.security.jgss/sun.security.jgss.krb5.Krb5MechFactory.getMechanismContext(Krb5MechFactory.java:191)
at
java.security.jgss/sun.security.jgss.GSSManagerImpl.getMechanismContext(GSSManagerImpl.java:218)
at
java.security.jgss/sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:230)
at
java.security.jgss/sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:196)
at
jdk.security.jgss/com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:192)
... 46 more
{code}
The job itself fails due to this error upon DT renewal:
{code}
INFO security.HadoopFSDelegationTokenProvider: getting token for: class
org.apache.hadoop.fs.ozone.RootedOzoneFileSystem:ofs://ozone with renewer
<non_om_principal>
{code}
was:
In case an OM Leader change happens, none of the longer running jobs that are
using Delegation Tokens can use the cluster anymore and fail. (reported by
[~pifta]
More details to be filled in later.
> Token verification from OMs at DT renew happens in the wrong login context.
> ---------------------------------------------------------------------------
>
> Key: HDDS-9442
> URL: https://issues.apache.org/jira/browse/HDDS-9442
> Project: Apache Ozone
> Issue Type: Bug
> Components: Ozone Manager, Security
> Reporter: István Fajth
> Assignee: István Fajth
> Priority: Critical
> Fix For: 1.4.0
>
>
> In case an OM Leader change happens, none of the longer running jobs that are
> using Delegation Tokens can use the cluster anymore and fail.
> The problem is with Delegation Token renewal, and the following exception can
> be seen in the leader OM logs:
> {code}
> WARN org.apache.hadoop.ipc.Client: Exception encountered while connecting to
> the server : javax.security.sasl.SaslException: GSS initiate failed [Caused
> by GSSException: No valid credentials provided (Mechanism level: Failed to
> find any Kerberos tgt)]
> DEBUG org.apache.hadoop.security.UserGroupInformation:
> PrivilegedActionException as:<non_om_principal> (auth:KERBEROS)
> cause:java.io.IOException: javax.security.sasl.SaslException: GSS initiate
> failed [Caused by GSSException: No valid credentials provided (Mechanism
> level: Failed to find any Kerberos tgt)]
> DEBUG org.apache.hadoop.ipc.Client: closing ipc connection to <server>:9961:
> javax.security.sasl.SaslException: GSS initiate failed [Caused by
> GSSException: No valid credentials provided (Mechanism level: Failed to find
> any Kerberos tgt)]
> java.io.IOException: javax.security.sasl.SaslException: GSS initiate failed
> [Caused by GSSException: No valid credentials provided (Mechanism level:
> Failed to find any Kerberos tgt)]
> at org.apache.hadoop.ipc.Client$Connection$1.run(Client.java:805)
> at java.base/java.security.AccessController.doPrivileged(Native
> Method)
> at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
> at
> org.apache.hadoop.ipc.Client$Connection.handleSaslConnectionFailure(Client.java:768)
> at
> org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:863)
> at
> org.apache.hadoop.ipc.Client$Connection.access$3800(Client.java:430)
> at org.apache.hadoop.ipc.Client.getConnection(Client.java:1678)
> at org.apache.hadoop.ipc.Client.call(Client.java:1503)
> at org.apache.hadoop.ipc.Client.call(Client.java:1456)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)
> at com.sun.proxy.$Proxy57.submitRequest(Unknown Source)
> at jdk.internal.reflect.GeneratedMethodAccessor637.invoke(Unknown
> Source)
> at
> java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:431)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:166)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:158)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:96)
> at
> org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:362)
> at com.sun.proxy.$Proxy57.submitRequest(Unknown Source)
> at
> org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB.submitRequest(SCMSecurityProtocolClientSideTranslatorPB.java:108)
> at
> org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB.getCertificate(SCMSecurityProtocolClientSideTranslatorPB.java:260)
> at
> org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient.getCertificateFromScm(DefaultCertificateClient.java:342)
> at
> org.apache.hadoop.hdds.security.x509.certificate.client.DefaultCertificateClient.getCertificate(DefaultCertificateClient.java:297)
> at
> org.apache.hadoop.ozone.security.OzoneDelegationTokenSecretManager.verifySignature(OzoneDelegationTokenSecretManager.java:469)
> at
> org.apache.hadoop.ozone.security.OzoneDelegationTokenSecretManager.validateToken(OzoneDelegationTokenSecretManager.java:453)
> at
> org.apache.hadoop.ozone.security.OzoneDelegationTokenSecretManager.renewToken(OzoneDelegationTokenSecretManager.java:313)
> at
> org.apache.hadoop.ozone.om.OzoneManager.renewDelegationToken(OzoneManager.java:2314)
> at
> org.apache.hadoop.ozone.om.request.security.OMRenewDelegationTokenRequest.preExecute(OMRenewDelegationTokenRequest.java:86)
> at
> org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.processRequest(OzoneManagerProtocolServerSideTranslatorPB.java:190)
> at
> org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:87)
> at
> org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.submitRequest(OzoneManagerProtocolServerSideTranslatorPB.java:147)
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$OzoneManagerService$2.callBlockingMethod(OzoneManagerProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:989)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:917)
> at java.base/java.security.AccessController.doPrivileged(Native
> Method)
> at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2894)
> Caused by: javax.security.sasl.SaslException: GSS initiate failed [Caused by
> GSSException: No valid credentials provided (Mechanism level: Failed to find
> any Kerberos tgt)]
> at
> jdk.security.jgss/com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:211)
> at
> org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:408)
> at
> org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:639)
> at
> org.apache.hadoop.ipc.Client$Connection.access$2300(Client.java:430)
> at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:850)
> at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:846)
> at java.base/java.security.AccessController.doPrivileged(Native
> Method)
> at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1898)
> at
> org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:846)
> ... 37 more
> Caused by: GSSException: No valid credentials provided (Mechanism level:
> Failed to find any Kerberos tgt)
> at
> java.security.jgss/sun.security.jgss.krb5.Krb5InitCredential.getInstance(Krb5InitCredential.java:147)
> at
> java.security.jgss/sun.security.jgss.krb5.Krb5MechFactory.getCredentialElement(Krb5MechFactory.java:126)
> at
> java.security.jgss/sun.security.jgss.krb5.Krb5MechFactory.getMechanismContext(Krb5MechFactory.java:191)
> at
> java.security.jgss/sun.security.jgss.GSSManagerImpl.getMechanismContext(GSSManagerImpl.java:218)
> at
> java.security.jgss/sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:230)
> at
> java.security.jgss/sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:196)
> at
> jdk.security.jgss/com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:192)
> ... 46 more
> {code}
> The job itself fails due to this error upon DT renewal:
> {code}
> INFO security.HadoopFSDelegationTokenProvider: getting token for: class
> org.apache.hadoop.fs.ozone.RootedOzoneFileSystem:ofs://ozone with renewer
> <non_om_principal>
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]