[ 
https://issues.apache.org/jira/browse/YARN-4336?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14993899#comment-14993899
 ] 

Greg Senia commented on YARN-4336:
----------------------------------

[~jlowe] I dumped stack traces below and it seems to match what was done in 
Hadoop-10650.. Do you see an issue with my workaround for now in my own env 
until HWX can provide a final solution?

Seems like this could also be related...

https://issues.apache.org/jira/browse/HADOOP-12413

Stack Trace:
2015-11-06 11:25:52,313 DEBUG ipc.Server (Server.java:processOneRpc(1762)) -  
got #-33
2015-11-06 11:25:52,313 DEBUG security.SaslRpcServer 
(SaslRpcServer.java:create(174)) - Created SASL server with mechanism = DIGEST-
MD5
2015-11-06 11:25:52,314 DEBUG ipc.Server (Server.java:doSaslReply(1424)) - 
Sending sasl message state: NEGOTIATE
auths {
  method: "TOKEN"
  mechanism: "DIGEST-MD5"
  protocol: ""
  serverId: "default"
  challenge: 
"realm=\"default\",nonce=\"389ZufpXfkC6CKunYceHayMBI3KM7v3keu9nPC/b\",qop=\"auth\",charset=utf-8,algorithm=md5-sess"
}
auths {
  method: "KERBEROS"
  mechanism: "GSSAPI"
  protocol: "nm"
  serverId: "xhadoopm5d.example.com"
}

2015-11-06 11:25:52,314 DEBUG ipc.Server (Server.java:processResponse(972)) - 
Socket Reader #1 for port 8040: responding to null fro
m 157.121.72.167:64599 Call#-33 Retry#-1
2015-11-06 11:25:52,314 DEBUG ipc.Server (Server.java:processResponse(991)) - 
Socket Reader #1 for port 8040: responding to null fro
m 157.121.72.167:64599 Call#-33 Retry#-1 Wrote 212 bytes.
2015-11-06 11:25:52,343 DEBUG ipc.Server (Server.java:processOneRpc(1762)) -  
got #-33
2015-11-06 11:25:52,343 DEBUG ipc.Server (Server.java:processSaslToken(1393)) - 
Have read input token of size 246 for processing by 
saslServer.evaluateResponse()
2015-11-06 11:25:52,344 DEBUG security.SaslRpcServer 
(SaslRpcServer.java:handle(308)) - SASL server DIGEST-MD5 callback: setting pas
sword for client: testing (auth:SIMPLE)
2015-11-06 11:25:52,344 DEBUG security.SaslRpcServer 
(SaslRpcServer.java:handle(325)) - SASL server DIGEST-MD5 callback: setting can
onicalized client ID: testing
2015-11-06 11:25:52,345 DEBUG ipc.Server (Server.java:buildSaslResponse(1410)) 
- Will send SUCCESS token of size 40 from saslServer.
2015-11-06 11:25:52,345 DEBUG ipc.Server (Server.java:saslProcess(1298)) - SASL 
server context established. Negotiated QoP is auth
2015-11-06 11:25:52,345 DEBUG ipc.Server (Server.java:saslProcess(1303)) - SASL 
server successfully authenticated client: testing (a
uth:SIMPLE)
2015-11-06 11:25:52,345 INFO  ipc.Server (Server.java:saslProcess(1306)) - Auth 
successful for testing (auth:SIMPLE)
2015-11-06 11:25:52,345 DEBUG ipc.Server (Server.java:doSaslReply(1424)) - 
Sending sasl message state: SUCCESS
token: "rspauth=9bfdf3e61c489664e885d7043b352c24"

2015-11-06 11:25:52,345 DEBUG ipc.Server (Server.java:processResponse(972)) - 
Socket Reader #1 for port 8040: responding to null fro
m 157.121.72.167:64599 Call#-33 Retry#-1
2015-11-06 11:25:52,346 DEBUG ipc.Server (Server.java:processResponse(991)) - 
Socket Reader #1 for port 8040: responding to null fro
m 157.121.72.167:64599 Call#-33 Retry#-1 Wrote 64 bytes.
2015-11-06 11:25:52,357 DEBUG ipc.Server (Server.java:processOneRpc(1762)) -  
got #-3
2015-11-06 11:25:52,357 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - java.lang.Thread.getSt
ackTrace(Thread.java:1589)
2015-11-06 11:25:52,357 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.security.UserGroupInformation.getGroupNames(UserGroupInformation.java:1487)
2015-11-06 11:25:52,357 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.security.authorize.AccessControlList.isUserInList(AccessControlList.java:252)
2015-11-06 11:25:52,357 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.security.authorize.AccessControlList.isUserAllowed(AccessControlList.java:262)
2015-11-06 11:25:52,357 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.security.authorize.ServiceAuthorizationManager.authorize(ServiceAuthorizationManager.java:110)
2015-11-06 11:25:52,357 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server.authorize(Server.java:2507)
2015-11-06 11:25:52,358 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server.access$3300(Server.java:135)
2015-11-06 11:25:52,358 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server$Connection.authorizeConnection(Server.java:1923)
2015-11-06 11:25:52,358 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server$Connection.processConnectionContext(Server.java:1690)
2015-11-06 11:25:52,358 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server$Connection.processRpcOutOfBandRequest(Server.java:1891)
2015-11-06 11:25:52,358 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server$Connection.processOneRpc(Server.java:1767)
2015-11-06 11:25:52,358 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server$Connection.readAndProcess(Server.java:1531)
2015-11-06 11:25:52,358 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server$Listener.doRead(Server.java:762)
2015-11-06 11:25:52,358 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server$Listener$Reader.doRunLoop(Server.java:636)
2015-11-06 11:25:52,358 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1488)) - 
org.apache.hadoop.ipc.Server$Listener$Reader.run(Server.java:607)
2015-11-06 11:25:52,370 WARN  security.UserGroupInformation 
(UserGroupInformation.java:getGroupNames(1495)) - No groups available for user 
testing
2015-11-06 11:25:52,370 INFO  authorize.ServiceAuthorizationManager 
(ServiceAuthorizationManager.java:authorize(118)) - Authorization successful 
for testing (auth:TOKEN) for protocol=interface 
org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocolPB
2015-11-06 11:25:52,370 DEBUG ipc.Server 
(Server.java:authorizeConnection(1925)) - Successfully authorized userInfo {
}
protocol: "org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocolPB"

2015-11-06 11:25:52,370 DEBUG ipc.Server (Server.java:processOneRpc(1762)) -  
got #0
2015-11-06 11:25:52,371 DEBUG ipc.Server (Server.java:run(2009)) - IPC Server 
handler 0 on 8040: 
org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocolPB.heartbeat 
from 157.121.72.167:64599 Call#0 Retry#0 for RpcKind RPC_PROTOCOL_BUFFER
2015-11-06 11:25:52,371 DEBUG security.UserGroupInformation 
(UserGroupInformation.java:logPrivilegedAction(1655)) - PrivilegedAction 
as:testing (auth:TOKEN) 
from:org.apache.hadoop.ipc.Server$Handler.run(Server.java:2033)




> YARN NodeManager - Container Initialization - Excessive load on NSS/LDAP
> ------------------------------------------------------------------------
>
>                 Key: YARN-4336
>                 URL: https://issues.apache.org/jira/browse/YARN-4336
>             Project: Hadoop YARN
>          Issue Type: Bug
>    Affects Versions: 2.4.0, 2.4.1, 2.6.0, 2.7.0, 2.6.1, 2.7.1
>         Environment: NSS w/ SSSD or Dell/Quest - VASD
>            Reporter: Greg Senia
>            Assignee: Greg Senia
>         Attachments: YARN-4336-tactical.txt
>
>
> Hi folks after performing some debug for our Unix Engineering and Active 
> Directory teams it was discovered that on YARN Container Initialization a 
> call via Hadoop Common AccessControlList.java:
>   for(String group: ugi.getGroupNames()) {
>         if (groups.contains(group)) {
>           return true;
>         }
>       }
> Unfortunately with the security call to check access on 
> "appattempt_XXXXXXXXXXXXX_XXXXX_XXXXX" will always return false but will make 
> unnecessary calls to NameSwitch service on linux which will call things like 
> SSSD/Quest VASD which will then initiate LDAP calls looking for non existent 
> userid's causing excessive load on LDAP.
> For now our tactical work around is as follows:
> /**
>    * Checks if a user represented by the provided {@link UserGroupInformation}
>    * is a member of the Access Control List
>    * @param ugi UserGroupInformation to check if contained in the ACL
>    * @return true if ugi is member of the list
>    */
>   public final boolean isUserInList(UserGroupInformation ugi) {
>     if (allAllowed || users.contains(ugi.getShortUserName())) {
>       return true;
>     } else {
>         String patternString = "^appattempt_\\d+_\\d+_\\d+$";
>         Pattern pattern = Pattern.compile(patternString);
>         Matcher matcher = pattern.matcher(ugi.getShortUserName());
>         boolean matches = matcher.matches();
>         if (matches) {
>               LOG.debug("Bailing !! AppAttempt Matches DONOT call UGI FOR 
> GROUPS!!");;
>               return false;
>         }
>       
>       
>       for(String group: ugi.getGroupNames()) {
>         if (groups.contains(group)) {
>           return true;
>         }
>       }
>     }
>     return false;
>   }
>   public boolean isUserAllowed(UserGroupInformation ugi) {
>     return isUserInList(ugi);
>   }
> Example of VASD Debug log showing the lookups for one task attempt 32 of them:
> One task:
> Oct 30 22:55:43 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:55:43 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:55:43 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:55:43 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:56:15 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:56:15 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:56:15 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:56:15 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:56:45 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:56:45 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:56:45 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:56:45 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:57:18 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:57:18 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:57:18 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:57:18 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:57:49 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:57:49 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:57:49 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:57:49 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:58:22 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:58:22 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:58:22 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:58:22 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:58:52 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:58:52 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:58:52 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:58:52 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:59:30 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:59:30 xhadoopm5d vasd[20741]: _vasug_user_namesearch_gc: searching 
> GC for host service domain EXNSD.EXA.EXAMPLE.COM with filter 
> (&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))
> Oct 30 22:59:30 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>
> Oct 30 22:59:30 xhadoopm5d vasd[20741]: libvas_attrs_find_uri: Searching 
> <GC://@EXNSD.EXA.EXAMPLE.COM> with 
> filter=<(&(objectCategory=Person)(samaccountname=appattempt_1446145939879_0022_000001))>,
>  base=<>, scope=<sub>



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to