[ https://issues.apache.org/jira/browse/KAFKA-7812?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16754787#comment-16754787 ]
Abhi commented on KAFKA-7812: ----------------------------- Hi [~rsivaram], Yes I am running the servers with `sun.security.jgss.native=true`. There weren't a huge number of connections but I was running a performance test when this happened so there would have been a lot of kafka message requests being sent. > Deadlock in SaslServerAuthenticator related threads > --------------------------------------------------- > > Key: KAFKA-7812 > URL: https://issues.apache.org/jira/browse/KAFKA-7812 > Project: Kafka > Issue Type: Bug > Components: core > Affects Versions: 2.0.0 > Reporter: Abhi > Priority: Major > Attachments: threaddump-100cpu-tbd-broker-5 > > > I am encountering a deadlock situation in SaslServerAuthenticator related > code path where one thread is waiting for a monitor object locked by another > thread. > +*Thread 1:*+ > "kafka-network-thread-5-ListenerName(SASL_PLAINTEXT)-SASL_PLAINTEXT-0" #66 > prio=5 os_prio=0 tid=0x00007fe131e17000 nid=0x78f7 runnable > [{color:#d04437}*0x00007fde287ed000*{color}] > java.lang.Thread.State: RUNNABLE > at java.util.HashMap$TreeNode.find(HashMap.java:1865) > at java.util.HashMap$TreeNode.find(HashMap.java:1861) > at java.util.HashMap$TreeNode.find(HashMap.java:1861) > at java.util.HashMap$TreeNode.find(HashMap.java:1861) > at java.util.HashMap$TreeNode.find(HashMap.java:1861) > at java.util.HashMap$TreeNode.find(HashMap.java:1861) > at java.util.HashMap$TreeNode.find(HashMap.java:1861) > at java.util.HashMap$TreeNode.find(HashMap.java:1861) > at java.util.HashMap$TreeNode.putTreeVal(HashMap.java:1979) > at java.util.HashMap.putVal(HashMap.java:637) > at java.util.HashMap.put(HashMap.java:611) > at java.util.HashSet.add(HashSet.java:219) > at javax.security.auth.Subject$ClassSet.populateSet(Subject.java:1418) > at javax.security.auth.Subject$ClassSet.<init>(Subject.java:1372) > - *{color:#f79232}locked <0x000000068893aae8>{color}* (a > java.util.Collections$SynchronizedSet) > at javax.security.auth.Subject.getPrivateCredentials(Subject.java:767) > at sun.security.jgss.GSSUtil$1.run(GSSUtil.java:343) > at sun.security.jgss.GSSUtil$1.run(GSSUtil.java:335) > at java.security.AccessController.doPrivileged(Native Method) > at sun.security.jgss.GSSUtil.searchSubject(GSSUtil.java:335) > at > sun.security.jgss.wrapper.NativeGSSFactory.getCredFromSubject(NativeGSSFactory.java:53) > at > sun.security.jgss.wrapper.NativeGSSFactory.getCredentialElement(NativeGSSFactory.java:116) > at > sun.security.jgss.GSSManagerImpl.getCredentialElement(GSSManagerImpl.java:193) > at sun.security.jgss.GSSCredentialImpl.add(GSSCredentialImpl.java:427) > at sun.security.jgss.GSSCredentialImpl.<init>(GSSCredentialImpl.java:62) > at sun.security.jgss.GSSManagerImpl.createCredential(GSSManagerImpl.java:154) > at com.sun.security.sasl.gsskerb.GssKrb5Server.<init>(GssKrb5Server.java:108) > at > com.sun.security.sasl.gsskerb.FactoryImpl.createSaslServer(FactoryImpl.java:85) > at javax.security.sasl.Sasl.createSaslServer(Sasl.java:524) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator$2.run(SaslServerAuthenticator.java:215) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator$2.run(SaslServerAuthenticator.java:213) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslKerberosServer(SaslServerAuthenticator.java:213) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslServer(SaslServerAuthenticator.java:162) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.handleKafkaRequest(SaslServerAuthenticator.java:443) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.authenticate(SaslServerAuthenticator.java:253) > at > org.apache.kafka.common.network.KafkaChannel.prepare(KafkaChannel.java:127) > at > org.apache.kafka.common.network.Selector.pollSelectionKeys(Selector.java:487) > at org.apache.kafka.common.network.Selector.poll(Selector.java:425) > at kafka.network.Processor.poll(SocketServer.scala:678) > at kafka.network.Processor.run(SocketServer.scala:583) > at java.lang.Thread.run(Thread.java:745) > Locked ownable synchronizers: > - None > *+Thread 2:+* > "kafka-network-thread-5-ListenerName(SASL_PLAINTEXT)-SASL_PLAINTEXT-2" #68 > prio=5 os_prio=0 tid=0x00007fe131e1a800 nid=0x78f9 waiting for monitor entry > [{color:#d04437}*0x00007fde277ed000*{color}] > java.lang.Thread.State: BLOCKED (on object monitor) > at java.util.Collections$SynchronizedCollection.add(Collections.java:2035) > - {color:#f79232}*waiting to lock <0x000000068893aae8>*{color} (a > java.util.Collections$SynchronizedSet) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslKerberosServer(SaslServerAuthenticator.java:206) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslServer(SaslServerAuthenticator.java:162) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.handleKafkaRequest(SaslServerAuthenticator.java:443) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.authenticate(SaslServerAuthenticator.java:253) > at > org.apache.kafka.common.network.KafkaChannel.prepare(KafkaChannel.java:127) > at > org.apache.kafka.common.network.Selector.pollSelectionKeys(Selector.java:487) > at org.apache.kafka.common.network.Selector.poll(Selector.java:425) > at kafka.network.Processor.poll(SocketServer.scala:678) > at kafka.network.Processor.run(SocketServer.scala:583) > at java.lang.Thread.run(Thread.java:745) > Locked ownable synchronizers: > - None > > +*Thread 3:*+ > "kafka-network-thread-5-ListenerName(SASL_PLAINTEXT)-SASL_PLAINTEXT-1" #67 > prio=5 os_prio=0 tid=0x00007fe131e18800 nid=0x78f8 waiting for monitor entry > [{color:#59afe1}0x00007fde27fec000{color}] > java.lang.Thread.State: BLOCKED (on object monitor) > at javax.security.auth.Subject$ClassSet.<init>(Subject.java:1372) > - *{color:#f79232}waiting to lock <0x000000068893aae8>{color}* (a > java.util.Collections$SynchronizedSet) > at javax.security.auth.Subject.getPrivateCredentials(Subject.java:767) > at sun.security.jgss.GSSUtil$1.run(GSSUtil.java:343) > at sun.security.jgss.GSSUtil$1.run(GSSUtil.java:335) > at java.security.AccessController.doPrivileged(Native Method) > at sun.security.jgss.GSSUtil.searchSubject(GSSUtil.java:335) > at > sun.security.jgss.wrapper.NativeGSSFactory.getCredFromSubject(NativeGSSFactory.java:53) > at > sun.security.jgss.wrapper.NativeGSSFactory.getCredentialElement(NativeGSSFactory.java:116) > at > sun.security.jgss.GSSManagerImpl.getCredentialElement(GSSManagerImpl.java:193) > at sun.security.jgss.GSSCredentialImpl.add(GSSCredentialImpl.java:427) > at sun.security.jgss.GSSCredentialImpl.<init>(GSSCredentialImpl.java:62) > at sun.security.jgss.GSSManagerImpl.createCredential(GSSManagerImpl.java:154) > at com.sun.security.sasl.gsskerb.GssKrb5Server.<init>(GssKrb5Server.java:108) > at > com.sun.security.sasl.gsskerb.FactoryImpl.createSaslServer(FactoryImpl.java:85) > at javax.security.sasl.Sasl.createSaslServer(Sasl.java:524) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator$2.run(SaslServerAuthenticator.java:215) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator$2.run(SaslServerAuthenticator.java:213) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslKerberosServer(SaslServerAuthenticator.java:213) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.createSaslServer(SaslServerAuthenticator.java:162) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.handleKafkaRequest(SaslServerAuthenticator.java:443) > at > org.apache.kafka.common.security.authenticator.SaslServerAuthenticator.authenticate(SaslServerAuthenticator.java:253) > at > org.apache.kafka.common.network.KafkaChannel.prepare(KafkaChannel.java:127) > at > org.apache.kafka.common.network.Selector.pollSelectionKeys(Selector.java:487) > at org.apache.kafka.common.network.Selector.poll(Selector.java:425) > at kafka.network.Processor.poll(SocketServer.scala:678) > at kafka.network.Processor.run(SocketServer.scala:583) > at java.lang.Thread.run(Thread.java:745) > Locked ownable synchronizers: > - None > > This deadlock is causing the kafka broker to run at 100% causing exceptions > in other brokers and affecting replication and other functionalities. > +output of top:+ > PID USER PR NI VIRT RES SHR S *%CPU* %MEM TIME+ COMMAND > 30857 kafkatst 20 0 14.9g 3.9g 17600 S *100.0* 12.7 2727:53 java > The connections with other brokers are in stuck in CLOSE_WAIT state: > afka...@kafkatest-01.tbd[nj2]:/local/kafka> lsof -p 30857 | grep TCP > java 30857 kafkatst 88u IPv4 63730863 0t0 TCP *:34982 (LISTEN) > java 30857 kafkatst 89u IPv4 63730864 0t0 TCP *:mwkafka_staging_jmx (LISTEN) > java 30857 kafkatst 108u IPv4 67204682 0t0 TCP > kafkatest-01.tbd.xxxx.com:44540->mwkafka-zk-test-02.nyc.xxxx.com:eforward > (ESTABLISHED) > java 30857 kafkatst 288u IPv4 63728109 0t0 TCP *:XmlIpcRegSvc (LISTEN) > java 30857 kafkatst 298u IPv4 68617055 0t0 TCP > kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-01.dr.xxxx.com:34318 > (ESTABLISHED) > java 30857 kafkatst 305u IPv4 63722102 0t0 TCP > kafkatest-01.tbd.xxxx.com:41354->mwkafka-zk-test-05.tbd.xxxx.com:eforward > (ESTABLISHED) > java 30857 kafkatst 308u IPv4 68616940 0t0 TCP > kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-02.dr.xxxx.com:37050 > (CLOSE_WAIT) > java 30857 kafkatst 318u IPv4 63728144 0t0 TCP > kafkatest-01.tbd.xxxx.com:m3ap->kafkatest-01.dr.xxxx.com:XmlIpcRegSvc > (ESTABLISHED) > java 30857 kafkatst 319u IPv4 63722104 0t0 TCP > kafkatest-01.tbd.xxxx.com:50960->kafkatest-01.nyc.xxxx.com:XmlIpcRegSvc > (ESTABLISHED) > java 30857 kafkatst 320u IPv4 63722109 0t0 TCP > kafkatest-01.tbd.xxxx.com:42892->kafkatest-02.nyc.xxxx.com:XmlIpcRegSvc > (ESTABLISHED) > java 30857 kafkatst 321u IPv4 68616946 0t0 TCP > kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-02.nyc.xxxx.com:54892 > (CLOSE_WAIT) > java 30857 kafkatst 322u IPv4 68617070 0t0 TCP > kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-01.nyc.xxxx.com:47308 > (ESTABLISHED) > java 30857 kafkatst 323u IPv4 68617081 0t0 TCP > kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-02.dr.xxxx.com:37060 > (ESTABLISHED) > java 30857 kafkatst 324u IPv4 68617095 0t0 TCP > kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-02.nyc.xxxx.com:54904 > (ESTABLISHED) > java 30857 kafkatst 327u IPv4 68616885 0t0 TCP > kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-01.dr.xxxx.com:34298 > (CLOSE_WAIT) > java 30857 kafkatst 328u IPv4 68616894 0t0 TCP > kafkatest-01.tbd.xxxx.com:XmlIpcRegSvc->kafkatest-01.nyc.xxxx.com:47290 > (CLOSE_WAIT) > > I have attached a full thread dump here for reference. Please check this. -- This message was sent by Atlassian JIRA (v7.6.3#76005)