[ https://issues.apache.org/jira/browse/KAFKA-6917?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jason Gustafson resolved KAFKA-6917. ------------------------------------ Resolution: Fixed > Request handler deadlocks attempting to acquire group metadata lock > ------------------------------------------------------------------- > > Key: KAFKA-6917 > URL: https://issues.apache.org/jira/browse/KAFKA-6917 > Project: Kafka > Issue Type: Task > Components: core > Affects Versions: 1.1.0, 1.0.1 > Reporter: Rajini Sivaram > Assignee: Rajini Sivaram > Priority: Blocker > Fix For: 2.0.0, 1.0.2, 1.1.1 > > > We have noticed another deadlock with the group metadata lock with version > 1.1. > {quote} > Found one Java-level deadlock: > ============================= > "executor-Heartbeat": > waiting for ownable synchronizer 0x00000005ce477080, (a > java.util.concurrent.locks.ReentrantLock$NonfairSync), > which is held by "kafka-request-handler-3" > "kafka-request-handler-3": > waiting for ownable synchronizer 0x00000005cbe7f698, (a > java.util.concurrent.locks.ReentrantLock$NonfairSync), > which is held by "kafka-request-handler-4" > "kafka-request-handler-4": > waiting for ownable synchronizer 0x00000005ce477080, (a > java.util.concurrent.locks.ReentrantLock$NonfairSync), > which is held by "kafka-request-handler-3" > Java stack information for the threads listed above: > =================================================== > "executor-Heartbeat": > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x00000005ce477080> (a > java.util.concurrent.locks.ReentrantLock$NonfairSync) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199) > at > java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209) > at > java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285) > at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:248) > at > kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:188) > at > kafka.coordinator.group.GroupCoordinator.onExpireHeartbeat(GroupCoordinator.scala:833) > at > kafka.coordinator.group.DelayedHeartbeat.onExpiration(DelayedHeartbeat.scala:34) > at kafka.server.DelayedOperation.run(DelayedOperation.scala:144) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > "kafka-request-handler-3": > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x00000005cbe7f698> (a > java.util.concurrent.locks.ReentrantLock$NonfairSync) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199) > at > java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209) > at > java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285) > at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:248) > at > kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:188) > at > kafka.coordinator.group.GroupMetadataManager$$anonfun$handleTxnCompletion$1.apply(GroupMetadataManager.scala:801) > at > kafka.coordinator.group.GroupMetadataManager$$anonfun$handleTxnCompletion$1.apply(GroupMetadataManager.scala:799) > at scala.collection.mutable.HashSet.foreach(HashSet.scala:78) > at > kafka.coordinator.group.GroupMetadataManager.handleTxnCompletion(GroupMetadataManager.scala:799) > at > kafka.coordinator.group.GroupCoordinator.handleTxnCompletion(GroupCoordinator.scala:496) > at > kafka.server.KafkaApis.kafka$server$KafkaApis$$maybeSendResponseCallback$1(KafkaApis.scala:1633) > at > kafka.server.KafkaApis$$anonfun$handleWriteTxnMarkersRequest$1$$anonfun$apply$19.apply(KafkaApis.scala:1691) > at > kafka.server.KafkaApis$$anonfun$handleWriteTxnMarkersRequest$1$$anonfun$apply$19.apply(KafkaApis.scala:1691) > at kafka.server.DelayedProduce.onComplete(DelayedProduce.scala:129) > at > kafka.server.DelayedOperation.forceComplete(DelayedOperation.scala:70) > at kafka.server.DelayedProduce.tryComplete(DelayedProduce.scala:111) > at > kafka.server.DelayedOperation.maybeTryComplete(DelayedOperation.scala:121) > at > kafka.server.DelayedOperationPurgatory$Watchers.tryCompleteWatched(DelayedOperation.scala:371) > at > kafka.server.DelayedOperationPurgatory.checkAndComplete(DelayedOperation.scala:277) > at > kafka.server.ReplicaManager.tryCompleteDelayedProduce(ReplicaManager.scala:294) > at > kafka.cluster.Partition.tryCompleteDelayedRequests(Partition.scala:498) > at kafka.cluster.Partition.appendRecordsToLeader(Partition.scala:592) > at > kafka.server.ReplicaManager$$anonfun$appendToLocalLog$2.apply(ReplicaManager.scala:744) > at > kafka.server.ReplicaManager$$anonfun$appendToLocalLog$2.apply(ReplicaManager.scala:728) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at scala.collection.immutable.Map$Map1.foreach(Map.scala:116) > at > scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > kafka.server.ReplicaManager.appendToLocalLog(ReplicaManager.scala:728) > at kafka.server.ReplicaManager.appendRecords(ReplicaManager.scala:469) > at > kafka.coordinator.group.GroupMetadataManager.appendForGroup(GroupMetadataManager.scala:285) > at > kafka.coordinator.group.GroupMetadataManager.storeOffsets(GroupMetadataManager.scala:428) > at > kafka.coordinator.group.GroupCoordinator$$anonfun$doCommitOffsets$1.apply$mcV$sp(GroupCoordinator.scala:512) > at > kafka.coordinator.group.GroupCoordinator$$anonfun$doCommitOffsets$1.apply(GroupCoordinator.scala:507) > at > kafka.coordinator.group.GroupCoordinator$$anonfun$doCommitOffsets$1.apply(GroupCoordinator.scala:507) > at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:250) > at > kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:188) > at > kafka.coordinator.group.GroupCoordinator.doCommitOffsets(GroupCoordinator.scala:506) > at > kafka.coordinator.group.GroupCoordinator.handleTxnCommitOffsets(GroupCoordinator.scala:460) > at > kafka.server.KafkaApis.handleTxnOffsetCommitRequest(KafkaApis.scala:1833) > at kafka.server.KafkaApis.handle(KafkaApis.scala:139) > at kafka.server.KafkaRequestHandler.run(KafkaRequestHandler.scala:69) > at java.lang.Thread.run(Thread.java:748) > "kafka-request-handler-4": > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x00000005ce477080> (a > java.util.concurrent.locks.ReentrantLock$NonfairSync) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199) > at > java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209) > at > java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285) > at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:248) > at > kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:188) > at > kafka.coordinator.group.GroupMetadataManager$$anonfun$handleTxnCompletion$1.apply(GroupMetadataManager.scala:801) > at > kafka.coordinator.group.GroupMetadataManager$$anonfun$handleTxnCompletion$1.apply(GroupMetadataManager.scala:799) > at scala.collection.mutable.HashSet.foreach(HashSet.scala:78) > at > kafka.coordinator.group.GroupMetadataManager.handleTxnCompletion(GroupMetadataManager.scala:799) > at > kafka.coordinator.group.GroupCoordinator.handleTxnCompletion(GroupCoordinator.scala:496) > at > kafka.server.KafkaApis.kafka$server$KafkaApis$$maybeSendResponseCallback$1(KafkaApis.scala:1633) > at > kafka.server.KafkaApis$$anonfun$handleWriteTxnMarkersRequest$1$$anonfun$apply$19.apply(KafkaApis.scala:1691) > at > kafka.server.KafkaApis$$anonfun$handleWriteTxnMarkersRequest$1$$anonfun$apply$19.apply(KafkaApis.scala:1691) > at kafka.server.DelayedProduce.onComplete(DelayedProduce.scala:129) > at > kafka.server.DelayedOperation.forceComplete(DelayedOperation.scala:70) > at kafka.server.DelayedProduce.tryComplete(DelayedProduce.scala:111) > at > kafka.server.DelayedOperation.maybeTryComplete(DelayedOperation.scala:121) > at > kafka.server.DelayedOperationPurgatory$Watchers.tryCompleteWatched(DelayedOperation.scala:371) > at > kafka.server.DelayedOperationPurgatory.checkAndComplete(DelayedOperation.scala:277) > at > kafka.server.ReplicaManager.tryCompleteDelayedProduce(ReplicaManager.scala:294) > at > kafka.cluster.Partition.tryCompleteDelayedRequests(Partition.scala:498) > at kafka.cluster.Partition.appendRecordsToLeader(Partition.scala:592) > at > kafka.server.ReplicaManager$$anonfun$appendToLocalLog$2.apply(ReplicaManager.scala:744) > at > kafka.server.ReplicaManager$$anonfun$appendToLocalLog$2.apply(ReplicaManager.scala:728) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at scala.collection.immutable.Map$Map1.foreach(Map.scala:116) > at > scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > kafka.server.ReplicaManager.appendToLocalLog(ReplicaManager.scala:728) > at kafka.server.ReplicaManager.appendRecords(ReplicaManager.scala:469) > at > kafka.coordinator.group.GroupMetadataManager.appendForGroup(GroupMetadataManager.scala:285) > at > kafka.coordinator.group.GroupMetadataManager.storeOffsets(GroupMetadataManager.scala:428) > at > kafka.coordinator.group.GroupCoordinator$$anonfun$doCommitOffsets$1.apply$mcV$sp(GroupCoordinator.scala:512) > at > kafka.coordinator.group.GroupCoordinator$$anonfun$doCommitOffsets$1.apply(GroupCoordinator.scala:507) > at > kafka.coordinator.group.GroupCoordinator$$anonfun$doCommitOffsets$1.apply(GroupCoordinator.scala:507) > at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:250) > at > kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:188) > at > kafka.coordinator.group.GroupCoordinator.doCommitOffsets(GroupCoordinator.scala:506) > at > kafka.coordinator.group.GroupCoordinator.handleTxnCommitOffsets(GroupCoordinator.scala:460) > at > kafka.server.KafkaApis.handleTxnOffsetCommitRequest(KafkaApis.scala:1833) > at kafka.server.KafkaApis.handle(KafkaApis.scala:139) > at kafka.server.KafkaRequestHandler.run(KafkaRequestHandler.scala:69) > at java.lang.Thread.run(Thread.java:748) > {quote} -- This message was sent by Atlassian JIRA (v7.6.3#76005)