chia7712 commented on a change in pull request #8657: URL: https://github.com/apache/kafka/pull/8657#discussion_r431209368
########## File path: core/src/main/scala/kafka/coordinator/group/DelayedJoin.scala ########## @@ -33,11 +34,40 @@ import scala.math.{max, min} */ private[group] class DelayedJoin(coordinator: GroupCoordinator, group: GroupMetadata, - rebalanceTimeout: Long) extends DelayedOperation(rebalanceTimeout, Some(group.lock)) { + rebalanceTimeout: Long) extends DelayedOperation(rebalanceTimeout, None) { - override def tryComplete(): Boolean = coordinator.tryCompleteJoin(group, forceComplete _) - override def onExpiration() = coordinator.onExpireJoin() - override def onComplete() = coordinator.onCompleteJoin(group) + /** + * The delayed requests should be completed without holding group lock so we keep those partitions and then + * complete them after releasing lock. + */ + private[group] var partitionsToComplete: scala.collection.Map[TopicPartition, LeaderHWChange] = Map.empty + + /** + * It controls the lock manually since GroupCoordinator#onCompleteJoin() invoked by onComplete() can't be within a + * group lock since GroupCoordinator#onCompleteJoin() tries to complete delayed requests. + * + */ + override def tryComplete(): Boolean = try group.inLock { + /** + * holds the group lock for both the "group.hasAllMembersJoined" check and the call to forceComplete() + */ + if (group.hasAllMembersJoined) forceComplete() + else false + } finally completeDelayedRequests() + override def onExpiration(): Unit = coordinator.onExpireJoin() + override def onComplete(): Unit = try partitionsToComplete = coordinator.onCompleteJoin(group) + finally completeDelayedRequests() + + /** + * try to complete delayed requests only if the caller does not hold the group lock. + * This method is called by following cases: + * 1) tryComplete -> hold lock -> onComplete -> release lock -> completeDelayedRequests + * 2) onComplete -> completeDelayedRequests + */ + private[group] def completeDelayedRequests(): Unit = if (!group.lock.isHeldByCurrentThread) { Review comment: this is a workaround to deal with deadlock caused by taking multiples group locks ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org