[ https://issues.apache.org/jira/browse/KAFKA-8896?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16988654#comment-16988654 ]
Francisco Juan commented on KAFKA-8896: --------------------------------------- Hello [~hachikuji], could you please provide the steps or scenario that triggers this error? We have a cluster running version 2.2.1 and it is throwing this same error +sometimes+ when there's a broker restart. We can correlate the error that you saw with an earlier error looking like "ERROR [GroupMetadataManager brokerId=2] Error loading offsets from __consumer_offsets". This happen short after a broker restarts. We can't reproduce this error in a test environment, which we would like to do to verify if an upgrade would actually fix our issue (some consumer-groups losing their offsets) Our cluster setup looks like this: {code:java} Kafka version: 2.2.1 Number of brokers: 30 Number of leader partitions: 15785 Number of consumer-groups: 1150 inter.broker.protocol.version=1.1 min.insync.replicas=2{code} Errors stack trace detail: {code:java} [2019-11-28 08:13:22,603] ERROR [KafkaApi-42] Error when handling request: clientId=enrichment-worker-kafka, correlationId=92, api=HEARTBEAT, body={group_id=enrichment-worker-importio-webhook-consumer-eu,generation_id=9877,member_id=enrichment-worker-kafka-25821b62-f36b-4e64-905b-92019e4a5493} (kafka.server.KafkaApis) java.util.NoSuchElementException: key not found: consumer-name-25821b62-f36b-4e64-905b-92019e4a5493 at scala.collection.MapLike.default(MapLike.scala:235) at scala.collection.MapLike.default$(MapLike.scala:234) at scala.collection.AbstractMap.default(Map.scala:63) at scala.collection.mutable.HashMap.apply(HashMap.scala:69) at kafka.coordinator.group.GroupMetadata.get(GroupMetadata.scala:203) at kafka.coordinator.group.GroupCoordinator.$anonfun$tryCompleteHeartbeat$1(GroupCoordinator.scala:927) at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251) at kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:198) at kafka.coordinator.group.GroupCoordinator.tryCompleteHeartbeat(GroupCoordinator.scala:920) at kafka.coordinator.group.DelayedHeartbeat.tryComplete(DelayedHeartbeat.scala:34) at kafka.server.DelayedOperation.maybeTryComplete(DelayedOperation.scala:121) at kafka.server.DelayedOperationPurgatory$Watchers.tryCompleteWatched(DelayedOperation.scala:388) at kafka.server.DelayedOperationPurgatory.checkAndComplete(DelayedOperation.scala:294) at kafka.coordinator.group.GroupCoordinator.completeAndScheduleNextExpiration(GroupCoordinator.scala:737) at kafka.coordinator.group.GroupCoordinator.completeAndScheduleNextHeartbeatExpiration(GroupCoordinator.scala:730) at kafka.coordinator.group.GroupCoordinator.$anonfun$handleHeartbeat$2(GroupCoordinator.scala:486) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251) at kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:198) at kafka.coordinator.group.GroupCoordinator.handleHeartbeat(GroupCoordinator.scala:451) at kafka.server.KafkaApis.handleHeartbeatRequest(KafkaApis.scala:1336) at kafka.server.KafkaApis.handle(KafkaApis.scala:120) at kafka.server.KafkaRequestHandler.run(KafkaRequestHandler.scala:69) at java.lang.Thread.run(Thread.java:748) [2019-11-28 08:13:18,175] ERROR [GroupMetadataManager brokerId=42] Error loading offsets from __consumer_offsets-24 (kafka.coordinator.group.GroupMetadataManager) java.util.NoSuchElementException: key not found: consumer-name-de868651-3166-46df-98c5-6196b9ade526 at scala.collection.MapLike.default(MapLike.scala:235) at scala.collection.MapLike.default(MapLike.scala:235) at scala.collection.MapLike.default$(MapLike.scala:234) at scala.collection.AbstractMap.default(Map.scala:63) at scala.collection.mutable.HashMap.apply(HashMap.scala:69) at kafka.coordinator.group.GroupMetadata.get(GroupMetadata.scala:203) at kafka.coordinator.group.GroupCoordinator.$anonfun$tryCompleteHeartbeat$1(GroupCoordinator.scala:927) at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251) at kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:198) at kafka.coordinator.group.GroupCoordinator.tryCompleteHeartbeat(GroupCoordinator.scala:920) at kafka.coordinator.group.DelayedHeartbeat.tryComplete(DelayedHeartbeat.scala:34) at kafka.server.DelayedOperation.maybeTryComplete(DelayedOperation.scala:121) at kafka.server.DelayedOperationPurgatory$Watchers.tryCompleteWatched(DelayedOperation.scala:388) at kafka.server.DelayedOperationPurgatory.checkAndComplete(DelayedOperation.scala:294) at kafka.coordinator.group.GroupCoordinator.completeAndScheduleNextExpiration(GroupCoordinator.scala:737) at kafka.coordinator.group.GroupCoordinator.completeAndScheduleNextHeartbeatExpiration(GroupCoordinator.scala:730) at kafka.coordinator.group.GroupCoordinator.$anonfun$onGroupLoaded$3(GroupCoordinator.scala:677) at kafka.coordinator.group.GroupCoordinator.$anonfun$onGroupLoaded$3$adapted(GroupCoordinator.scala:677) at scala.collection.immutable.List.foreach(List.scala:392) at kafka.coordinator.group.GroupCoordinator.$anonfun$onGroupLoaded$1(GroupCoordinator.scala:677) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251) at kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:198) at kafka.coordinator.group.GroupCoordinator.onGroupLoaded(GroupCoordinator.scala:670) at kafka.coordinator.group.GroupCoordinator.$anonfun$handleGroupImmigration$1(GroupCoordinator.scala:682) at kafka.coordinator.group.GroupCoordinator.$anonfun$handleGroupImmigration$1$adapted(GroupCoordinator.scala:682) at kafka.coordinator.group.GroupMetadataManager.$anonfun$doLoadGroupsAndOffsets$23(GroupMetadataManager.scala:646) at kafka.coordinator.group.GroupMetadataManager.$anonfun$doLoadGroupsAndOffsets$23$adapted(GroupMetadataManager.scala:641) at scala.collection.mutable.HashMap$$anon$2.$anonfun$foreach$3(HashMap.scala:158) at scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237) at scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44) at scala.collection.mutable.HashMap$$anon$2.foreach(HashMap.scala:158) at kafka.coordinator.group.GroupMetadataManager.doLoadGroupsAndOffsets(GroupMetadataManager.scala:641) at kafka.coordinator.group.GroupMetadataManager.loadGroupsAndOffsets(GroupMetadataManager.scala:500) at kafka.coordinator.group.GroupMetadataManager.$anonfun$scheduleLoadGroupAndOffsets$2(GroupMetadataManager.scala:491) at kafka.utils.KafkaScheduler.$anonfun$schedule$2(KafkaScheduler.scala:114) at kafka.utils.CoreUtils$$anon$1.run(CoreUtils.scala:63) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) {code} > NoSuchElementException after coordinator move > --------------------------------------------- > > Key: KAFKA-8896 > URL: https://issues.apache.org/jira/browse/KAFKA-8896 > Project: Kafka > Issue Type: Bug > Affects Versions: 2.2.0, 2.3.0, 2.2.1 > Reporter: Jason Gustafson > Assignee: Boyang Chen > Priority: Major > Fix For: 2.2.2, 2.3.1 > > > Caught this exception in the wild: > {code:java} > java.util.NoSuchElementException: key not found: > consumer-group-38981ebe-4361-44e7-b710-7d11f5d35639 > at scala.collection.MapLike.default(MapLike.scala:235) > at scala.collection.MapLike.default$(MapLike.scala:234) > at scala.collection.AbstractMap.default(Map.scala:63) > at scala.collection.mutable.HashMap.apply(HashMap.scala:69) > at kafka.coordinator.group.GroupMetadata.get(GroupMetadata.scala:214) > at > kafka.coordinator.group.GroupCoordinator.$anonfun$tryCompleteHeartbeat$1(GroupCoordinator.scala:1008) > at > scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23) > at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:253) > at kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:209) > at > kafka.coordinator.group.GroupCoordinator.tryCompleteHeartbeat(GroupCoordinator.scala:1001) > at > kafka.coordinator.group.DelayedHeartbeat.tryComplete(DelayedHeartbeat.scala:34) > at > kafka.server.DelayedOperation.maybeTryComplete(DelayedOperation.scala:122) > at > kafka.server.DelayedOperationPurgatory$Watchers.tryCompleteWatched(DelayedOperation.scala:391) > at > kafka.server.DelayedOperationPurgatory.checkAndComplete(DelayedOperation.scala:295) > at > kafka.coordinator.group.GroupCoordinator.completeAndScheduleNextExpiration(GroupCoordinator.scala:802) > at > kafka.coordinator.group.GroupCoordinator.completeAndScheduleNextHeartbeatExpiration(GroupCoordinator.scala:795) > at > kafka.coordinator.group.GroupCoordinator.$anonfun$handleHeartbeat$2(GroupCoordinator.scala:543) > at > scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:253) > at kafka.coordinator.group.GroupMetadata.inLock(GroupMetadata.scala:209) > at > kafka.coordinator.group.GroupCoordinator.handleHeartbeat(GroupCoordinator.scala:516) > at kafka.server.KafkaApis.handleHeartbeatRequest(KafkaApis.scala:1617) > at kafka.server.KafkaApis.handle(KafkaApis.scala:155) {code} > > Looking at the logs, I see a coordinator change just prior to this exception. > The group was first unloaded as the coordinator moved to another broker and > then was loaded again as the coordinator was moved back. I am guessing that > somehow the delayed heartbeat is retaining the reference to the old > GroupMetadata instance. Not sure exactly how this can happen though. > -- This message was sent by Atlassian Jira (v8.3.4#803005)