[ https://issues.apache.org/jira/browse/KAFKA-8046?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17155988#comment-17155988 ]
Abhi commented on KAFKA-8046: ----------------------------- I am also saw the same exception in kafka_2.12-2.3.0. [2020-07-11 02:50:03,621] ERROR Error while reading checkpoint file /local/kafka/data/replication-offset-checkpoint (kafka.server.LogDirFailureChannel) java.nio.charset.MalformedInputException: Input length = 1 at java.base/java.nio.charset.CoderResult.throwException(CoderResult.java:274) at java.base/sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:339) at java.base/sun.nio.cs.StreamDecoder.read(StreamDecoder.java:178) at java.base/java.io.InputStreamReader.read(InputStreamReader.java:185) at java.base/java.io.BufferedReader.fill(BufferedReader.java:161) at java.base/java.io.BufferedReader.readLine(BufferedReader.java:326) at java.base/java.io.BufferedReader.readLine(BufferedReader.java:392) at kafka.server.checkpoints.CheckpointFile.liftedTree2$1(CheckpointFile.scala:90) at kafka.server.checkpoints.CheckpointFile.read(CheckpointFile.scala:86) at kafka.server.checkpoints.OffsetCheckpointFile.read(OffsetCheckpointFile.scala:61) at kafka.cluster.Partition.$anonfun$getOrCreateReplica$1(Partition.scala:204) at kafka.utils.Pool$$anon$1.apply(Pool.scala:61) at java.base/java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1705) at kafka.utils.Pool.getAndMaybePut(Pool.scala:60) at kafka.cluster.Partition.getOrCreateReplica(Partition.scala:198) at kafka.cluster.Partition.$anonfun$makeLeader$3(Partition.scala:376) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:237) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at scala.collection.TraversableLike.map(TraversableLike.scala:237) at scala.collection.TraversableLike.map$(TraversableLike.scala:230) at scala.collection.AbstractTraversable.map(Traversable.scala:108) at kafka.cluster.Partition.$anonfun$makeLeader$1(Partition.scala:376) at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:253) at kafka.utils.CoreUtils$.inWriteLock(CoreUtils.scala:261) at kafka.cluster.Partition.makeLeader(Partition.scala:370) at kafka.server.ReplicaManager.$anonfun$makeLeaders$5(ReplicaManager.scala:1188) at scala.collection.mutable.HashMap.$anonfun$foreach$1(HashMap.scala:149) at scala.collection.mutable.HashTable.foreachEntry(HashTable.scala:237) at scala.collection.mutable.HashTable.foreachEntry$(HashTable.scala:230) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:44) at scala.collection.mutable.HashMap.foreach(HashMap.scala:149) at kafka.server.ReplicaManager.makeLeaders(ReplicaManager.scala:1186) at kafka.server.ReplicaManager.becomeLeaderOrFollower(ReplicaManager.scala:1098) at kafka.server.KafkaApis.handleLeaderAndIsrRequest(KafkaApis.scala:198) at kafka.server.KafkaApis.handle(KafkaApis.scala:115) at kafka.server.KafkaRequestHandler.run(KafkaRequestHandler.scala:69) > Shutdown broker because all log dirs in /tmp/kafka-logs have failed > ------------------------------------------------------------------- > > Key: KAFKA-8046 > URL: https://issues.apache.org/jira/browse/KAFKA-8046 > Project: Kafka > Issue Type: Bug > Affects Versions: 2.0.0 > Environment: centos 7 > Reporter: jaren > Priority: Major > > kafka stop working every few days.Here are some of logs. > ERROR Error while reading checkpoint file > /tmp/kafka-logs/cleaner-offset-checkpoint (kafka.server.LogDirFailureChannel) > java.io.FileNotFoundException: /tmp/kafka-logs/cleaner-offset-checkpoint (No > such file or directory) > at java.io.FileInputStream.open0(Native Method) > at java.io.FileInputStream.open(FileInputStream.java:195) > at java.io.FileInputStream.<init>(FileInputStream.java:138) > at > kafka.server.checkpoints.CheckpointFile.liftedTree2$1(CheckpointFile.scala:87) > at kafka.server.checkpoints.CheckpointFile.read(CheckpointFile.scala:86) > at > kafka.server.checkpoints.OffsetCheckpointFile.read(OffsetCheckpointFile.scala:61) > at > kafka.log.LogCleanerManager$$anonfun$allCleanerCheckpoints$1$$anonfun$apply$1.apply(LogCleanerManager.scala:89) > at > kafka.log.LogCleanerManager$$anonfun$allCleanerCheckpoints$1$$anonfun$apply$1.apply(LogCleanerManager.scala:87) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at > scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241) > at scala.collection.Iterator$class.foreach(Iterator.scala:891) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1334) > at scala.collection.MapLike$DefaultValuesIterable.foreach(MapLike.scala:206) > at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241) > at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104) > at > kafka.log.LogCleanerManager$$anonfun$allCleanerCheckpoints$1.apply(LogCleanerManager.scala:87) > at > kafka.log.LogCleanerManager$$anonfun$allCleanerCheckpoints$1.apply(LogCleanerManager.scala:95) > at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251) > at > kafka.log.LogCleanerManager.allCleanerCheckpoints(LogCleanerManager.scala:86) > at > kafka.log.LogCleanerManager$$anonfun$grabFilthiestCompactedLog$1.apply(LogCleanerManager.scala:126) > at > kafka.log.LogCleanerManager$$anonfun$grabFilthiestCompactedLog$1.apply(LogCleanerManager.scala:123) > at kafka.utils.CoreUtils$.inLock(CoreUtils.scala:251) > at > kafka.log.LogCleanerManager.grabFilthiestCompactedLog(LogCleanerManager.scala:123) > at kafka.log.LogCleaner$CleanerThread.cleanOrSleep(LogCleaner.scala:296) > at kafka.log.LogCleaner$CleanerThread.doWork(LogCleaner.scala:289) > at kafka.utils.ShutdownableThread.run(ShutdownableThread.scala:82) > [2019-03-04 16:44:13,154] INFO [ReplicaManager broker=1] Stopping serving > replicas in dir /tmp/kafka-logs (kafka.server.ReplicaManager) > [2019-03-04 16:44:13,189] INFO [ReplicaFetcherManager on broker 1] Removed > fetcher for partitions > __consumer_offsets-22,FOTA_PLAIN_FORCESTOP-0,__consumer_offsets-30,OBSERVE_DEVICE- > > 0,__consumer_offsets-8,__consumer_offsets-21,__consumer_offsets-4,__consumer_offsets-27,__consumer_offsets-7,__consumer_offsets-9,__consumer_offsets-46,FOTA_DOWNLOAD_ERROR-0,__consumer_offsets- > > 25,DEVICE_DE_REGISTER-0,__consumer_offsets-35,DEVICE_REG_UPDATE-0,__consumer_offsets-41,__consumer_offsets-33,__consumer_offsets-23,__consumer_offsets-49,__consumer_offsets-47,__consumer_offsets- > > 16,__consumer_offsets-28,FOTA_IMEI_MONITOR-0,__consumer_offsets-31,__consumer_offsets-36,__consumer_offsets-42,FOTA_IMEI_MONITOR-1-0,__consumer_offsets-3,__consumer_offsets-18,DATA_TO_DEVICE- > > 0,__consumer_offsets-37,emq_notify-0,__consumer_offsets-15,__consumer_offsets-24,FOTA_PLAIN_MONITOR_FORCE-0,DEVICE_REGISTER-0,springCloudBus-0,__consumer_offsets-38,__consumer_offsets- > > 17,DEVICE_REP-0,__consumer_offsets-48,__consumer_offsets-19,__consumer_offsets-11,__consumer_offsets-13,__consumer_offsets-2,__consumer_offsets-43,__consumer_offsets-6,FOTA_STATICS_MONITOR-1- > > 0,__consumer_offsets-14,FOTA_STATICS_MONITOR-0,__consumer_offsets-20,__consumer_offsets-0,__consumer_offsets-44,__consumer_offsets-39,FOTA_STATE_CHANGE-0,__consumer_offsets-12,FOTA_UPGRADE_NOTIFY- > > 0,__consumer_offsets-45,__consumer_offsets-1,emq_message_down-0,__consumer_offsets-5,__consumer_offsets-26,__consumer_offsets-29,emq_message-0,__consumer_offsets-34,__consumer_offsets- > > 10,__consumer_offsets-32,__consumer_offsets-40,REQUEST_DEVICE-0 > (kafka.server.ReplicaFetcherManager) > [2019-03-04 16:44:13,190] INFO [ReplicaAlterLogDirsManager on broker 1] > Removed fetcher for partitions > __consumer_offsets-22,FOTA_PLAIN_FORCESTOP-0,__consumer_offsets-30,OBSERVE_DEVICE- > > 0,__consumer_offsets-8,__consumer_offsets-21,__consumer_offsets-4,__consumer_offsets-27,__consumer_offsets-7,__consumer_offsets-9,__consumer_offsets-46,FOTA_DOWNLOAD_ERROR-0,__consumer_offsets- > > 25,DEVICE_DE_REGISTER-0,__consumer_offsets-35,DEVICE_REG_UPDATE-0,__consumer_offsets-41,__consumer_offsets-33,__consumer_offsets-23,__consumer_offsets-49,__consumer_offsets-47,__consumer_offsets- > > 16,__consumer_offsets-28,FOTA_IMEI_MONITOR-0,__consumer_offsets-31,__consumer_offsets-36,__consumer_offsets-42,FOTA_IMEI_MONITOR-1-0,__consumer_offsets-3,__consumer_offsets-18,DATA_TO_DEVICE- > > 0,__consumer_offsets-37,emq_notify-0,__consumer_offsets-15,__consumer_offsets-24,FOTA_PLAIN_MONITOR_FORCE-0,DEVICE_REGISTER-0,springCloudBus-0,__consumer_offsets-38,__consumer_offsets- > > 17,DEVICE_REP-0,__consumer_offsets-48,__consumer_offsets-19,__consumer_offsets-11,__consumer_offsets-13,__consumer_offsets-2,__consumer_offsets-43,__consumer_offsets-6,FOTA_STATICS_MONITOR-1- > > 0,__consumer_offsets-14,FOTA_STATICS_MONITOR-0,__consumer_offsets-20,__consumer_offsets-0,__consumer_offsets-44,__consumer_offsets-39,FOTA_STATE_CHANGE-0,__consumer_offsets-12,FOTA_UPGRADE_NOTIFY- > > 0,__consumer_offsets-45,__consumer_offsets-1,emq_message_down-0,__consumer_offsets-5,__consumer_offsets-26,__consumer_offsets-29,emq_message-0,__consumer_offsets-34,__consumer_offsets- > > 10,__consumer_offsets-32,__consumer_offsets-40,REQUEST_DEVICE-0 > (kafka.server.ReplicaAlterLogDirsManager) > [2019-03-04 16:44:13,263] INFO [ReplicaManager broker=1] Broker 1 stopped > fetcher for partitions > __consumer_offsets-22,FOTA_PLAIN_FORCESTOP-0,__consumer_offsets-30,OBSERVE_DEVICE- > > 0,__consumer_offsets-8,__consumer_offsets-21,__consumer_offsets-4,__consumer_offsets-27,__consumer_offsets-7,__consumer_offsets-9,__consumer_offsets-46,FOTA_DOWNLOAD_ERROR-0,__consumer_offsets- > > 25,DEVICE_DE_REGISTER-0,__consumer_offsets-35,DEVICE_REG_UPDATE-0,__consumer_offsets-41,__consumer_offsets-33,__consumer_offsets-23,__consumer_offsets-49,__consumer_offsets-47,__consumer_offsets- > > 16,__consumer_offsets-28,FOTA_IMEI_MONITOR-0,__consumer_offsets-31,__consumer_offsets-36,__consumer_offsets-42,FOTA_IMEI_MONITOR-1-0,__consumer_offsets-3,__consumer_offsets-18,DATA_TO_DEVICE- > > 0,__consumer_offsets-37,emq_notify-0,__consumer_offsets-15,__consumer_offsets-24,FOTA_PLAIN_MONITOR_FORCE-0,DEVICE_REGISTER-0,springCloudBus-0,__consumer_offsets-38,__consumer_offsets- > > 17,DEVICE_REP-0,__consumer_offsets-48,__consumer_offsets-19,__consumer_offsets-11,__consumer_offsets-13,__consumer_offsets-2,__consumer_offsets-43,__consumer_offsets-6,FOTA_STATICS_MONITOR-1- > > 0,__consumer_offsets-14,FOTA_STATICS_MONITOR-0,__consumer_offsets-20,__consumer_offsets-0,__consumer_offsets-44,__consumer_offsets-39,FOTA_STATE_CHANGE-0,__consumer_offsets-12,FOTA_UPGRADE_NOTIFY- > > 0,__consumer_offsets-45,__consumer_offsets-1,emq_message_down-0,__consumer_offsets-5,__consumer_offsets-26,__consumer_offsets-29,emq_message-0,__consumer_offsets-34,__consumer_offsets- > > 10,__consumer_offsets-32,__consumer_offsets-40,REQUEST_DEVICE-0 and stopped > moving logs for partitions because they are in the failed log directory > /tmp/kafka-logs. (kafka.server.ReplicaManager) > [2019-03-04 16:44:13,286] INFO Stopping serving logs in dir /tmp/kafka-logs > (kafka.log.LogManager) > [2019-03-04 16:44:13,364] ERROR Shutdown broker because all log dirs in > /tmp/kafka-logs have failed (kafka.log.LogManager) -- This message was sent by Atlassian Jira (v8.3.4#803005)