[ https://issues.apache.org/jira/browse/ZOOKEEPER-4813?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Andor Molnar updated ZOOKEEPER-4813: ------------------------------------ Fix Version/s: 3.9.3 (was: 3.9.2) > Make zookeeper start successfully when the last log file is dirty during the > restore progress > --------------------------------------------------------------------------------------------- > > Key: ZOOKEEPER-4813 > URL: https://issues.apache.org/jira/browse/ZOOKEEPER-4813 > Project: ZooKeeper > Issue Type: Improvement > Components: server > Affects Versions: 3.9.1 > Reporter: Yan Zhao > Assignee: Yan Zhao > Priority: Major > Labels: pull-request-available > Fix For: 3.9.3 > > Time Spent: 10m > Remaining Estimate: 0h > > When the zookeeper restarts, it will restore the data from the last valid > snapshot file, and replay txn log to append data. > But if the last log file is empty due to some reason, the restore will fail, > not make the zookeeper can not restart. > The logs as followings: > {noformat} > 14:12:16.023 [main] INFO org.apache.zookeeper.server.persistence.SnapStream > - Invalid snapshot snapshot.188700025d87. len = 761554294, byte = 45 > 14:12:16.024 [main] INFO org.apache.zookeeper.server.persistence.FileSnap - > Reading snapshot /pulsar/data/zookeeper/version-2/snapshot.188700025a05 > 14:12:17.350 [main] INFO org.apache.zookeeper.server.DataTree - The digest > in the snapshot has digest version of 2, with zxid as 0x188700025b07, and > digest value as 510776662607117 > 14:12:17.492 [main] ERROR org.apache.zookeeper.server.quorum.QuorumPeer - > Unable to load database on disk > java.io.EOFException: null > at java.io.DataInputStream.readInt(DataInputStream.java:386) ~[?:?] > at > org.apache.jute.BinaryInputArchive.readInt(BinaryInputArchive.java:96) > ~[org.apache.zookeeper-zookeeper-jute-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileHeader.deserialize(FileHeader.java:67) > ~[org.apache.zookeeper-zookeeper-jute-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.inStreamCreated(FileTxnLog.java:725) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.createInputArchive(FileTxnLog.java:743) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.goToNextLog(FileTxnLog.java:711) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.next(FileTxnLog.java:792) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnSnapLog.fastForwardFromEdits(FileTxnSnapLog.java:361) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnSnapLog.lambda$restore$0(FileTxnSnapLog.java:267) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnSnapLog.restore(FileTxnSnapLog.java:312) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.ZKDatabase.loadDataBase(ZKDatabase.java:288) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeer.loadDataBase(QuorumPeer.java:1149) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeer.start(QuorumPeer.java:1135) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeerMain.runFromConfig(QuorumPeerMain.java:229) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeerMain.initializeAndRun(QuorumPeerMain.java:137) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeerMain.main(QuorumPeerMain.java:91) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > 14:12:17.502 [main] INFO > org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider - Shutdown > executor service with timeout 1000 > 14:12:17.508 [main] INFO org.eclipse.jetty.server.AbstractConnector - > Stopped ServerConnector@2484f433{HTTP/1.1, (http/1.1)}{0.0.0.0:8000} > 14:12:17.510 [main] INFO org.eclipse.jetty.server.handler.ContextHandler - > Stopped o.e.j.s.ServletContextHandler@59a67c3a{/,null,STOPPED} > 14:12:17.515 [main] ERROR org.apache.zookeeper.server.quorum.QuorumPeerMain - > Unexpected exception, exiting abnormally > java.lang.RuntimeException: Unable to run quorum server > at > org.apache.zookeeper.server.quorum.QuorumPeer.loadDataBase(QuorumPeer.java:1204) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeer.start(QuorumPeer.java:1135) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeerMain.runFromConfig(QuorumPeerMain.java:229) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeerMain.initializeAndRun(QuorumPeerMain.java:137) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeerMain.main(QuorumPeerMain.java:91) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > Caused by: java.io.EOFException > at java.io.DataInputStream.readInt(DataInputStream.java:386) ~[?:?] > at > org.apache.jute.BinaryInputArchive.readInt(BinaryInputArchive.java:96) > ~[org.apache.zookeeper-zookeeper-jute-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileHeader.deserialize(FileHeader.java:67) > ~[org.apache.zookeeper-zookeeper-jute-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.inStreamCreated(FileTxnLog.java:725) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.createInputArchive(FileTxnLog.java:743) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.goToNextLog(FileTxnLog.java:711) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnLog$FileTxnIterator.next(FileTxnLog.java:792) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnSnapLog.fastForwardFromEdits(FileTxnSnapLog.java:361) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnSnapLog.lambda$restore$0(FileTxnSnapLog.java:267) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.persistence.FileTxnSnapLog.restore(FileTxnSnapLog.java:312) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.ZKDatabase.loadDataBase(ZKDatabase.java:288) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > at > org.apache.zookeeper.server.quorum.QuorumPeer.loadDataBase(QuorumPeer.java:1149) > ~[org.apache.zookeeper-zookeeper-3.9.1.jar:3.9.1] > ... 4 more > {noformat} > > In fact, if the last log file open failed, we can ignore the log file. -- This message was sent by Atlassian Jira (v8.20.10#820010)