milleruntime commented on issue #2179:
URL: https://github.com/apache/accumulo/issues/2179#issuecomment-867799697
After repeating the upgrade again and using CI, I did see errors and
Accumulo was unable to recover properly.
<pre>
2021-06-24T12:48:55,826 [log.RecoveryLogsIterator] DEBUG: Opening recovery
log dir 0367b0c1-12fb-4d94-8335-061b7c9ac232
2021-06-24T12:48:55,831 [tserver.AssignmentHandler] WARN : exception trying
to assign tablet !0<;~ null
java.lang.RuntimeException: Error recovering tablet !0<;~ from log files
at org.apache.accumulo.tserver.tablet.Tablet.<init>(Tablet.java:407)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.AssignmentHandler.run(AssignmentHandler.java:160)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.ActiveAssignmentRunnable.run(ActiveAssignmentRunnable.java:63)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.htrace.wrappers.TraceRunnable.run(TraceRunnable.java:57)
~[htrace-core-3.2.0-incubating.jar:3.2.0-incubating]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
~[?:?]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
~[?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
Caused by: java.io.IOException: java.lang.RuntimeException:
java.io.FileNotFoundException: Path is not a file:
/accumulo/recovery/0367b0c1-12fb-4d94-8335-061b7c9ac232/part-r-00000
at
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:90)
at
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
at
org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2070)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:770)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:458)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:532)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1020)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:948)
at java.base/java.security.AccessController.doPrivileged(Native
Method)
at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1845)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2952)
at
org.apache.accumulo.tserver.log.TabletServerLogger.recover(TabletServerLogger.java:540)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.TabletServer.recover(TabletServer.java:1153)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at org.apache.accumulo.tserver.tablet.Tablet.<init>(Tablet.java:366)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
... 6 more
Caused by: java.lang.RuntimeException: java.io.FileNotFoundException: Path
is not a file:
/accumulo/recovery/0367b0c1-12fb-4d94-8335-061b7c9ac232/part-r-00000
at
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:90)
at
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
at
org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2070)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:770)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:458)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:532)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1020)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:948)
at java.base/java.security.AccessController.doPrivileged(Native
Method)
at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1845)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2952)
at
org.apache.accumulo.core.client.rfile.RFileScanner.iterator(RFileScanner.java:398)
~[accumulo-core-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.RecoveryLogsIterator.validateFirstKey(RecoveryLogsIterator.java:156)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.RecoveryLogsIterator.<init>(RecoveryLogsIterator.java:77)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.SortedLogRecovery.findMaxTabletId(SortedLogRecovery.java:107)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.SortedLogRecovery.findLogsThatDefineTablet(SortedLogRecovery.java:147)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.SortedLogRecovery.recover(SortedLogRecovery.java:291)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.TabletServerLogger.recover(TabletServerLogger.java:538)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.TabletServer.recover(TabletServer.java:1153)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at org.apache.accumulo.tserver.tablet.Tablet.<init>(Tablet.java:366)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
... 6 more
Suppressed: java.lang.NullPointerException
at
org.apache.accumulo.core.client.rfile.RFileScannerBuilder$InputArgs.getSources(RFileScannerBuilder.java:64)
~[accumulo-core-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.core.client.rfile.RFileScanner.close(RFileScanner.java:405)
~[accumulo-core-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.RecoveryLogsIterator.validateFirstKey(RecoveryLogsIterator.java:153)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.RecoveryLogsIterator.<init>(RecoveryLogsIterator.java:77)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.SortedLogRecovery.findMaxTabletId(SortedLogRecovery.java:107)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.SortedLogRecovery.findLogsThatDefineTablet(SortedLogRecovery.java:147)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.SortedLogRecovery.recover(SortedLogRecovery.java:291)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.log.TabletServerLogger.recover(TabletServerLogger.java:538)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.TabletServer.recover(TabletServer.java:1153)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.tablet.Tablet.<init>(Tablet.java:366)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.AssignmentHandler.run(AssignmentHandler.java:160)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.accumulo.tserver.ActiveAssignmentRunnable.run(ActiveAssignmentRunnable.java:63)
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
at
org.apache.htrace.wrappers.TraceRunnable.run(TraceRunnable.java:57)
~[htrace-core-3.2.0-incubating.jar:3.2.0-incubating]
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
~[?:?]
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
~[?:?]
at java.lang.Thread.run(Thread.java:829) [?:?]
</pre>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]