milleruntime commented on issue #2179:
URL: https://github.com/apache/accumulo/issues/2179#issuecomment-867799697


   After repeating the upgrade again and using CI, I did see errors and 
Accumulo was unable to recover properly. 
   <pre>
   2021-06-24T12:48:55,826 [log.RecoveryLogsIterator] DEBUG: Opening recovery 
log dir 0367b0c1-12fb-4d94-8335-061b7c9ac232
   2021-06-24T12:48:55,831 [tserver.AssignmentHandler] WARN : exception trying 
to assign tablet !0<;~ null
   java.lang.RuntimeException: Error recovering tablet !0<;~ from log files
           at org.apache.accumulo.tserver.tablet.Tablet.<init>(Tablet.java:407) 
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.AssignmentHandler.run(AssignmentHandler.java:160) 
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.ActiveAssignmentRunnable.run(ActiveAssignmentRunnable.java:63)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.htrace.wrappers.TraceRunnable.run(TraceRunnable.java:57) 
~[htrace-core-3.2.0-incubating.jar:3.2.0-incubating]
           at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) 
~[?:?]
           at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) 
~[?:?]
           at java.lang.Thread.run(Thread.java:829) [?:?]
   Caused by: java.io.IOException: java.lang.RuntimeException: 
java.io.FileNotFoundException: Path is not a file: 
/accumulo/recovery/0367b0c1-12fb-4d94-8335-061b7c9ac232/part-r-00000
           at 
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:90)
           at 
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
           at 
org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
           at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2070)
           at 
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:770)
           at 
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:458)
           at 
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
           at 
org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:532)
           at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
           at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1020)
           at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:948)
           at java.base/java.security.AccessController.doPrivileged(Native 
Method)
           at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
           at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1845)
           at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2952)
   
           at 
org.apache.accumulo.tserver.log.TabletServerLogger.recover(TabletServerLogger.java:540)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.TabletServer.recover(TabletServer.java:1153) 
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at org.apache.accumulo.tserver.tablet.Tablet.<init>(Tablet.java:366) 
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           ... 6 more
   Caused by: java.lang.RuntimeException: java.io.FileNotFoundException: Path 
is not a file: 
/accumulo/recovery/0367b0c1-12fb-4d94-8335-061b7c9ac232/part-r-00000
           at 
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:90)
           at 
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
           at 
org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
           at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2070)
           at 
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:770)
           at 
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:458)
           at 
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
           at 
org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:532)
           at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
           at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1020)
           at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:948)
           at java.base/java.security.AccessController.doPrivileged(Native 
Method)
           at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
           at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1845)
           at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2952)
   
           at 
org.apache.accumulo.core.client.rfile.RFileScanner.iterator(RFileScanner.java:398)
 ~[accumulo-core-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.log.RecoveryLogsIterator.validateFirstKey(RecoveryLogsIterator.java:156)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.log.RecoveryLogsIterator.<init>(RecoveryLogsIterator.java:77)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.log.SortedLogRecovery.findMaxTabletId(SortedLogRecovery.java:107)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.log.SortedLogRecovery.findLogsThatDefineTablet(SortedLogRecovery.java:147)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.log.SortedLogRecovery.recover(SortedLogRecovery.java:291)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.log.TabletServerLogger.recover(TabletServerLogger.java:538)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at 
org.apache.accumulo.tserver.TabletServer.recover(TabletServer.java:1153) 
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           at org.apache.accumulo.tserver.tablet.Tablet.<init>(Tablet.java:366) 
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
           ... 6 more
           Suppressed: java.lang.NullPointerException
                   at 
org.apache.accumulo.core.client.rfile.RFileScannerBuilder$InputArgs.getSources(RFileScannerBuilder.java:64)
 ~[accumulo-core-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.core.client.rfile.RFileScanner.close(RFileScanner.java:405) 
~[accumulo-core-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.log.RecoveryLogsIterator.validateFirstKey(RecoveryLogsIterator.java:153)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.log.RecoveryLogsIterator.<init>(RecoveryLogsIterator.java:77)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.log.SortedLogRecovery.findMaxTabletId(SortedLogRecovery.java:107)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.log.SortedLogRecovery.findLogsThatDefineTablet(SortedLogRecovery.java:147)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.log.SortedLogRecovery.recover(SortedLogRecovery.java:291)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.log.TabletServerLogger.recover(TabletServerLogger.java:538)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.TabletServer.recover(TabletServer.java:1153) 
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.tablet.Tablet.<init>(Tablet.java:366) 
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.AssignmentHandler.run(AssignmentHandler.java:160) 
~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.accumulo.tserver.ActiveAssignmentRunnable.run(ActiveAssignmentRunnable.java:63)
 ~[accumulo-tserver-2.1.0-SNAPSHOT.jar:2.1.0-SNAPSHOT]
                   at 
org.apache.htrace.wrappers.TraceRunnable.run(TraceRunnable.java:57) 
~[htrace-core-3.2.0-incubating.jar:3.2.0-incubating]
                   at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) 
~[?:?]
                   at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) 
~[?:?]
                   at java.lang.Thread.run(Thread.java:829) [?:?]
   </pre>


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to