[
https://issues.apache.org/jira/browse/HBASE-12063?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Andrew Purtell resolved HBASE-12063.
------------------------------------
Resolution: Invalid
Please contact [email protected] for troubleshooting help.
> wal.ProtobufLogWriter: Got IOException while writing trailer
> ------------------------------------------------------------
>
> Key: HBASE-12063
> URL: https://issues.apache.org/jira/browse/HBASE-12063
> Project: HBase
> Issue Type: Bug
> Components: regionserver
> Affects Versions: 0.98.0
> Reporter: wangshizhe
>
> 2014-09-23 14:57:02,161 DEBUG [RS_OPEN_META-logs05:60020-0-WAL.AsyncWriter]
> wal.FSHLog: RS_OPEN_META-logs05:60020-0-WAL.AsyncWriter interrupted while
> waiting for newer writes added to local buffer
> 2014-09-23 14:57:02,161 INFO [RS_OPEN_META-logs05:60020-0-WAL.AsyncWriter]
> wal.FSHLog: RS_OPEN_META-logs05:60020-0-WAL.AsyncWriter exiting
> 2014-09-23 14:57:02,161 DEBUG [regionserver60020] wal.FSHLog: Closing WAL
> writer in
> hdfs://logs01.yz.bj.uar.nsn:8020/apps/hbase/data/WALs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:02,161 ERROR [regionserver60020] wal.ProtobufLogWriter: Got
> IOException while writing trailer
> java.io.IOException: All datanodes 10.136.172.105:50010 are bad. Aborting...
> at
> org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:1128)
> at
> org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.processDatanodeError(DFSOutputStream.java:924)
> at
> org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:486)
> 2014-09-23 14:57:02,163 ERROR [regionserver60020] regionserver.HRegionServer:
> Metalog close and delete failed
> java.io.IOException: All datanodes 10.136.172.105:50010 are bad. Aborting...
> at
> org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.setupPipelineForAppendOrRecovery(DFSOutputStream.java:1128)
> at
> org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.processDatanodeError(DFSOutputStream.java:924)
> at
> org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:486)
> 2014-09-23 14:57:02,163 DEBUG [regionserver60020-WAL.AsyncNotifier]
> wal.FSHLog: regionserver60020-WAL.AsyncNotifier interrupted while waiting for
> notification from AsyncSyncer thread
> 2014-09-23 14:57:02,163 INFO [regionserver60020-WAL.AsyncNotifier]
> wal.FSHLog: regionserver60020-WAL.AsyncNotifier exiting
> 2014-09-23 14:57:02,163 DEBUG [regionserver60020-WAL.AsyncSyncer0]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer0 interrupted while waiting for
> notification from AsyncWriter thread
> 2014-09-23 14:57:02,163 INFO [regionserver60020-WAL.AsyncSyncer0]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer0 exiting
> 2014-09-23 14:57:02,163 DEBUG [regionserver60020-WAL.AsyncSyncer1]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer1 interrupted while waiting for
> notification from AsyncWriter thread
> 2014-09-23 14:57:02,163 INFO [regionserver60020-WAL.AsyncSyncer1]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer1 exiting
> 2014-09-23 14:57:02,164 DEBUG [regionserver60020-WAL.AsyncSyncer2]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer2 interrupted while waiting for
> notification from AsyncWriter thread
> 2014-09-23 14:57:02,164 INFO [regionserver60020-WAL.AsyncSyncer2]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer2 exiting
> 2014-09-23 14:57:02,164 DEBUG [regionserver60020-WAL.AsyncSyncer3]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer3 interrupted while waiting for
> notification from AsyncWriter thread
> 2014-09-23 14:57:02,164 INFO [regionserver60020-WAL.AsyncSyncer3]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer3 exiting
> 2014-09-23 14:57:02,164 DEBUG [regionserver60020-WAL.AsyncSyncer4]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer4 interrupted while waiting for
> notification from AsyncWriter thread
> 2014-09-23 14:57:02,164 INFO [regionserver60020-WAL.AsyncSyncer4]
> wal.FSHLog: regionserver60020-WAL.AsyncSyncer4 exiting
> 2014-09-23 14:57:02,164 DEBUG [regionserver60020-WAL.AsyncWriter] wal.FSHLog:
> regionserver60020-WAL.AsyncWriter interrupted while waiting for newer writes
> added to local buffer
> 2014-09-23 14:57:02,164 INFO [regionserver60020-WAL.AsyncWriter] wal.FSHLog:
> regionserver60020-WAL.AsyncWriter exiting
> 2014-09-23 14:57:02,165 DEBUG [regionserver60020] wal.FSHLog: Closing WAL
> writer in
> hdfs://logs01.yz.bj.uar.nsn:8020/apps/hbase/data/WALs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:02,591 ERROR [regionserver60020] regionserver.HRegionServer:
> Close and delete failed
> org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException: No lease on
> /apps/hbase/data/WALs/logs05.yz.bj.uar.nsn,60020,1411454591351/logs05.yz.bj.uar.nsn%2C60020%2C1411454591351.1411454597278:
> File does not exist. [Lease. Holder:
> DFSClient_hb_rs_logs05.yz.bj.uar.nsn,60020,1411454591351_1603749335_33,
> pendingcreates: 1]
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.checkLease(FSNamesystem.java:2946)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.completeFileInternal(FSNamesystem.java:3010)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.completeFile(FSNamesystem.java:2990)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.complete(NameNodeRpcServer.java:641)
> at
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.complete(ClientNamenodeProtocolServerSideTranslatorPB.java:484)
> at
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:585)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:928)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2013)
> at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2009)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:396)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1557)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2007)
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native
> Method)
> at
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
> at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
> at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
> at
> org.apache.hadoop.hbase.RemoteExceptionHandler.decodeRemoteException(RemoteExceptionHandler.java:97)
> at
> org.apache.hadoop.hbase.RemoteExceptionHandler.checkThrowable(RemoteExceptionHandler.java:49)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.closeWAL(HRegionServer.java:1188)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:998)
> at java.lang.Thread.run(Thread.java:662)
> 2014-09-23 14:57:02,692 INFO [regionserver60020] regionserver.Leases:
> regionserver60020 closing leases
> 2014-09-23 14:57:02,692 INFO [regionserver60020] regionserver.Leases:
> regionserver60020 closed leases
> 2014-09-23 14:57:08,460 INFO [regionserver60020.leaseChecker]
> regionserver.Leases: regionserver60020.leaseChecker closing leases
> 2014-09-23 14:57:08,461 INFO [regionserver60020.leaseChecker]
> regionserver.Leases: regionserver60020.leaseChecker closed leases
> 2014-09-23 14:57:08,466 INFO [regionserver60020.periodicFlusher]
> regionserver.HRegionServer$PeriodicMemstoreFlusher:
> regionserver60020.periodicFlusher exiting
> 2014-09-23 14:57:08,467 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/replication/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:08,467 INFO [regionserver60020] util.RetryCounter: Sleeping
> 1000ms before retry #0...
> 2014-09-23 14:57:09,468 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/replication/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:09,468 INFO [regionserver60020] util.RetryCounter: Sleeping
> 2000ms before retry #1...
> 2014-09-23 14:57:11,468 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/replication/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:11,468 INFO [regionserver60020] util.RetryCounter: Sleeping
> 4000ms before retry #2...
> 2014-09-23 14:57:15,469 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/replication/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:15,469 INFO [regionserver60020] util.RetryCounter: Sleeping
> 8000ms before retry #3...
> 2014-09-23 14:57:23,469 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/replication/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:23,469 ERROR [regionserver60020]
> zookeeper.RecoverableZooKeeper: ZooKeeper getChildren failed after 4 attempts
> 2014-09-23 14:57:23,490 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:23,490 INFO [regionserver60020] util.RetryCounter: Sleeping
> 1000ms before retry #0...
> 2014-09-23 14:57:24,490 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:24,490 INFO [regionserver60020] util.RetryCounter: Sleeping
> 2000ms before retry #1...
> 2014-09-23 14:57:26,491 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:26,491 INFO [regionserver60020] util.RetryCounter: Sleeping
> 4000ms before retry #2...
> 2014-09-23 14:57:30,492 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:30,492 INFO [regionserver60020] util.RetryCounter: Sleeping
> 8000ms before retry #3...
> 2014-09-23 14:57:38,492 WARN [regionserver60020]
> zookeeper.RecoverableZooKeeper: Possibly transient ZooKeeper,
> quorum=logs02.yz.bj.uar.nsn:2181,logs01.yz.bj.uar.nsn:2181,logs03.yz.bj.uar.nsn:2181,
> exception=org.apache.zookeeper.KeeperException$SessionExpiredException:
> KeeperErrorCode = Session expired for
> /hbase-unsecure/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> 2014-09-23 14:57:38,492 ERROR [regionserver60020]
> zookeeper.RecoverableZooKeeper: ZooKeeper delete failed after 4 attempts
> 2014-09-23 14:57:38,492 WARN [regionserver60020] regionserver.HRegionServer:
> Failed deleting my ephemeral node
> org.apache.zookeeper.KeeperException$SessionExpiredException: KeeperErrorCode
> = Session expired for
> /hbase-unsecure/rs/logs05.yz.bj.uar.nsn,60020,1411454591351
> at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:127)
> at
> org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
> at org.apache.zookeeper.ZooKeeper.delete(ZooKeeper.java:873)
> at
> org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper.delete(RecoverableZooKeeper.java:156)
> at
> org.apache.hadoop.hbase.zookeeper.ZKUtil.deleteNode(ZKUtil.java:1270)
> at
> org.apache.hadoop.hbase.zookeeper.ZKUtil.deleteNode(ZKUtil.java:1259)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.deleteMyEphemeralNode(HRegionServer.java:1286)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.run(HRegionServer.java:1016)
> at java.lang.Thread.run(Thread.java:662)
> 2014-09-23 14:57:38,502 INFO [regionserver60020] regionserver.HRegionServer:
> stopping server logs05.yz.bj.uar.nsn,60020,1411454591351; zookeeper
> connection closed.
> 2014-09-23 14:57:38,502 INFO [regionserver60020] regionserver.HRegionServer:
> regionserver60020 exiting
> 2014-09-23 14:57:38,503 ERROR [main] regionserver.HRegionServerCommandLine:
> Region server exiting
> java.lang.RuntimeException: HRegionServer Aborted
> at
> org.apache.hadoop.hbase.regionserver.HRegionServerCommandLine.start(HRegionServerCommandLine.java:66)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServerCommandLine.run(HRegionServerCommandLine.java:85)
> at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
> at
> org.apache.hadoop.hbase.util.ServerCommandLine.doMain(ServerCommandLine.java:126)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.main(HRegionServer.java:2403)
> 2014-09-23 14:57:38,506 INFO [Thread-11] regionserver.ShutdownHook: Shutdown
> hook starting; hbase.shutdown.hook=true;
> fsShutdownHook=org.apache.hadoop.fs.FileSystem$Cache$ClientFinalizer@6e75d758
> 2014-09-23 14:57:38,506 INFO [Thread-11] regionserver.HRegionServer:
> STOPPED: Shutdown hook
> 2014-09-23 14:57:38,507 INFO [Thread-11] regionserver.ShutdownHook: Starting
> fs shutdown hook thread.
> 2014-09-23 14:57:38,508 INFO [Thread-11] regionserver.ShutdownHook: Shutdown
> hook finished.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)