Derek, We use hbase in semi-production mode, we've got issues but mainly from the machines themselves. Have you tried the following? http://wiki.apache.org/hadoop/Hbase/Troubleshooting#6
J-D On Fri, Jan 16, 2009 at 9:01 PM, Derek Pappas <[email protected]> wrote: > Hi, > > Are any companies using hbase in a production system that can talk about > hbase stability issues. > We are a there person start up and need to choose the right storage system > the first time. > We are testing hbase 0.18 on a 7 machine cluster. We have seen all sorts of > errors > such as the following: > > > 2009-01-16 16:31:49,710 WARN org.apache.hadoop.dfs.DFSClient: Error > Recovery for block nul > l bad datanode[0] > [...@xxx~]$ tail -f hbase-0.18.1/logs/hbase-xxx-regionserver-xxxx0.log > at java.lang.reflect.Method.invoke(Unknown Source) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationH > andler.java:82) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler > .java:59) > at org.apache.hadoop.dfs.$Proxy1.addBlock(Unknown Source) > at > org.apache.hadoop.dfs.DFSClient$DFSOutputStream.locateFollowingBlock(DFSClient. > java:2440) > at > org.apache.hadoop.dfs.DFSClient$DFSOutputStream.nextBlockOutputStream(DFSClient > .java:2323) > at > org.apache.hadoop.dfs.DFSClient$DFSOutputStream.access$1800(DFSClient.java:1735 > ) > at > org.apache.hadoop.dfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java > :1912) > > 2009-01-16 16:31:49,710 WARN org.apache.hadoop.dfs.DFSClient: Error > Recovery for block nul > l bad datanode[0] > 5:30 PM > > on an error like this the one of the servers (and the data inserts) just > hangs > 5:30 PM > > then you wait an hour or so to figure out whether it come out of it > 5:30 PM > > the other servers don't recoginize the one is gone > 5:33 PM > > 2009-01-16 16:31:46,507 WARN org.apache.hadoop.dfs.DFSClient: > NotReplicatedYetException sleeping > /hbase/yotest1/689876272/size/mapfiles/8253971210487871616/index retries > left 1 > 2009-01-16 16:31:49,710 WARN org.apache.hadoop.dfs.DFSClient: DataStreamer > Exception: org.apache.hadoop.ipc.RemoteException: > org.apache.hadoop.dfs.LeaseExpiredException: No lease on > /hbase/yotest1/689876272/size/mapfiles/8253971210487871616/index File does > not exist. Holder DFSClient_464109999 does not have any open files. > at > org.apache.hadoop.dfs.FSNamesystem.checkLease(FSNamesystem.java:1169) > at > org.apache.hadoop.dfs.FSNamesystem.getAdditionalBlock(FSNamesystem.java:1100) > at org.apache.hadoop.dfs.NameNode.addBlock(NameNode.java:330) > at sun.reflect.GeneratedMethodAccessor9.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) > at java.lang.reflect.Method.invoke(Method.java:597) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:452) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:888) > > at org.apache.hadoop.ipc.Client.call(Client.java:715) > at org.apache.hadoop.ipc.RPC$Invoker.invoke(RPC.java:216) > at org.apache.hadoop.dfs.$Proxy1.addBlock(Unknown Source) > > hadoop (4) (1) > 2009-01-16 08:26:12,017 WARN org.apache.hadoop.dfs.DataNode: > DatanodeRegistration(10.7.0.104:50010, > storageID=DS-603767860-10.7.0.104-50010-1230215140509, infoPort=50075, > ipcPort=50020):Failed to transfer blk_-8100972070675150101_1897857 to > 10.7.0.100:50010 got java.net.SocketException: Connection reset > at > java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:96) > at java.net.SocketOutputStream.write(SocketOutputStream.java:136) > at java.io.BufferedOutputStream.write(BufferedOutputStream.java:105) > at java.io.DataOutputStream.write(DataOutputStream.java:90) > at > org.apache.hadoop.dfs.DataNode$BlockSender.sendChunks(DataNode.java:1923) > at > org.apache.hadoop.dfs.DataNode$BlockSender.sendBlock(DataNode.java:2011) > at > org.apache.hadoop.dfs.DataNode$DataTransfer.run(DataNode.java:2899) > at java.lang.Thread.run(Thread.java:595) > > 2009-01-16 08:39:18,952 ERROR org.apache.hadoop.dfs.DataNode: > DatanodeRegistration(10.7.0.101:50010, > storageID=DS-1644697266-10.7.0.101-50010-1230180097338, infoPort=50075, > ipcPort=50020):DataXceiver: java.net.SocketTimeoutException: Read timed out > at java.net.SocketInputStream.socketRead0(Native Method) > at java.net.SocketInputStream.read(SocketInputStream.java:129) > at java.net.SocketInputStream.read(SocketInputStream.java:182) > at java.io.DataInputStream.readByte(DataInputStream.java:248) > at > org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:324) > at > org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:345) > at org.apache.hadoop.io.Text.readString(Text.java:410) > at > org.apache.hadoop.dfs.DataNode$DataXceiver.writeBlock(DataNode.java:1270) > at org.apache.hadoop.dfs.DataNode$DataXceiver.run(DataNode.java:1076) > at java.lang.Thread.run(Thread.java:619) > 5:59 PM > > 2009-01-16 08:44:20,551 WARN org.apache.hadoop.dfs.DFSClient: DataStreamer > Exception: java.net.SocketTimeoutException: 15000 millis timeout while > waiting for channel to be ready for write. ch : > java.nio.channels.SocketChannel[connected local=/10.7.0.106:44905 remote=/ > 10.7.0.106:50010] > at > org.apache.hadoop.net.SocketIOWithTimeout.doIO(SocketIOWithTimeout.java:162) > at > org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:146) > at > org.apache.hadoop.net.SocketOutputStream.write(SocketOutputStream.java:107) > at java.io.BufferedOutputStream.write(BufferedOutputStream.java:105) > at java.io.DataOutputStream.write(DataOutputStream.java:90) > at > org.apache.hadoop.dfs.DFSClient$DFSOutputStream$DataStreamer.run(DFSClient.java:1938) > 5:59 PM > > ading from blk_6762060810858066967_1788520 of > /hbase/yotest1/1831862944/resp/mapfiles/6379496651348145490/data from > 10.7.0.104:50010: java.io.IOException: Premeture EOF from inputStream > at org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:102) > at > org.apache.hadoop.dfs.DFSClient$BlockReader.readChunk(DFSClient.java:996) > at > org.apache.hadoop.fs.FSInputChecker.readChecksumChunk(FSInputChecker.java:236) > at org.apache.hadoop.fs.FSInputChecker.read1(FSInputChecker.java:191) > at org.apache.hadoop.fs.FSInputChecker.read(FSInputChecker.java:159) > at > org.apache.hadoop.dfs.DFSClient$BlockReader.read(DFSClient.java:858) > at > org.apache.hadoop.dfs.DFSClient$DFSInputStream.readBuffer(DFSClient.java:1384) > at > org.apache.hadoop.dfs.DFSClient$DFSInputStream.read(DFSClient.java:1420) > at java.io.DataInputStream.readFully(DataInputStream.java:176) > at > org.apache.hadoop.io.DataOutputBuffer$Buffer.write(DataOutputBuffer.java:64) > at > org.apache.hadoop.io.DataOutputBuffer.write(DataOutputBuffer.java:102) > at > org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:1933) > at > org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:1833) > at > org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:1879) > at org.apache.hadoop.io.MapFile$Reader.next(MapFile.java:516) > at > org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:1003) > at > org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:893) > at > org.apache.hadoop.hbase.regionserver.HRegion.compactStores(HRegion.java:902) > at > org.apache.hadoop.hbase.regionserver.HRegion.compactStores(HRegion.java:860) > at > org.apache.hadoop.hbase.regionserver.CompactSplitThread.run(CompactSplitThread.java:83) > Best Regards, > > Derek Pappas > depappas at yahoo d0t com > > > > >
