Are you running enough xciever counts? Any failures in your datanode logs?
On Wed, Sep 1, 2010 at 10:51 PM, Stack <[email protected]> wrote: > Vidhya: > > Could you use the hadoop 0.20-append branch on your cluster as per > Todd's suggestion? > > St.Ack > > On Wed, Sep 1, 2010 at 12:22 PM, Vidhyashankar Venkataraman > <[email protected]> wrote: >> The RS logs is filled with exceptions like the one I have specified below.. >> >> Vidhya >> >> RS log: >> >> 2010-09-01 18:23:55,883 ERROR >> org.apache.hadoop.hbase.regionserver.HRegionServer: Failed openScanner >> java.io.IOException: Could not seek StoreFileScanner[HFileScanner for reader >> reader=hdfs://b3130080.yst.yahoo.net:4600/hbase/DocDB/912a67889c51bee4a6252f289b0ad95e/bigColumn/149303798533194752, >> compression=none, inMemory=false, >> firstKey=0000003568669995/bigColumn:CONTENT/1282901111530/Put, >> lastKey=0000003568803993/bigColumn:CONTENT/1282902034907/Put, avgKeyLen=44, >> avgValueLen=32000, entries=133999, length=4295200267, cur=null] >> at >> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:104) >> at >> org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:73) >> at >> org.apache.hadoop.hbase.regionserver.Store.getScanner(Store.java:1201) >> at >> org.apache.hadoop.hbase.regionserver.HRegion$RegionScanner.<init>(HRegion.java:2333) >> at >> org.apache.hadoop.hbase.regionserver.HRegion.instantiateInternalScanner(HRegion.java:1205) >> at >> org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1197) >> at >> org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1178) >> at >> org.apache.hadoop.hbase.regionserver.HRegionServer.openScanner(HRegionServer.java:1814) >> at sun.reflect.GeneratedMethodAccessor14.invoke(Unknown Source) >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) >> at java.lang.reflect.Method.invoke(Method.java:597) >> at org.apache.hadoop.hbase.ipc.HBaseRPC$Server.call(HBaseRPC.java:576) >> at >> org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:919) >> Caused by: java.io.IOException: Could not obtain block: >> blk_4305541000700762499_1100131 >> file=/hbase/DocDB/912a67889c51bee4a6252f289b0ad95e/bigColumn/149303798533194752 >> at >> org.apache.hadoop.hdfs.DFSClient$DFSInputStream.chooseDataNode(DFSClient.java:1787) >> at >> org.apache.hadoop.hdfs.DFSClient$DFSInputStream.blockSeekTo(DFSClient.java:1615) >> at >> org.apache.hadoop.hdfs.DFSClient$DFSInputStream.read(DFSClient.java:1742) >> at java.io.DataInputStream.read(DataInputStream.java:132) >> at >> org.apache.hadoop.hbase.io.hfile.BoundedRangeFileInputStream.read(BoundedRangeFileInputStream.java:105) >> at org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:100) >> at >> org.apache.hadoop.hbase.io.hfile.HFile$Reader.decompress(HFile.java:1060) >> at >> org.apache.hadoop.hbase.io.hfile.HFile$Reader.readBlock(HFile.java:1007) >> at >> org.apache.hadoop.hbase.io.hfile.HFile$Reader$Scanner.seekTo(HFile.java:1373) >> at >> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seekAtOrAfter(StoreFileScanner.java:125) >> at >> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:96) >> ... 12 more >> 2010-09-01 18:23:56,926 WARN org.apache.hadoop.hdfs.DFSClient: DFS Read: >> java.io.IOException: Could not obtain block: blk_4305541000700762499_1100131 >> file=/hbase/DocDB/912a67889c51bee4a6252f289b0ad95e/bigColumn/149303798533194752 >> at >> org.apache.hadoop.hdfs.DFSClient$DFSInputStream.chooseDataNode(DFSClient.java:1787) >> at >> org.apache.hadoop.hdfs.DFSClient$DFSInputStream.blockSeekTo(DFSClient.java:1615) >> at >> org.apache.hadoop.hdfs.DFSClient$DFSInputStream.read(DFSClient.java:1742) >> at java.io.DataInputStream.read(DataInputStream.java:132) >> at >> org.apache.hadoop.hbase.io.hfile.BoundedRangeFileInputStream.read(BoundedRangeFileInputStream.java:105) >> at org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:100) >> at >> org.apache.hadoop.hbase.io.hfile.HFile$Reader.decompress(HFile.java:1060) >> at >> org.apache.hadoop.hbase.io.hfile.HFile$Reader.readBlock(HFile.java:1007) >> at >> org.apache.hadoop.hbase.io.hfile.HFile$Reader$Scanner.seekTo(HFile.java:1373) >> at >> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seekAtOrAfter(StoreFileScanner.java:125) >> at >> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:96) >> at >> org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:73) >> at >> org.apache.hadoop.hbase.regionserver.Store.getScanner(Store.java:1201) >> at >> org.apache.hadoop.hbase.regionserver.HRegion$RegionScanner.<init>(HRegion.java:2333) >> at >> org.apache.hadoop.hbase.regionserver.HRegion.instantiateInternalScanner(HRegion.java:1205) >> at >> org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1197) >> at >> org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1178) >> at >> org.apache.hadoop.hbase.regionserver.HRegionServer.openScanner(HRegionServer.java:1814) >> at sun.reflect.GeneratedMethodAccessor14.invoke(Unknown Source) >> at >> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) >> at java.lang.reflect.Method.invoke(Method.java:597) >> at org.apache.hadoop.hbase.ipc.HBaseRPC$Server.call(HBaseRPC.java:576) >> at >> org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:919) >> >> >> >> On 9/1/10 12:06 PM, "Todd Lipcon" <[email protected]> wrote: >> >> Hi Vidhya, >> >> Problems like this used to be more frequent, but then we did a bunch >> of DFS bug fixes in the hadoop-0.20-append branch that resolved a lot >> of them. I imagine you're using YDH which doesn't have all the fixes, >> but I couldn't say exactly what issue this is. >> >> Could you grep both the NN and RS logs for blk_4731742382812534502 and >> pastebin what you find? >> >> Thanks >> -Todd >> >> >> On Wed, Sep 1, 2010 at 11:58 AM, Vidhyashankar Venkataraman >> <[email protected]> wrote: >>> I have been trying to run my scanner jobs and sometimes they fail due to >>> DFS errors in one of the storefiles: >>> >>> I looked at the namenode logs and the file that caused the problem was in >>> the process of getting fixed by the namenode but by then the scanner >>> failed.. (I tried copying the file after the failure and it did without any >>> problems).. >>> >>> I just wanted to confirm with you guys if this is a common problem.. >>> >>> Vidhya >>> >>> The MR exceptions: >>> org.apache.hadoop.hbase.client.RetriesExhaustedException: Trying to contact >>> region server b3130504.yst.yahoo.net:60020 for region >>> DocDB,0000000190009985,1282947505610.532732ba4420906a085e22489a897bea., row >>> '0000000190009985', but failed after 10 attempts. >>> Exceptions: >>> java.io.IOException: java.io.IOException: Could not seek >>> StoreFileScanner[HFileScanner for reader >>> reader=hdfs://b3130080.yst.yahoo.net:4600/hbase/DocDB/532732ba4420906a085e22489a897bea/bigColumn/4660977350540113930, >>> compression=none, inMemory=false, >>> firstKey=0000000190009985/bigColumn:CONTENT/1282909598083/Put, >>> lastKey=0000000190143983/bigColumn:CONTENT/1282910525475/Put, avgKeyLen=44, >>> avgValueLen=32000, entries=133999, length=4295200267, cur=null] >>> at >>> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:104) >>> at >>> org.apache.hadoop.hbase.regionserver.StoreScanner.(StoreScanner.java:73) >>> at org.apache.hadoop.hbase.regionserver.Store.getScanner(Store.java:1201) >>> at >>> org.apache.hadoop.hbase.regionserver.HRegion$RegionScanner.(HRegion.java:2333) >>> at >>> org.apache.hadoop.hbase.regionserver.HRegion.instantiateInternalScanner(HRegion.java:1205) >>> at >>> org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1197) >>> at >>> org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:1178) >>> at >>> org.apache.hadoop.hbase.regionserver.HRegionServer.openScanner(HRegionServer.java:1814) >>> at sun.reflect.GeneratedMethodAccessor14.invoke(Unknown Source) >>> at >>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) >>> at java.lang.reflect.Method.invoke(Method.java:597) >>> at org.apache.hadoop.hbase.ipc.HBaseRPC$Server.call(HBaseRPC.java:576) >>> at >>> org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:919) >>> Caused by: java.io.IOException: Could not obtain block: >>> blk_4731742382812534502_1129302 >>> file=/hbase/DocDB/532732ba4420906a085e22489a897bea/bigColumn/4660977350540113930 >>> at >>> org.apache.hadoop.hdfs.DFSClient$DFSInputStream.chooseDataNode(DFSClient.java:1787) >>> at >>> org.apache.hadoop.hdfs.DFSClient$DFSInputStream.blockSeekTo(DFSClient.java:1615) >>> at >>> org.apache.hadoop.hdfs.DFSClient$DFSInputStream.read(DFSClient.java:1742) >>> at java.io.DataInputStream.read(DataInputStream.java:132) >>> at >>> org.apache.hadoop.hbase.io.hfile.BoundedRangeFileInputStream.read(BoundedRangeFileInputStream.java:105) >>> at org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:100) >>> at >>> org.apache.hadoop.hbase.io.hfile.HFile$Reader.decompress(HFile.java:1060) >>> at >>> org.apache.hadoop.hbase.io.hfile.HFile$Reader.readBlock(HFile.java:1007) >>> at >>> org.apache.hadoop.hbase.io.hfile.HFile$Reader$Scanner.seekTo(HFile.java:1373) >>> at >>> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seekAtOrAfter(StoreFileScanner.java:125) >>> at >>> org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:96) >>> ... 12 more >>> >>> >> >> >> >> -- >> Todd Lipcon >> Software Engineer, Cloudera >> >> >
