[ 
https://issues.apache.org/jira/browse/HBASE-28065?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17762371#comment-17762371
 ] 

Nick Dimiduk commented on HBASE-28065:
--------------------------------------

{noformat}
2023-08-25T01:13:34,716 [regionserver/hostname:60020-shortCompactions-0] ERROR 
org.apache.hadoop.hbase.regionserver.CompactSplit: Compaction failed 
region=region,1661687622515.f84ed7f32752a5e4599a3d7a5ac37f58., 
storeName=f84ed7f32752a5e4599a3d7a5ac37f58/0, priority=12, 
startTime=1692926011339
java.io.IOException: Could not iterate StoreFileScanner[HFileScanner for reader 
reader=hdfs://hostname:8020/hbase/data/default/table/f84ed7f32752a5e4599a3d7a5ac37f58/0/44af10b2c98841689fa70ed1656524d0,
 compression=gz, cacheConf=cacheDataOnRead=true, cacheDataOnWrite=false, 
cacheIndexesOnWr
ite=false, cacheBloomsOnWrite=false, cacheEvictOnClose=false, 
cacheDataCompressed=false, prefetchOnOpen=false, 
firstKey=Optional[rowkey/Put/seqid=0
], lastKey=Optional[rowkey/Put/seqid=0], avgKeyLen=60, avgValueLen=15, 
entries=183556, length=2542985, 
cur=org.apache.hadoop.hbase.io.encoding.BufferedDataBlockEncode
r$OffheapDecodedExtendedCell@56a3d234]
        at 
org.apache.hadoop.hbase.regionserver.StoreFileScanner.next(StoreFileScanner.java:204)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.KeyValueHeap.next(KeyValueHeap.java:118) 
~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.StoreScanner.next(StoreScanner.java:692) 
~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.compactions.Compactor.performCompaction(Compactor.java:440)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.compactions.Compactor.compact(Compactor.java:363)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor.compact(DefaultCompactor.java:64)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.DefaultStoreEngine$DefaultCompactionContext.compact(DefaultStoreEngine.java:122)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:1145) 
~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.HRegion.compact(HRegion.java:2287) 
~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.doCompaction(CompactSplit.java:667)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.run(CompactSplit.java:716)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) 
~[?:?]
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) 
~[?:?]
        at java.lang.Thread.run(Thread.java:829) ~[?:?]
Caused by: java.io.IOException: incorrect header check
        at 
org.apache.hadoop.io.compress.zlib.ZlibDecompressor.inflateBytesDirect(Native 
Method) ~[hadoop-common-3.3.1.jar:?]
        at 
org.apache.hadoop.io.compress.zlib.ZlibDecompressor.decompress(ZlibDecompressor.java:225)
 ~[hadoop-common-3.3.1.jar:?]
        at 
org.apache.hadoop.io.compress.DecompressorStream.decompress(DecompressorStream.java:111)
 ~[hadoop-common-3.3.1.jar:?]
        at 
org.apache.hadoop.io.compress.DecompressorStream.read(DecompressorStream.java:105)
 ~[hadoop-common-3.3.1.jar:?]
        at java.io.BufferedInputStream.read1(BufferedInputStream.java:290) 
~[?:?]
        at java.io.BufferedInputStream.read(BufferedInputStream.java:351) ~[?:?]
        at 
org.apache.hadoop.hbase.io.util.BlockIOUtils.readFullyWithHeapBuffer(BlockIOUtils.java:151)
 ~[hbase-common-2.5-hubspot-20230418.173647-18.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext.prepareDecoding(HFileBlockDefaultDecodingContext.java:104)
 ~[hbase-common-2.5-hubspot-20230418.173647-18.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.io.hfile.HFileBlock.unpack(HFileBlock.java:644) 
~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1353)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1252)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.readNextDataBlock(HFileReaderImpl.java:754)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$EncodedScanner.next(HFileReaderImpl.java:1520)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        at 
org.apache.hadoop.hbase.regionserver.StoreFileScanner.next(StoreFileScanner.java:195)
 ~[hbase-server-2.5-hubspot-20230418.183923-40.jar:2.5-hubspot-SNAPSHOT]
        ... 13 more
{noformat}

> Corrupt HFile data is mishandled in several cases
> -------------------------------------------------
>
>                 Key: HBASE-28065
>                 URL: https://issues.apache.org/jira/browse/HBASE-28065
>             Project: HBase
>          Issue Type: Bug
>          Components: HFile
>    Affects Versions: 2.5.2
>            Reporter: Nick Dimiduk
>            Priority: Major
>
> While riding over a spat of HDFS data corruption issues, we've observed 
> several places in the read path that do not fall back to HDFS checksum 
> appropriately. These failures manifest during client reads and during 
> compactions. Sometimes failure is detected by the fallback 
> {{verifyOnDiskSizeMatchesHeader}}, sometimes we attempt to allocate a buffer 
> with a negative size, and sometimes we read through to a failure from block 
> decompression.
> After code study, I think that all three cases arise from using a block 
> header that was read without checksum validation.
> Will post up the stack traces in the comments. Now sure if we'll want a 
> single patch or multiple.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to