[
https://issues.apache.org/jira/browse/HDDS-11193?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17868549#comment-17868549
]
Pratyush Bhatt commented on HDDS-11193:
---------------------------------------
Hi [~duongnguyen] , I was using this fix, but I am still hitting the above
java.lang.IllegalArgumentException issue, and all the RS went down with below
issue:
{code:java}
2024-07-23 05:32:26,367 ERROR
org.apache.hadoop.hbase.regionserver.CompactSplit: Compaction failed
region=11loadtesttabtyui22aaqw23,a22221f2,1721725742844.9bcd68da6a7f7c0150854b20b2340a41.,
storeName=9bcd68da6a7f7c0150854b20b2340a41/ch8, priority=13,
startTime=1721737880870
java.io.IOException: java.lang.IllegalArgumentException
at
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.ensureIOException(AbstractFSWAL.java:858)
at
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.blockOnSync(AbstractFSWAL.java:853)
at
org.apache.hadoop.hbase.regionserver.wal.FSHLog.publishSyncThenBlockOnCompletion(FSHLog.java:809)
at org.apache.hadoop.hbase.regionserver.wal.FSHLog.sync(FSHLog.java:858)
at org.apache.hadoop.hbase.regionserver.wal.FSHLog.sync(FSHLog.java:847)
at
org.apache.hadoop.hbase.regionserver.wal.WALUtil.doFullMarkerAppendTransaction(WALUtil.java:163)
at
org.apache.hadoop.hbase.regionserver.wal.WALUtil.writeMarker(WALUtil.java:140)
at
org.apache.hadoop.hbase.regionserver.wal.WALUtil.writeCompactionMarker(WALUtil.java:74)
at
org.apache.hadoop.hbase.regionserver.HStore.writeCompactionWalRecord(HStore.java:1262)
at
org.apache.hadoop.hbase.regionserver.HStore.lambda$replaceStoreFiles$3(HStore.java:1272)
at
org.apache.hadoop.hbase.regionserver.StoreEngine.replaceStoreFiles(StoreEngine.java:483)
at
org.apache.hadoop.hbase.regionserver.HStore.replaceStoreFiles(HStore.java:1270)
at
org.apache.hadoop.hbase.regionserver.HStore.doCompaction(HStore.java:1206)
at org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:1188)
at
org.apache.hadoop.hbase.regionserver.HRegion.compact(HRegion.java:2261)
at
org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.doCompaction(CompactSplit.java:625)
at
org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.run(CompactSplit.java:673)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException
at
org.apache.hadoop.ozone.shaded.com.google.common.base.Preconditions.checkArgument(Preconditions.java:129)
at
org.apache.hadoop.ozone.client.io.KeyOutputStream.handleException(KeyOutputStream.java:318)
at
org.apache.hadoop.ozone.client.io.KeyOutputStream.handleFlushOrClose(KeyOutputStream.java:499)
at
org.apache.hadoop.ozone.client.io.KeyOutputStream.hsync(KeyOutputStream.java:463)
at
org.apache.hadoop.ozone.client.io.OzoneOutputStream.hsync(OzoneOutputStream.java:118)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:184)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:149)
at
org.apache.hadoop.fs.ozone.OzoneFSOutputStream.hsync(OzoneFSOutputStream.java:80)
at
org.apache.hadoop.fs.ozone.OzoneFSOutputStream.hflush(OzoneFSOutputStream.java:75)
at
org.apache.hadoop.fs.FSDataOutputStream.hflush(FSDataOutputStream.java:136)
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter.sync(ProtobufLogWriter.java:84)
at
org.apache.hadoop.hbase.regionserver.wal.FSHLog$SyncRunner.run(FSHLog.java:669)
2024-07-23 05:32:26,371 ERROR
org.apache.hadoop.hbase.regionserver.HRegionServer: ***** ABORTING region
server vc0134.xyz,22101,1721724988754: Failed log close in log roller
*****{code}
cc: [~ashishkr] [~weichiu]
> KeyOutputStream flakiness when running write and hsync concurrently
> -------------------------------------------------------------------
>
> Key: HDDS-11193
> URL: https://issues.apache.org/jira/browse/HDDS-11193
> Project: Apache Ozone
> Issue Type: Sub-task
> Reporter: Duong
> Assignee: Duong
> Priority: Major
> Labels: pull-request-available
> Fix For: HDDS-7593
>
>
> Testing KeyOutputStream with TestHSync#testConcurrentWriteHSync sometimes
> fails. Bellow are the known error stacktraces.
> Repeated test run CI:
> [https://github.com/apache/ozone/actions/runs/9961420355]
> #1
> {code:java}
> java.io.IOException: java.lang.IndexOutOfBoundsException
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.handleWrite(KeyOutputStream.java:243)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.write(KeyOutputStream.java:212)
> at
> org.apache.hadoop.ozone.client.io.OzoneOutputStream.write(OzoneOutputStream.java:94)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.lambda$write$1(OzoneFSOutputStream.java:58)
> at
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:184)
> at
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:149)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.write(OzoneFSOutputStream.java:54)
> at
> org.apache.hadoop.fs.FSDataOutputStream$PositionCache.write(FSDataOutputStream.java:62)
> at java.base/java.io.DataOutputStream.write(DataOutputStream.java:112)
> at java.base/java.io.FilterOutputStream.write(FilterOutputStream.java:108)
> at
> org.apache.hadoop.fs.ozone.TestHSync.lambda$runConcurrentWriteHSync$8(TestHSync.java:683)
> at java.base/java.lang.Thread.run(Thread.java:840)
> Caused by: java.lang.IndexOutOfBoundsException
> at
> org.apache.hadoop.hdds.scm.storage.BlockOutputStream.write(BlockOutputStream.java:328)
> at
> org.apache.hadoop.ozone.client.io.BlockOutputStreamEntry.write(BlockOutputStreamEntry.java:138)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.writeToOutputStream(KeyOutputStream.java:255)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.handleWrite(KeyOutputStream.java:233)
> {code}
> #2
> {code:java}
> java.io.IOException: java.lang.IllegalArgumentException
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.handleWrite(KeyOutputStream.java:243)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.write(KeyOutputStream.java:212)
> at
> org.apache.hadoop.ozone.client.io.OzoneOutputStream.write(OzoneOutputStream.java:94)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.lambda$write$1(OzoneFSOutputStream.java:58)
> at
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:184)
> at
> org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:149)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.write(OzoneFSOutputStream.java:54)
> at
> org.apache.hadoop.fs.FSDataOutputStream$PositionCache.write(FSDataOutputStream.java:62)
> at java.io.DataOutputStream.write(DataOutputStream.java:107)
> at java.io.FilterOutputStream.write(FilterOutputStream.java:97)
> at
> org.apache.hadoop.fs.ozone.TestHSync.lambda$runConcurrentWriteHSync$8(TestHSync.java:683)
> at java.lang.Thread.run(Thread.java:750)
> Caused by: java.lang.IllegalArgumentException
> at
> com.google.common.base.Preconditions.checkArgument(Preconditions.java:129)
> at
> org.apache.hadoop.ozone.client.io.BlockOutputStreamEntryPool.allocateBlockIfNeeded(BlockOutputStreamEntryPool.java:398)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.handleWrite(KeyOutputStream.java:222)
> {code}
> #3
> {code:java}
> Error: org.apache.hadoop.fs.ozone.TestHSync.testConcurrentWriteHSync(int,
> int, boolean)[3] Time elapsed: 12.476 s <<< ERROR!
> java.lang.IllegalArgumentException: Expected offset: 6134992 expected len:
> 6143720
> at
> com.google.common.base.Preconditions.checkArgument(Preconditions.java:143)
> at
> org.apache.hadoop.ozone.client.io.BlockOutputStreamEntryPool.commitKey(BlockOutputStreamEntryPool.java:321)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.close(KeyOutputStream.java:563)
> at
> org.apache.hadoop.ozone.client.io.OzoneOutputStream.close(OzoneOutputStream.java:105)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.close(OzoneFSOutputStream.java:70)
> at
> org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:77)
> at
> org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
> at
> org.apache.hadoop.fs.ozone.TestHSync.testConcurrentWriteHSync(TestHSync.java:757)
> at java.lang.reflect.Method.invoke(Method.java:498)
> {code}
>
> #4
> {code:java}
> java.io.IOException: Inconsistent read for blockID=conID: 4 locID:
> 113750153625600158 bcsId: 3233 replicaIndex: null length=196608
> position=181568 numBytesToRead=8 numBytesRead=-1
> at
> org.apache.hadoop.ozone.client.io.KeyInputStream.checkPartBytesRead(KeyInputStream.java:195)
> at
> org.apache.hadoop.hdds.scm.storage.MultipartInputStream.readWithStrategy(MultipartInputStream.java:97)
> at
> org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:56)
> at
> org.apache.hadoop.fs.ozone.OzoneFSInputStream.read(OzoneFSInputStream.java:81)
> at java.io.DataInputStream.read(DataInputStream.java:100)
> at
> org.apache.hadoop.fs.ozone.TestHSync.validateWrittenFile(TestHSync.java:767)
> at
> org.apache.hadoop.fs.ozone.TestHSync.testConcurrentWriteHSync(TestHSync.java:758)
> at java.lang.reflect.Method.invoke(Method.java:498)
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]