[
https://issues.apache.org/jira/browse/HDDS-8460?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Pratyush Bhatt resolved HDDS-8460.
----------------------------------
Resolution: Fixed
> [Hbase-Ozone] NullPointerException while flushing data to HBase
> ---------------------------------------------------------------
>
> Key: HDDS-8460
> URL: https://issues.apache.org/jira/browse/HDDS-8460
> Project: Apache Ozone
> Issue Type: Bug
> Components: Ozone Filesystem
> Affects Versions: 1.4.0
> Reporter: Pratyush Bhatt
> Priority: Major
>
> Hbase RegionServer went down with:
> {noformat}
> 2023-04-19 08:59:21,591 ERROR
> org.apache.hadoop.hdds.scm.storage.BlockOutputStream: Failed to flush. error:
> null
> java.lang.NullPointerException{noformat}
> On checking further, I can see some offset mismatch error also.
> RS detailed logs of around same time:
> {noformat}
> 2023-04-19 08:59:21,588 WARN org.apache.hadoop.hbase.regionserver.wal.FSHLog:
> UNEXPECTED
> java.lang.IllegalArgumentException: Expected offset: 18930 expected len: 19830
> at
> org.apache.hadoop.ozone.shaded.com.google.common.base.Preconditions.checkArgument(Preconditions.java:145)
> at
> org.apache.hadoop.ozone.client.io.BlockOutputStreamEntryPool.hsyncKey(BlockOutputStreamEntryPool.java:352)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.hsync(KeyOutputStream.java:472)
> at
> org.apache.hadoop.ozone.client.io.OzoneOutputStream.hsync(OzoneOutputStream.java:76)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.hsync(OzoneFSOutputStream.java:70)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.hflush(OzoneFSOutputStream.java:65)
> at
> org.apache.hadoop.fs.FSDataOutputStream.hflush(FSDataOutputStream.java:136)
> at
> org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter.sync(ProtobufLogWriter.java:89)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$SyncRunner.run(FSHLog.java:659)
> 2023-04-19 08:59:21,591 ERROR
> org.apache.hadoop.hdds.scm.storage.CommitWatcher: Couldn't find required
> future for 19830
> 2023-04-19 08:59:21,591 ERROR
> org.apache.hadoop.hdds.scm.storage.CommitWatcher: Existing acknowledged data:
> 18930
> 2023-04-19 08:59:21,591 ERROR
> org.apache.hadoop.hdds.scm.storage.BlockOutputStream: Failed to flush. error:
> null
> java.lang.NullPointerException
> at
> org.apache.hadoop.ozone.shaded.com.google.common.base.Preconditions.checkNotNull(Preconditions.java:889)
> at
> org.apache.hadoop.hdds.scm.storage.CommitWatcher.releaseBuffers(CommitWatcher.java:108)
> at
> org.apache.hadoop.hdds.scm.storage.CommitWatcher.adjustBuffers(CommitWatcher.java:176)
> at
> org.apache.hadoop.hdds.scm.storage.CommitWatcher.watchForCommit(CommitWatcher.java:205)
> at
> org.apache.hadoop.hdds.scm.storage.CommitWatcher.watchOnLastIndex(CommitWatcher.java:166)
> at
> org.apache.hadoop.hdds.scm.storage.RatisBlockOutputStream.sendWatchForCommit(RatisBlockOutputStream.java:106)
> at
> org.apache.hadoop.hdds.scm.storage.BlockOutputStream.watchForCommit(BlockOutputStream.java:417)
> at
> org.apache.hadoop.hdds.scm.storage.BlockOutputStream.handleFlushInternal(BlockOutputStream.java:583)
> at
> org.apache.hadoop.hdds.scm.storage.BlockOutputStream.handleFlush(BlockOutputStream.java:545)
> at
> org.apache.hadoop.hdds.scm.storage.RatisBlockOutputStream.hsync(RatisBlockOutputStream.java:141)
> at
> org.apache.hadoop.ozone.client.io.BlockOutputStreamEntry.hsync(BlockOutputStreamEntry.java:151)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.handleStreamAction(KeyOutputStream.java:536)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.handleFlushOrClose(KeyOutputStream.java:499)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.hsync(KeyOutputStream.java:469)
> at
> org.apache.hadoop.ozone.client.io.OzoneOutputStream.hsync(OzoneOutputStream.java:76)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.hsync(OzoneFSOutputStream.java:70)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.hflush(OzoneFSOutputStream.java:65)
> at
> org.apache.hadoop.fs.FSDataOutputStream.hflush(FSDataOutputStream.java:136)
> at
> org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter.sync(ProtobufLogWriter.java:89)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$SyncRunner.run(FSHLog.java:659)
> 2023-04-19 08:59:21,592 WARN org.apache.hadoop.hbase.regionserver.wal.FSHLog:
> UNEXPECTED
> java.lang.NullPointerException
> at
> org.apache.hadoop.ozone.shaded.com.google.common.base.Preconditions.checkNotNull(Preconditions.java:889)
> at
> org.apache.hadoop.hdds.scm.storage.CommitWatcher.releaseBuffers(CommitWatcher.java:108)
> at
> org.apache.hadoop.hdds.scm.storage.CommitWatcher.adjustBuffers(CommitWatcher.java:176)
> at
> org.apache.hadoop.hdds.scm.storage.CommitWatcher.watchForCommit(CommitWatcher.java:205)
> at
> org.apache.hadoop.hdds.scm.storage.CommitWatcher.watchOnLastIndex(CommitWatcher.java:166)
> at
> org.apache.hadoop.hdds.scm.storage.RatisBlockOutputStream.sendWatchForCommit(RatisBlockOutputStream.java:106)
> at
> org.apache.hadoop.hdds.scm.storage.BlockOutputStream.watchForCommit(BlockOutputStream.java:417)
> at
> org.apache.hadoop.hdds.scm.storage.BlockOutputStream.handleFlushInternal(BlockOutputStream.java:583)
> at
> org.apache.hadoop.hdds.scm.storage.BlockOutputStream.handleFlush(BlockOutputStream.java:545)
> at
> org.apache.hadoop.hdds.scm.storage.RatisBlockOutputStream.hsync(RatisBlockOutputStream.java:141)
> at
> org.apache.hadoop.ozone.client.io.BlockOutputStreamEntry.hsync(BlockOutputStreamEntry.java:151)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.handleStreamAction(KeyOutputStream.java:536)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.handleFlushOrClose(KeyOutputStream.java:499)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.hsync(KeyOutputStream.java:469)
> at
> org.apache.hadoop.ozone.client.io.OzoneOutputStream.hsync(OzoneOutputStream.java:76)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.hsync(OzoneFSOutputStream.java:70)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.hflush(OzoneFSOutputStream.java:65)
> at
> org.apache.hadoop.fs.FSDataOutputStream.hflush(FSDataOutputStream.java:136)
> at
> org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter.sync(ProtobufLogWriter.java:89)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$SyncRunner.run(FSHLog.java:659)
> 2023-04-19 08:59:22,046 WARN org.apache.hadoop.hbase.regionserver.wal.FSHLog:
> Append sequenceId=7, requesting roll of WAL
> java.io.IOException: : Stream is closed! Key:
> hbase/WALs/hbase-ha2-8.hbase-ha2.root.hwx.site,22101,1681894449051/hbase-ha2-8.hbase-ha2.root.hwx.site%2C22101%2C1681894449051.hbase-ha2-8.hbase-ha2.root.hwx.site%2C22101%2C1681894449051.regiongroup-0.1681894740393
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.checkNotClosed(KeyOutputStream.java:726)
> at
> org.apache.hadoop.ozone.client.io.KeyOutputStream.write(KeyOutputStream.java:209)
> at
> org.apache.hadoop.ozone.client.io.OzoneOutputStream.write(OzoneOutputStream.java:57)
> at
> org.apache.hadoop.fs.ozone.OzoneFSOutputStream.write(OzoneFSOutputStream.java:50)
> at
> org.apache.hadoop.fs.FSDataOutputStream$PositionCache.write(FSDataOutputStream.java:62)
> at java.io.DataOutputStream.write(DataOutputStream.java:107)
> at
> org.apache.hbase.thirdparty.com.google.protobuf.CodedOutputStream$OutputStreamEncoder.doFlush(CodedOutputStream.java:3041)
> at
> org.apache.hbase.thirdparty.com.google.protobuf.CodedOutputStream$OutputStreamEncoder.flushIfNotAvailable(CodedOutputStream.java:3036)
> at
> org.apache.hbase.thirdparty.com.google.protobuf.CodedOutputStream$OutputStreamEncoder.writeUInt64(CodedOutputStream.java:2726)
> at
> org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos$WALKey.writeTo(WALProtos.java:2021)
> at
> org.apache.hbase.thirdparty.com.google.protobuf.AbstractMessageLite.writeDelimitedTo(AbstractMessageLite.java:95)
> at
> org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter.append(ProtobufLogWriter.java:55)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog.doAppend(FSHLog.java:334)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog.doAppend(FSHLog.java:73)
> at
> org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.appendEntry(AbstractFSWAL.java:1022)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$RingBufferEventHandler.append(FSHLog.java:1200)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$RingBufferEventHandler.onEvent(FSHLog.java:1079)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$RingBufferEventHandler.onEvent(FSHLog.java:978)
> at
> com.lmax.disruptor.BatchEventProcessor.processEvents(BatchEventProcessor.java:168)
> at
> com.lmax.disruptor.BatchEventProcessor.run(BatchEventProcessor.java:125)
> at java.lang.Thread.run(Thread.java:748)
> 2023-04-19 08:59:22,091 ERROR
> org.apache.hadoop.hbase.regionserver.wal.FSHLog: Failed close of WAL writer
> ofs://ozone1/vol1/bucket1/hbase/WALs/hbase-ha2-8.hbase-ha2.root.hwx.site,22101,1681894449051/hbase-ha2-8.hbase-ha2.root.hwx.site%2C22101%2C1681894449051.hbase-ha2-8.hbase-ha2.root.hwx.site%2C22101%2C1681894449051.regiongroup-0.1681894740393,
> unflushedEntries=11
> org.apache.hadoop.hbase.regionserver.wal.FailedSyncBeforeLogCloseException:
> org.apache.hadoop.hbase.regionserver.wal.DamagedWALException: Append
> sequenceId=7, requesting roll of WAL
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$SafePointZigZagLatch.checkIfSyncFailed(FSHLog.java:900)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$SafePointZigZagLatch.waitSafePoint(FSHLog.java:916)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog.doReplaceWriter(FSHLog.java:372)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog.doReplaceWriter(FSHLog.java:73)
> at
> org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.replaceWriter(AbstractFSWAL.java:786)
> at
> org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.rollWriter(AbstractFSWAL.java:845)
> at
> org.apache.hadoop.hbase.wal.AbstractWALRoller$RollController.rollWal(AbstractWALRoller.java:306)
> at
> org.apache.hadoop.hbase.wal.AbstractWALRoller.run(AbstractWALRoller.java:211)
> Caused by: org.apache.hadoop.hbase.regionserver.wal.DamagedWALException:
> Append sequenceId=7, requesting roll of WAL
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$RingBufferEventHandler.append(FSHLog.java:1206)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$RingBufferEventHandler.onEvent(FSHLog.java:1079)
> at
> org.apache.hadoop.hbase.regionserver.wal.FSHLog$RingBufferEventHandler.onEvent(FSHLog.java:978)
> at
> com.lmax.disruptor.BatchEventProcessor.processEvents(BatchEventProcessor.java:168)
> at
> com.lmax.disruptor.BatchEventProcessor.run(BatchEventProcessor.java:125){noformat}
> And just after sometime, RS went down:
> {noformat}
> 2023-04-19 08:59:23,544 ERROR
> org.apache.hadoop.hbase.regionserver.HRegionServerCommandLine: Region server
> exiting
> java.lang.RuntimeException: HRegionServer Aborted
> at
> org.apache.hadoop.hbase.regionserver.HRegionServerCommandLine.start(HRegionServerCommandLine.java:62)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServerCommandLine.run(HRegionServerCommandLine.java:82)
> at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:81)
> at
> org.apache.hadoop.hbase.util.ServerCommandLine.doMain(ServerCommandLine.java:144)
> at
> org.apache.hadoop.hbase.regionserver.HRegionServer.main(HRegionServer.java:3192){noformat}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]