Pratyush Bhatt created HDDS-11220:
-------------------------------------
Summary: [HBase Replication] RS down due to
"ManagedChannelOrphanWrapper: Previous channel was not shutdown properly"
Key: HDDS-11220
URL: https://issues.apache.org/jira/browse/HDDS-11220
Project: Apache Ozone
Issue Type: Bug
Reporter: Pratyush Bhatt
*Scenario:* Bidirectional HBase replication, with HBase on Ozone on both the
clusters.
Affected RS went down with
{_}org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper:
*~*~*~ Previous channel ManagedChannelImpl\{logId=167987,
target=10.140.176.6:9859} was not shutdown properly!!!{_}:
{code:java}
2024-07-19 19:08:26,365 ERROR
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper:
*~*~*~ Previous channel ManagedChannelImpl{logId=167987,
target=10.140.176.6:9859} was not shutdown properly!!! ~*~*~*
Make sure to call shutdown()/shutdownNow() and wait until
awaitTermination() returns true.
java.lang.RuntimeException: ManagedChannel allocation site
at
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper$ManagedChannelReference.<init>(ManagedChannelOrphanWrapper.java:102)
at
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:60)
at
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:51)
at
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelImplBuilder.build(ManagedChannelImplBuilder.java:631)
at
org.apache.ratis.thirdparty.io.grpc.internal.AbstractManagedChannelImplBuilder.build(AbstractManagedChannelImplBuilder.java:297)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.connectToDatanode(XceiverClientGrpc.java:182)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.connect(XceiverClientGrpc.java:161)
at
org.apache.hadoop.hdds.scm.XceiverClientManager$2.call(XceiverClientManager.java:256)
at
org.apache.hadoop.hdds.scm.XceiverClientManager$2.call(XceiverClientManager.java:237)
at
org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4938)
at
org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3576)
at
org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2318)
at
org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2191)
at
org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2081)
at
org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache.get(LocalCache.java:4019)
at
org.apache.hadoop.ozone.shaded.com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4933)
at
org.apache.hadoop.hdds.scm.XceiverClientManager.getClient(XceiverClientManager.java:237)
at
org.apache.hadoop.hdds.scm.XceiverClientManager.acquireClient(XceiverClientManager.java:184)
at
org.apache.hadoop.hdds.scm.XceiverClientManager.acquireClientForReadData(XceiverClientManager.java:161)
at
org.apache.hadoop.hdds.scm.storage.BlockInputStream.acquireClient(BlockInputStream.java:342)
at
org.apache.hadoop.hdds.scm.storage.BlockInputStream.getBlockData(BlockInputStream.java:258)
at
org.apache.hadoop.hdds.scm.storage.BlockInputStream.initialize(BlockInputStream.java:164)
at
org.apache.hadoop.hdds.scm.storage.BlockInputStream.readWithStrategy(BlockInputStream.java:370)
at
org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66)
at
org.apache.hadoop.hdds.scm.storage.ByteBufferReader.readFromBlock(ByteBufferReader.java:56)
at
org.apache.hadoop.hdds.scm.storage.MultipartInputStream.readWithStrategy(MultipartInputStream.java:96)
at
org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66)
at
org.apache.hadoop.fs.ozone.OzoneFSInputStream.readInTrace(OzoneFSInputStream.java:136)
at
org.apache.hadoop.fs.ozone.OzoneFSInputStream.lambda$read$0(OzoneFSInputStream.java:126)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:169)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:159)
at
org.apache.hadoop.fs.ozone.OzoneFSInputStream.read(OzoneFSInputStream.java:125)
at
org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:152)
at org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:78)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1481)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1699)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1528)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1322)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1242)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.readAndUpdateNewBlock(HFileReaderImpl.java:946)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.seekTo(HFileReaderImpl.java:935)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.seekAtOrAfter(StoreFileScanner.java:311)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:214)
at
org.apache.hadoop.hbase.regionserver.StoreScanner.seekScanners(StoreScanner.java:412)
at
org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:323)
at
org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:289)
at
org.apache.hadoop.hbase.regionserver.compactions.Compactor.createScanner(Compactor.java:530)
at
org.apache.hadoop.hbase.regionserver.compactions.Compactor$1.createScanner(Compactor.java:259)
at
org.apache.hadoop.hbase.regionserver.compactions.Compactor.compact(Compactor.java:346)
at
org.apache.hadoop.hbase.regionserver.compactions.DefaultCompactor.compact(DefaultCompactor.java:66)
at
org.apache.hadoop.hbase.regionserver.DefaultStoreEngine$DefaultCompactionContext.compact(DefaultStoreEngine.java:122)
at org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:1189)
at
org.apache.hadoop.hbase.regionserver.HRegion.compact(HRegion.java:2261)
at
org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.doCompaction(CompactSplit.java:625)
at
org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.run(CompactSplit.java:673)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2024-07-19 19:08:26,365 ERROR
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper:
*~*~*~ Previous channel ManagedChannelImpl{logId=167995,
target=10.140.139.7:9859} was not shutdown properly!!! ~*~*~*
Make sure to call shutdown()/shutdownNow() and wait until
awaitTermination() returns true.
java.lang.RuntimeException: ManagedChannel allocation site
at
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper$ManagedChannelReference.<init>(ManagedChannelOrphanWrapper.java:102)
at
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:60)
at
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelOrphanWrapper.<init>(ManagedChannelOrphanWrapper.java:51)
at
org.apache.ratis.thirdparty.io.grpc.internal.ManagedChannelImplBuilder.build(ManagedChannelImplBuilder.java:631)
at
org.apache.ratis.thirdparty.io.grpc.internal.AbstractManagedChannelImplBuilder.build(AbstractManagedChannelImplBuilder.java:297)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.connectToDatanode(XceiverClientGrpc.java:182)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.reconnect(XceiverClientGrpc.java:618)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.checkOpen(XceiverClientGrpc.java:609)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandAsync(XceiverClientGrpc.java:539)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandWithRetry(XceiverClientGrpc.java:415)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.lambda$sendCommandWithTraceIDAndRetry$0(XceiverClientGrpc.java:352)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:169)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:159)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandWithTraceIDAndRetry(XceiverClientGrpc.java:344)
at
org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommand(XceiverClientGrpc.java:325)
at
org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.getBlock(ContainerProtocolCalls.java:235)
at
org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.lambda$getBlock$0(ContainerProtocolCalls.java:209)
at
org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.tryEachDatanode(ContainerProtocolCalls.java:158)
at
org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.getBlock(ContainerProtocolCalls.java:208)
at
org.apache.hadoop.hdds.scm.storage.BlockInputStream.getBlockDataUsingClient(BlockInputStream.java:288)
at
org.apache.hadoop.hdds.scm.storage.BlockInputStream.getBlockData(BlockInputStream.java:260)
at
org.apache.hadoop.hdds.scm.storage.BlockInputStream.initialize(BlockInputStream.java:164)
at
org.apache.hadoop.hdds.scm.storage.BlockInputStream.readWithStrategy(BlockInputStream.java:370)
at
org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66)
at
org.apache.hadoop.hdds.scm.storage.ByteBufferReader.readFromBlock(ByteBufferReader.java:56)
at
org.apache.hadoop.hdds.scm.storage.MultipartInputStream.readWithStrategy(MultipartInputStream.java:96)
at
org.apache.hadoop.hdds.scm.storage.ExtendedInputStream.read(ExtendedInputStream.java:66)
at
org.apache.hadoop.fs.ozone.OzoneFSInputStream.readInTrace(OzoneFSInputStream.java:136)
at
org.apache.hadoop.fs.ozone.OzoneFSInputStream.lambda$read$0(OzoneFSInputStream.java:126)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInSpan(TracingUtil.java:169)
at
org.apache.hadoop.hdds.tracing.TracingUtil.executeInNewSpan(TracingUtil.java:159)
at
org.apache.hadoop.fs.ozone.OzoneFSInputStream.read(OzoneFSInputStream.java:125)
at
org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:152)
at
org.apache.hadoop.hbase.io.util.BlockIOUtils.readFully(BlockIOUtils.java:78)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1481)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1699)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1528)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlock(HFileBlock.java:1423)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl$1.nextBlockWithBlockType(HFileBlock.java:1436)
at
org.apache.hadoop.hbase.io.hfile.HFileInfo.initMetaAndIndex(HFileInfo.java:368)
at
org.apache.hadoop.hbase.regionserver.HStoreFile.open(HStoreFile.java:368)
at
org.apache.hadoop.hbase.regionserver.HStoreFile.initReader(HStoreFile.java:485)
at
org.apache.hadoop.hbase.regionserver.StoreEngine.createStoreFileAndReader(StoreEngine.java:224)
at
org.apache.hadoop.hbase.regionserver.StoreEngine.createStoreFileAndReader(StoreEngine.java:217)
at
org.apache.hadoop.hbase.regionserver.StoreEngine.validateStoreFile(StoreEngine.java:236)
at
org.apache.hadoop.hbase.regionserver.StoreEngine.commitStoreFiles(StoreEngine.java:422)
at
org.apache.hadoop.hbase.regionserver.HStore.doCompaction(HStore.java:1200)
at org.apache.hadoop.hbase.regionserver.HStore.compact(HStore.java:1188)
at
org.apache.hadoop.hbase.regionserver.HRegion.compact(HRegion.java:2261)
at
org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.doCompaction(CompactSplit.java:625)
at
org.apache.hadoop.hbase.regionserver.CompactSplit$CompactionRunner.run(CompactSplit.java:673)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
2024-07-19 19:08:30,744 WARN
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader:
Failed to read stream of replication entries{code}
And then Error logs with java.io.EOFException followed:
{code:java}
java.io.EOFException: EOF encountered at pos: 3439495 for key:
hbase/WALs/ccycloud-5.ozn-hbaserepl2.xyz,22101,1721293282050/ccycloud-5.ozn-hbaserepl2.xyz%2C22101%2C1721293282050.ccycloud-5.ozn-hbaserepl2.root.xyz%2C22101%2C1721293282050.regiongroup-0.1721415920990
at
org.apache.hadoop.hdds.scm.storage.MultipartInputStream.seek(MultipartInputStream.java:139)
at
org.apache.hadoop.fs.ozone.OzoneFSInputStream.seek(OzoneFSInputStream.java:99)
at
org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:70)
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.seekOnFs(ProtobufLogReader.java:505)
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.readNext(ProtobufLogReader.java:427)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:95)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:83)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.readNextEntryAndRecordReaderPosition(WALEntryStream.java:258)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.tryAdvanceEntry(WALEntryStream.java:172)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.hasNext(WALEntryStream.java:101)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.readWALEntries(ReplicationSourceWALReader.java:212)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.run(ReplicationSourceWALReader.java:148)
2024-07-19 19:08:32,912 WARN
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader:
Failed to read stream of replication entries
java.io.EOFException: EOF encountered at pos: 3439495 for key:
hbase/WALs/ccycloud-5.ozn-hbaserepl2.xyz,22101,1721293282050/ccycloud-5.ozn-hbaserepl2.xyz%2C22101%2C1721293282050.ccycloud-5.ozn-hbaserepl2.xyz%2C22101%2C1721293282050.regiongroup-0.1721415920990
at
org.apache.hadoop.hdds.scm.storage.MultipartInputStream.seek(MultipartInputStream.java:139)
at
org.apache.hadoop.fs.ozone.OzoneFSInputStream.seek(OzoneFSInputStream.java:99)
at
org.apache.hadoop.fs.FSDataInputStream.seek(FSDataInputStream.java:70)
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.seekOnFs(ProtobufLogReader.java:505)
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.readNext(ProtobufLogReader.java:427)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:95)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:83)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.readNextEntryAndRecordReaderPosition(WALEntryStream.java:258)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.tryAdvanceEntry(WALEntryStream.java:172)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.hasNext(WALEntryStream.java:101)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.readWALEntries(ReplicationSourceWALReader.java:212)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.run(ReplicationSourceWALReader.java:148)
2024-07-19 19:08:35,180 WARN
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl: id=2,
table=newtableloadtest2, attempt=6/5, failureCount=192ops, last
exception=org.apache.hadoop.hbase.regionserver.RegionServerAbortedException:
org.apache.hadoop.hbase.regionserver.RegionServerAbortedException: Server
ccycloud-1.ozn-hbaserepl2.xyz,22101,1721293283469 aborting
at
org.apache.hadoop.hbase.regionserver.RSRpcServices.checkOpen(RSRpcServices.java:1524)
at
org.apache.hadoop.hbase.regionserver.RSRpcServices.multi(RSRpcServices.java:2691)
at
org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:45961)
at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:387)
at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:139)
{code}
cc: [~sammichen] [~weichiu] [~ashishk]
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]