[
https://issues.apache.org/jira/browse/HBASE-24120?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17076939#comment-17076939
]
Huaxiang Sun commented on HBASE-24120:
--------------------------------------
Add a new type Exception, which caused the Region Server Abort, add here as a
reminder to handle this exception.
{code:java}
regionserver.ReplicationSourceWALReader(291): Filtered entry for replication:
test/7ccb7437b6e48dd03f6347b5a2e06964/27=[#edits: 0 = <>]
2020-04-07 03:30:03,676 DEBUG
[RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.wal-reader.asf905.gq1.ygridcore.net%2C41391%2C1586230117579,2]
regionserver.ReplicationSourceWALReader(171): Edit null or empty for entry
test/7ccb7437b6e48dd03f6347b5a2e06964/27=[#edits: 0 = <>]
2020-04-07 03:30:03,677 WARN
[RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.wal-reader.asf905.gq1.ygridcore.net%2C41391%2C1586230117579,2]
impl.BlockReaderFactory(768): I/O error constructing remote block reader.
java.nio.channels.ClosedByInterruptException
at
java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:202)
at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:659)
at
org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:192)
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
at
org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:2881)
at
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:825)
at
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:750)
at
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:387)
at
org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:717)
at
org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:665)
at
org.apache.hadoop.hdfs.DFSInputStream.seekToBlockSource(DFSInputStream.java:1697)
at
org.apache.hadoop.hdfs.DFSInputStream.readBuffer(DFSInputStream.java:915)
at
org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:950)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:996)
at java.io.DataInputStream.read(DataInputStream.java:149)
at java.io.FilterInputStream.read(FilterInputStream.java:133)
at java.io.PushbackInputStream.read(PushbackInputStream.java:186)
at org.apache.hadoop.io.IOUtils.readFully(IOUtils.java:209)
at
org.apache.hadoop.hbase.KeyValueUtil.createKeyValueFromInputStream(KeyValueUtil.java:716)
at
org.apache.hadoop.hbase.codec.KeyValueCodecWithTags$KeyValueDecoder.parseCell(KeyValueCodecWithTags.java:81)
at
org.apache.hadoop.hbase.codec.BaseDecoder.advance(BaseDecoder.java:68)
at org.apache.hadoop.hbase.wal.WALEdit.readFromCells(WALEdit.java:276)
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.readNext(ProtobufLogReader.java:382)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:98)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:86)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.readNextEntryAndRecordReaderPosition(WALEntryStream.java:263)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.tryAdvanceEntry(WALEntryStream.java:176)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.hasNext(WALEntryStream.java:101)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.readWALEntries(ReplicationSourceWALReader.java:221)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.run(ReplicationSourceWALReader.java:138)
2020-04-07 03:30:03,678 ERROR
[RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.wal-reader.asf905.gq1.ygridcore.net%2C41391%2C1586230117579,2]
regionserver.ReplicationSource(397): Unexpected exception in
RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.wal-reader.asf905.gq1.ygridcore.net%2C41391%2C1586230117579,2
currentPath=hdfs://localhost:37359/user/jenkins/test-data/260e1f0f-a3fd-6192-b1d7-6568614aef58/WALs/asf905.gq1.ygridcore.net,41391,1586230117579/asf905.gq1.ygridcore.net%2C41391%2C1586230117579.1586230122806
java.lang.NullPointerException
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.extractHiddenEof(ProtobufLogReader.java:449)
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.readNext(ProtobufLogReader.java:396)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:98)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:86)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.readNextEntryAndRecordReaderPosition(WALEntryStream.java:263)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.tryAdvanceEntry(WALEntryStream.java:176)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.hasNext(WALEntryStream.java:101)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.readWALEntries(ReplicationSourceWALReader.java:221)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.run(ReplicationSourceWALReader.java:138)
2020-04-07 03:30:03,683 ERROR
[RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.wal-reader.asf905.gq1.ygridcore.net%2C41391%2C1586230117579,2]
helpers.MarkerIgnoringBase(159): ***** ABORTING region server
asf905.gq1.ygridcore.net,41391,1586230117579: Unexpected exception in
RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.wal-reader.asf905.gq1.ygridcore.net%2C41391%2C1586230117579,2
*****
java.lang.NullPointerException
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.extractHiddenEof(ProtobufLogReader.java:449)
at
org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader.readNext(ProtobufLogReader.java:396)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:98)
at
org.apache.hadoop.hbase.regionserver.wal.ReaderBase.next(ReaderBase.java:86)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.readNextEntryAndRecordReaderPosition(WALEntryStream.java:263)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.tryAdvanceEntry(WALEntryStream.java:176)
at
org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.hasNext(WALEntryStream.java:101)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.readWALEntries(ReplicationSourceWALReader.java:221)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReader.run(ReplicationSourceWALReader.java:138)
2020-04-07 03:30:03,683 ERROR
[RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.wal-reader.asf905.gq1.ygridcore.net%2C41391%2C1586230117579,2]
helpers.MarkerIgnoringBase(143): RegionServer abort: loaded coprocessors are:
[org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint]
2020-04-07 03:30:03,690 INFO
[RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.wal-reader.asf905.gq1.ygridcore.net%2C41391%2C1586230117579,2]
regionserver.HRegionServer(2472): Dump of metrics as JSON on abort: { {code}
> Flakey Test: TestReplicationAdminWithClusters timeout
> ------------------------------------------------------
>
> Key: HBASE-24120
> URL: https://issues.apache.org/jira/browse/HBASE-24120
> Project: HBase
> Issue Type: Test
> Components: Replication
> Affects Versions: 2.3.0, master, 2.4.0
> Reporter: Huaxiang Sun
> Assignee: Hua Xiang
> Priority: Major
>
> {code:java}
> 2020-04-05 23:36:53,092 ERROR
> [RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.shipperasf905.gq1.ygridcore.net%2C42849%2C1586129728118,2]
> regionserver.ReplicationSource(397): Unexpected exception in
> RS_REFRESH_PEER-regionserver/asf905:0-0.replicationSource,2.replicationSource.shipperasf905.gq1.ygridcore.net%2C42849%2C1586129728118,2
>
> currentPath=hdfs://localhost:34203/user/jenkins/test-data/03854f9d-2780-eeaa-9645-c341240b62bf/WALs/asf905.gq1.ygridcore.net,42849,1586129728118/asf905.gq1.ygridcore.net%2C42849%2C1586129728118.1586129730509
> java.lang.RuntimeException: Thread is interrupted, the replication source may
> be terminated
> at
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager.interruptOrAbortWhenFail(ReplicationSourceManager.java:477)
> at
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager.logPositionAndCleanOldLogs(ReplicationSourceManager.java:519)
> at
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceShipper.updateLogPosition(ReplicationSourceShipper.java:264)
> at
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceShipper.shipEdits(ReplicationSourceShipper.java:160)
> at
> org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceShipper.run(ReplicationSourceShipper.java:118)
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)