Xiaolin Ha created HBASE-27926:
----------------------------------
Summary: DBB release too early for replication
Key: HBASE-27926
URL: https://issues.apache.org/jira/browse/HBASE-27926
Project: HBase
Issue Type: Bug
Components: Replication
Affects Versions: 2.5.5, 2.4.17, 3.0.0-alpha-4
Reporter: Xiaolin Ha
Assignee: Xiaolin Ha
When the RS of destination cluster acts as client to forward the replicated
entries and encounters exception, the DBB will be released too early by calling
RpcResponse#done() in NettyRpcServerResponseEncoder.
The coredump and log details are as follows,
{code:java}
Stack: [0x00007f92d9e6d000,0x00007f92d9f6e000], sp=0x00007f92d9f6be18, free
space=1019kNative frames: (J=compiled Java code, j=interpreted, Vv=VM code,
C=native code)C [libc.so.6+0x89db4] _wordcopy_fwd_dest_aligned+0xd4
Java frames: (J=compiled Java code, j=interpreted, Vv=VM code)J 3297
sun.misc.Unsafe.copyMemory(Ljava/lang/Object;JLjava/lang/Object;JJ)V (0 bytes)
@ 0x00007fad7d9aa267 [0x00007fad7d9aa200+0x67]j
org.apache.hadoop.hbase.util.UnsafeAccess.unsafeCopy(Ljava/lang/Object;JLjava/lang/Object;JJ)V+36j
org.apache.hadoop.hbase.util.UnsafeAccess.copy(Ljava/nio/ByteBuffer;I[BII)V+69j
org.apache.hadoop.hbase.util.ByteBufferUtils.copyFromBufferToArray([BLjava/nio/ByteBuffer;III)V+39j
org.apache.hadoop.hbase.CellUtil.copyQualifierTo(Lorg/apache/hadoop/hbase/Cell;[BI)I+31J
15658 C1
org.apache.hadoop.hbase.CellUtil.cloneQualifier(Lorg/apache/hadoop/hbase/Cell;)[B
(18 bytes) @ 0x00007fad7e9a6c2c [0x00007fad7e9a6aa0+0x18c]j
org.apache.hadoop.hbase.ByteBufferKeyValue.getQualifierArray()[B+1j
org.apache.hadoop.hbase.client.Mutation.cellToStringMap(Lorg/apache/hadoop/hbase/Cell;)Ljava/util/Map;+12j
org.apache.hadoop.hbase.client.Mutation.toMap(I)Ljava/util/Map;+189j
org.apache.hadoop.hbase.client.Operation.toJSON(I)Ljava/lang/String;+2j
org.apache.hadoop.hbase.client.Operation.toString(I)Ljava/lang/String;+2j
org.apache.hadoop.hbase.client.Operation.toString()Ljava/lang/String;+2J 8353
C2 java.lang.StringBuilder.append(Ljava/lang/Object;)Ljava/lang/StringBuilder;
(9 bytes) @ 0x00007fad7ea0a1bc [0x00007fad7ea0a180+0x3c]j
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.manageLocationError(Lorg/apache/hadoop/hbase/client/Action;Ljava/lang/Exception;)V+28j
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.groupAndSendMultiAction(Ljava/util/List;I)V+163J
23463 C2
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.resubmit(Lorg/apache/hadoop/hbase/ServerName;Ljava/util/List;IILjava/lang/Throwable;)V
(214 bytes) @ 0x00007fad80effb54 [0x00007fad80eff7a0+0x3b4]J 19097 C2
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.receiveGlobalFailure(Lorg/apache/hadoop/hbase/client/MultiAction;Lorg/apache/hadoop/hbase/ServerName;ILjava/lang/Throwable;Z)V
(312 bytes) @ 0x00007fad7ff53370 [0x00007fad7ff52fa0+0x3d0]J 20201 C1
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.access$1600(Lorg/apache/hadoop/hbase/client/AsyncRequestFutureImpl;Lorg/apache/hadoop/hbase/client/MultiAction;Lorg/apache/hadoop/hbase/ServerName;ILjava/lang/Throwable;Z)V
(12 bytes) @ 0x00007fad803f31dc [0x00007fad803f3180+0x5c]J 18619 C2
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl$SingleServerRequestRunnable.run()V
(677 bytes) @ 0x00007fad7f40a8b4 [0x00007fad7f409160+0x1754]J 13220 C2
java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V
(225 bytes) @ 0x00007fad7f3b3a28 [0x00007fad7f3b38a0+0x188]J 10884 C1
java.util.concurrent.ThreadPoolExecutor$Worker.run()V (9 bytes) @
0x00007fad7db53c44 [0x00007fad7db53b40+0x104]J 7961 C1 java.lang.Thread.run()V
(17 bytes) @ 0x00007fad7d61bbfc [0x00007fad7d61bac0+0x13c]v
~StubRoutines::call_stubStack: [0x00007f92d9e6d000,0x00007f92d9f6e000],
sp=0x00007f92d9f6be18, free space=1019kNative frames: (J=compiled Java code,
j=interpreted, Vv=VM code, C=native code)C [libc.so.6+0x89db4]
_wordcopy_fwd_dest_aligned+0xd4
Java frames: (J=compiled Java code, j=interpreted, Vv=VM code)J 3297
sun.misc.Unsafe.copyMemory(Ljava/lang/Object;JLjava/lang/Object;JJ)V (0 bytes)
@ 0x00007fad7d9aa267 [0x00007fad7d9aa200+0x67]j
org.apache.hadoop.hbase.util.UnsafeAccess.unsafeCopy(Ljava/lang/Object;JLjava/lang/Object;JJ)V+36j
org.apache.hadoop.hbase.util.UnsafeAccess.copy(Ljava/nio/ByteBuffer;I[BII)V+69j
org.apache.hadoop.hbase.util.ByteBufferUtils.copyFromBufferToArray([BLjava/nio/ByteBuffer;III)V+39j
org.apache.hadoop.hbase.CellUtil.copyQualifierTo(Lorg/apache/hadoop/hbase/Cell;[BI)I+31J
15658 C1
org.apache.hadoop.hbase.CellUtil.cloneQualifier(Lorg/apache/hadoop/hbase/Cell;)[B
(18 bytes) @ 0x00007fad7e9a6c2c [0x00007fad7e9a6aa0+0x18c]j
org.apache.hadoop.hbase.ByteBufferKeyValue.getQualifierArray()[B+1j
org.apache.hadoop.hbase.client.Mutation.cellToStringMap(Lorg/apache/hadoop/hbase/Cell;)Ljava/util/Map;+12j
org.apache.hadoop.hbase.client.Mutation.toMap(I)Ljava/util/Map;+189j
org.apache.hadoop.hbase.client.Operation.toJSON(I)Ljava/lang/String;+2j
org.apache.hadoop.hbase.client.Operation.toString(I)Ljava/lang/String;+2j
org.apache.hadoop.hbase.client.Operation.toString()Ljava/lang/String;+2J 8353
C2 java.lang.StringBuilder.append(Ljava/lang/Object;)Ljava/lang/StringBuilder;
(9 bytes) @ 0x00007fad7ea0a1bc [0x00007fad7ea0a180+0x3c]j
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.manageLocationError(Lorg/apache/hadoop/hbase/client/Action;Ljava/lang/Exception;)V+28j
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.groupAndSendMultiAction(Ljava/util/List;I)V+163J
23463 C2
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.resubmit(Lorg/apache/hadoop/hbase/ServerName;Ljava/util/List;IILjava/lang/Throwable;)V
(214 bytes) @ 0x00007fad80effb54 [0x00007fad80eff7a0+0x3b4]J 19097 C2
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.receiveGlobalFailure(Lorg/apache/hadoop/hbase/client/MultiAction;Lorg/apache/hadoop/hbase/ServerName;ILjava/lang/Throwable;Z)V
(312 bytes) @ 0x00007fad7ff53370 [0x00007fad7ff52fa0+0x3d0]J 20201 C1
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.access$1600(Lorg/apache/hadoop/hbase/client/AsyncRequestFutureImpl;Lorg/apache/hadoop/hbase/client/MultiAction;Lorg/apache/hadoop/hbase/ServerName;ILjava/lang/Throwable;Z)V
(12 bytes) @ 0x00007fad803f31dc [0x00007fad803f3180+0x5c]J 18619 C2
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl$SingleServerRequestRunnable.run()V
(677 bytes) @ 0x00007fad7f40a8b4 [0x00007fad7f409160+0x1754]J 13220 C2
java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V
(225 bytes) @ 0x00007fad7f3b3a28 [0x00007fad7f3b38a0+0x188]J 10884 C1
java.util.concurrent.ThreadPoolExecutor$Worker.run()V (9 bytes) @
0x00007fad7db53c44 [0x00007fad7db53b40+0x104]J 7961 C1 java.lang.Thread.run()V
(17 bytes) @ 0x00007fad7d61bbfc [0x00007fad7d61bac0+0x13c]v
~StubRoutines::call_stub {code}
{code:java}
2023-06-13 09:04:08,106 ERROR
[RpcServer.replication.FPBQ.Fifo.handler=9,queue=0,port=16020]
regionserver.ReplicationSink: Unable to accept edit because:
org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException: Failed 816
actions: NotServingRegionException: 816 times, servers with issues:
XXXXXX,XXXX,1686558427722,
at
org.apache.hadoop.hbase.client.BatchErrors.makeException(BatchErrors.java:53)
at
org.apache.hadoop.hbase.client.AsyncRequestFutureImpl.getErrors(AsyncRequestFutureImpl.java:1309)
at org.apache.hadoop.hbase.client.HTable.batch(HTable.java:936)
at org.apache.hadoop.hbase.client.HTable.batch(HTable.java:927)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSink.batch(ReplicationSink.java:434)
at
org.apache.hadoop.hbase.replication.regionserver.ReplicationSink.replicateEntries(ReplicationSink.java:261)
at
org.apache.hadoop.hbase.replication.regionserver.Replication.replicateLogEntries(Replication.java:187)
at
org.apache.hadoop.hbase.regionserver.RSRpcServices.replicateWALEntry(RSRpcServices.java:2094)
at
org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos$AdminService$2.callBlockingMethod(AdminProtos.java:32335)
at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:396)
at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:135)
at org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:103)
at org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:83) {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)