This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch branch-2 in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push: new 0d1ffbdf431 HBASE-28248 Race between RegionRemoteProcedureBase and rollback operation could lead to ROLLEDBACK state be persisent to procedure store (#5567) 0d1ffbdf431 is described below commit 0d1ffbdf43133326a0ff2fa1719173fecdca0b8a Author: Duo Zhang <zhang...@apache.org> AuthorDate: Sat Dec 9 21:55:11 2023 +0800 HBASE-28248 Race between RegionRemoteProcedureBase and rollback operation could lead to ROLLEDBACK state be persisent to procedure store (#5567) Signed-off-by: GeorryHuang <huangzhuo...@apache.org> Signed-off-by: Yi Mei <myime...@gmail.com> (cherry picked from commit 82a2ce10f24a828b2c4960ba85b714a0203c8441) --- .../master/assignment/RegionRemoteProcedureBase.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java index 6b6da9e3396..edfed07e588 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionRemoteProcedureBaseState; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionRemoteProcedureBaseStateData; import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; /** @@ -179,7 +180,20 @@ public abstract class RegionRemoteProcedureBase extends Procedure<MasterProcedur // A bit strange but the procedure store will throw RuntimeException if we can not persist the // state, so upper layer should take care of this... private void persistAndWake(MasterProcedureEnv env, RegionStateNode regionNode) { - env.getMasterServices().getMasterProcedureExecutor().getStore().update(this); + // The synchronization here is to guard with ProcedureExecutor.executeRollback, as here we will + // not hold the procedure execution lock, but we should not persist a procedure in ROLLEDBACK + // state to the procedure store. + // The ProcedureStore.update must be inside the lock, so here the check for procedure state and + // update could be atomic. In ProcedureExecutor.cleanupAfterRollbackOneStep, we will set the + // state to ROLLEDBACK, which will hold the same lock too as the Procedure.setState method is + // synchronized. This is the key to keep us safe. + synchronized (this) { + if (getState() == ProcedureState.ROLLEDBACK) { + LOG.warn("Procedure {} has already been rolled back, skip persistent", this); + return; + } + env.getMasterServices().getMasterProcedureExecutor().getStore().update(this); + } regionNode.getProcedureEvent().wake(env.getProcedureScheduler()); }