This is an automated email from the ASF dual-hosted git repository. zhangduo pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-3 by this push: new dc73c028ea8 HBASE-28140 AbstractWALProvider may miss the WAL which is under creation in getWALs method (#5455) dc73c028ea8 is described below commit dc73c028ea87b27819524a2b8a856a280e0fea9d Author: Duo Zhang <zhang...@apache.org> AuthorDate: Fri Oct 13 22:19:18 2023 +0800 HBASE-28140 AbstractWALProvider may miss the WAL which is under creation in getWALs method (#5455) Signed-off-by: GeorryHuang <huangzhuo...@apache.org> Signed-off-by: Xiaolin Ha <haxiao...@apache.org> Signed-off-by: Wellington Chevreuil <wchevre...@apache.org> (cherry picked from commit 391dfda6adcbe42b5dcb68a4bb98f1fce49ae88c) --- .../hadoop/hbase/wal/AbstractWALProvider.java | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractWALProvider.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractWALProvider.java index e9c63fb5217..31ef3cebc2d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractWALProvider.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractWALProvider.java @@ -24,7 +24,9 @@ import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; import java.util.function.BiPredicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -87,6 +89,15 @@ public abstract class AbstractWALProvider implements WALProvider, PeerActionList private final KeyLocker<String> createLock = new KeyLocker<>(); + // in getWALs we can not throw any exceptions out, so we use lock and condition here as it + // supports awaitUninterruptibly which will not throw a InterruptedException + private final Lock numRemoteWALUnderCreationLock = new ReentrantLock(); + private final Condition noRemoteWALUnderCreationCond = + numRemoteWALUnderCreationLock.newCondition(); + // record the number of remote WALs which are under creation. This is very important to not + // missing a WAL instance in getWALs method. See HBASE-28140 and related issues for more details. + private int numRemoteWALUnderCreation; + // we need to have this because when getting meta wal, there is no peer info provider yet. private SyncReplicationPeerInfoProvider peerInfoProvider = new SyncReplicationPeerInfoProvider() { @@ -150,11 +161,26 @@ public abstract class AbstractWALProvider implements WALProvider, PeerActionList WAL wal = createRemoteWAL(region, ReplicationUtils.getRemoteWALFileSystem(conf, remoteWALDir), ReplicationUtils.getPeerRemoteWALDir(remoteWALDir, peerId), getRemoteWALPrefix(peerId), ReplicationUtils.SYNC_WAL_SUFFIX); + numRemoteWALUnderCreationLock.lock(); + try { + numRemoteWALUnderCreation++; + } finally { + numRemoteWALUnderCreationLock.unlock(); + } initWAL(wal); peerId2WAL.put(peerId, Optional.of(wal)); return wal; } finally { lock.unlock(); + numRemoteWALUnderCreationLock.lock(); + try { + numRemoteWALUnderCreation--; + if (numRemoteWALUnderCreation == 0) { + noRemoteWALUnderCreationCond.signalAll(); + } + } finally { + numRemoteWALUnderCreationLock.unlock(); + } } } @@ -179,6 +205,17 @@ public abstract class AbstractWALProvider implements WALProvider, PeerActionList @Override public final List<WAL> getWALs() { + List<WAL> wals = new ArrayList<WAL>(); + numRemoteWALUnderCreationLock.lock(); + try { + while (numRemoteWALUnderCreation > 0) { + noRemoteWALUnderCreationCond.awaitUninterruptibly(); + } + peerId2WAL.values().stream().filter(Optional::isPresent).map(Optional::get) + .forEach(wals::add); + } finally { + numRemoteWALUnderCreationLock.unlock(); + } return Streams .concat(peerId2WAL.values().stream().filter(Optional::isPresent).map(Optional::get), getWALs0().stream())