This is an automated email from the ASF dual-hosted git repository. zhouxj pushed a commit to branch feature/GEODE-9060 in repository https://gitbox.apache.org/repos/asf/geode.git
commit faf056d657463cdfee50e402a3861ed86e232f09 Author: Xiaojian Zhou <[email protected]> AuthorDate: Thu Apr 1 15:37:21 2021 -0700 GEODE-9060: Remove the member from a copy of replicates as GII candid… (#6246) * GEODE-9060: Remove the member from a copy of replicates as GII candidate if it's not part of the same distributed system, but leave original replicates unchanged. (cherry picked from commit 76a5afddc9eaecea9f0b4528a910eb6761e1a3a1) --- .../PersistentRecoveryOrderDUnitTest.java | 34 +++++++++++++++++++++- .../cache/persistence/PersistenceAdvisorImpl.java | 18 ++++++++++-- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/geode-core/src/distributedTest/java/org/apache/geode/internal/cache/persistence/PersistentRecoveryOrderDUnitTest.java b/geode-core/src/distributedTest/java/org/apache/geode/internal/cache/persistence/PersistentRecoveryOrderDUnitTest.java index 1270e89..4f1f436 100644 --- a/geode-core/src/distributedTest/java/org/apache/geode/internal/cache/persistence/PersistentRecoveryOrderDUnitTest.java +++ b/geode-core/src/distributedTest/java/org/apache/geode/internal/cache/persistence/PersistentRecoveryOrderDUnitTest.java @@ -876,11 +876,43 @@ public class PersistentRecoveryOrderDUnitTest extends CacheTestCase { Throwable thrown = catchThrowable(() -> { createReplicateRegion(regionName, getDiskDirs(getVMId())); }); - assertThat(thrown).isInstanceOf(ConflictingPersistentDataException.class); + assertThat(thrown) + .isInstanceOf(ConflictingPersistentDataException.class) + .hasMessageContaining("was not part of the same distributed system as the local data"); } }); } + @Test + public void testRecoverableSplitBrain() { + vm2.invoke(() -> { + createReplicateRegion(regionName, getDiskDirs(getVMId())); + }); + vm0.invoke(() -> { + createReplicateRegion(regionName, getDiskDirs(getVMId())); + putEntry("A", "B"); + getCache().getRegion(regionName).close(); + }); + + vm1.invoke(() -> { + createReplicateRegion(regionName, getDiskDirs(getVMId())); + validateEntry("A", "B"); + updateEntry("A", "C"); + getCache().getRegion(regionName).close(); + }); + + // VM0 doesn't know that VM1 ever existed so it will start up. + vm0.invoke(() -> { + createReplicateRegion(regionName, getDiskDirs(getVMId())); + validateEntry("A", "C"); + }); + + vm1.invoke(() -> { + createReplicateRegion(regionName, getDiskDirs(getVMId())); + validateEntry("A", "C"); + }); + } + /** * Test to make sure that if if a member crashes while a GII is in progress, we wait for the * member to come back for starting. diff --git a/geode-core/src/main/java/org/apache/geode/internal/cache/persistence/PersistenceAdvisorImpl.java b/geode-core/src/main/java/org/apache/geode/internal/cache/persistence/PersistenceAdvisorImpl.java index 11f2563..073a632 100644 --- a/geode-core/src/main/java/org/apache/geode/internal/cache/persistence/PersistenceAdvisorImpl.java +++ b/geode-core/src/main/java/org/apache/geode/internal/cache/persistence/PersistenceAdvisorImpl.java @@ -510,18 +510,18 @@ public class PersistenceAdvisorImpl implements InternalPersistenceAdvisor { public boolean checkMyStateOnMembers(Set<InternalDistributedMember> replicates) throws ReplyException { PersistentStateQueryResults remoteStates = getMyStateOnMembers(replicates); + Set<InternalDistributedMember> copyOfReplicates = null; persistenceAdvisorObserver.observe(regionPath); boolean equal = false; + PersistentMemberID myId = getPersistentID(); for (Map.Entry<InternalDistributedMember, PersistentMemberState> entry : remoteStates .getStateOnPeers().entrySet()) { InternalDistributedMember member = entry.getKey(); PersistentMemberID remoteId = remoteStates.getPersistentIds().get(member); - PersistentMemberID myId = getPersistentID(); PersistentMemberState stateOnPeer = entry.getValue(); - if (PersistentMemberState.REVOKED.equals(stateOnPeer)) { throw new RevokedPersistentDataException( String.format( @@ -533,7 +533,19 @@ public class PersistenceAdvisorImpl implements InternalPersistenceAdvisor { String message = String.format( "Region %s remote member %s with persistent data %s was not part of the same distributed system as the local data from %s", regionPath, member, remoteId, myId); - throw new ConflictingPersistentDataException(message); + // Conceptually the removed member due to not knowing current member, should be equal to + // existing replicates. + // It can still be used as GII provider candidate. Use a copyOfReplicates to avoid modifying + // the replicates. + if (copyOfReplicates == null) { + copyOfReplicates = new HashSet<>(replicates); + } + copyOfReplicates.remove(member); + if (copyOfReplicates.isEmpty()) { + throw new ConflictingPersistentDataException(message); + } else { + logger.info(message); + } } if (myId != null && stateOnPeer == PersistentMemberState.EQUAL) {
