Repository: geode Updated Branches: refs/heads/feature/GEODE-2497 8d45ca227 -> 69e3523ec
GEODE-2497 surprise members are never timed out during startup Moved the creation of the timer to GMSMembershipManager.started() Removed write-lock in timer-creation method since it's only called from one place now Altered the way that the timer-creation method finds the InternalDistributedSystem. The old way of using getAnyInstance() was the primary source of the problem since it returns null until startup is completed. Altered the surprise-member unit test to ensure that it's using the timer and not relying on installation of a new membership view to clean things up. Altered the surprise-member unit test to run faster. It now completes in under 10 seconds. Project: http://git-wip-us.apache.org/repos/asf/geode/repo Commit: http://git-wip-us.apache.org/repos/asf/geode/commit/a6dfa4ca Tree: http://git-wip-us.apache.org/repos/asf/geode/tree/a6dfa4ca Diff: http://git-wip-us.apache.org/repos/asf/geode/diff/a6dfa4ca Branch: refs/heads/feature/GEODE-2497 Commit: a6dfa4ca630a82fcf92942a834f8255e86d2bfcb Parents: dbea592 Author: Bruce Schuchardt <bschucha...@pivotal.io> Authored: Fri Feb 17 10:17:21 2017 -0800 Committer: Bruce Schuchardt <bschucha...@pivotal.io> Committed: Tue Feb 21 15:07:00 2017 -0800 ---------------------------------------------------------------------- .../gms/mgr/GMSMembershipManager.java | 76 ++++++++++---------- .../internal/DistributionManagerDUnitTest.java | 66 +++++++---------- 2 files changed, 62 insertions(+), 80 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/geode/blob/a6dfa4ca/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java ---------------------------------------------------------------------- diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java index cf17025..9652ad5 100644 --- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java +++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java @@ -608,7 +608,6 @@ public class GMSMembershipManager implements MembershipManager, Manager { } try { listener.viewInstalled(latestView); - startCleanupTimer(); } catch (DistributedSystemDisconnectedException se) { } } finally { @@ -616,6 +615,10 @@ public class GMSMembershipManager implements MembershipManager, Manager { } } + public boolean isCleanupTimerStarted() { + return this.cleanupTimer != null; + } + /** * the timer used to perform periodic tasks * @@ -767,7 +770,9 @@ public class GMSMembershipManager implements MembershipManager, Manager { } @Override - public void started() {} + public void started() { + startCleanupTimer(); + } /** this is invoked by JoinLeave when there is a loss of quorum in the membership system */ @@ -942,12 +947,6 @@ public class GMSMembershipManager implements MembershipManager, Manager { surpriseMembers.remove(member); } else { - // Now that we're sure the member is new, add them. - // make sure the surprise-member cleanup task is running - if (this.cleanupTimer == null) { - startCleanupTimer(); - } // cleanupTimer == null - // Ensure that the member is accounted for in the view // Conjure up a new view including the new member. This is necessary // because we are about to tell the listener about a new member, so @@ -978,40 +977,39 @@ public class GMSMembershipManager implements MembershipManager, Manager { /** starts periodic task to perform cleanup chores such as expire surprise members */ private void startCleanupTimer() { + if (this.dcReceiver == null) { + // junit tests don't provide a direct-channel receiver + return; + } + DistributedSystem ds = this.dcReceiver.getDM().getSystem(); + this.cleanupTimer = new SystemTimer(ds, true); + SystemTimer.SystemTimerTask st = new SystemTimer.SystemTimerTask() { + @Override + public void run2() { + cleanUpSurpriseMembers(); + } + }; + this.cleanupTimer.scheduleAtFixedRate(st, surpriseMemberTimeout, surpriseMemberTimeout / 3); + } + + // invoked from the cleanupTimer task + private void cleanUpSurpriseMembers() { latestViewWriteLock.lock(); try { - if (this.cleanupTimer != null) { - return; + long oldestAllowed = System.currentTimeMillis() - surpriseMemberTimeout; + for (Iterator it = surpriseMembers.entrySet().iterator(); it.hasNext();) { + Map.Entry entry = (Map.Entry) it.next(); + Long birthtime = (Long) entry.getValue(); + if (birthtime.longValue() < oldestAllowed) { + it.remove(); + InternalDistributedMember m = (InternalDistributedMember) entry.getKey(); + logger.info(LocalizedMessage.create( + LocalizedStrings.GroupMembershipService_MEMBERSHIP_EXPIRING_MEMBERSHIP_OF_SURPRISE_MEMBER_0, + m)); + removeWithViewLock(m, true, + "not seen in membership view in " + surpriseMemberTimeout + "ms"); + } } - DistributedSystem ds = InternalDistributedSystem.getAnyInstance(); - if (ds != null && ds.isConnected()) { - this.cleanupTimer = new SystemTimer(ds, true); - SystemTimer.SystemTimerTask st = new SystemTimer.SystemTimerTask() { - @Override - public void run2() { - latestViewWriteLock.lock(); - try { - long oldestAllowed = System.currentTimeMillis() - surpriseMemberTimeout; - for (Iterator it = surpriseMembers.entrySet().iterator(); it.hasNext();) { - Map.Entry entry = (Map.Entry) it.next(); - Long birthtime = (Long) entry.getValue(); - if (birthtime.longValue() < oldestAllowed) { - it.remove(); - InternalDistributedMember m = (InternalDistributedMember) entry.getKey(); - logger.info(LocalizedMessage.create( - LocalizedStrings.GroupMembershipService_MEMBERSHIP_EXPIRING_MEMBERSHIP_OF_SURPRISE_MEMBER_0, - m)); - removeWithViewLock(m, true, - "not seen in membership view in " + surpriseMemberTimeout + "ms"); - } - } - } finally { - latestViewWriteLock.unlock(); - } - } - }; - this.cleanupTimer.scheduleAtFixedRate(st, surpriseMemberTimeout, surpriseMemberTimeout / 3); - } // ds != null && ds.isConnected() } finally { latestViewWriteLock.unlock(); } http://git-wip-us.apache.org/repos/asf/geode/blob/a6dfa4ca/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java ---------------------------------------------------------------------- diff --git a/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java b/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java index ccce013..c795261 100644 --- a/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java +++ b/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java @@ -17,6 +17,7 @@ package org.apache.geode.distributed.internal; import static org.apache.geode.distributed.ConfigurationProperties.*; import static org.apache.geode.test.dunit.Assert.*; +import org.apache.geode.test.dunit.rules.DistributedRestoreSystemProperties; import org.awaitility.Awaitility; import java.net.InetAddress; @@ -28,6 +29,7 @@ import org.apache.geode.test.junit.categories.MembershipTest; import org.apache.logging.log4j.Logger; import org.junit.Assert; import org.junit.Ignore; +import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -74,6 +76,10 @@ public class DistributionManagerDUnitTest extends JUnit4DistributedTestCase { public static DistributedSystem ds; + @Rule + public DistributedRestoreSystemProperties restoreSystemProperties = + new DistributedRestoreSystemProperties(); + /** * Clears the exceptionInThread flag in the given distribution manager. */ @@ -137,18 +143,14 @@ public class DistributionManagerDUnitTest extends JUnit4DistributedTestCase { InternalDistributedMember idm = mgr.getLocalMember(); // TODO GMS needs to have a system property allowing the bind-port to be set System.setProperty(DistributionConfig.GEMFIRE_PREFIX + "jg-bind-port", "" + idm.getPort()); - try { - sys.disconnect(); - sys = getSystem(); - mgr = MembershipManagerHelper.getMembershipManager(sys); - sys.disconnect(); - InternalDistributedMember idm2 = mgr.getLocalMember(); - org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() - .info("original ID=" + idm + " and after connecting=" + idm2); - assertTrue("should not have used a different udp port", idm.getPort() == idm2.getPort()); - } finally { - System.getProperties().remove(DistributionConfig.GEMFIRE_PREFIX + "jg-bind-port"); - } + sys.disconnect(); + sys = getSystem(); + mgr = MembershipManagerHelper.getMembershipManager(sys); + sys.disconnect(); + InternalDistributedMember idm2 = mgr.getLocalMember(); + org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() + .info("original ID=" + idm + " and after connecting=" + idm2); + assertTrue("should not have used a different udp port", idm.getPort() == idm2.getPort()); } /** @@ -158,11 +160,12 @@ public class DistributionManagerDUnitTest extends JUnit4DistributedTestCase { * should be gone and force more view processing to have it scrubbed from the set. **/ @Test - public void testSurpriseMemberHandling() { - VM vm0 = Host.getHost(0).getVM(0); + public void testSurpriseMemberHandling() throws Exception { + System.setProperty(DistributionConfig.GEMFIRE_PREFIX + "surprise-member-timeout", "3000"); InternalDistributedSystem sys = getSystem(); MembershipManager mgr = MembershipManagerHelper.getMembershipManager(sys); + assertTrue(((GMSMembershipManager) mgr).isCleanupTimerStarted()); try { InternalDistributedMember mbr = @@ -180,19 +183,13 @@ public class DistributionManagerDUnitTest extends JUnit4DistributedTestCase { .info("current membership view is " + mgr.getView()); org.apache.geode.test.dunit.LogWriterUtils.getLogWriter() .info("created ID " + mbr + " with view ID " + mbr.getVmViewId()); - sys.getLogWriter() - .info("<ExpectedException action=add>attempt to add old member</ExpectedException>"); - sys.getLogWriter() - .info("<ExpectedException action=add>Removing shunned GemFire node</ExpectedException>"); - try { - boolean accepted = mgr.addSurpriseMember(mbr); - Assert.assertTrue("member with old ID was not rejected (bug #44566)", !accepted); - } finally { - sys.getLogWriter() - .info("<ExpectedException action=remove>attempt to add old member</ExpectedException>"); - sys.getLogWriter().info( - "<ExpectedException action=remove>Removing shunned GemFire node</ExpectedException>"); - } + + IgnoredException.addIgnoredException("attempt to add old member"); + IgnoredException.addIgnoredException("Removing shunned GemFire node"); + + boolean accepted = mgr.addSurpriseMember(mbr); + Assert.assertTrue("member with old ID was not rejected (bug #44566)", !accepted); + mbr.setVmViewId(oldViewId); // now forcibly add it as a surprise member and show that it is reaped @@ -203,27 +200,14 @@ public class DistributionManagerDUnitTest extends JUnit4DistributedTestCase { MembershipManagerHelper.addSurpriseMember(sys, mbr, birthTime); assertTrue("Member was not a surprise member", mgr.isSurpriseMember(mbr)); - // force a real view change - SerializableRunnable connectDisconnect = new SerializableRunnable() { - public void run() { - getSystem().disconnect(); - } - }; - vm0.invoke(connectDisconnect); - if (birthTime < (System.currentTimeMillis() - timeout)) { return; // machine is too busy and we didn't get enough CPU to perform more assertions } assertTrue("Member was incorrectly removed from surprise member set", mgr.isSurpriseMember(mbr)); - try { - Thread.sleep(gracePeriod); - } catch (InterruptedException e) { - fail("test was interrupted", e); - } + Thread.sleep((timeout / 3) + gracePeriod); - vm0.invoke(connectDisconnect); assertTrue("Member was not removed from surprise member set", !mgr.isSurpriseMember(mbr)); } finally {