Repository: geode
Updated Branches:
  refs/heads/feature/GEODE-2497 8d45ca227 -> 69e3523ec


GEODE-2497 surprise members are never timed out during startup

Moved the creation of the timer to GMSMembershipManager.started()

Removed write-lock in timer-creation method since it's only called from
one place now

Altered the way that the timer-creation method finds the
InternalDistributedSystem.  The old way of using getAnyInstance() was
the primary source of the problem since it returns null until startup
is completed.

Altered the surprise-member unit test to ensure that it's using the
timer and not relying on installation of a new membership view to clean
things up.

Altered the surprise-member unit test to run faster.  It now completes in
under 10 seconds.


Project: http://git-wip-us.apache.org/repos/asf/geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/geode/commit/a6dfa4ca
Tree: http://git-wip-us.apache.org/repos/asf/geode/tree/a6dfa4ca
Diff: http://git-wip-us.apache.org/repos/asf/geode/diff/a6dfa4ca

Branch: refs/heads/feature/GEODE-2497
Commit: a6dfa4ca630a82fcf92942a834f8255e86d2bfcb
Parents: dbea592
Author: Bruce Schuchardt <bschucha...@pivotal.io>
Authored: Fri Feb 17 10:17:21 2017 -0800
Committer: Bruce Schuchardt <bschucha...@pivotal.io>
Committed: Tue Feb 21 15:07:00 2017 -0800

----------------------------------------------------------------------
 .../gms/mgr/GMSMembershipManager.java           | 76 ++++++++++----------
 .../internal/DistributionManagerDUnitTest.java  | 66 +++++++----------
 2 files changed, 62 insertions(+), 80 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/geode/blob/a6dfa4ca/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
----------------------------------------------------------------------
diff --git 
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
 
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
index cf17025..9652ad5 100644
--- 
a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
+++ 
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
@@ -608,7 +608,6 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
       }
       try {
         listener.viewInstalled(latestView);
-        startCleanupTimer();
       } catch (DistributedSystemDisconnectedException se) {
       }
     } finally {
@@ -616,6 +615,10 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
     }
   }
 
+  public boolean isCleanupTimerStarted() {
+    return this.cleanupTimer != null;
+  }
+
   /**
    * the timer used to perform periodic tasks
    * 
@@ -767,7 +770,9 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
   }
 
   @Override
-  public void started() {}
+  public void started() {
+    startCleanupTimer();
+  }
 
 
   /** this is invoked by JoinLeave when there is a loss of quorum in the 
membership system */
@@ -942,12 +947,6 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
         surpriseMembers.remove(member);
       } else {
 
-        // Now that we're sure the member is new, add them.
-        // make sure the surprise-member cleanup task is running
-        if (this.cleanupTimer == null) {
-          startCleanupTimer();
-        } // cleanupTimer == null
-
         // Ensure that the member is accounted for in the view
         // Conjure up a new view including the new member. This is necessary
         // because we are about to tell the listener about a new member, so
@@ -978,40 +977,39 @@ public class GMSMembershipManager implements 
MembershipManager, Manager {
 
   /** starts periodic task to perform cleanup chores such as expire surprise 
members */
   private void startCleanupTimer() {
+    if (this.dcReceiver == null) {
+      // junit tests don't provide a direct-channel receiver
+      return;
+    }
+    DistributedSystem ds = this.dcReceiver.getDM().getSystem();
+    this.cleanupTimer = new SystemTimer(ds, true);
+    SystemTimer.SystemTimerTask st = new SystemTimer.SystemTimerTask() {
+      @Override
+      public void run2() {
+        cleanUpSurpriseMembers();
+      }
+    };
+    this.cleanupTimer.scheduleAtFixedRate(st, surpriseMemberTimeout, 
surpriseMemberTimeout / 3);
+  }
+
+  // invoked from the cleanupTimer task
+  private void cleanUpSurpriseMembers() {
     latestViewWriteLock.lock();
     try {
-      if (this.cleanupTimer != null) {
-        return;
+      long oldestAllowed = System.currentTimeMillis() - surpriseMemberTimeout;
+      for (Iterator it = surpriseMembers.entrySet().iterator(); it.hasNext();) 
{
+        Map.Entry entry = (Map.Entry) it.next();
+        Long birthtime = (Long) entry.getValue();
+        if (birthtime.longValue() < oldestAllowed) {
+          it.remove();
+          InternalDistributedMember m = (InternalDistributedMember) 
entry.getKey();
+          logger.info(LocalizedMessage.create(
+              
LocalizedStrings.GroupMembershipService_MEMBERSHIP_EXPIRING_MEMBERSHIP_OF_SURPRISE_MEMBER_0,
+              m));
+          removeWithViewLock(m, true,
+              "not seen in membership view in " + surpriseMemberTimeout + 
"ms");
+        }
       }
-      DistributedSystem ds = InternalDistributedSystem.getAnyInstance();
-      if (ds != null && ds.isConnected()) {
-        this.cleanupTimer = new SystemTimer(ds, true);
-        SystemTimer.SystemTimerTask st = new SystemTimer.SystemTimerTask() {
-          @Override
-          public void run2() {
-            latestViewWriteLock.lock();
-            try {
-              long oldestAllowed = System.currentTimeMillis() - 
surpriseMemberTimeout;
-              for (Iterator it = surpriseMembers.entrySet().iterator(); 
it.hasNext();) {
-                Map.Entry entry = (Map.Entry) it.next();
-                Long birthtime = (Long) entry.getValue();
-                if (birthtime.longValue() < oldestAllowed) {
-                  it.remove();
-                  InternalDistributedMember m = (InternalDistributedMember) 
entry.getKey();
-                  logger.info(LocalizedMessage.create(
-                      
LocalizedStrings.GroupMembershipService_MEMBERSHIP_EXPIRING_MEMBERSHIP_OF_SURPRISE_MEMBER_0,
-                      m));
-                  removeWithViewLock(m, true,
-                      "not seen in membership view in " + 
surpriseMemberTimeout + "ms");
-                }
-              }
-            } finally {
-              latestViewWriteLock.unlock();
-            }
-          }
-        };
-        this.cleanupTimer.scheduleAtFixedRate(st, surpriseMemberTimeout, 
surpriseMemberTimeout / 3);
-      } // ds != null && ds.isConnected()
     } finally {
       latestViewWriteLock.unlock();
     }

http://git-wip-us.apache.org/repos/asf/geode/blob/a6dfa4ca/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
----------------------------------------------------------------------
diff --git 
a/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
 
b/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
index ccce013..c795261 100644
--- 
a/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
+++ 
b/geode-core/src/test/java/org/apache/geode/distributed/internal/DistributionManagerDUnitTest.java
@@ -17,6 +17,7 @@ package org.apache.geode.distributed.internal;
 import static org.apache.geode.distributed.ConfigurationProperties.*;
 import static org.apache.geode.test.dunit.Assert.*;
 
+import org.apache.geode.test.dunit.rules.DistributedRestoreSystemProperties;
 import org.awaitility.Awaitility;
 
 import java.net.InetAddress;
@@ -28,6 +29,7 @@ import org.apache.geode.test.junit.categories.MembershipTest;
 import org.apache.logging.log4j.Logger;
 import org.junit.Assert;
 import org.junit.Ignore;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
@@ -74,6 +76,10 @@ public class DistributionManagerDUnitTest extends 
JUnit4DistributedTestCase {
 
   public static DistributedSystem ds;
 
+  @Rule
+  public DistributedRestoreSystemProperties restoreSystemProperties =
+      new DistributedRestoreSystemProperties();
+
   /**
    * Clears the exceptionInThread flag in the given distribution manager.
    */
@@ -137,18 +143,14 @@ public class DistributionManagerDUnitTest extends 
JUnit4DistributedTestCase {
     InternalDistributedMember idm = mgr.getLocalMember();
     // TODO GMS needs to have a system property allowing the bind-port to be 
set
     System.setProperty(DistributionConfig.GEMFIRE_PREFIX + "jg-bind-port", "" 
+ idm.getPort());
-    try {
-      sys.disconnect();
-      sys = getSystem();
-      mgr = MembershipManagerHelper.getMembershipManager(sys);
-      sys.disconnect();
-      InternalDistributedMember idm2 = mgr.getLocalMember();
-      org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
-          .info("original ID=" + idm + " and after connecting=" + idm2);
-      assertTrue("should not have used a different udp port", idm.getPort() == 
idm2.getPort());
-    } finally {
-      System.getProperties().remove(DistributionConfig.GEMFIRE_PREFIX + 
"jg-bind-port");
-    }
+    sys.disconnect();
+    sys = getSystem();
+    mgr = MembershipManagerHelper.getMembershipManager(sys);
+    sys.disconnect();
+    InternalDistributedMember idm2 = mgr.getLocalMember();
+    org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
+        .info("original ID=" + idm + " and after connecting=" + idm2);
+    assertTrue("should not have used a different udp port", idm.getPort() == 
idm2.getPort());
   }
 
   /**
@@ -158,11 +160,12 @@ public class DistributionManagerDUnitTest extends 
JUnit4DistributedTestCase {
    * should be gone and force more view processing to have it scrubbed from 
the set.
    **/
   @Test
-  public void testSurpriseMemberHandling() {
-    VM vm0 = Host.getHost(0).getVM(0);
+  public void testSurpriseMemberHandling() throws Exception {
 
+    System.setProperty(DistributionConfig.GEMFIRE_PREFIX + 
"surprise-member-timeout", "3000");
     InternalDistributedSystem sys = getSystem();
     MembershipManager mgr = MembershipManagerHelper.getMembershipManager(sys);
+    assertTrue(((GMSMembershipManager) mgr).isCleanupTimerStarted());
 
     try {
       InternalDistributedMember mbr =
@@ -180,19 +183,13 @@ public class DistributionManagerDUnitTest extends 
JUnit4DistributedTestCase {
           .info("current membership view is " + mgr.getView());
       org.apache.geode.test.dunit.LogWriterUtils.getLogWriter()
           .info("created ID " + mbr + " with view ID " + mbr.getVmViewId());
-      sys.getLogWriter()
-          .info("<ExpectedException action=add>attempt to add old 
member</ExpectedException>");
-      sys.getLogWriter()
-          .info("<ExpectedException action=add>Removing shunned GemFire 
node</ExpectedException>");
-      try {
-        boolean accepted = mgr.addSurpriseMember(mbr);
-        Assert.assertTrue("member with old ID was not rejected (bug #44566)", 
!accepted);
-      } finally {
-        sys.getLogWriter()
-            .info("<ExpectedException action=remove>attempt to add old 
member</ExpectedException>");
-        sys.getLogWriter().info(
-            "<ExpectedException action=remove>Removing shunned GemFire 
node</ExpectedException>");
-      }
+
+      IgnoredException.addIgnoredException("attempt to add old member");
+      IgnoredException.addIgnoredException("Removing shunned GemFire node");
+
+      boolean accepted = mgr.addSurpriseMember(mbr);
+      Assert.assertTrue("member with old ID was not rejected (bug #44566)", 
!accepted);
+
       mbr.setVmViewId(oldViewId);
 
       // now forcibly add it as a surprise member and show that it is reaped
@@ -203,27 +200,14 @@ public class DistributionManagerDUnitTest extends 
JUnit4DistributedTestCase {
       MembershipManagerHelper.addSurpriseMember(sys, mbr, birthTime);
       assertTrue("Member was not a surprise member", 
mgr.isSurpriseMember(mbr));
 
-      // force a real view change
-      SerializableRunnable connectDisconnect = new SerializableRunnable() {
-        public void run() {
-          getSystem().disconnect();
-        }
-      };
-      vm0.invoke(connectDisconnect);
-
       if (birthTime < (System.currentTimeMillis() - timeout)) {
         return; // machine is too busy and we didn't get enough CPU to perform 
more assertions
       }
       assertTrue("Member was incorrectly removed from surprise member set",
           mgr.isSurpriseMember(mbr));
 
-      try {
-        Thread.sleep(gracePeriod);
-      } catch (InterruptedException e) {
-        fail("test was interrupted", e);
-      }
+      Thread.sleep((timeout / 3) + gracePeriod);
 
-      vm0.invoke(connectDisconnect);
       assertTrue("Member was not removed from surprise member set", 
!mgr.isSurpriseMember(mbr));
 
     } finally {

Reply via email to