This is an automated email from the ASF dual-hosted git repository.

echobravo pushed a commit to branch revert/1.13
in repository https://gitbox.apache.org/repos/asf/geode.git

commit 2249dd163948be472b72dd0d0204b6a5952fc181
Author: Ernest Burghardt <eburgha...@pivotal.io>
AuthorDate: Tue Jun 22 13:50:05 2021 -0500

    Revert "GEODE-7861: Improve error reporting in GMSJoinLeave.join() (#5839)"
    
    This reverts commit 53d32c325c808d6e965a45cdf36aca1a71db2183.
---
 .../apache/geode/distributed/LocatorDUnitTest.java |   3 +-
 .../gms/membership/GMSJoinLeaveJUnitTest.java      | 100 +++++----------------
 .../internal/membership/gms/GMSMembership.java     |   7 +-
 .../membership/gms/interfaces/JoinLeave.java       |  12 +--
 .../membership/gms/membership/GMSJoinLeave.java    |  76 ++++++----------
 5 files changed, 55 insertions(+), 143 deletions(-)

diff --git 
a/geode-core/src/distributedTest/java/org/apache/geode/distributed/LocatorDUnitTest.java
 
b/geode-core/src/distributedTest/java/org/apache/geode/distributed/LocatorDUnitTest.java
index e0f8966..d3c1733 100644
--- 
a/geode-core/src/distributedTest/java/org/apache/geode/distributed/LocatorDUnitTest.java
+++ 
b/geode-core/src/distributedTest/java/org/apache/geode/distributed/LocatorDUnitTest.java
@@ -1002,8 +1002,7 @@ public class LocatorDUnitTest implements Serializable {
 
     } catch (GemFireConfigException ex) {
       String s = ex.getMessage();
-      assertThat(s.contains("Could not contact any of the locators"))
-          .isTrue();
+      assertThat(s.contains("Locator does not exist")).isTrue();
     }
   }
 
diff --git 
a/geode-membership/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java
 
b/geode-membership/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java
index fd15db6..3a86a1e 100644
--- 
a/geode-membership/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java
+++ 
b/geode-membership/src/integrationTest/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java
@@ -22,14 +22,12 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.anyBoolean;
 import static org.mockito.ArgumentMatchers.anyInt;
 import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Matchers.isA;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.never;
-import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.timeout;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -117,18 +115,11 @@ public class GMSJoinLeaveJUnitTest {
 
   public void initMocks(boolean enableNetworkPartition, boolean 
useTestGMSJoinLeave)
       throws Exception {
-    String locator = "localhost[12345]";
-    initMocks(enableNetworkPartition, useTestGMSJoinLeave, locator, locator);
-  }
-
-  public void initMocks(boolean enableNetworkPartition, boolean 
useTestGMSJoinLeave,
-      String locators, String startLocator)
-      throws Exception {
     mockConfig = mock(MembershipConfig.class);
     
when(mockConfig.isNetworkPartitionDetectionEnabled()).thenReturn(enableNetworkPartition);
     when(mockConfig.getSecurityUDPDHAlgo()).thenReturn("");
-    when(mockConfig.getStartLocator()).thenReturn(startLocator);
-    when(mockConfig.getLocators()).thenReturn(locators);
+    when(mockConfig.getStartLocator()).thenReturn("localhost[12345]");
+    when(mockConfig.getLocators()).thenReturn("localhost[12345]");
     when(mockConfig.getMcastPort()).thenReturn(0);
     when(mockConfig.getMemberTimeout()).thenReturn(2000L);
 
@@ -1432,7 +1423,14 @@ public class GMSJoinLeaveJUnitTest {
   @Test
   public void testCoordinatorFindRequestSuccess() throws Exception {
     initMocks(false);
-    mockRequestToServer(isA(HostAndPort.class));
+    HashSet<MemberIdentifier> registrants = new HashSet<>();
+    registrants.add(mockMembers[0]);
+    FindCoordinatorResponse fcr = new FindCoordinatorResponse(mockMembers[0], 
mockMembers[0], false,
+        null, registrants, false, true, null);
+
+    when(locatorClient.requestToServer(isA(HostAndPort.class),
+        isA(FindCoordinatorRequest.class), anyInt(), anyBoolean()))
+            .thenReturn(fcr);
 
     boolean foundCoordinator = gmsJoinLeave.findCoordinator();
     assertTrue(gmsJoinLeave.searchState.toString(), foundCoordinator);
@@ -1443,80 +1441,22 @@ public class GMSJoinLeaveJUnitTest {
   public void testCoordinatorFindRequestFailure() throws Exception {
     try {
       initMocks(false);
-      mockRequestToServer(eq(new HostAndPort("localhost", 12346)));
+      HashSet<MemberIdentifier> registrants = new HashSet<>();
+      registrants.add(mockMembers[0]);
+      FindCoordinatorResponse fcr = new 
FindCoordinatorResponse(mockMembers[0], mockMembers[0],
+          false, null, registrants, false, true, null);
       GMSMembershipView view = createView();
       JoinResponseMessage jrm = new JoinResponseMessage(mockMembers[0], view, 
0);
       gmsJoinLeave.setJoinResponseMessage(jrm);
 
-      assertThatThrownBy(gmsJoinLeave::join)
-          .isInstanceOf(MembershipConfigurationException.class);
-    } finally {
-    }
-  }
-
-  @Test
-  public void testJoinFailureWhenSleepInterrupted() throws Exception {
-    initMocks(false);
-    mockRequestToServer(isA(HostAndPort.class));
-
-    when(mockConfig.getMemberTimeout()).thenReturn(100L);
-    when(mockConfig.getJoinTimeout()).thenReturn(1000L);
-
-    GMSJoinLeave spyGmsJoinLeave = spy(gmsJoinLeave);
-    when(spyGmsJoinLeave.pauseIfThereIsNoCoordinator(-1, 
GMSJoinLeave.JOIN_RETRY_SLEEP))
-        .thenThrow(new InterruptedException());
-
-    assertThatThrownBy(spyGmsJoinLeave::join)
-        .isInstanceOf(MembershipConfigurationException.class)
-        .hasMessageContaining("Retry sleep interrupted");
-  }
-
-  @Test
-  public void testJoinFailureWhenTimeout() throws Exception {
-    initMocks(false);
-    mockRequestToServer(isA(HostAndPort.class));
-
-    assertThatThrownBy(() -> gmsJoinLeave.join())
-        .isInstanceOf(MembershipConfigurationException.class)
-        .hasMessageContaining("Operation timed out");
-  }
-
-  @Test
-  public void testPauseIfThereIsNoCoordinator() throws InterruptedException {
-    locatorClient = mock(TcpClient.class);
-    gmsJoinLeave = new GMSJoinLeave(locatorClient);
-    assertThat(gmsJoinLeave.pauseIfThereIsNoCoordinator(-1, 
GMSJoinLeave.JOIN_RETRY_SLEEP))
-        .isFalse();
-    assertThat(gmsJoinLeave.pauseIfThereIsNoCoordinator(1, 
GMSJoinLeave.JOIN_RETRY_SLEEP)).isTrue();
-  }
-
-  @Test
-  public void testJoinFailureWhenNoLocator() throws Exception {
-    final String locator1 = "locator1[12345]";
-    final String locator2 = "locator2[54321]";
-    locatorClient = mock(TcpClient.class);
+      when(locatorClient.requestToServer(eq(new HostAndPort("localhost", 
12346)),
+          isA(FindCoordinatorRequest.class), anyInt(), anyBoolean()))
+              .thenReturn(fcr);
 
-    initMocks(false, false, locator1 + ',' + locator2, locator1);
-    when(locatorClient.requestToServer(any(), any(), anyInt(), anyBoolean()))
-        .thenThrow(IOException.class);
-
-    assertThatThrownBy(gmsJoinLeave::join)
-        .isInstanceOf(MembershipConfigurationException.class)
-        .hasMessageContaining(
-            "Could not contact any of the locators: 
[HostAndPort[locator1:12345], HostAndPort[locator2:54321]]")
-        .hasCauseInstanceOf(IOException.class);
-  }
-
-  private void mockRequestToServer(HostAndPort hostAndPort)
-      throws IOException, ClassNotFoundException {
-    HashSet<MemberIdentifier> registrants = new HashSet<>();
-    registrants.add(mockMembers[0]);
+      assertFalse("Should not be able to join ", gmsJoinLeave.join());
+    } finally {
 
-    FindCoordinatorResponse fcr = new FindCoordinatorResponse(mockMembers[0], 
mockMembers[0], false,
-        null, registrants, false, true, null);
-    when(locatorClient.requestToServer(hostAndPort,
-        isA(FindCoordinatorRequest.class), anyInt(), anyBoolean()))
-            .thenReturn(fcr);
+    }
   }
 
   private void waitForViewAndFinalCheckInProgress(int viewId) throws 
InterruptedException {
diff --git 
a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/GMSMembership.java
 
b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/GMSMembership.java
index 2d88312..e1d1b55 100644
--- 
a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/GMSMembership.java
+++ 
b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/GMSMembership.java
@@ -571,7 +571,12 @@ public class GMSMembership<ID extends MemberIdentifier> 
implements Membership<ID
         this.isJoining = true; // added for bug #44373
 
         // connect
-        services.getJoinLeave().join();
+        boolean ok = services.getJoinLeave().join();
+
+        if (!ok) {
+          throw new MembershipConfigurationException("Unable to join the 
distributed system.  "
+              + "Operation either timed out, was stopped or Locator does not 
exist.");
+        }
 
         MembershipView<ID> initialView = 
createGeodeView(services.getJoinLeave().getView());
         latestView = new MembershipView<>(initialView, 
initialView.getViewId());
diff --git 
a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/interfaces/JoinLeave.java
 
b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/interfaces/JoinLeave.java
index 1880a26..7228162 100755
--- 
a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/interfaces/JoinLeave.java
+++ 
b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/interfaces/JoinLeave.java
@@ -16,7 +16,6 @@ package 
org.apache.geode.distributed.internal.membership.gms.interfaces;
 
 import org.apache.geode.distributed.internal.membership.api.MemberIdentifier;
 import 
org.apache.geode.distributed.internal.membership.api.MemberStartupException;
-import 
org.apache.geode.distributed.internal.membership.api.MembershipConfigurationException;
 import org.apache.geode.distributed.internal.membership.gms.GMSMembershipView;
 
 /**
@@ -27,15 +26,10 @@ import 
org.apache.geode.distributed.internal.membership.gms.GMSMembershipView;
 public interface JoinLeave<ID extends MemberIdentifier> extends Service<ID> {
 
   /**
-   * joins the distributed system.
-   *
-   * @throws MemberStartupException if there was a problem joining the cluster 
after membership
-   *         configuration has
-   *         completed.
-   * @throws MembershipConfigurationException if operation either timed out, 
was stopped or locator
-   *         does not exist.
+   * joins the distributed system and returns true if successful, false if 
not. Throws
+   * MemberStartupException and MemberConfigurationException
    */
-  void join() throws MemberStartupException;
+  boolean join() throws MemberStartupException;
 
   /**
    * leaves the distributed system. Should be invoked before stop()
diff --git 
a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
 
b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
index 7328ae3..ff5c619 100644
--- 
a/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
+++ 
b/geode-membership/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
@@ -273,13 +273,11 @@ public class GMSJoinLeave<ID extends MemberIdentifier> 
implements JoinLeave<ID>
     int lastFindCoordinatorInViewId = -1000;
     final Set<FindCoordinatorResponse<ID>> responses = new HashSet<>();
     public int responsesExpected;
-    Exception lastLocatorException;
 
     void cleanup() {
       alreadyTried.clear();
       possibleCoordinator = null;
       view = null;
-      lastLocatorException = null;
       synchronized (responses) {
         responses.clear();
       }
@@ -317,14 +315,14 @@ public class GMSJoinLeave<ID extends MemberIdentifier> 
implements JoinLeave<ID>
    * @return true if successful, false if not
    */
   @Override
-  public void join() throws MemberStartupException {
+  public boolean join() throws MemberStartupException {
 
     try {
       if (Boolean.getBoolean(BYPASS_DISCOVERY_PROPERTY)) {
         synchronized (viewInstallationLock) {
           becomeCoordinator();
         }
-        return;
+        return true;
       }
 
       SearchState<ID> state = searchState;
@@ -357,11 +355,11 @@ public class GMSJoinLeave<ID extends MemberIdentifier> 
implements JoinLeave<ID>
               synchronized (viewInstallationLock) {
                 becomeCoordinator();
               }
-              return;
+              return true;
             }
           } else {
             if (attemptToJoin()) {
-              return;
+              return true;
             }
             if (this.isStopping) {
               break;
@@ -385,45 +383,40 @@ public class GMSJoinLeave<ID extends MemberIdentifier> 
implements JoinLeave<ID>
             break;
           }
         }
-        if (found && !state.hasContactedAJoinedLocator) {
-          try {
-            if 
(pauseIfThereIsNoCoordinator(state.possibleCoordinator.getVmViewId(), 
retrySleep)) {
+        try {
+          if (found && !state.hasContactedAJoinedLocator) {
+            // if locators are restarting they may be handing out IDs from a 
stale view that
+            // we should go through quickly. Otherwise we should sleep a bit 
to let failure
+            // detection select a new coordinator
+            if (state.possibleCoordinator.getVmViewId() < 0) {
+              logger.debug("sleeping for {} before making another attempt to 
find the coordinator",
+                  retrySleep);
+              Thread.sleep(retrySleep);
+            } else {
               // since we were given a coordinator that couldn't be used we 
should keep trying
               tries = 0;
               giveupTime = System.currentTimeMillis() + timeout;
             }
-          } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-            throw new MembershipConfigurationException(
-                "Retry sleep interrupted. Giving up on joining the distributed 
system.");
           }
+        } catch (InterruptedException e) {
+          logger.debug("retry sleep interrupted - giving up on joining the 
distributed system");
+          return false;
         }
       } // for
 
       if (!this.isJoined) {
         logger.debug("giving up attempting to join the distributed system 
after "
             + (System.currentTimeMillis() - startTime) + "ms");
+      }
 
-        // to preserve old behavior we need to throw a MemberStartupException 
if
-        // unable to contact any of the locators
-        if (state.hasContactedAJoinedLocator) {
-          throw new MemberStartupException("Unable to join the distributed 
system in "
-              + (System.currentTimeMillis() - startTime) + "ms");
-        }
-
-        if (state.locatorsContacted == 0) {
-          throw new MembershipConfigurationException(
-              "Unable to join the distributed system. Could not contact any of 
the locators: "
-                  + locators,
-              state.lastLocatorException);
-        }
-
-        if (System.currentTimeMillis() > giveupTime) {
-          throw new MembershipConfigurationException(
-              "Unable to join the distributed system. Operation timed out");
-        }
+      // to preserve old behavior we need to throw a MemberStartupException if
+      // unable to contact any of the locators
+      if (!this.isJoined && state.hasContactedAJoinedLocator) {
+        throw new MemberStartupException("Unable to join the distributed 
system in "
+            + (System.currentTimeMillis() - startTime) + "ms");
       }
-      return;
+
+      return this.isJoined;
     } finally {
       // notify anyone waiting on the address to be completed
       if (this.isJoined) {
@@ -435,24 +428,6 @@ public class GMSJoinLeave<ID extends MemberIdentifier> 
implements JoinLeave<ID>
     }
   }
 
-  boolean pauseIfThereIsNoCoordinator(int viewId, long retrySleep)
-      throws InterruptedException {
-    // if locators are restarting they may be handing out IDs from a stale 
view that
-    // we should go through quickly. Otherwise we should sleep a bit to let 
failure
-    // detection select a new coordinator
-    if (viewId < 0) {
-      // the process hasn't finished joining the cluster.
-      logger.debug("sleeping for {} before making another attempt to find the 
coordinator",
-          retrySleep);
-      Thread.sleep(retrySleep);
-    } else {
-      // the member has joined the cluster.
-      return true;
-    }
-
-    return false;
-  }
-
   /**
    * send a join request and wait for a reply. Process the reply. This may 
throw a
    * MemberStartupException or an exception from the authenticator, if present.
@@ -1224,7 +1199,6 @@ public class GMSJoinLeave<ID extends MemberIdentifier> 
implements JoinLeave<ID>
         } catch (IOException | ClassNotFoundException problem) {
           logger.info("Unable to contact locator " + laddr + ": " + problem);
           logger.debug("Exception thrown when contacting a locator", problem);
-          state.lastLocatorException = problem;
           if (state.locatorsContacted == 0 && System.currentTimeMillis() < 
giveUpTime) {
             try {
               Thread.sleep(FIND_LOCATOR_RETRY_SLEEP);

Reply via email to