This is an automated email from the ASF dual-hosted git repository. bschuchardt pushed a commit to branch feature/GEODE-7031 in repository https://gitbox.apache.org/repos/asf/geode.git
commit 82ae6edf41f7c9c8736db523cf8709d1df6cd467 Author: Bruce Schuchardt <bschucha...@pivotal.io> AuthorDate: Tue Jul 30 14:50:22 2019 -0700 GEODE-7031 Attempts to send messages to alert listeners delays network partition detection Decrease the socket-formation timeout for Alert listeners. Generally we'll already have a connection to an alert listener so the decreased timeout won't be used. In times where there are network problems, though, we often have to create a new tcp/ip connection to send an alert and we don't want these to stall for too long. --- .../membership/gms/fd/GMSHealthMonitor.java | 2 ++ .../org/apache/geode/internal/tcp/Connection.java | 11 +++++++---- .../geode/internal/tcp/ConnectionJUnitTest.java | 22 ++++++++++++++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java index 9481e9b..80c40f6 100644 --- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java +++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/fd/GMSHealthMonitor.java @@ -282,6 +282,8 @@ public class GMSHealthMonitor implements HealthMonitor { } if (nextNeighborTS == null) { + logger.debug("timestamp for {} was found null - setting current time as timestamp", + neighbour); TimeStamp customTS = new TimeStamp(currentTime); memberTimeStamps.put(neighbour, customTS); return; diff --git a/geode-core/src/main/java/org/apache/geode/internal/tcp/Connection.java b/geode-core/src/main/java/org/apache/geode/internal/tcp/Connection.java index 9bffbc9..454ed3d 100644 --- a/geode-core/src/main/java/org/apache/geode/internal/tcp/Connection.java +++ b/geode-core/src/main/java/org/apache/geode/internal/tcp/Connection.java @@ -168,14 +168,17 @@ public class Connection implements Runnable { return isReaderThread.get(); } - private int getP2PConnectTimeout() { + int getP2PConnectTimeout(DistributionConfig config) { + if (AlertingAction.isThreadAlerting()) { + return config.getMemberTimeout(); + } if (IS_P2P_CONNECT_TIMEOUT_INITIALIZED) return P2P_CONNECT_TIMEOUT; String connectTimeoutStr = System.getProperty("p2p.connectTimeout"); if (connectTimeoutStr != null) { P2P_CONNECT_TIMEOUT = Integer.parseInt(connectTimeoutStr); } else { - P2P_CONNECT_TIMEOUT = 6 * this.conduit.getDM().getConfig().getMemberTimeout(); + P2P_CONNECT_TIMEOUT = 6 * config.getMemberTimeout(); } IS_P2P_CONNECT_TIMEOUT_INITIALIZED = true; return P2P_CONNECT_TIMEOUT; @@ -513,7 +516,7 @@ public class Connection implements Runnable { this.isReceiver = true; this.owner = t; this.socket = socket; - this.conduitIdStr = owner.getConduit().getSocketId().toString(); + this.conduitIdStr = conduit.getSocketId().toString(); this.handshakeRead = false; this.handshakeCancelled = false; this.connected = true; @@ -1138,7 +1141,7 @@ public class Connection implements Runnable { setSendBufferSize(channel.socket()); channel.configureBlocking(true); - int connectTime = getP2PConnectTimeout(); + int connectTime = getP2PConnectTimeout(conduit.getDM().getConfig()); try { diff --git a/geode-core/src/test/java/org/apache/geode/internal/tcp/ConnectionJUnitTest.java b/geode-core/src/test/java/org/apache/geode/internal/tcp/ConnectionJUnitTest.java index 854685f..aba03f0 100755 --- a/geode-core/src/test/java/org/apache/geode/internal/tcp/ConnectionJUnitTest.java +++ b/geode-core/src/test/java/org/apache/geode/internal/tcp/ConnectionJUnitTest.java @@ -14,6 +14,7 @@ */ package org.apache.geode.internal.tcp; +import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.Mockito.any; import static org.mockito.Mockito.isNull; import static org.mockito.Mockito.mock; @@ -21,6 +22,8 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.net.InetSocketAddress; +import java.net.Socket; +import java.net.UnknownHostException; import java.nio.channels.SocketChannel; import org.junit.Test; @@ -28,9 +31,11 @@ import org.junit.experimental.categories.Category; import org.apache.geode.CancelCriterion; import org.apache.geode.distributed.internal.DMStats; +import org.apache.geode.distributed.internal.DistributionConfig; import org.apache.geode.distributed.internal.DistributionManager; import org.apache.geode.distributed.internal.membership.InternalDistributedMember; import org.apache.geode.distributed.internal.membership.MembershipManager; +import org.apache.geode.internal.alerting.AlertingAction; import org.apache.geode.internal.net.BufferPool; import org.apache.geode.internal.net.SocketCloser; import org.apache.geode.internal.net.SocketCreator; @@ -82,4 +87,21 @@ public class ConnectionJUnitTest { conn.run(); verify(membership).suspectMember(isNull(InternalDistributedMember.class), any(String.class)); } + + @Test + public void connectTimeoutIsShortWhenAlerting() throws UnknownHostException { + ConnectionTable table = mock(ConnectionTable.class); + TCPConduit conduit = mock(TCPConduit.class); + when(table.getConduit()).thenReturn(conduit); + when(conduit.getSocketId()) + .thenReturn(new InetSocketAddress(SocketCreator.getLocalHost(), 12345)); + DistributionConfig config = mock(DistributionConfig.class); + when(config.getMemberTimeout()).thenReturn(100); + Connection connection = new Connection(table, mock(Socket.class)); + int normalTimeout = connection.getP2PConnectTimeout(config); + AlertingAction.execute(() -> { + assertThat(normalTimeout).isEqualTo(6 * connection.getP2PConnectTimeout(config)); + }); + + } }