This is an automated email from the ASF dual-hosted git repository.
mpetrov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new 0b0915fdd67 IGNITE-21830 Fixed logging of failed attempts to connect
to the previous node in the ring. (#11327)
0b0915fdd67 is described below
commit 0b0915fdd6754cb1f05ba57852475e1214d1cd57
Author: Maksim Davydov <[email protected]>
AuthorDate: Tue May 21 10:49:16 2024 +0300
IGNITE-21830 Fixed logging of failed attempts to connect to the previous
node in the ring. (#11327)
---
.../ignite/spi/discovery/tcp/ServerImpl.java | 28 ++++++++++------
.../tcp/TcpDiscoveryNetworkIssuesTest.java | 39 ++++++++++++++++++++++
2 files changed, 56 insertions(+), 11 deletions(-)
diff --git
a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
index db09f914872..0ba98ae7097 100644
---
a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
+++
b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
@@ -6770,7 +6770,6 @@ class ServerImpl extends TcpDiscoveryImpl {
if (previous != null &&
!previous.id().equals(nodeId) &&
(req.checkPreviousNodeId() == null ||
previous.id().equals(req.checkPreviousNodeId()))) {
- Collection<InetSocketAddress> nodeAddrs =
spi.getEffectiveNodeAddresses(previous);
// The connection recovery connection to one
node is connCheckTick.
// We need to suppose network delays. So we
use half of this time.
@@ -6781,13 +6780,7 @@ class ServerImpl extends TcpDiscoveryImpl {
"previous [" + previous + "] with
timeout " + backwardCheckTimeout);
}
- liveAddr = checkConnection(new
ArrayList<>(nodeAddrs), backwardCheckTimeout);
-
- if (log.isInfoEnabled()) {
- log.info("Connection check to previous
node done: [liveAddr=" + liveAddr
- + ", previousNode=" +
U.toShortString(previous) + ", addressesToCheck=" +
- nodeAddrs + ", connectingNodeId=" +
nodeId + ']');
- }
+ liveAddr = checkConnection(previous,
backwardCheckTimeout);
}
ok = liveAddr != null;
@@ -7254,9 +7247,11 @@ class ServerImpl extends TcpDiscoveryImpl {
}
/** @return Alive address if was able to connected to. {@code Null}
otherwise. */
- private InetSocketAddress checkConnection(List<InetSocketAddress>
addrs, int timeout) {
+ private InetSocketAddress checkConnection(TcpDiscoveryNode node, int
timeout) {
AtomicReference<InetSocketAddress> liveAddrHolder = new
AtomicReference<>();
+ List<InetSocketAddress> addrs = new
ArrayList<>(spi.getEffectiveNodeAddresses(node));
+
CountDownLatch latch = new CountDownLatch(addrs.size());
int addrLeft = addrs.size();
@@ -7289,8 +7284,9 @@ class ServerImpl extends TcpDiscoveryImpl {
liveAddrHolder.compareAndSet(null, addr);
}
}
- catch (Exception ignored) {
- // No-op.
+ catch (Exception e) {
+ U.warn(log, "Failed to check connection to
previous node [nodeId=" + node.id() + ", order="
+ + node.order() + ", address=" + addr +
']', e);
}
finally {
latch.countDown();
@@ -7307,6 +7303,16 @@ class ServerImpl extends TcpDiscoveryImpl {
// No-op.
}
+ if (liveAddrHolder.get() == null) {
+ U.warn(log, "Failed to check connection to previous node
[connectingNodeId=" + nodeId
+ + ", previousNode=" + U.toShortString(node) + ",
previousNodeKnownAddresses=" + addrs + ']');
+ }
+ else if (log.isInfoEnabled()) {
+ log.info("Connection check to previous node done
[connectingNodeId=" + nodeId + ", previousNode="
+ + U.toShortString(node) + ", firstRespondedAddress=" +
liveAddrHolder.get() +
+ ", previousNodeKnownAddresses=" + addrs + ']');
+ }
+
return liveAddrHolder.get();
}
diff --git
a/modules/core/src/test/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoveryNetworkIssuesTest.java
b/modules/core/src/test/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoveryNetworkIssuesTest.java
index e8b97f4ca62..12b655b3ec2 100644
---
a/modules/core/src/test/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoveryNetworkIssuesTest.java
+++
b/modules/core/src/test/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoveryNetworkIssuesTest.java
@@ -22,6 +22,7 @@ import java.net.InetSocketAddress;
import java.net.Socket;
import java.net.SocketTimeoutException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
@@ -56,6 +57,8 @@ import
org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryAbstractMessage;
import
org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryHandshakeRequest;
import
org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryHandshakeResponse;
import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.apache.ignite.testframework.LogListener;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.junit.Test;
@@ -383,6 +386,42 @@ public class TcpDiscoveryNetworkIssuesTest extends
GridCommonAbstractTest {
}
}
+ /**
+ * This test uses node failure by stopping service threads, which makes
the node unresponsive and results in
+ * failing connection to the server. Failures are simulated on the 1st
node in the ring. In this case,
+ * the 2nd node in the ring will trigger 'Backward Connection Check',
which should result in failing attempt of connection.
+ * This result is followed by the corresponding logs, indicating described
failures. The test verifies the logs.
+ *
+ * @throws Exception If failed.
+ */
+ @Test
+ public void testBackwardConnectionCheckFailedLogMessage() throws Exception
{
+ ListeningTestLogger testLog = new ListeningTestLogger(log);
+
+ LogListener lsnr0 = LogListener.matches("Failed to check connection to
previous node").times(2).build();
+
+ testLog.registerListener(lsnr0);
+
+ startGrid(0);
+
+ IgniteConfiguration cfg =
getConfiguration(getTestIgniteInstanceName(1));
+ cfg.setGridLogger(testLog);
+
+ startGrid(cfg);
+
+ startGrid(2);
+
+ spi(grid(0)).simulateNodeFailure();
+
+ assertTrue(lsnr0.check(getTestTimeout()));
+
+ for (Ignite ig : Arrays.asList(grid(1), grid(2))) {
+ waitForCondition(() -> ig.cluster().nodes().size() == 2,
getTestTimeout());
+
+ assertTrue(ig.cluster().nodes().stream().noneMatch(node ->
node.order() == 1));
+ }
+ }
+
/**
* @param ig Ignite instance to get failedNodes collection from.
*/