This is an automated email from the ASF dual-hosted git repository.

mpetrov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new 0b0915fdd67 IGNITE-21830 Fixed logging of failed attempts to connect 
to the previous node in the ring. (#11327)
0b0915fdd67 is described below

commit 0b0915fdd6754cb1f05ba57852475e1214d1cd57
Author: Maksim Davydov <[email protected]>
AuthorDate: Tue May 21 10:49:16 2024 +0300

    IGNITE-21830 Fixed logging of failed attempts to connect to the previous 
node in the ring. (#11327)
---
 .../ignite/spi/discovery/tcp/ServerImpl.java       | 28 ++++++++++------
 .../tcp/TcpDiscoveryNetworkIssuesTest.java         | 39 ++++++++++++++++++++++
 2 files changed, 56 insertions(+), 11 deletions(-)

diff --git 
a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
 
b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
index db09f914872..0ba98ae7097 100644
--- 
a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java
@@ -6770,7 +6770,6 @@ class ServerImpl extends TcpDiscoveryImpl {
 
                             if (previous != null && 
!previous.id().equals(nodeId) &&
                                 (req.checkPreviousNodeId() == null || 
previous.id().equals(req.checkPreviousNodeId()))) {
-                                Collection<InetSocketAddress> nodeAddrs = 
spi.getEffectiveNodeAddresses(previous);
 
                                 // The connection recovery connection to one 
node is connCheckTick.
                                 // We need to suppose network delays. So we 
use half of this time.
@@ -6781,13 +6780,7 @@ class ServerImpl extends TcpDiscoveryImpl {
                                         "previous [" + previous + "] with 
timeout " + backwardCheckTimeout);
                                 }
 
-                                liveAddr = checkConnection(new 
ArrayList<>(nodeAddrs), backwardCheckTimeout);
-
-                                if (log.isInfoEnabled()) {
-                                    log.info("Connection check to previous 
node done: [liveAddr=" + liveAddr
-                                        + ", previousNode=" + 
U.toShortString(previous) + ", addressesToCheck=" +
-                                        nodeAddrs + ", connectingNodeId=" + 
nodeId + ']');
-                                }
+                                liveAddr = checkConnection(previous, 
backwardCheckTimeout);
                             }
 
                             ok = liveAddr != null;
@@ -7254,9 +7247,11 @@ class ServerImpl extends TcpDiscoveryImpl {
         }
 
         /** @return Alive address if was able to connected to. {@code Null} 
otherwise. */
-        private InetSocketAddress checkConnection(List<InetSocketAddress> 
addrs, int timeout) {
+        private InetSocketAddress checkConnection(TcpDiscoveryNode node, int 
timeout) {
             AtomicReference<InetSocketAddress> liveAddrHolder = new 
AtomicReference<>();
 
+            List<InetSocketAddress> addrs = new 
ArrayList<>(spi.getEffectiveNodeAddresses(node));
+
             CountDownLatch latch = new CountDownLatch(addrs.size());
 
             int addrLeft = addrs.size();
@@ -7289,8 +7284,9 @@ class ServerImpl extends TcpDiscoveryImpl {
                                     liveAddrHolder.compareAndSet(null, addr);
                                 }
                             }
-                            catch (Exception ignored) {
-                                // No-op.
+                            catch (Exception e) {
+                                U.warn(log, "Failed to check connection to 
previous node [nodeId=" + node.id() + ", order="
+                                    + node.order() + ", address=" + addr + 
']', e);
                             }
                             finally {
                                 latch.countDown();
@@ -7307,6 +7303,16 @@ class ServerImpl extends TcpDiscoveryImpl {
                 // No-op.
             }
 
+            if (liveAddrHolder.get() == null) {
+                U.warn(log, "Failed to check connection to previous node 
[connectingNodeId=" + nodeId
+                    + ", previousNode=" + U.toShortString(node) + ", 
previousNodeKnownAddresses=" + addrs + ']');
+            }
+            else if (log.isInfoEnabled()) {
+                log.info("Connection check to previous node done 
[connectingNodeId=" + nodeId + ", previousNode="
+                    + U.toShortString(node) + ", firstRespondedAddress=" + 
liveAddrHolder.get() +
+                    ", previousNodeKnownAddresses=" + addrs + ']');
+            }
+
             return liveAddrHolder.get();
         }
 
diff --git 
a/modules/core/src/test/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoveryNetworkIssuesTest.java
 
b/modules/core/src/test/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoveryNetworkIssuesTest.java
index e8b97f4ca62..12b655b3ec2 100644
--- 
a/modules/core/src/test/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoveryNetworkIssuesTest.java
+++ 
b/modules/core/src/test/java/org/apache/ignite/spi/discovery/tcp/TcpDiscoveryNetworkIssuesTest.java
@@ -22,6 +22,7 @@ import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.net.SocketTimeoutException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
@@ -56,6 +57,8 @@ import 
org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryAbstractMessage;
 import 
org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryHandshakeRequest;
 import 
org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryHandshakeResponse;
 import org.apache.ignite.testframework.GridTestUtils;
+import org.apache.ignite.testframework.ListeningTestLogger;
+import org.apache.ignite.testframework.LogListener;
 import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
 import org.junit.Test;
 
@@ -383,6 +386,42 @@ public class TcpDiscoveryNetworkIssuesTest extends 
GridCommonAbstractTest {
         }
     }
 
+    /**
+     * This test uses node failure by stopping service threads, which makes 
the node unresponsive and results in
+     * failing connection to the server. Failures are simulated on the 1st 
node in the ring. In this case,
+     * the 2nd node in the ring will trigger 'Backward Connection Check', 
which should result in failing attempt of connection.
+     * This result is followed by the corresponding logs, indicating described 
failures. The test verifies the logs.
+     *
+     * @throws Exception If failed.
+     */
+    @Test
+    public void testBackwardConnectionCheckFailedLogMessage() throws Exception 
{
+        ListeningTestLogger testLog = new ListeningTestLogger(log);
+
+        LogListener lsnr0 = LogListener.matches("Failed to check connection to 
previous node").times(2).build();
+
+        testLog.registerListener(lsnr0);
+
+        startGrid(0);
+
+        IgniteConfiguration cfg = 
getConfiguration(getTestIgniteInstanceName(1));
+        cfg.setGridLogger(testLog);
+
+        startGrid(cfg);
+
+        startGrid(2);
+
+        spi(grid(0)).simulateNodeFailure();
+
+        assertTrue(lsnr0.check(getTestTimeout()));
+
+        for (Ignite ig : Arrays.asList(grid(1), grid(2))) {
+            waitForCondition(() -> ig.cluster().nodes().size() == 2, 
getTestTimeout());
+
+            assertTrue(ig.cluster().nodes().stream().noneMatch(node -> 
node.order() == 1));
+        }
+    }
+
     /**
      * @param ig Ignite instance to get failedNodes collection from.
      */

Reply via email to