This is an automated email from the ASF dual-hosted git repository.

alexpl pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git


The following commit(s) were added to refs/heads/master by this push:
     new f32480505f0 IGNITE-21630 Fix cluster failure on topology change when 
DNS service is unavailable - Fixes #11260.
f32480505f0 is described below

commit f32480505f0b5a827b760789bc724008e68a6f3e
Author: Aleksey Plekhanov <[email protected]>
AuthorDate: Fri Mar 15 09:40:29 2024 +0300

    IGNITE-21630 Fix cluster failure on topology change when DNS service is 
unavailable - Fixes #11260.
    
    Signed-off-by: Aleksey Plekhanov <[email protected]>
---
 .../apache/ignite/internal/util/IgniteUtils.java   |  97 +++------------
 .../java/java/net/BlockingDnsInet4AddressImpl.java |  34 ++++++
 .../java/java/net/BlockingDnsInet6AddressImpl.java |  34 ++++++
 .../src/main/java/java/net/DnsBlocker.java         |  99 +++++++++++++++
 .../ignitetest/services/utils/ignite_aware.py      |  13 +-
 .../tests/ignitetest/tests/dns_failure_test.py     | 136 +++++++++++++++++++++
 6 files changed, 331 insertions(+), 82 deletions(-)

diff --git 
a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java 
b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
index aabce7ae87d..7ea6899e696 100755
--- 
a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
+++ 
b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
@@ -9692,67 +9692,7 @@ public abstract class IgniteUtils {
     }
 
     /**
-     * Returns tha list of resolved inet addresses. First addresses are 
resolved by host names,
-     * if this attempt fails then the addresses are resolved by ip addresses.
-     *
-     * @param node Grid node.
-     * @return Inet addresses for given addresses and host names.
-     * @throws IgniteCheckedException If non of addresses can be resolved.
-     */
-    public static Collection<InetAddress> toInetAddresses(ClusterNode node) 
throws IgniteCheckedException {
-        return toInetAddresses(node.addresses(), node.hostNames());
-    }
-
-    /**
-     * Returns tha list of resolved inet addresses. First addresses are 
resolved by host names,
-     * if this attempt fails then the addresses are resolved by ip addresses.
-     *
-     * @param addrs Addresses.
-     * @param hostNames Host names.
-     * @return Inet addresses for given addresses and host names.
-     * @throws IgniteCheckedException If non of addresses can be resolved.
-     */
-    public static Collection<InetAddress> toInetAddresses(Collection<String> 
addrs,
-        Collection<String> hostNames) throws IgniteCheckedException {
-        Set<InetAddress> res = new HashSet<>(addrs.size());
-
-        Iterator<String> hostNamesIt = hostNames.iterator();
-
-        for (String addr : addrs) {
-            String hostName = hostNamesIt.hasNext() ? hostNamesIt.next() : 
null;
-
-            InetAddress inetAddr = null;
-
-            if (!F.isEmpty(hostName)) {
-                try {
-                    inetAddr = InetAddress.getByName(hostName);
-                }
-                catch (UnknownHostException ignored) {
-                }
-            }
-
-            if (inetAddr == null || inetAddr.isLoopbackAddress()) {
-                try {
-                    inetAddr = InetAddress.getByName(addr);
-                }
-                catch (UnknownHostException ignored) {
-                }
-            }
-
-            if (inetAddr != null)
-                res.add(inetAddr);
-        }
-
-        if (res.isEmpty())
-            throw new IgniteCheckedException("Addresses can not be resolved 
[addr=" + addrs +
-                ", hostNames=" + hostNames + ']');
-
-        return res;
-    }
-
-    /**
-     * Returns tha list of resolved socket addresses. First addresses are 
resolved by host names,
-     * if this attempt fails then the addresses are resolved by ip addresses.
+     * Returns the list of resolved socket addresses.
      *
      * @param node Grid node.
      * @param port Port.
@@ -9763,37 +9703,38 @@ public abstract class IgniteUtils {
     }
 
     /**
-     * Returns tha list of resolved socket addresses. First addresses are 
resolved by host names,
-     * if this attempt fails then the addresses are resolved by ip addresses.
+     * Returns the list of resolved socket addresses.
      *
      * @param addrs Addresses.
      * @param hostNames Host names.
      * @param port Port.
      * @return Socket addresses for given addresses and host names.
      */
-    public static Collection<InetSocketAddress> 
toSocketAddresses(Collection<String> addrs,
-        Collection<String> hostNames, int port) {
+    public static Collection<InetSocketAddress> toSocketAddresses(
+        Collection<String> addrs,
+        Collection<String> hostNames,
+        int port
+    ) {
         Set<InetSocketAddress> res = new HashSet<>(addrs.size());
 
-        Iterator<String> hostNamesIt = hostNames.iterator();
+        boolean hasAddr = false;
 
         for (String addr : addrs) {
-            String hostName = hostNamesIt.hasNext() ? hostNamesIt.next() : 
null;
+            InetSocketAddress inetSockAddr = createResolved(addr, port);
+            res.add(inetSockAddr);
 
-            if (!F.isEmpty(hostName)) {
-                InetSocketAddress inetSockAddr = createResolved(hostName, 
port);
+            if (!inetSockAddr.isUnresolved() && 
!inetSockAddr.getAddress().isLoopbackAddress())
+                hasAddr = true;
+        }
 
-                if (inetSockAddr.isUnresolved() ||
-                    (!inetSockAddr.isUnresolved() && 
inetSockAddr.getAddress().isLoopbackAddress())
-                )
-                    inetSockAddr = createResolved(addr, port);
+        // Try to resolve addresses from host names if no external addresses 
found.
+        if (!hasAddr) {
+            for (String host : hostNames) {
+                InetSocketAddress inetSockAddr = createResolved(host, port);
 
-                res.add(inetSockAddr);
+                if (!inetSockAddr.isUnresolved())
+                    res.add(inetSockAddr);
             }
-
-            // Always append address because local and remote nodes may have 
the same hostname
-            // therefore remote hostname will always be resolved to local 
address.
-            res.add(createResolved(addr, port));
         }
 
         return res;
diff --git 
a/modules/ducktests/src/main/java/java/net/BlockingDnsInet4AddressImpl.java 
b/modules/ducktests/src/main/java/java/net/BlockingDnsInet4AddressImpl.java
new file mode 100644
index 00000000000..9e98aa37c8a
--- /dev/null
+++ b/modules/ducktests/src/main/java/java/net/BlockingDnsInet4AddressImpl.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package java.net;
+
+/** */
+public class BlockingDnsInet4AddressImpl extends Inet4AddressImpl {
+    /** {@inheritDoc} */
+    @Override public InetAddress[] lookupAllHostAddr(String hostname) throws 
UnknownHostException {
+        DnsBlocker.INSTANCE.onHostResolve(this, hostname);
+
+        return super.lookupAllHostAddr(hostname);
+    }
+
+    /** {@inheritDoc} */
+    @Override public String getHostByAddr(byte[] addr) throws 
UnknownHostException {
+        DnsBlocker.INSTANCE.onAddrResolve(this, addr);
+
+        return super.getHostByAddr(addr);
+    }
+}
diff --git 
a/modules/ducktests/src/main/java/java/net/BlockingDnsInet6AddressImpl.java 
b/modules/ducktests/src/main/java/java/net/BlockingDnsInet6AddressImpl.java
new file mode 100644
index 00000000000..47a0a025241
--- /dev/null
+++ b/modules/ducktests/src/main/java/java/net/BlockingDnsInet6AddressImpl.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package java.net;
+
+/** */
+public class BlockingDnsInet6AddressImpl extends Inet6AddressImpl {
+    /** {@inheritDoc} */
+    @Override public InetAddress[] lookupAllHostAddr(String hostname) throws 
UnknownHostException {
+        DnsBlocker.INSTANCE.onHostResolve(this, hostname);
+
+        return super.lookupAllHostAddr(hostname);
+    }
+
+    /** {@inheritDoc} */
+    @Override public String getHostByAddr(byte[] addr) throws 
UnknownHostException {
+        DnsBlocker.INSTANCE.onAddrResolve(this, addr);
+
+        return super.getHostByAddr(addr);
+    }
+}
diff --git a/modules/ducktests/src/main/java/java/net/DnsBlocker.java 
b/modules/ducktests/src/main/java/java/net/DnsBlocker.java
new file mode 100644
index 00000000000..8f939281220
--- /dev/null
+++ b/modules/ducktests/src/main/java/java/net/DnsBlocker.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package java.net;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.Scanner;
+
+/** */
+public class DnsBlocker {
+    /** */
+    private static final String BLOCK_DNS_FILE = "/tmp/block_dns";
+
+    /** */
+    public static final DnsBlocker INSTANCE = new DnsBlocker();
+
+    /** */
+    private DnsBlocker() {
+        // No-op.
+    }
+
+    /**
+     * Check and block hostname resolve request if needed.
+     * @param impl Implementation.
+     * @param hostname Hostname.
+     */
+    public void onHostResolve(InetAddressImpl impl, String hostname) throws 
UnknownHostException {
+        if (!impl.loopbackAddress().getHostAddress().equals(hostname))
+            check(hostname);
+    }
+
+    /**
+     * Check and block address resolve request if needed.
+     * @param impl Implementation.
+     * @param addr Address.
+     */
+    public void onAddrResolve(InetAddressImpl impl, byte[] addr) throws 
UnknownHostException {
+        if (!Arrays.equals(impl.loopbackAddress().getAddress(), addr))
+            check(InetAddress.getByAddress(addr).toString());
+    }
+
+    /** */
+    private void check(String req) throws UnknownHostException {
+        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
+        File file = new File(BLOCK_DNS_FILE);
+
+        if (file.exists()) {
+            try {
+                Scanner scanner = new Scanner(file);
+                if (!scanner.hasNextLong())
+                    throw new RuntimeException("Wrong " + BLOCK_DNS_FILE + " 
file format");
+
+                long timeout = scanner.nextLong();
+
+                if (!scanner.hasNextBoolean())
+                    throw new RuntimeException("Wrong " + BLOCK_DNS_FILE + " 
file format");
+
+                boolean fail = scanner.nextBoolean();
+
+                // Can't use logger here, because class need to be in 
bootstrap classloader.
+                System.out.println(sdf.format(new Date()) + " [" + 
Thread.currentThread().getName() +
+                    "] DNS request " + req + " blocked for " + timeout + " 
ms");
+
+                Thread.dumpStack();
+
+                Thread.sleep(timeout);
+
+                if (fail)
+                    throw new UnknownHostException();
+            }
+            catch (InterruptedException | FileNotFoundException e) {
+                throw new RuntimeException(e);
+            }
+        }
+        else {
+            System.out.println(sdf.format(new Date()) + " [" + 
Thread.currentThread().getName() +
+                "] Passed DNS request " + req);
+
+            Thread.dumpStack();
+        }
+    }
+}
diff --git a/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py 
b/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
index 98b7692d83c..0a0b581f301 100644
--- a/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
+++ b/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
@@ -97,7 +97,7 @@ class IgniteAwareService(BackgroundThreadService, 
IgnitePathAware, JvmProcessMix
         self.start_async(**kwargs)
         self.await_started()
 
-    def await_started(self):
+    def await_started(self, nodes=None):
         """
         Awaits start finished.
         """
@@ -106,7 +106,7 @@ class IgniteAwareService(BackgroundThreadService, 
IgnitePathAware, JvmProcessMix
 
         self.logger.info("Waiting for IgniteAware(s) to start ...")
 
-        self.await_event("Topology snapshot", self.startup_timeout_sec, 
from_the_beginning=True)
+        self.await_event("Topology snapshot", self.startup_timeout_sec, 
nodes=nodes, from_the_beginning=True)
 
     def start_node(self, node, **kwargs):
         self.init_shared(node)
@@ -254,17 +254,22 @@ class IgniteAwareService(BackgroundThreadService, 
IgnitePathAware, JvmProcessMix
                                err_msg="Event [%s] was not triggered on '%s' 
in %d seconds" % (evt_message, node.name,
                                                                                
                timeout_sec))
 
-    def await_event(self, evt_message, timeout_sec, from_the_beginning=False, 
backoff_sec=.1, log_file=None):
+    def await_event(self, evt_message, timeout_sec, nodes=None, 
from_the_beginning=False, backoff_sec=.1,
+                    log_file=None):
         """
         Await for specific event messages on all nodes.
         :param evt_message: Event message.
         :param timeout_sec: Number of seconds to check the condition for 
before failing.
+        :param nodes: Nodes to await event or None, for all nodes.
         :param from_the_beginning: If True, search for message from the 
beggining of log file.
         :param backoff_sec: Number of seconds to back off between each failure 
to meet the condition
                 before checking again.
         :param log_file: Explicit log file.
         """
-        for node in self.nodes:
+        if nodes is None:
+            nodes = self.nodes
+
+        for node in nodes:
             self.await_event_on_node(evt_message, node, timeout_sec, 
from_the_beginning=from_the_beginning,
                                      backoff_sec=backoff_sec, 
log_file=log_file)
 
diff --git a/modules/ducktests/tests/ignitetest/tests/dns_failure_test.py 
b/modules/ducktests/tests/ignitetest/tests/dns_failure_test.py
new file mode 100644
index 00000000000..0bfce662d8a
--- /dev/null
+++ b/modules/ducktests/tests/ignitetest/tests/dns_failure_test.py
@@ -0,0 +1,136 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Module contains DNS service failure test.
+"""
+import os
+import socket
+
+from ducktape.mark import defaults
+from ignitetest.services.ignite import IgniteService
+from ignitetest.services.ignite_app import IgniteApplicationService
+from ignitetest.services.utils.control_utility import ControlUtility
+from ignitetest.services.utils.ignite_aware import IgniteAwareService
+from ignitetest.services.utils.ignite_configuration import 
IgniteConfiguration, DataStorageConfiguration
+from ignitetest.services.utils.ignite_configuration.data_storage import 
DataRegionConfiguration
+from ignitetest.services.utils.ignite_configuration.discovery import 
from_ignite_cluster
+from ignitetest.utils import ignite_versions
+from ignitetest.utils.ignite_test import IgniteTest
+from ignitetest.utils.version import IgniteVersion, DEV_BRANCH
+from ignitetest.utils import cluster
+
+
+class DnsFailureTest(IgniteTest):
+    """
+    Test DNS service failure.
+    """
+
+    @cluster(num_nodes=4)
+    @ignite_versions(str(DEV_BRANCH))
+    @defaults(fail=[True, False])
+    def dns_failure_test(self, ignite_version, fail):
+        """
+        DNS failure test.
+        """
+        # Replace hosts with IP addresses.
+        for node in self.test_context.cluster.nodes:
+            node.account.externally_routable_ip = 
socket.gethostbyname(node.account.externally_routable_ip)
+
+        version = IgniteVersion(ignite_version)
+
+        ignite_config = IgniteConfiguration(
+            version=version,
+            data_storage=DataStorageConfiguration(
+                default=DataRegionConfiguration(
+                    persistence_enabled=True
+                )
+            )
+        )
+
+        ignites = self.__prepare_service(ignite_config, 3)
+
+        self.__unblock_dns(ignites)
+
+        # Start nodes one-by-one to reproduce the problem.
+        ignites.start_node(ignites.nodes[0])
+        ignites.await_started([ignites.nodes[0]])
+        ignites.start_node(ignites.nodes[1])
+        ignites.await_started([ignites.nodes[0], ignites.nodes[1]])
+        ignites.start_node(ignites.nodes[2])
+        ignites.await_started()
+
+        control_utility = ControlUtility(ignites)
+        control_utility.activate()
+
+        self.__block_dns(ignites, 20000, fail)
+
+        ignites.stop_node(ignites.nodes[1])
+
+        ignites.await_event("Node left topology", 60, from_the_beginning=True,
+                            nodes=[ignites.nodes[0], ignites.nodes[2]])
+
+        assert ignites.alive(ignites.nodes[0]), 'Node 0 should be alive'
+        assert ignites.alive(ignites.nodes[2]), 'Node 2 should be alive'
+
+        self.__unblock_dns(ignites, [ignites.nodes[1]])
+
+        ignites.start_node(ignites.nodes[1])
+        ignites.await_started()
+
+        assert len(ignites.alive_nodes) == 3, 'All nodes should be alive'
+
+        # Smoke test on full topology.
+        app = IgniteApplicationService(
+            self.test_context,
+            ignite_config._replace(client_mode=True, 
discovery_spi=from_ignite_cluster(ignites)),
+            startup_timeout_sec=120,
+            
java_class_name="org.apache.ignite.internal.ducktest.tests.smoke_test.SimpleApplication")
+
+        app.start()
+        app.stop()
+
+        ignites.stop()
+
+    def __prepare_service(self, ignite_config, num_nodes=1):
+        ignite = IgniteService(
+            self.test_context,
+            ignite_config,
+            startup_timeout_sec=120,
+            num_nodes=num_nodes)
+
+        bootclasspath = list(map(lambda lib: os.path.join(lib, "classes"), 
ignite.spec._module_libs("ducktests")))
+
+        # Note: Support of impl.prefix property was removed since java 18.
+        ignite.spec.jvm_opts.append("-Dimpl.prefix=BlockingDns")
+        ignite.spec.jvm_opts.append("-Xbootclasspath/a:" + 
":".join(bootclasspath))
+
+        return ignite
+
+    @staticmethod
+    def __block_dns(ignite, timeout, fail, nodes=None):
+        if nodes is None:
+            nodes = ignite.nodes
+
+        for node in nodes:
+            _, err = IgniteAwareService.exec_command_ex(node, f"echo {timeout} 
{fail} > /tmp/block_dns")
+
+    @staticmethod
+    def __unblock_dns(ignite, nodes=None):
+        if nodes is None:
+            nodes = ignite.nodes
+
+        for node in nodes:
+            _, err = IgniteAwareService.exec_command_ex(node, "rm 
/tmp/block_dns")

Reply via email to