This is an automated email from the ASF dual-hosted git repository.
alexpl pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ignite.git
The following commit(s) were added to refs/heads/master by this push:
new f32480505f0 IGNITE-21630 Fix cluster failure on topology change when
DNS service is unavailable - Fixes #11260.
f32480505f0 is described below
commit f32480505f0b5a827b760789bc724008e68a6f3e
Author: Aleksey Plekhanov <[email protected]>
AuthorDate: Fri Mar 15 09:40:29 2024 +0300
IGNITE-21630 Fix cluster failure on topology change when DNS service is
unavailable - Fixes #11260.
Signed-off-by: Aleksey Plekhanov <[email protected]>
---
.../apache/ignite/internal/util/IgniteUtils.java | 97 +++------------
.../java/java/net/BlockingDnsInet4AddressImpl.java | 34 ++++++
.../java/java/net/BlockingDnsInet6AddressImpl.java | 34 ++++++
.../src/main/java/java/net/DnsBlocker.java | 99 +++++++++++++++
.../ignitetest/services/utils/ignite_aware.py | 13 +-
.../tests/ignitetest/tests/dns_failure_test.py | 136 +++++++++++++++++++++
6 files changed, 331 insertions(+), 82 deletions(-)
diff --git
a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
index aabce7ae87d..7ea6899e696 100755
---
a/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
+++
b/modules/core/src/main/java/org/apache/ignite/internal/util/IgniteUtils.java
@@ -9692,67 +9692,7 @@ public abstract class IgniteUtils {
}
/**
- * Returns tha list of resolved inet addresses. First addresses are
resolved by host names,
- * if this attempt fails then the addresses are resolved by ip addresses.
- *
- * @param node Grid node.
- * @return Inet addresses for given addresses and host names.
- * @throws IgniteCheckedException If non of addresses can be resolved.
- */
- public static Collection<InetAddress> toInetAddresses(ClusterNode node)
throws IgniteCheckedException {
- return toInetAddresses(node.addresses(), node.hostNames());
- }
-
- /**
- * Returns tha list of resolved inet addresses. First addresses are
resolved by host names,
- * if this attempt fails then the addresses are resolved by ip addresses.
- *
- * @param addrs Addresses.
- * @param hostNames Host names.
- * @return Inet addresses for given addresses and host names.
- * @throws IgniteCheckedException If non of addresses can be resolved.
- */
- public static Collection<InetAddress> toInetAddresses(Collection<String>
addrs,
- Collection<String> hostNames) throws IgniteCheckedException {
- Set<InetAddress> res = new HashSet<>(addrs.size());
-
- Iterator<String> hostNamesIt = hostNames.iterator();
-
- for (String addr : addrs) {
- String hostName = hostNamesIt.hasNext() ? hostNamesIt.next() :
null;
-
- InetAddress inetAddr = null;
-
- if (!F.isEmpty(hostName)) {
- try {
- inetAddr = InetAddress.getByName(hostName);
- }
- catch (UnknownHostException ignored) {
- }
- }
-
- if (inetAddr == null || inetAddr.isLoopbackAddress()) {
- try {
- inetAddr = InetAddress.getByName(addr);
- }
- catch (UnknownHostException ignored) {
- }
- }
-
- if (inetAddr != null)
- res.add(inetAddr);
- }
-
- if (res.isEmpty())
- throw new IgniteCheckedException("Addresses can not be resolved
[addr=" + addrs +
- ", hostNames=" + hostNames + ']');
-
- return res;
- }
-
- /**
- * Returns tha list of resolved socket addresses. First addresses are
resolved by host names,
- * if this attempt fails then the addresses are resolved by ip addresses.
+ * Returns the list of resolved socket addresses.
*
* @param node Grid node.
* @param port Port.
@@ -9763,37 +9703,38 @@ public abstract class IgniteUtils {
}
/**
- * Returns tha list of resolved socket addresses. First addresses are
resolved by host names,
- * if this attempt fails then the addresses are resolved by ip addresses.
+ * Returns the list of resolved socket addresses.
*
* @param addrs Addresses.
* @param hostNames Host names.
* @param port Port.
* @return Socket addresses for given addresses and host names.
*/
- public static Collection<InetSocketAddress>
toSocketAddresses(Collection<String> addrs,
- Collection<String> hostNames, int port) {
+ public static Collection<InetSocketAddress> toSocketAddresses(
+ Collection<String> addrs,
+ Collection<String> hostNames,
+ int port
+ ) {
Set<InetSocketAddress> res = new HashSet<>(addrs.size());
- Iterator<String> hostNamesIt = hostNames.iterator();
+ boolean hasAddr = false;
for (String addr : addrs) {
- String hostName = hostNamesIt.hasNext() ? hostNamesIt.next() :
null;
+ InetSocketAddress inetSockAddr = createResolved(addr, port);
+ res.add(inetSockAddr);
- if (!F.isEmpty(hostName)) {
- InetSocketAddress inetSockAddr = createResolved(hostName,
port);
+ if (!inetSockAddr.isUnresolved() &&
!inetSockAddr.getAddress().isLoopbackAddress())
+ hasAddr = true;
+ }
- if (inetSockAddr.isUnresolved() ||
- (!inetSockAddr.isUnresolved() &&
inetSockAddr.getAddress().isLoopbackAddress())
- )
- inetSockAddr = createResolved(addr, port);
+ // Try to resolve addresses from host names if no external addresses
found.
+ if (!hasAddr) {
+ for (String host : hostNames) {
+ InetSocketAddress inetSockAddr = createResolved(host, port);
- res.add(inetSockAddr);
+ if (!inetSockAddr.isUnresolved())
+ res.add(inetSockAddr);
}
-
- // Always append address because local and remote nodes may have
the same hostname
- // therefore remote hostname will always be resolved to local
address.
- res.add(createResolved(addr, port));
}
return res;
diff --git
a/modules/ducktests/src/main/java/java/net/BlockingDnsInet4AddressImpl.java
b/modules/ducktests/src/main/java/java/net/BlockingDnsInet4AddressImpl.java
new file mode 100644
index 00000000000..9e98aa37c8a
--- /dev/null
+++ b/modules/ducktests/src/main/java/java/net/BlockingDnsInet4AddressImpl.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package java.net;
+
+/** */
+public class BlockingDnsInet4AddressImpl extends Inet4AddressImpl {
+ /** {@inheritDoc} */
+ @Override public InetAddress[] lookupAllHostAddr(String hostname) throws
UnknownHostException {
+ DnsBlocker.INSTANCE.onHostResolve(this, hostname);
+
+ return super.lookupAllHostAddr(hostname);
+ }
+
+ /** {@inheritDoc} */
+ @Override public String getHostByAddr(byte[] addr) throws
UnknownHostException {
+ DnsBlocker.INSTANCE.onAddrResolve(this, addr);
+
+ return super.getHostByAddr(addr);
+ }
+}
diff --git
a/modules/ducktests/src/main/java/java/net/BlockingDnsInet6AddressImpl.java
b/modules/ducktests/src/main/java/java/net/BlockingDnsInet6AddressImpl.java
new file mode 100644
index 00000000000..47a0a025241
--- /dev/null
+++ b/modules/ducktests/src/main/java/java/net/BlockingDnsInet6AddressImpl.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package java.net;
+
+/** */
+public class BlockingDnsInet6AddressImpl extends Inet6AddressImpl {
+ /** {@inheritDoc} */
+ @Override public InetAddress[] lookupAllHostAddr(String hostname) throws
UnknownHostException {
+ DnsBlocker.INSTANCE.onHostResolve(this, hostname);
+
+ return super.lookupAllHostAddr(hostname);
+ }
+
+ /** {@inheritDoc} */
+ @Override public String getHostByAddr(byte[] addr) throws
UnknownHostException {
+ DnsBlocker.INSTANCE.onAddrResolve(this, addr);
+
+ return super.getHostByAddr(addr);
+ }
+}
diff --git a/modules/ducktests/src/main/java/java/net/DnsBlocker.java
b/modules/ducktests/src/main/java/java/net/DnsBlocker.java
new file mode 100644
index 00000000000..8f939281220
--- /dev/null
+++ b/modules/ducktests/src/main/java/java/net/DnsBlocker.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package java.net;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.Scanner;
+
+/** */
+public class DnsBlocker {
+ /** */
+ private static final String BLOCK_DNS_FILE = "/tmp/block_dns";
+
+ /** */
+ public static final DnsBlocker INSTANCE = new DnsBlocker();
+
+ /** */
+ private DnsBlocker() {
+ // No-op.
+ }
+
+ /**
+ * Check and block hostname resolve request if needed.
+ * @param impl Implementation.
+ * @param hostname Hostname.
+ */
+ public void onHostResolve(InetAddressImpl impl, String hostname) throws
UnknownHostException {
+ if (!impl.loopbackAddress().getHostAddress().equals(hostname))
+ check(hostname);
+ }
+
+ /**
+ * Check and block address resolve request if needed.
+ * @param impl Implementation.
+ * @param addr Address.
+ */
+ public void onAddrResolve(InetAddressImpl impl, byte[] addr) throws
UnknownHostException {
+ if (!Arrays.equals(impl.loopbackAddress().getAddress(), addr))
+ check(InetAddress.getByAddress(addr).toString());
+ }
+
+ /** */
+ private void check(String req) throws UnknownHostException {
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
+ File file = new File(BLOCK_DNS_FILE);
+
+ if (file.exists()) {
+ try {
+ Scanner scanner = new Scanner(file);
+ if (!scanner.hasNextLong())
+ throw new RuntimeException("Wrong " + BLOCK_DNS_FILE + "
file format");
+
+ long timeout = scanner.nextLong();
+
+ if (!scanner.hasNextBoolean())
+ throw new RuntimeException("Wrong " + BLOCK_DNS_FILE + "
file format");
+
+ boolean fail = scanner.nextBoolean();
+
+ // Can't use logger here, because class need to be in
bootstrap classloader.
+ System.out.println(sdf.format(new Date()) + " [" +
Thread.currentThread().getName() +
+ "] DNS request " + req + " blocked for " + timeout + "
ms");
+
+ Thread.dumpStack();
+
+ Thread.sleep(timeout);
+
+ if (fail)
+ throw new UnknownHostException();
+ }
+ catch (InterruptedException | FileNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ else {
+ System.out.println(sdf.format(new Date()) + " [" +
Thread.currentThread().getName() +
+ "] Passed DNS request " + req);
+
+ Thread.dumpStack();
+ }
+ }
+}
diff --git a/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
b/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
index 98b7692d83c..0a0b581f301 100644
--- a/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
+++ b/modules/ducktests/tests/ignitetest/services/utils/ignite_aware.py
@@ -97,7 +97,7 @@ class IgniteAwareService(BackgroundThreadService,
IgnitePathAware, JvmProcessMix
self.start_async(**kwargs)
self.await_started()
- def await_started(self):
+ def await_started(self, nodes=None):
"""
Awaits start finished.
"""
@@ -106,7 +106,7 @@ class IgniteAwareService(BackgroundThreadService,
IgnitePathAware, JvmProcessMix
self.logger.info("Waiting for IgniteAware(s) to start ...")
- self.await_event("Topology snapshot", self.startup_timeout_sec,
from_the_beginning=True)
+ self.await_event("Topology snapshot", self.startup_timeout_sec,
nodes=nodes, from_the_beginning=True)
def start_node(self, node, **kwargs):
self.init_shared(node)
@@ -254,17 +254,22 @@ class IgniteAwareService(BackgroundThreadService,
IgnitePathAware, JvmProcessMix
err_msg="Event [%s] was not triggered on '%s'
in %d seconds" % (evt_message, node.name,
timeout_sec))
- def await_event(self, evt_message, timeout_sec, from_the_beginning=False,
backoff_sec=.1, log_file=None):
+ def await_event(self, evt_message, timeout_sec, nodes=None,
from_the_beginning=False, backoff_sec=.1,
+ log_file=None):
"""
Await for specific event messages on all nodes.
:param evt_message: Event message.
:param timeout_sec: Number of seconds to check the condition for
before failing.
+ :param nodes: Nodes to await event or None, for all nodes.
:param from_the_beginning: If True, search for message from the
beggining of log file.
:param backoff_sec: Number of seconds to back off between each failure
to meet the condition
before checking again.
:param log_file: Explicit log file.
"""
- for node in self.nodes:
+ if nodes is None:
+ nodes = self.nodes
+
+ for node in nodes:
self.await_event_on_node(evt_message, node, timeout_sec,
from_the_beginning=from_the_beginning,
backoff_sec=backoff_sec,
log_file=log_file)
diff --git a/modules/ducktests/tests/ignitetest/tests/dns_failure_test.py
b/modules/ducktests/tests/ignitetest/tests/dns_failure_test.py
new file mode 100644
index 00000000000..0bfce662d8a
--- /dev/null
+++ b/modules/ducktests/tests/ignitetest/tests/dns_failure_test.py
@@ -0,0 +1,136 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Module contains DNS service failure test.
+"""
+import os
+import socket
+
+from ducktape.mark import defaults
+from ignitetest.services.ignite import IgniteService
+from ignitetest.services.ignite_app import IgniteApplicationService
+from ignitetest.services.utils.control_utility import ControlUtility
+from ignitetest.services.utils.ignite_aware import IgniteAwareService
+from ignitetest.services.utils.ignite_configuration import
IgniteConfiguration, DataStorageConfiguration
+from ignitetest.services.utils.ignite_configuration.data_storage import
DataRegionConfiguration
+from ignitetest.services.utils.ignite_configuration.discovery import
from_ignite_cluster
+from ignitetest.utils import ignite_versions
+from ignitetest.utils.ignite_test import IgniteTest
+from ignitetest.utils.version import IgniteVersion, DEV_BRANCH
+from ignitetest.utils import cluster
+
+
+class DnsFailureTest(IgniteTest):
+ """
+ Test DNS service failure.
+ """
+
+ @cluster(num_nodes=4)
+ @ignite_versions(str(DEV_BRANCH))
+ @defaults(fail=[True, False])
+ def dns_failure_test(self, ignite_version, fail):
+ """
+ DNS failure test.
+ """
+ # Replace hosts with IP addresses.
+ for node in self.test_context.cluster.nodes:
+ node.account.externally_routable_ip =
socket.gethostbyname(node.account.externally_routable_ip)
+
+ version = IgniteVersion(ignite_version)
+
+ ignite_config = IgniteConfiguration(
+ version=version,
+ data_storage=DataStorageConfiguration(
+ default=DataRegionConfiguration(
+ persistence_enabled=True
+ )
+ )
+ )
+
+ ignites = self.__prepare_service(ignite_config, 3)
+
+ self.__unblock_dns(ignites)
+
+ # Start nodes one-by-one to reproduce the problem.
+ ignites.start_node(ignites.nodes[0])
+ ignites.await_started([ignites.nodes[0]])
+ ignites.start_node(ignites.nodes[1])
+ ignites.await_started([ignites.nodes[0], ignites.nodes[1]])
+ ignites.start_node(ignites.nodes[2])
+ ignites.await_started()
+
+ control_utility = ControlUtility(ignites)
+ control_utility.activate()
+
+ self.__block_dns(ignites, 20000, fail)
+
+ ignites.stop_node(ignites.nodes[1])
+
+ ignites.await_event("Node left topology", 60, from_the_beginning=True,
+ nodes=[ignites.nodes[0], ignites.nodes[2]])
+
+ assert ignites.alive(ignites.nodes[0]), 'Node 0 should be alive'
+ assert ignites.alive(ignites.nodes[2]), 'Node 2 should be alive'
+
+ self.__unblock_dns(ignites, [ignites.nodes[1]])
+
+ ignites.start_node(ignites.nodes[1])
+ ignites.await_started()
+
+ assert len(ignites.alive_nodes) == 3, 'All nodes should be alive'
+
+ # Smoke test on full topology.
+ app = IgniteApplicationService(
+ self.test_context,
+ ignite_config._replace(client_mode=True,
discovery_spi=from_ignite_cluster(ignites)),
+ startup_timeout_sec=120,
+
java_class_name="org.apache.ignite.internal.ducktest.tests.smoke_test.SimpleApplication")
+
+ app.start()
+ app.stop()
+
+ ignites.stop()
+
+ def __prepare_service(self, ignite_config, num_nodes=1):
+ ignite = IgniteService(
+ self.test_context,
+ ignite_config,
+ startup_timeout_sec=120,
+ num_nodes=num_nodes)
+
+ bootclasspath = list(map(lambda lib: os.path.join(lib, "classes"),
ignite.spec._module_libs("ducktests")))
+
+ # Note: Support of impl.prefix property was removed since java 18.
+ ignite.spec.jvm_opts.append("-Dimpl.prefix=BlockingDns")
+ ignite.spec.jvm_opts.append("-Xbootclasspath/a:" +
":".join(bootclasspath))
+
+ return ignite
+
+ @staticmethod
+ def __block_dns(ignite, timeout, fail, nodes=None):
+ if nodes is None:
+ nodes = ignite.nodes
+
+ for node in nodes:
+ _, err = IgniteAwareService.exec_command_ex(node, f"echo {timeout}
{fail} > /tmp/block_dns")
+
+ @staticmethod
+ def __unblock_dns(ignite, nodes=None):
+ if nodes is None:
+ nodes = ignite.nodes
+
+ for node in nodes:
+ _, err = IgniteAwareService.exec_command_ex(node, "rm
/tmp/block_dns")