[
https://issues.apache.org/jira/browse/IGNITE-19662?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Aleksandr Polovtcev updated IGNITE-19662:
-----------------------------------------
Ignite Flags: (was: Docs Required,Release Notes Required)
> Node from CMG restart failed
> ----------------------------
>
> Key: IGNITE-19662
> URL: https://issues.apache.org/jira/browse/IGNITE-19662
> Project: Ignite
> Issue Type: Bug
> Reporter: Mikhail Pochatkin
> Assignee: Aleksandr Polovtcev
> Priority: Major
> Labels: ignite-3
>
> Test case:
> # Start cluster with 8 nodes where CMG is nodes [0, 1, 2]
> # Restart one node from CMG (leader or not doesn't matter)
> Exceptation:
> Node restarted sucessful and joined to cluster
> Actually:
> Node faild to start after 20 second timeout. Logs
> {code:java}
> [WARNING][CompletableFutureDelayScheduler][RaftGroupServiceImpl] Recoverable
> error during the request type=ActionRequestImpl occurred (will be retried on
> the randomly selected node):
> java.util.concurrent.CompletionException:
> java.util.concurrent.TimeoutException
> at
> java.base/java.util.concurrent.CompletableFuture.encodeRelay(CompletableFuture.java:367)
> at
> java.base/java.util.concurrent.CompletableFuture.completeRelay(CompletableFuture.java:376)
> at
> java.base/java.util.concurrent.CompletableFuture$UniRelay.tryFire(CompletableFuture.java:1019)
> at
> java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)
> at
> java.base/java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:2088)
> at
> java.base/java.util.concurrent.CompletableFuture$Timeout.run(CompletableFuture.java:2792)
> at
> java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
> at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
> at
> java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> at java.base/java.lang.Thread.run(Thread.java:829)
> Caused by: java.util.concurrent.TimeoutException
> ... 7 more
> {code}
>
> {code:java}
> 2023-06-06 11:05:15:972 +0300
> [WARNING][%iduft_tdwns_2%JRaft-Request-Processor-15][RaftGroupServiceImpl]
> Recoverable error during the request type=ActionRequestImpl occurred (will be
> retried on the randomly selected node):
> java.util.concurrent.CompletionException: java.net.ConnectException: Peer
> iduft_tdwns_0 is unavailable
> at
> java.base/java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:331)
> at
> java.base/java.util.concurrent.CompletableFuture.uniComposeStage(CompletableFuture.java:1099)
> at
> java.base/java.util.concurrent.CompletableFuture.thenCompose(CompletableFuture.java:2235)
> at
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.sendWithRetry(RaftGroupServiceImpl.java:520)
> at
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.sendWithRetry(RaftGroupServiceImpl.java:487)
> at
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.run(RaftGroupServiceImpl.java:454)
> at
> org.apache.ignite.internal.cluster.management.raft.CmgRaftService.validatedNodes(CmgRaftService.java:213)
> at
> java.base/java.util.concurrent.CompletableFuture.uniComposeStage(CompletableFuture.java:1106)
> at
> java.base/java.util.concurrent.CompletableFuture.thenCompose(CompletableFuture.java:2235)
> at
> org.apache.ignite.internal.cluster.management.ClusterManagementGroupManager.validatedNodes(ClusterManagementGroupManager.java:827)
> at
> org.apache.ignite.internal.cluster.management.topology.LogicalTopologyServiceImpl.validatedNodesOnLeader(LogicalTopologyServiceImpl.java:58)
> at
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.lambda$removeLearner$10(MetaStorageRaftGroupEventsListener.java:194)
> at
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.updateConfigUnderLock(MetaStorageRaftGroupEventsListener.java:237)
> at
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.removeLearner(MetaStorageRaftGroupEventsListener.java:194)
> at
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener$1.lambda$onNodeInvalidated$1(MetaStorageRaftGroupEventsListener.java:117)
> at
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.lambda$executeIfLeaderImpl$2(MetaStorageRaftGroupEventsListener.java:170)
> at
> org.apache.ignite.internal.metastorage.impl.MetaStorageRaftGroupEventsListener.lambda$executeWithStatus$3(MetaStorageRaftGroupEventsListener.java:179)
> at
> java.base/java.util.concurrent.CompletableFuture$UniCompose.tryFire(CompletableFuture.java:1072)
> at
> java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)
> at
> java.base/java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:2073)
> at
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.lambda$sendWithRetry$39(RaftGroupServiceImpl.java:539)
> at
> java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:859)
> at
> java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:837)
> at
> java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)
> at
> java.base/java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:2073)
> at
> org.apache.ignite.network.DefaultMessagingService.onInvokeResponse(DefaultMessagingService.java:371)
> at
> org.apache.ignite.network.DefaultMessagingService.send0(DefaultMessagingService.java:194)
> at
> org.apache.ignite.network.DefaultMessagingService.respond(DefaultMessagingService.java:137)
> at
> org.apache.ignite.network.MessagingService.respond(MessagingService.java:89)
> at
> org.apache.ignite.raft.jraft.rpc.impl.IgniteRpcServer$NetworkRpcContext.sendResponse(IgniteRpcServer.java:233)
> at
> org.apache.ignite.raft.jraft.rpc.RpcRequestProcessor.handleRequest(RpcRequestProcessor.java:52)
> at
> org.apache.ignite.raft.jraft.rpc.RpcRequestProcessor.handleRequest(RpcRequestProcessor.java:29)
> at
> org.apache.ignite.raft.jraft.rpc.impl.IgniteRpcServer$RpcMessageHandler.lambda$onReceived$0(IgniteRpcServer.java:192)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> at java.base/java.lang.Thread.run(Thread.java:829)
> Caused by: java.net.ConnectException: Peer iduft_tdwns_0 is unavailable
> at
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.resolvePeer(RaftGroupServiceImpl.java:752)
> at
> org.apache.ignite.internal.raft.RaftGroupServiceImpl.sendWithRetry(RaftGroupServiceImpl.java:519)
> ... 32 more {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)