[
https://issues.apache.org/jira/browse/IGNITE-28089?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Denis Chudov updated IGNITE-28089:
----------------------------------
Labels: MakeTeamcityGreenAgain ignite-3 (was: ignite-3)
> Critical system error detected - Unable to start rebalance
> ----------------------------------------------------------
>
> Key: IGNITE-28089
> URL: https://issues.apache.org/jira/browse/IGNITE-28089
> Project: Ignite
> Issue Type: Bug
> Reporter: Denis Chudov
> Priority: Major
> Labels: MakeTeamcityGreenAgain, ignite-3
>
> {code:java}
> 03:58:20
> [2026-03-06T03:58:20,758][ERROR][%icict_nwdcicc_3345%rebalance-scheduler-0][FailureManager]
> Critical system error detected. Will be handled accordingly to configured
> handler [hnd=StopNodeFailureHandler [nodeName=icict_nwdcicc_3345,
> super=AbstractFailureHandler [ignoredFailureTypes=UnmodifiableSet
> [SYSTEM_WORKER_BLOCKED, SYSTEM_CRITICAL_OPERATION_TIMEOUT]]],
> failureCtx=CRITICAL_ERROR, failureCtxId=e42b3853-1d65-426f-a47b-56540ba2cbfd]
> 03:58:20 org.apache.ignite.internal.failure.StackTraceCapturingException:
> Unable to start rebalance [zonePartitionId=20_part_7, term=1]
> 03:58:20 at
> org.apache.ignite.internal.failure.FailureManager.process(FailureManager.java:191)
> 03:58:20 at
> org.apache.ignite.internal.failure.FailureManager.process(FailureManager.java:168)
> 03:58:20 at
> org.apache.ignite.internal.distributionzones.rebalance.ZoneRebalanceRaftGroupEventsListener.maybeRunFailHandler(ZoneRebalanceRaftGroupEventsListener.java:278)
> 03:58:20 at
> org.apache.ignite.internal.distributionzones.rebalance.ZoneRebalanceRaftGroupEventsListener.lambda$onLeaderElected$1(ZoneRebalanceRaftGroupEventsListener.java:259)
> 03:58:20 at
> java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:539)
> 03:58:20 at
> java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
> 03:58:20 at
> java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)
> 03:58:20 at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
> 03:58:20 at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
> 03:58:20 at java.base/java.lang.Thread.run(Thread.java:833)
> 03:58:20 Caused by: java.util.concurrent.ExecutionException:
> org.apache.ignite.internal.network.handshake.HandshakeException:
> icict_nwdcicc_3344:9108c883-2450-40c7-aa99-493730c9506a belongs to cluster
> a93b03ae-29e3-4444-908b-bee38613deda which is different from this one
> b252b6ce-18c2-435c-ae3f-4a2e5770e1d0, connection rejected. Either another
> cluster is reachable for this one on the network (in this case make sure they
> can't connect), or CMG/MG repair was made and then some node that did not
> participate one is started (in this case, migrate the started node to the
> repaired cluster using CMG/MG repair tools)
> 03:58:20 at
> java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:396)
> 03:58:20 at
> java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:2073)
> 03:58:20 at
> org.apache.ignite.internal.distributionzones.rebalance.ZoneRebalanceRaftGroupEventsListener.lambda$onLeaderElected$1(ZoneRebalanceRaftGroupEventsListener.java:206)
> 03:58:20 ... 6 more
> 03:58:20 Caused by:
> org.apache.ignite.internal.network.handshake.HandshakeException:
> icict_nwdcicc_3344:9108c883-2450-40c7-aa99-493730c9506a belongs to cluster
> a93b03ae-29e3-4444-908b-bee38613deda which is different from this one
> b252b6ce-18c2-435c-ae3f-4a2e5770e1d0, connection rejected. Either another
> cluster is reachable for this one on the network (in this case make sure they
> can't connect), or CMG/MG repair was made and then some node that did not
> participate one is started (in this case, migrate the started node to the
> repaired cluster using CMG/MG repair tools)
> 03:58:20 at
> org.apache.ignite.internal.network.recovery.HandshakeManagerUtils.lambda$sendRejectionMessageAndFailHandshake$0(HandshakeManagerUtils.java:99)
> 03:58:20 at
> java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:863)
> 03:58:20 at
> java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:841)
> 03:58:20 at
> java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:510)
> 03:58:20 at
> java.base/java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:2147)
> 03:58:20 at
> org.apache.ignite.internal.network.netty.NettyUtils.lambda$toCompletableFuture$0(NettyUtils.java:62)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:604)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:571)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:506)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:650)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.setSuccess0(DefaultPromise.java:639)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:119)
> 03:58:20 at
> io.netty.channel.DefaultChannelPromise.trySuccess(DefaultChannelPromise.java:84)
> 03:58:20 at
> io.netty.handler.stream.ChunkedWriteHandler$PendingWrite.success(ChunkedWriteHandler.java:384)
> 03:58:20 at
> io.netty.handler.stream.ChunkedWriteHandler.handleEndOfInputFuture(ChunkedWriteHandler.java:340)
> 03:58:20 at
> io.netty.handler.stream.ChunkedWriteHandler.lambda$doFlush$0(ChunkedWriteHandler.java:301)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:604)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:571)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:506)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:650)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.setSuccess0(DefaultPromise.java:639)
> 03:58:20 at
> io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:119)
> 03:58:20 at
> io.netty.util.internal.PromiseNotificationUtil.trySuccess(PromiseNotificationUtil.java:48)
> 03:58:20 at
> io.netty.channel.ChannelOutboundBuffer.safeSuccess(ChannelOutboundBuffer.java:747)
> 03:58:20 at
> io.netty.channel.ChannelOutboundBuffer.remove(ChannelOutboundBuffer.java:302)
> 03:58:20 at
> io.netty.channel.ChannelOutboundBuffer.removeBytes(ChannelOutboundBuffer.java:382)
> 03:58:20 at
> io.netty.channel.socket.nio.NioSocketChannel.doWrite(NioSocketChannel.java:414)
> 03:58:20 at
> io.netty.channel.AbstractChannel$AbstractUnsafe.flush0(AbstractChannel.java:794)
> 03:58:20 at
> io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.flush0(AbstractNioChannel.java:404)
> 03:58:20 at
> io.netty.channel.AbstractChannel$AbstractUnsafe.flush(AbstractChannel.java:758)
> 03:58:20 at
> io.netty.channel.DefaultChannelPipeline$HeadContext.flush(DefaultChannelPipeline.java:1391)
> 03:58:20 at
> io.netty.channel.AbstractChannelHandlerContext.flush(AbstractChannelHandlerContext.java:754)
> 03:58:20 at
> io.netty.handler.flush.FlushConsolidationHandler.flushNow(FlushConsolidationHandler.java:204)
> 03:58:20 at
> io.netty.handler.flush.FlushConsolidationHandler.flushIfNeeded(FlushConsolidationHandler.java:197)
> 03:58:20 at
> io.netty.handler.flush.FlushConsolidationHandler.resetReadAndFlushIfNeeded(FlushConsolidationHandler.java:192)
> 03:58:20 at
> io.netty.handler.flush.FlushConsolidationHandler.channelReadComplete(FlushConsolidationHandler.java:145)
> 03:58:20 at
> io.netty.channel.AbstractChannelHandlerContext.fireChannelReadComplete(AbstractChannelHandlerContext.java:385)
> 03:58:20 at
> io.netty.channel.DefaultChannelPipeline$HeadContext.channelReadComplete(DefaultChannelPipeline.java:1434)
> 03:58:20 at
> io.netty.channel.DefaultChannelPipeline.fireChannelReadComplete(DefaultChannelPipeline.java:932)
> 03:58:20 at
> io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:181)
> 03:58:20 at
> io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.handle(AbstractNioChannel.java:445)
> 03:58:20 at
> io.netty.channel.nio.NioIoHandler$DefaultNioRegistration.handle(NioIoHandler.java:388)
> 03:58:20 at
> io.netty.channel.nio.NioIoHandler.processSelectedKey(NioIoHandler.java:596)
> 03:58:20 at
> io.netty.channel.nio.NioIoHandler.processSelectedKeysOptimized(NioIoHandler.java:571)
> 03:58:20 at
> io.netty.channel.nio.NioIoHandler.processSelectedKeys(NioIoHandler.java:512)
> 03:58:20 at io.netty.channel.nio.NioIoHandler.run(NioIoHandler.java:484)
> 03:58:20 at
> io.netty.channel.SingleThreadIoEventLoop.runIo(SingleThreadIoEventLoop.java:225)
> 03:58:20 at
> io.netty.channel.SingleThreadIoEventLoop.run(SingleThreadIoEventLoop.java:196)
> 03:58:20 at
> io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:1195)
> 03:58:20 at
> io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
> 03:58:20 at
> io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
> 03:58:20 ... 1 more{code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)