Denis Chudov created IGNITE-28089:
-------------------------------------
Summary: Critical system error detected - Unable to start rebalance
Key: IGNITE-28089
URL: https://issues.apache.org/jira/browse/IGNITE-28089
Project: Ignite
Issue Type: Bug
Reporter: Denis Chudov
{code:java}
03:58:20
[2026-03-06T03:58:20,758][ERROR][%icict_nwdcicc_3345%rebalance-scheduler-0][FailureManager]
Critical system error detected. Will be handled accordingly to configured
handler [hnd=StopNodeFailureHandler [nodeName=icict_nwdcicc_3345,
super=AbstractFailureHandler [ignoredFailureTypes=UnmodifiableSet
[SYSTEM_WORKER_BLOCKED, SYSTEM_CRITICAL_OPERATION_TIMEOUT]]],
failureCtx=CRITICAL_ERROR, failureCtxId=e42b3853-1d65-426f-a47b-56540ba2cbfd]
03:58:20 org.apache.ignite.internal.failure.StackTraceCapturingException:
Unable to start rebalance [zonePartitionId=20_part_7, term=1]
03:58:20 at
org.apache.ignite.internal.failure.FailureManager.process(FailureManager.java:191)
03:58:20 at
org.apache.ignite.internal.failure.FailureManager.process(FailureManager.java:168)
03:58:20 at
org.apache.ignite.internal.distributionzones.rebalance.ZoneRebalanceRaftGroupEventsListener.maybeRunFailHandler(ZoneRebalanceRaftGroupEventsListener.java:278)
03:58:20 at
org.apache.ignite.internal.distributionzones.rebalance.ZoneRebalanceRaftGroupEventsListener.lambda$onLeaderElected$1(ZoneRebalanceRaftGroupEventsListener.java:259)
03:58:20 at
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:539)
03:58:20 at
java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
03:58:20 at
java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)
03:58:20 at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
03:58:20 at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
03:58:20 at java.base/java.lang.Thread.run(Thread.java:833)
03:58:20 Caused by: java.util.concurrent.ExecutionException:
org.apache.ignite.internal.network.handshake.HandshakeException:
icict_nwdcicc_3344:9108c883-2450-40c7-aa99-493730c9506a belongs to cluster
a93b03ae-29e3-4444-908b-bee38613deda which is different from this one
b252b6ce-18c2-435c-ae3f-4a2e5770e1d0, connection rejected. Either another
cluster is reachable for this one on the network (in this case make sure they
can't connect), or CMG/MG repair was made and then some node that did not
participate one is started (in this case, migrate the started node to the
repaired cluster using CMG/MG repair tools)
03:58:20 at
java.base/java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:396)
03:58:20 at
java.base/java.util.concurrent.CompletableFuture.get(CompletableFuture.java:2073)
03:58:20 at
org.apache.ignite.internal.distributionzones.rebalance.ZoneRebalanceRaftGroupEventsListener.lambda$onLeaderElected$1(ZoneRebalanceRaftGroupEventsListener.java:206)
03:58:20 ... 6 more
03:58:20 Caused by:
org.apache.ignite.internal.network.handshake.HandshakeException:
icict_nwdcicc_3344:9108c883-2450-40c7-aa99-493730c9506a belongs to cluster
a93b03ae-29e3-4444-908b-bee38613deda which is different from this one
b252b6ce-18c2-435c-ae3f-4a2e5770e1d0, connection rejected. Either another
cluster is reachable for this one on the network (in this case make sure they
can't connect), or CMG/MG repair was made and then some node that did not
participate one is started (in this case, migrate the started node to the
repaired cluster using CMG/MG repair tools)
03:58:20 at
org.apache.ignite.internal.network.recovery.HandshakeManagerUtils.lambda$sendRejectionMessageAndFailHandshake$0(HandshakeManagerUtils.java:99)
03:58:20 at
java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:863)
03:58:20 at
java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:841)
03:58:20 at
java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:510)
03:58:20 at
java.base/java.util.concurrent.CompletableFuture.complete(CompletableFuture.java:2147)
03:58:20 at
org.apache.ignite.internal.network.netty.NettyUtils.lambda$toCompletableFuture$0(NettyUtils.java:62)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:604)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:571)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:506)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:650)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.setSuccess0(DefaultPromise.java:639)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:119)
03:58:20 at
io.netty.channel.DefaultChannelPromise.trySuccess(DefaultChannelPromise.java:84)
03:58:20 at
io.netty.handler.stream.ChunkedWriteHandler$PendingWrite.success(ChunkedWriteHandler.java:384)
03:58:20 at
io.netty.handler.stream.ChunkedWriteHandler.handleEndOfInputFuture(ChunkedWriteHandler.java:340)
03:58:20 at
io.netty.handler.stream.ChunkedWriteHandler.lambda$doFlush$0(ChunkedWriteHandler.java:301)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:604)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:571)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:506)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:650)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.setSuccess0(DefaultPromise.java:639)
03:58:20 at
io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:119)
03:58:20 at
io.netty.util.internal.PromiseNotificationUtil.trySuccess(PromiseNotificationUtil.java:48)
03:58:20 at
io.netty.channel.ChannelOutboundBuffer.safeSuccess(ChannelOutboundBuffer.java:747)
03:58:20 at
io.netty.channel.ChannelOutboundBuffer.remove(ChannelOutboundBuffer.java:302)
03:58:20 at
io.netty.channel.ChannelOutboundBuffer.removeBytes(ChannelOutboundBuffer.java:382)
03:58:20 at
io.netty.channel.socket.nio.NioSocketChannel.doWrite(NioSocketChannel.java:414)
03:58:20 at
io.netty.channel.AbstractChannel$AbstractUnsafe.flush0(AbstractChannel.java:794)
03:58:20 at
io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.flush0(AbstractNioChannel.java:404)
03:58:20 at
io.netty.channel.AbstractChannel$AbstractUnsafe.flush(AbstractChannel.java:758)
03:58:20 at
io.netty.channel.DefaultChannelPipeline$HeadContext.flush(DefaultChannelPipeline.java:1391)
03:58:20 at
io.netty.channel.AbstractChannelHandlerContext.flush(AbstractChannelHandlerContext.java:754)
03:58:20 at
io.netty.handler.flush.FlushConsolidationHandler.flushNow(FlushConsolidationHandler.java:204)
03:58:20 at
io.netty.handler.flush.FlushConsolidationHandler.flushIfNeeded(FlushConsolidationHandler.java:197)
03:58:20 at
io.netty.handler.flush.FlushConsolidationHandler.resetReadAndFlushIfNeeded(FlushConsolidationHandler.java:192)
03:58:20 at
io.netty.handler.flush.FlushConsolidationHandler.channelReadComplete(FlushConsolidationHandler.java:145)
03:58:20 at
io.netty.channel.AbstractChannelHandlerContext.fireChannelReadComplete(AbstractChannelHandlerContext.java:385)
03:58:20 at
io.netty.channel.DefaultChannelPipeline$HeadContext.channelReadComplete(DefaultChannelPipeline.java:1434)
03:58:20 at
io.netty.channel.DefaultChannelPipeline.fireChannelReadComplete(DefaultChannelPipeline.java:932)
03:58:20 at
io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:181)
03:58:20 at
io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe.handle(AbstractNioChannel.java:445)
03:58:20 at
io.netty.channel.nio.NioIoHandler$DefaultNioRegistration.handle(NioIoHandler.java:388)
03:58:20 at
io.netty.channel.nio.NioIoHandler.processSelectedKey(NioIoHandler.java:596)
03:58:20 at
io.netty.channel.nio.NioIoHandler.processSelectedKeysOptimized(NioIoHandler.java:571)
03:58:20 at
io.netty.channel.nio.NioIoHandler.processSelectedKeys(NioIoHandler.java:512)
03:58:20 at io.netty.channel.nio.NioIoHandler.run(NioIoHandler.java:484)
03:58:20 at
io.netty.channel.SingleThreadIoEventLoop.runIo(SingleThreadIoEventLoop.java:225)
03:58:20 at
io.netty.channel.SingleThreadIoEventLoop.run(SingleThreadIoEventLoop.java:196)
03:58:20 at
io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:1195)
03:58:20 at
io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
03:58:20 at
io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
03:58:20 ... 1 more{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)