[
https://issues.apache.org/jira/browse/IGNITE-26053?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Igor updated IGNITE-26053:
--------------------------
Description:
*Steps to reproduce:*
# Start cluster of 3 nodes (each on separate host)
# Create 10 tables
{code:java}
create zone if not exists "cluster_failover_3" (replicas 3, auto scale up 10,
auto scale down 10) storage profiles ['default_aipersist'];
create TABLE failoverTest00(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest01(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest02(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest03(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest04(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest05(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest06(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest07(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest08(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest09(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";{code}
# Fill tables with 100 rows each.
# Await all partitions of all tables local state is "HEALTHY".
# Await all partitions of all tables global state is "AVAILABLE".
# Assert the tables has been filled, with expected row count of '100' and no
errors in logs
# Stop node 1 forcibly
# Clean work directory of the node
# Start node 1
# Wait node is ready after restart
*Expected:*
Node with index 1 is started successfully.
*Actual:*
Node started, but not initialized and stuck on log message:
{code:java}
2025-07-29 04:47:51:639 +0000 [INFO][main][IgniteImpl] Joining the cluster
{code}
Servers logs are in the attachment.
was:
*Steps to reproduce:*
# Start cluster of 3 nodes (each on separate host)
# Create 10 tables
{code:java}
create zone if not exists "cluster_failover_3" (replicas 3, auto scale up 10,
auto scale down 10) storage profiles ['default_aipersist'];
create TABLE failoverTest00(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest01(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest02(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest03(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest04(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest05(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest06(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest07(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest08(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";
create TABLE failoverTest09(k1 INTEGER not null, k2 INTEGER not null, v1
VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
ZONE "cluster_failover_3";{code}
# Fill tables with 100 rows each.
# Await all partitions of all tables local state is "HEALTHY".
# Await all partitions of all tables global state is "AVAILABLE".
# Assert the tables has been filled, with expected row count of '100' and no
errors in logs
# Stop node 1 forcibly
# Clean work directory of the node
# Start node 1
# Wait node is ready after restart
Expected:
Node with index 1 is started successfully.
Actual:
Node started, but not initialized and stuck on log message:
{code:java}
2025-07-29 04:47:51:639 +0000 [INFO][main][IgniteImpl] Joining the cluster
{code}
Servers logs are in the attachment.
> Node stuck after restarted with removed work directory
> ------------------------------------------------------
>
> Key: IGNITE-26053
> URL: https://issues.apache.org/jira/browse/IGNITE-26053
> Project: Ignite
> Issue Type: Bug
> Components: storage engines ai3
> Affects Versions: 3.1
> Environment: 3 nodes each on separate host
> Reporter: Igor
> Priority: Critical
> Labels: ignite-3
> Attachments: servers_log.zip
>
>
> *Steps to reproduce:*
> # Start cluster of 3 nodes (each on separate host)
> # Create 10 tables
> {code:java}
> create zone if not exists "cluster_failover_3" (replicas 3, auto scale up 10,
> auto scale down 10) storage profiles ['default_aipersist'];
> create TABLE failoverTest00(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";
> create TABLE failoverTest01(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";
> create TABLE failoverTest02(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";
> create TABLE failoverTest03(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";
> create TABLE failoverTest04(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";
> create TABLE failoverTest05(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";
> create TABLE failoverTest06(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";
> create TABLE failoverTest07(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";
> create TABLE failoverTest08(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";
> create TABLE failoverTest09(k1 INTEGER not null, k2 INTEGER not null, v1
> VARCHAR(100), v2 VARCHAR(255), v3 TIMESTAMP not null, primary key (k1, k2))
> ZONE "cluster_failover_3";{code}
> # Fill tables with 100 rows each.
> # Await all partitions of all tables local state is "HEALTHY".
> # Await all partitions of all tables global state is "AVAILABLE".
> # Assert the tables has been filled, with expected row count of '100' and no
> errors in logs
> # Stop node 1 forcibly
> # Clean work directory of the node
> # Start node 1
> # Wait node is ready after restart
> *Expected:*
> Node with index 1 is started successfully.
> *Actual:*
> Node started, but not initialized and stuck on log message:
> {code:java}
> 2025-07-29 04:47:51:639 +0000 [INFO][main][IgniteImpl] Joining the cluster
> {code}
> Servers logs are in the attachment.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)