This is an automated email from the ASF dual-hosted git repository. CRZbulabula pushed a commit to branch fix_migrate_region_leader_switch_fake_complete in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 904787534d893cb09c5339a9c47ac36dd49c62ea Author: Yongzao <[email protected]> AuthorDate: Thu Jun 11 12:07:46 2026 +0800 Fix MIGRATE REGION falsely reported complete on ConfigNode leader switch When the ConfigNode leader is gracefully stopped (or loses leadership) while AddRegionPeerProcedure is waiting for the coordinator DataNode's AddRegionPeer task to finish, RegionMaintainHandler.waitTaskFinish() is interrupted and returns PROCESSING. The DO_ADD_REGION_PEER state previously treated PROCESSING as a no-op terminal state (return Flow.NO_MORE_STATE), silently ending the AddRegionPeerProcedure without success or rollback. The parent RegionMigrateProcedure had already persisted at CHECK_ADD_REGION_PEER, so the new leader resumed there directly. Its isDataNodeContainsRegion() check only inspects the partition table's location list, which is written at CREATE_NEW_REGION_PEER (long before the snapshot finishes transferring). It therefore passed, the source replica was removed, and the migration was declared a success while the destination replica was still in Adding state and had not received the snapshot. Queries against the region returned incorrect results during the gap (observed: ~17 min until the destination became active). Fix: in the PROCESSING branch, stay at DO_ADD_REGION_PEER and persist it (HAS_MORE_STATE) instead of ending. After recovery the new leader re-enters DO_ADD_REGION_PEER and re-polls the coordinator task until it truly reaches SUCCESS or FAIL. The re-poll is idempotent: the isStateDeserialized() guard skips re-submitting after a restart, and the coordinator DataNode dedups by taskId (putIfAbsent) even on a same-process re-execute, so the AddRegionPeer task is never submitted twice. If the coordinator crashed and lost its task table, the poll returns TASK_NOT_EXIST and falls through to the existing FAIL/rollback path. Add cnLeaderSwitchDuringDoAddPeerTest for each consensus protocol (IoTConsensus, IoTConsensusV2 batch/stream, Ratis). Existing daily ConfigNode-crash ITs all use stopForcibly() (SIGKILL), which kills the process before it can run the PROCESSING branch; the new test uses a graceful stop() (SIGTERM) of the leader among 3 ConfigNodes so the interrupted PROCESSING path is actually exercised across a real leader switch. --- ...IoTDBRegionOperationReliabilityITFramework.java | 43 ++++++++++++++++++++++ .../IoTDBRegionMigrateConfigNodeCrashIoTV1IT.java | 24 ++++++++++++ ...DBRegionMigrateConfigNodeCrashIoTV2BatchIT.java | 24 ++++++++++++ ...BRegionMigrateConfigNodeCrashIoTV2StreamIT.java | 24 ++++++++++++ ...oTDBRegionMigrateConfigNodeCrashForRatisIT.java | 24 ++++++++++++ .../iotdb/confignode/i18n/ProcedureMessages.java | 2 +- .../iotdb/confignode/i18n/ProcedureMessages.java | 2 +- .../impl/region/AddRegionPeerProcedure.java | 14 ++++++- 8 files changed, 154 insertions(+), 3 deletions(-) diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/IoTDBRegionOperationReliabilityITFramework.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/IoTDBRegionOperationReliabilityITFramework.java index 9b9c4ad41a1..b2da179dfb7 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/IoTDBRegionOperationReliabilityITFramework.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/IoTDBRegionOperationReliabilityITFramework.java @@ -119,6 +119,27 @@ public class IoTDBRegionOperationReliabilityITFramework { LOGGER.info("Cluster has been restarted"); }; + /** + * Gracefully stop (SIGTERM, not a forcible kill) the ConfigNode that hit the kill point, then + * restart it. A graceful stop lets the ConfigNode run its shutdown hooks, which interrupts the + * in-flight region-operation procedure worker. This reproduces a leader switch / graceful + * shutdown during AddRegionPeer: the interrupted {@code waitTaskFinish()} returns PROCESSING + * while the AddRegionPeerTask is still running on the coordinator DataNode. The procedure must + * NOT silently end here, otherwise the parent RegionMigrateProcedure would falsely treat AddPeer + * as complete and remove the source replica before the destination replica is actually Running. + * See AddRegionPeerProcedure#executeFromState DO_ADD_REGION_PEER PROCESSING branch. + */ + public static Consumer<KillPointContext> actionOfGracefullyRestartConfigNode = + context -> { + Assert.assertTrue(context.getNodeWrapper() instanceof ConfigNodeWrapper); + context.getNodeWrapper().stop(); + LOGGER.info("ConfigNode {} gracefully stopped.", context.getNodeWrapper().getId()); + Assert.assertFalse(context.getNodeWrapper().isAlive()); + context.getNodeWrapper().start(); + LOGGER.info("ConfigNode {} restarted.", context.getNodeWrapper().getId()); + Assert.assertTrue(context.getNodeWrapper().isAlive()); + }; + @Before public void setUp() throws Exception { EnvFactory.getEnv() @@ -155,6 +176,28 @@ public class IoTDBRegionOperationReliabilityITFramework { killNode); } + public void successTestWithAction( + final int dataReplicateFactor, + final int schemaReplicationFactor, + final int configNodeNum, + final int dataNodeNum, + KeySetView<String, Boolean> killConfigNodeKeywords, + KeySetView<String, Boolean> killDataNodeKeywords, + Consumer<KillPointContext> actionWhenDetectKeyWords, + KillNode killNode) + throws Exception { + generalTestWithAllOptions( + dataReplicateFactor, + schemaReplicationFactor, + configNodeNum, + dataNodeNum, + killConfigNodeKeywords, + killDataNodeKeywords, + actionWhenDetectKeyWords, + true, + killNode); + } + public void failTest( final int dataReplicateFactor, final int schemaReplicationFactor, diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv1/IoTDBRegionMigrateConfigNodeCrashIoTV1IT.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv1/IoTDBRegionMigrateConfigNodeCrashIoTV1IT.java index ebda0c36ee3..23eb6380ad3 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv1/IoTDBRegionMigrateConfigNodeCrashIoTV1IT.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv1/IoTDBRegionMigrateConfigNodeCrashIoTV1IT.java @@ -28,6 +28,7 @@ import org.apache.iotdb.confignode.procedure.state.RemoveRegionPeerState; import org.apache.iotdb.consensus.ConsensusFactory; import org.apache.iotdb.it.env.EnvFactory; import org.apache.iotdb.it.framework.IoTDBTestRunner; +import org.apache.iotdb.itbase.category.ClusterIT; import org.apache.iotdb.itbase.category.DailyIT; import org.junit.Before; @@ -92,6 +93,29 @@ public class IoTDBRegionMigrateConfigNodeCrashIoTV1IT KillNode.CONFIG_NODE); } + /** + * Gracefully restart (not forcibly kill) the ConfigNode leader while AddRegionPeer is waiting for + * the coordinator's task to finish (DO_ADD_REGION_PEER). The graceful shutdown interrupts the + * procedure worker, so waitTaskFinish() returns PROCESSING. The migration must still finish + * correctly after a leader switch: previously the AddRegionPeerProcedure silently ended on + * PROCESSING, letting the parent procedure remove the source replica before the destination + * replica was actually Running. Uses 3 ConfigNodes so a real leader switch happens. + */ + @Test + // TODO temporarily also run on per-PR Cluster IT (1C3D) to validate; revert to DailyIT-only later + @Category({DailyIT.class, ClusterIT.class}) + public void cnLeaderSwitchDuringDoAddPeerTest() throws Exception { + successTestWithAction( + 1, + 1, + 3, + 2, + buildSet(AddRegionPeerState.DO_ADD_REGION_PEER), + noKillPoints(), + actionOfGracefullyRestartConfigNode, + KillNode.CONFIG_NODE); + } + @Test public void cnCrashDuringUpdateCacheTest() throws Exception { successTest( diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv2/batch/IoTDBRegionMigrateConfigNodeCrashIoTV2BatchIT.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv2/batch/IoTDBRegionMigrateConfigNodeCrashIoTV2BatchIT.java index bc4f477b6bd..70cbe44de74 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv2/batch/IoTDBRegionMigrateConfigNodeCrashIoTV2BatchIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv2/batch/IoTDBRegionMigrateConfigNodeCrashIoTV2BatchIT.java @@ -26,6 +26,7 @@ import org.apache.iotdb.confignode.procedure.state.AddRegionPeerState; import org.apache.iotdb.confignode.procedure.state.RegionTransitionState; import org.apache.iotdb.confignode.procedure.state.RemoveRegionPeerState; import org.apache.iotdb.it.framework.IoTDBTestRunner; +import org.apache.iotdb.itbase.category.ClusterIT; import org.apache.iotdb.itbase.category.DailyIT; import org.junit.Ignore; @@ -79,6 +80,29 @@ public class IoTDBRegionMigrateConfigNodeCrashIoTV2BatchIT KillNode.CONFIG_NODE); } + /** + * Gracefully restart (not forcibly kill) the ConfigNode leader while AddRegionPeer is waiting for + * the coordinator's task to finish (DO_ADD_REGION_PEER). The graceful shutdown interrupts the + * procedure worker, so waitTaskFinish() returns PROCESSING. The migration must still finish + * correctly after a leader switch: previously the AddRegionPeerProcedure silently ended on + * PROCESSING, letting the parent procedure remove the source replica before the destination + * replica was actually Running. Uses 3 ConfigNodes so a real leader switch happens. + */ + @Test + // TODO temporarily also run on per-PR Cluster IT (1C3D) to validate; revert to DailyIT-only later + @Category({DailyIT.class, ClusterIT.class}) + public void cnLeaderSwitchDuringDoAddPeerTest() throws Exception { + successTestWithAction( + 1, + 1, + 3, + 2, + buildSet(AddRegionPeerState.DO_ADD_REGION_PEER), + noKillPoints(), + actionOfGracefullyRestartConfigNode, + KillNode.CONFIG_NODE); + } + @Test public void cnCrashDuringUpdateCacheTest() throws Exception { successTest( diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv2/stream/IoTDBRegionMigrateConfigNodeCrashIoTV2StreamIT.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv2/stream/IoTDBRegionMigrateConfigNodeCrashIoTV2StreamIT.java index 39b5953de4a..704ab5e5886 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv2/stream/IoTDBRegionMigrateConfigNodeCrashIoTV2StreamIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/iotv2/stream/IoTDBRegionMigrateConfigNodeCrashIoTV2StreamIT.java @@ -28,6 +28,7 @@ import org.apache.iotdb.confignode.procedure.state.RemoveRegionPeerState; import org.apache.iotdb.consensus.ConsensusFactory; import org.apache.iotdb.it.env.EnvFactory; import org.apache.iotdb.it.framework.IoTDBTestRunner; +import org.apache.iotdb.itbase.category.ClusterIT; import org.apache.iotdb.itbase.category.DailyIT; import org.junit.Before; @@ -93,6 +94,29 @@ public class IoTDBRegionMigrateConfigNodeCrashIoTV2StreamIT KillNode.CONFIG_NODE); } + /** + * Gracefully restart (not forcibly kill) the ConfigNode leader while AddRegionPeer is waiting for + * the coordinator's task to finish (DO_ADD_REGION_PEER). The graceful shutdown interrupts the + * procedure worker, so waitTaskFinish() returns PROCESSING. The migration must still finish + * correctly after a leader switch: previously the AddRegionPeerProcedure silently ended on + * PROCESSING, letting the parent procedure remove the source replica before the destination + * replica was actually Running. Uses 3 ConfigNodes so a real leader switch happens. + */ + @Test + // TODO temporarily also run on per-PR Cluster IT (1C3D) to validate; revert to DailyIT-only later + @Category({DailyIT.class, ClusterIT.class}) + public void cnLeaderSwitchDuringDoAddPeerTest() throws Exception { + successTestWithAction( + 1, + 1, + 3, + 2, + buildSet(AddRegionPeerState.DO_ADD_REGION_PEER), + noKillPoints(), + actionOfGracefullyRestartConfigNode, + KillNode.CONFIG_NODE); + } + @Test public void cnCrashDuringUpdateCacheTest() throws Exception { successTest( diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/ratis/IoTDBRegionMigrateConfigNodeCrashForRatisIT.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/ratis/IoTDBRegionMigrateConfigNodeCrashForRatisIT.java index 67b318691a9..cfcbd6f7802 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/ratis/IoTDBRegionMigrateConfigNodeCrashForRatisIT.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/daily/ratis/IoTDBRegionMigrateConfigNodeCrashForRatisIT.java @@ -25,6 +25,7 @@ import org.apache.iotdb.confignode.it.regionmigration.IoTDBRegionMigrateITFramew import org.apache.iotdb.confignode.procedure.state.AddRegionPeerState; import org.apache.iotdb.confignode.procedure.state.RemoveRegionPeerState; import org.apache.iotdb.it.framework.IoTDBTestRunner; +import org.apache.iotdb.itbase.category.ClusterIT; import org.apache.iotdb.itbase.category.DailyIT; import org.junit.Test; @@ -76,6 +77,29 @@ public class IoTDBRegionMigrateConfigNodeCrashForRatisIT KillNode.CONFIG_NODE); } + /** + * Gracefully restart (not forcibly kill) the ConfigNode leader while AddRegionPeer is waiting for + * the coordinator's task to finish (DO_ADD_REGION_PEER). The graceful shutdown interrupts the + * procedure worker, so waitTaskFinish() returns PROCESSING. The migration must still finish + * correctly after a leader switch: previously the AddRegionPeerProcedure silently ended on + * PROCESSING, letting the parent procedure remove the source replica before the destination + * replica was actually Running. Uses 3 ConfigNodes so a real leader switch happens. + */ + @Test + // TODO temporarily also run on per-PR Cluster IT (1C3D) to validate; revert to DailyIT-only later + @Category({DailyIT.class, ClusterIT.class}) + public void cnLeaderSwitchDuringDoAddPeerTest() throws Exception { + successTestWithAction( + 1, + 1, + 3, + 2, + buildSet(AddRegionPeerState.DO_ADD_REGION_PEER), + noKillPoints(), + actionOfGracefullyRestartConfigNode, + KillNode.CONFIG_NODE); + } + @Test public void cnCrashDuringUpdateCacheTest() throws Exception { successTest( diff --git a/iotdb-core/confignode/src/main/i18n/en/org/apache/iotdb/confignode/i18n/ProcedureMessages.java b/iotdb-core/confignode/src/main/i18n/en/org/apache/iotdb/confignode/i18n/ProcedureMessages.java index a43e5151d40..f4ae087b159 100644 --- a/iotdb-core/confignode/src/main/i18n/en/org/apache/iotdb/confignode/i18n/ProcedureMessages.java +++ b/iotdb-core/confignode/src/main/i18n/en/org/apache/iotdb/confignode/i18n/ProcedureMessages.java @@ -1001,7 +1001,7 @@ public final class ProcedureMessages { public static final String VALIDATE_TABLE_FOR_TABLE_WHEN_SETTING_PROPERTIES = "Validate table for table {}.{} when setting properties"; public static final String WAITTASKFINISH_RETURNS_PROCESSING_WHICH_MEANS_THE_WAITING_HAS_BEEN_INTERRUPTED = - "waitTaskFinish() returns PROCESSING, which means the waiting has been interrupted, this procedure will end without rollback"; + "waitTaskFinish() returns PROCESSING, which means the waiting has been interrupted (ConfigNode shutdown or leader change); the AddRegionPeer task is still running on the coordinator, this procedure will stay at DO_ADD_REGION_PEER and resume polling after recovery"; public static final String FAILED_TO_CREATE_DATABASE_THE_TTL_SHOULD_BE_NON_NEGATIVE = "Failed to create database. The TTL should be non-negative."; public static final String FAILED_TO_CREATE_DATABASE_THE_DATAREGIONGROUPNUM_SHOULD_BE_POSITIVE = "Failed to create database. The dataRegionGroupNum should be positive."; diff --git a/iotdb-core/confignode/src/main/i18n/zh/org/apache/iotdb/confignode/i18n/ProcedureMessages.java b/iotdb-core/confignode/src/main/i18n/zh/org/apache/iotdb/confignode/i18n/ProcedureMessages.java index dbad526cce7..126055a7c57 100644 --- a/iotdb-core/confignode/src/main/i18n/zh/org/apache/iotdb/confignode/i18n/ProcedureMessages.java +++ b/iotdb-core/confignode/src/main/i18n/zh/org/apache/iotdb/confignode/i18n/ProcedureMessages.java @@ -999,7 +999,7 @@ public final class ProcedureMessages { public static final String VALIDATE_TABLE_FOR_TABLE_WHEN_SETTING_PROPERTIES = "Validate table for table {}.{} when setting properties"; public static final String WAITTASKFINISH_RETURNS_PROCESSING_WHICH_MEANS_THE_WAITING_HAS_BEEN_INTERRUPTED = - "waitTaskFinish() returns PROCESSING, which means the waiting has been interrupted, this procedure will end without rollback"; + "waitTaskFinish() 返回 PROCESSING,表示等待被中断(ConfigNode 关闭或主节点切换);AddRegionPeer 任务仍在协调者上运行,该流程将停留在 DO_ADD_REGION_PEER 状态,恢复后继续轮询"; public static final String FAILED_TO_CREATE_DATABASE_THE_TTL_SHOULD_BE_NON_NEGATIVE = "创建数据库失败。TTL 不能为负数。"; public static final String FAILED_TO_CREATE_DATABASE_THE_DATAREGIONGROUPNUM_SHOULD_BE_POSITIVE = "创建数据库失败。dataRegionGroupNum 应为正数。"; diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/region/AddRegionPeerProcedure.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/region/AddRegionPeerProcedure.java index d09647a332f..dac9a6bae04 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/region/AddRegionPeerProcedure.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/impl/region/AddRegionPeerProcedure.java @@ -124,10 +124,22 @@ public class AddRegionPeerProcedure extends RegionOperationProcedure<AddRegionPe return warnAndRollBackAndNoMoreState( env, handler, String.format("%s result is %s", state, result.getTaskStatus())); case PROCESSING: + // waitTaskFinish() only returns PROCESSING when its polling loop was interrupted by + // an InterruptedException, i.e. this ConfigNode is shutting down / losing leadership + // (a user CANCEL or a coordinator disconnection both go through the FAIL branch + // above). The AddRegionPeerTask is still running on the coordinator DataNode, so we + // must NOT silently end here: doing so would let the parent RegionMigrateProcedure + // proceed to CHECK_ADD_REGION_PEER / REMOVE_REGION_PEER and remove the source replica + // before the destination replica has actually finished receiving the snapshot. + // Instead, stay in DO_ADD_REGION_PEER and persist it; after recovery the new leader + // re-enters this state and re-polls the still-running coordinator task (the + // isStateDeserialized() guard above prevents re-submitting the task) until it really + // reaches SUCCESS or FAIL. LOGGER.info( ProcedureMessages .WAITTASKFINISH_RETURNS_PROCESSING_WHICH_MEANS_THE_WAITING_HAS_BEEN_INTERRUPTED); - return Flow.NO_MORE_STATE; + setNextState(AddRegionPeerState.DO_ADD_REGION_PEER); + break outerSwitch; case SUCCESS: setNextState(UPDATE_REGION_LOCATION_CACHE); break outerSwitch;
