This is an automated email from the ASF dual-hosted git repository. CRZbulabula pushed a commit to branch improve-region-reconstruct-iotv1-test in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 3fff9dd4030a3a49875b355ccf4f18538753d72b Author: Yongzao <[email protected]> AuthorDate: Mon May 18 21:17:12 2026 +0800 f --- ...IoTDBRegionOperationReliabilityITFramework.java | 19 ++++++++++++++++++ .../commit/IoTDBRegionReconstructForIoTV1IT.java | 23 +++++++++++++++------- .../procedure/env/RegionMaintainHandler.java | 9 +++++++-- 3 files changed, 42 insertions(+), 9 deletions(-) diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/IoTDBRegionOperationReliabilityITFramework.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/IoTDBRegionOperationReliabilityITFramework.java index fb31828a794..58393ecc61b 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/IoTDBRegionOperationReliabilityITFramework.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/IoTDBRegionOperationReliabilityITFramework.java @@ -736,4 +736,23 @@ public class IoTDBRegionOperationReliabilityITFramework { } return result; } + + /** Returns regionId -> dataNodeId -> status as reported by {@code show regions}. */ + protected static Map<Integer, Map<Integer, String>> getRegionStatusMap(Session session) + throws IoTDBConnectionException, StatementExecutionException { + SessionDataSet dataSet = session.executeQueryStatement("show regions"); + final int regionIdIndex = dataSet.getColumnNames().indexOf("RegionId"); + final int dataNodeIdIndex = dataSet.getColumnNames().indexOf("DataNodeId"); + final int regionStatusIndex = dataSet.getColumnNames().indexOf("Status"); + dataSet.setFetchSize(1024); + Map<Integer, Map<Integer, String>> result = new TreeMap<>(); + while (dataSet.hasNext()) { + List<Field> fields = dataSet.next().getFields(); + final int regionId = fields.get(regionIdIndex).getIntV(); + final int dataNodeId = fields.get(dataNodeIdIndex).getIntV(); + final String regionStatus = fields.get(regionStatusIndex).toString(); + result.computeIfAbsent(regionId, k -> new TreeMap<>()).put(dataNodeId, regionStatus); + } + return result; + } } diff --git a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/commit/IoTDBRegionReconstructForIoTV1IT.java b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/commit/IoTDBRegionReconstructForIoTV1IT.java index fbecdabcc60..ccde946ada5 100644 --- a/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/commit/IoTDBRegionReconstructForIoTV1IT.java +++ b/integration-test/src/test/java/org/apache/iotdb/confignode/it/regionmigration/pass/commit/IoTDBRegionReconstructForIoTV1IT.java @@ -46,6 +46,7 @@ import org.slf4j.LoggerFactory; import java.io.File; import java.sql.Connection; import java.sql.Statement; +import java.util.Collections; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -108,7 +109,9 @@ public class IoTDBRegionReconstructForIoTV1IT extends IoTDBRegionOperationReliab Assert.assertTrue(dataRegionMap.containsKey(selectedRegion)); Pair<Integer, Set<Integer>> leaderAndNodeIds = dataRegionMap.get(selectedRegion); Assert.assertEquals(2, leaderAndNodeIds.right.size()); - // reconstruct from the leader to ensure no data is lost + // Stop the current leader; reconstruct will later target the surviving follower (whose + // tsfiles get deleted below). When the original leader is restarted, the follower's + // missing data is replicated back from it, so no committed data is lost. final int dataNodeToBeClosed = leaderAndNodeIds.left; final int dataNodeToBeReconstructed = leaderAndNodeIds.right.stream().filter(x -> x != dataNodeToBeClosed).findAny().get(); @@ -172,19 +175,25 @@ public class IoTDBRegionReconstructForIoTV1IT extends IoTDBRegionOperationReliab EnvFactory.getAbstractEnv().checkNodeInStatus(dataNodeToBeClosed, NodeStatus.Running); session.executeNonQueryStatement( String.format(RECONSTRUCT_FORMAT, selectedRegion, dataNodeToBeReconstructed)); + // Confirm reconstruct succeeded: the selected region must contain both the reconstructed + // peer and the (formerly closed) peer, with both rows reporting Running. try { Awaitility.await() .pollInterval(1, TimeUnit.SECONDS) .atMost(10, TimeUnit.MINUTES) .until( - () -> - getRegionStatusWithoutRunning(session).isEmpty() - && dataDirToBeReconstructed.getAbsoluteFile().exists()); + () -> { + Map<Integer, String> peerStatus = + getRegionStatusMap(session) + .getOrDefault(selectedRegion, Collections.emptyMap()); + return "Running".equals(peerStatus.get(dataNodeToBeReconstructed)) + && "Running".equals(peerStatus.get(dataNodeToBeClosed)); + }); } catch (Exception e) { LOGGER.error( - "Two factor: {} && {}", - getRegionStatusWithoutRunning(session), - dataDirToBeReconstructed.getAbsoluteFile().exists()); + "Reconstruct did not finish in time. region {} status map: {}", + selectedRegion, + getRegionStatusMap(session).get(selectedRegion)); fail(); } EnvFactory.getEnv().dataNodeIdToWrapper(dataNodeToBeClosed).get().stopForcibly(); diff --git a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RegionMaintainHandler.java b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RegionMaintainHandler.java index c106c1a1efd..8637e6c8fe7 100644 --- a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RegionMaintainHandler.java +++ b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/procedure/env/RegionMaintainHandler.java @@ -175,9 +175,14 @@ public class RegionMaintainHandler { if (TConsensusGroupType.DataRegion.equals(regionId.getType()) && (IOT_CONSENSUS.equals(CONF.getDataRegionConsensusProtocolClass()) || IOT_CONSENSUS_V2.equals(CONF.getDataRegionConsensusProtocolClass()))) { - // parameter of createPeer for MultiLeader should be all peers + // parameter of createPeer for MultiLeader should be all peers; callers (e.g. + // AddRegionPeerProcedure) may have already inserted destDataNode into the partition + // table before reaching here, so append only when not already present to avoid + // sending a peer list with duplicates. currentPeerNodes = new ArrayList<>(regionReplicaNodes); - currentPeerNodes.add(destDataNode); + if (!currentPeerNodes.contains(destDataNode)) { + currentPeerNodes.add(destDataNode); + } } else { // parameter of createPeer for Ratis can be empty currentPeerNodes = Collections.emptyList();
