adoroszlai commented on code in PR #10024:
URL: https://github.com/apache/ozone/pull/10024#discussion_r3026309629
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java:
##########
@@ -300,6 +301,145 @@ public void testSnapshotChainManagerRestore() throws
Exception {
assertFalse(metadataManager.getSnapshotChainManager().isSnapshotChainCorrupted());
}
+ /**
+ * Tests that SnapshotDeletingService (SDS) correctly handles an OM leader
+ * failover. The old leader's SDS is suspended (simulating SDS being blocked
+ * or mid-cleanup) when a snapshot is queued for deletion. After the leader
+ * failover, the new leader's SDS picks up the pending work and correctly
+ * purges the snapshot. (HDDS-8703)
+ */
+ @Test
+ public void testSnapshotDeletingServiceDuringOMFailover()
+ throws Exception {
+ OzoneManager oldLeader = cluster.getOMLeader();
+ String oldLeaderId = oldLeader.getOMNodeId();
+
+ // Create keys and a snapshot so there is data to clean up.
+ int numKeys = 5;
+ for (int i = 0; i < numKeys; i++) {
+ createFileKey(ozoneBucket, "key-" +
RandomStringUtils.secure().nextNumeric(10));
+ }
+
+ String snapshotName = "snap-" + RandomStringUtils.secure().nextNumeric(10);
+ createSnapshot(volumeName, bucketName, snapshotName);
+
+ // Suspend SDS on the current leader before the snapshot is deleted,
+ // simulating SDS being blocked while a cleanup is pending.
+ oldLeader.getKeyManager().getSnapshotDeletingService().suspend();
+
+ // Delete the snapshot — marks it as SNAPSHOT_DELETED in the DB.
+ store.deleteSnapshot(volumeName, bucketName, snapshotName);
+ String tableKey = SnapshotInfo.getTableKey(volumeName, bucketName,
snapshotName);
+
+ // Wait for the snapshot entry to reach SNAPSHOT_DELETED state on old
leader.
+ GenericTestUtils.waitFor(() -> {
+ try {
+ SnapshotInfo info = oldLeader.getMetadataManager()
+ .getSnapshotInfoTable().get(tableKey);
+ return info != null
+ && info.getSnapshotStatus() ==
SnapshotInfo.SnapshotStatus.SNAPSHOT_DELETED;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }, 1000, 30000);
+
+ try {
+ // Trigger OM leader failover: with 3 OMs and quorum=2, the remaining
+ // two nodes elect a new leader.
+ cluster.shutdownOzoneManager(oldLeader);
+ cluster.waitForLeaderOM();
+
+ OzoneManager newLeader = cluster.getOMLeader();
+ assertNotNull(newLeader);
+ // Confirm that a genuinely different OM node became leader.
+ assertNotEquals(oldLeaderId, newLeader.getOMNodeId());
+
+ // The new leader's SDS (not suspended) must process the pending deleted
+ // snapshot and purge it from the DB, even though the old leader's SDS
+ // never ran the cleanup.
+ checkSnapshotIsPurgedFromDB(newLeader, tableKey);
+
+ // Verify the snapshot chain is not corrupted after the cleanup.
+ OmMetadataManagerImpl metadataManager =
+ (OmMetadataManagerImpl) newLeader.getMetadataManager();
+
assertFalse(metadataManager.getSnapshotChainManager().isSnapshotChainCorrupted());
+ } finally {
+ // Restore the 3-node cluster for subsequent tests.
+ cluster.restartOzoneManager(oldLeader, true);
+ }
+ }
+
+ /**
+ * Tests that SDS on the new leader correctly handles multiple snapshots
+ * queued for deletion after an OM leader failover. After the failover, all
+ * pending deletions should be completed and the snapshot chain should remain
+ * consistent. (HDDS-8703)
+ */
+ @Test
+ public void testSnapshotDeletingServiceWithMultipleSnapshotsDuringFailover()
Review Comment:
Please make this test parameterized with `numSnapshots` 1 and 3, then
`testSnapshotDeletingServiceDuringOMFailover` can be removed, and this one
renamed to `testSnapshotDeletingServiceDuringOMFailover`.
```java
@ParametizedTest
@ValueSource(ints = { 1, 3 })
void testSnapshotDeletingServiceDuringOMFailover(int numSnapshots)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]