aswinshakil commented on code in PR #7112:
URL: https://github.com/apache/ozone/pull/7112#discussion_r1735168998
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java:
##########
@@ -265,4 +277,104 @@ private void createFileKey(OzoneBucket bucket, String
keyName)
fileKey.write(value);
}
}
+
+
Review Comment:
We remove the whitespaces here.
##########
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java:
##########
@@ -674,19 +675,36 @@ private ReferenceCounted<OmSnapshot> getSnapshot(String
snapshotTableKey, boolea
}
/**
- * Returns true if the snapshot is in given status.
- * @param key DB snapshot table key
- * @param status SnapshotStatus
- * @return true if the snapshot is in given status, false otherwise
+ * Returns OmSnapshot object and skips active check.
+ * This should only be used for API calls initiated by background service
e.g. purgeKeys, purgeSnapshot,
+ * snapshotMoveDeletedKeys, and SetSnapshotProperty.
*/
- public boolean isSnapshotStatus(String key,
- SnapshotInfo.SnapshotStatus status)
- throws IOException {
- return getSnapshotInfo(key).getSnapshotStatus().equals(status);
+ public ReferenceCounted<OmSnapshot> getSnapshot(UUID snapshotId) throws
IOException {
+ return snapshotCache.get(snapshotId);
}
- public SnapshotInfo getSnapshotInfo(String key) throws IOException {
- return SnapshotUtils.getSnapshotInfo(ozoneManager, key);
+ /**
+ * Returns snapshotInfo from cache if it is present in cache, otherwise it
checks RocksDB and return value from there.
+ * This should be used by SnapshotCache only.
+ * Sometimes, the follower OM node may be lagging that it gets purgeKeys or
snapshotMoveDeletedKeys from a Snapshot,
+ * and purgeSnapshot for the same Snapshot one after another. And
purgeSnapshot's validateAndUpdateCache gets
+ * executed before doubleBuffer flushes purgeKeys or snapshotMoveDeletedKeys
from that Snapshot.
+ * This should not be a case on the leader node because
SnapshotDeletingService checks that deletedTable and
+ * deletedDirectoryTable in DB don't have entries for the bucket before it
sends a purgeSnapshot on a snapshot.
+ * If that happens, and we just look into the cache, the addToBatch
operation will fail when it tries to open
+ * the DB and purgeKeys from the Snapshot because snapshot is already purged
from the SnapshotInfoTable cache.
+ * Hence, it is needed to look into the table to make sure that snapshot
exists somewhere either in cache or in DB.
+ */
+ private SnapshotInfo getSnapshotInfo(String snapshotKey) throws IOException {
+ SnapshotInfo snapshotInfo =
ozoneManager.getMetadataManager().getSnapshotInfoTable().get(snapshotKey);
+
+ if (snapshotInfo == null) {
+ snapshotInfo =
ozoneManager.getMetadataManager().getSnapshotInfoTable().getSkipCache(snapshotKey);
+ }
+ if (snapshotInfo == null) {
+ throw new OMException("Snapshot '" + snapshotKey + "' is not found.",
KEY_NOT_FOUND);
Review Comment:
`INVALID_SNAPSHOT_ERROR` sounds appropriate for a snapshot error.
```suggestion
throw new OMException("Snapshot '" + snapshotKey + "' is not found.",
INVALID_SNAPSHOT_ERROR);
```
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerHASnapshot.java:
##########
@@ -265,4 +277,104 @@ private void createFileKey(OzoneBucket bucket, String
keyName)
fileKey.write(value);
}
}
+
+
+
+ /**
+ * This is to simulate HDDS-11152 scenario. In which a follower's
doubleBuffer is lagging and accumulates purgeKey
+ * and purgeSnapshot in same batch.
+ */
+ @Test
+ public void testKeyAndSnapshotDeletionService() throws IOException,
InterruptedException, TimeoutException {
+ OzoneManager omLeader = cluster.getOMLeader();
+ OzoneManager omFollower;
+
+ if (omLeader != cluster.getOzoneManager(0)) {
+ omFollower = cluster.getOzoneManager(0);
+ } else {
+ omFollower = cluster.getOzoneManager(1);
+ }
+
+ int numKeys = 5;
+ List<String> keys = new ArrayList<>();
+ for (int i = 0; i < numKeys; i++) {
+ String keyName = "key-" + RandomStringUtils.randomNumeric(10);
+ createFileKey(ozoneBucket, keyName);
+ keys.add(keyName);
+ }
+
+ // Stop the key deletion service so that deleted keys get trapped in the
snapshots.
+ omLeader.getKeyManager().getDeletingService().suspend();
+ // Stop the snapshot deletion service so that deleted keys get trapped in
the snapshots.
+ omLeader.getKeyManager().getSnapshotDeletingService().suspend();
+
+ // Delete half of the keys
+ for (int i = 0; i < numKeys / 2; i++) {
+ ozoneBucket.deleteKey(keys.get(i));
+ }
+
+ String snapshotName = "snap-" + RandomStringUtils.randomNumeric(10);
+ createSnapshot(volumeName, bucketName, snapshotName);
+
+ store.deleteSnapshot(volumeName, bucketName, snapshotName);
+
+ // Pause double buffer on follower node to accumulate all the key purge,
snapshot delete and purge transactions.
+
omFollower.getOmRatisServer().getOmStateMachine().getOzoneManagerDoubleBuffer().stopDaemon();
+
+ long keyDeleteServiceCount =
omLeader.getKeyManager().getDeletingService().getRunCount().get();
+ omLeader.getKeyManager().getDeletingService().resume();
+
+ GenericTestUtils.waitFor(
+ () ->
omLeader.getKeyManager().getDeletingService().getRunCount().get() >
keyDeleteServiceCount,
+ 1000, 60000);
+
+ long snapshotDeleteServiceCount =
omLeader.getKeyManager().getSnapshotDeletingService().getRunCount().get();
+ omLeader.getKeyManager().getSnapshotDeletingService().resume();
+
+ GenericTestUtils.waitFor(
+ () ->
omLeader.getKeyManager().getSnapshotDeletingService().getRunCount().get() >
snapshotDeleteServiceCount,
+ 1000, 60000);
+
+ String tableKey = SnapshotInfo.getTableKey(volumeName, bucketName,
snapshotName);
+ checkSnapshotIsPurgedFromDB(omLeader, tableKey);
+
+ // Resume the DoubleBuffer and flush the pending transactions.
+ OzoneManagerDoubleBuffer omDoubleBuffer =
+
omFollower.getOmRatisServer().getOmStateMachine().getOzoneManagerDoubleBuffer();
+ omDoubleBuffer.resume();
+ CompletableFuture.supplyAsync(() -> {
+ omDoubleBuffer.flushTransactions();
+ return null;
+ });
+ omDoubleBuffer.awaitFlush();
+ checkSnapshotIsPurgedFromDB(omFollower, tableKey);
+ }
+
+ private void createSnapshot(String volName, String buckName, String
snapName) throws IOException {
+ store.createSnapshot(volName, buckName, snapName);
+
+ String tableKey = SnapshotInfo.getTableKey(volName, buckName, snapName);
+ SnapshotInfo snapshotInfo = cluster.getOMLeader().getMetadataManager()
Review Comment:
Can we use `SnapshotUtils.getSnapshotInfo` here as well?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]