This is an automated email from the ASF dual-hosted git repository.
swamirishi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 4d6f3a5a769 HDDS-13772. Snapshot Paths to be re read from om
checkpoint db inside lock again. (#9131)
4d6f3a5a769 is described below
commit 4d6f3a5a769cca01b98b69a89f80f1da6e6eced0
Author: Sadanand Shenoy <[email protected]>
AuthorDate: Sun Nov 2 19:49:07 2025 +0530
HDDS-13772. Snapshot Paths to be re read from om checkpoint db inside lock
again. (#9131)
Co-authored-by: Sadanand Shenoy <[email protected]>
---
.../TestOMDbCheckpointServletInodeBasedXfer.java | 103 ++++++++++++++++++++-
.../om/OMDBCheckpointServletInodeBasedXfer.java | 32 ++++---
2 files changed, 123 insertions(+), 12 deletions(-)
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
index 0f5c8bae4b4..f2b94182c80 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
@@ -21,6 +21,7 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS;
import static
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD;
+import static
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL;
import static org.apache.hadoop.ozone.OzoneConsts.OM_CHECKPOINT_DIR;
import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME;
import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
@@ -35,6 +36,8 @@
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyMap;
+import static org.mockito.ArgumentMatchers.anySet;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.anyBoolean;
import static org.mockito.Mockito.doCallRealMethod;
@@ -146,6 +149,7 @@ void init() throws Exception {
// ensure cache entries are not evicted thereby snapshot db's are not
closed
conf.setTimeDuration(OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL,
100, TimeUnit.MINUTES);
+ conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 100,
TimeUnit.MILLISECONDS);
}
@AfterEach
@@ -228,12 +232,15 @@ public void write(int b) throws IOException {
.thenReturn(lock);
doCallRealMethod().when(omDbCheckpointServletMock).getCheckpoint(any(),
anyBoolean());
assertNull(doCallRealMethod().when(omDbCheckpointServletMock).getBootstrapTempData());
- doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirs(any());
doCallRealMethod().when(omDbCheckpointServletMock).
processMetadataSnapshotRequest(any(), any(), anyBoolean(),
anyBoolean());
doCallRealMethod().when(omDbCheckpointServletMock).writeDbDataToStream(any(),
any(), any(), any());
doCallRealMethod().when(omDbCheckpointServletMock).getCompactionLogDir();
doCallRealMethod().when(omDbCheckpointServletMock).getSstBackupDir();
+ doCallRealMethod().when(omDbCheckpointServletMock)
+ .transferSnapshotData(anySet(), any(), anySet(), any(), any(),
anyMap());
+
doCallRealMethod().when(omDbCheckpointServletMock).createAndPrepareCheckpoint(anyBoolean());
+
doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirsFromDB(any());
}
@ParameterizedTest
@@ -586,6 +593,99 @@ public void testBootstrapLockBlocksMultipleServices()
throws Exception {
assertTrue(servicesSucceeded.get() > 0, "Services should have succeeded
after lock release");
}
+ /**
+ * Tests the full checkpoint servlet flow to ensure snapshot paths are read
+ * from checkpoint metadata (frozen state) rather than live OM metadata
(current state).
+ * Scenario:
+ * 1. Create snapshots S1
+ * 2. create snapshot S2 later just before checkpoint
+ * 3. Servlet processes checkpoint - should still include S1, S3 data as
+ * checkpoint snapshotInfoTable has S1 S3
+ */
+ @Test
+ public void testCheckpointIncludesSnapshotsFromFrozenState() throws
Exception {
+ String volumeName = "vol" + RandomStringUtils.secure().nextNumeric(5);
+ String bucketName = "buck" + RandomStringUtils.secure().nextNumeric(5);
+
+ setupCluster();
+ om.getKeyManager().getSnapshotSstFilteringService().pause();
+
+ // Create test data and snapshots
+ OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(client,
volumeName, bucketName);
+
+ // Create key before first snapshot
+ TestDataUtil.createKey(bucket, "key1",
+ ReplicationConfig.fromTypeAndFactor(ReplicationType.RATIS,
ReplicationFactor.ONE),
+ "data1".getBytes(StandardCharsets.UTF_8));
+ client.getObjectStore().createSnapshot(volumeName, bucketName,
"snapshot1");
+ // At this point: Live OM has snapshots S1
+ List<OzoneSnapshot> snapshots = new ArrayList<>();
+ client.getObjectStore().listSnapshot(volumeName, bucketName, "", null)
+ .forEachRemaining(snapshots::add);
+ assertEquals(1, snapshots.size(), "Should have 1 snapshot initially");
+ OzoneSnapshot snapshot1 = snapshots.stream()
+ .filter(snap -> snap.getName().equals("snapshot1"))
+ .findFirst()
+ .orElseThrow(() -> new RuntimeException("snapshot1 not found"));
+
+ // Setup servlet mocks for checkpoint processing
+ setupMocks();
+
when(requestMock.getParameter(OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA)).thenReturn("true");
+
+ // Create a checkpoint that captures current state (S1)
+ DBStore dbStore = om.getMetadataManager().getStore();
+ DBStore spyDbStore = spy(dbStore);
+ AtomicReference<DBCheckpoint> capturedCheckpoint = new AtomicReference<>();
+
+ when(spyDbStore.getCheckpoint(true)).thenAnswer(invocation -> {
+ // Purge snapshot2 before checkpoint
+ // create snapshot 3 before checkpoint
+ client.getObjectStore().createSnapshot(volumeName, bucketName,
"snapshot2");
+ // Also wait for double buffer to flush to ensure all transactions are
committed
+ om.awaitDoubleBufferFlush();
+ DBCheckpoint checkpoint = spy(dbStore.getCheckpoint(true));
+ doNothing().when(checkpoint).cleanupCheckpoint(); // Don't cleanup for
verification
+ capturedCheckpoint.set(checkpoint);
+ return checkpoint;
+ });
+
+ // Initialize servlet
+ doCallRealMethod().when(omDbCheckpointServletMock).initialize(any(), any(),
+ eq(false), any(), any(), eq(false));
+ omDbCheckpointServletMock.initialize(spyDbStore,
om.getMetrics().getDBCheckpointMetrics(),
+ false, om.getOmAdminUsernames(), om.getOmAdminGroups(), false);
+ when(responseMock.getOutputStream()).thenReturn(servletOutputStream);
+ // Process checkpoint servlet
+ omDbCheckpointServletMock.doGet(requestMock, responseMock);
+ snapshots.clear();
+ client.getObjectStore().listSnapshot(volumeName, bucketName, "", null)
+ .forEachRemaining(snapshots::add);
+ assertEquals(2, snapshots.size(), "Should have 2 snapshots");
+ OzoneSnapshot snapshot2 = snapshots.stream()
+ .filter(snap -> snap.getName().equals("snapshot2"))
+ .findFirst()
+ .orElseThrow(() -> new RuntimeException("snapshot2 not found"));
+ // Extract tarball and verify contents
+ String testDirName = folder.resolve("testDir").toString();
+ String newDbDirName = testDirName + OM_KEY_PREFIX + OM_DB_NAME;
+ File newDbDir = new File(newDbDirName);
+ assertTrue(newDbDir.mkdirs());
+ FileUtil.unTar(tempFile, newDbDir);
+ OmSnapshotUtils.createHardLinks(newDbDir.toPath(), true);
+ Path snapshot1DbDir = Paths.get(newDbDir.toPath().toString(),
OM_SNAPSHOT_CHECKPOINT_DIR,
+ OM_DB_NAME + "-" + snapshot1.getSnapshotId());
+ Path snapshot2DbDir = Paths.get(newDbDir.toPath().toString(),
OM_SNAPSHOT_CHECKPOINT_DIR,
+ OM_DB_NAME + "-" + snapshot2.getSnapshotId());
+ boolean snapshot1IncludedInCheckpoint = Files.exists(snapshot1DbDir);
+ boolean snapshot2IncludedInCheckpoint = Files.exists(snapshot2DbDir);
+ assertTrue(snapshot1IncludedInCheckpoint && snapshot2IncludedInCheckpoint,
+ "Checkpoint should include both snapshot1 and snapshot2 data");
+ // Cleanup
+ if (capturedCheckpoint.get() != null) {
+ capturedCheckpoint.get().cleanupCheckpoint();
+ }
+ }
+
private static void deleteWalFiles(Path snapshotDbDir) throws IOException {
try (Stream<Path> filesInTarball = Files.list(snapshotDbDir)) {
List<Path> files = filesInTarball.filter(p ->
p.toString().contains(".log"))
@@ -648,6 +748,7 @@ private void setupClusterAndMocks(String volumeName, String
bucketName,
// Init the mock with the spyDbstore
doCallRealMethod().when(omDbCheckpointServletMock).initialize(any(), any(),
eq(false), any(), any(), eq(false));
+
doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirsFromDB(any());
omDbCheckpointServletMock.initialize(spyDbStore,
om.getMetrics().getDBCheckpointMetrics(),
false,
om.getOmAdminUsernames(), om.getOmAdminGroups(), false);
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
index 27e7f1c2d6d..0c120ba080d 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
@@ -67,6 +67,7 @@
import org.apache.hadoop.hdds.utils.DBCheckpointServlet;
import org.apache.hadoop.hdds.utils.db.DBCheckpoint;
import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.hdds.utils.db.TableIterator;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.lock.BootstrapStateHandler;
import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
@@ -219,7 +220,7 @@ public void writeDbDataToStream(HttpServletRequest request,
OutputStream destina
if (!includeSnapshotData) {
maxTotalSstSize.set(Long.MAX_VALUE);
} else {
- snapshotPaths = getSnapshotDirs(omMetadataManager);
+ snapshotPaths = getSnapshotDirsFromDB(omMetadataManager);
}
if (sstFilesToExclude.isEmpty()) {
@@ -263,6 +264,11 @@ public void writeDbDataToStream(HttpServletRequest
request, OutputStream destina
writeDBToArchive(sstFilesToExclude, checkpointDir,
maxTotalSstSize, archiveOutputStream, tmpdir, hardLinkFileMap,
false);
if (includeSnapshotData) {
+ // get the list of snapshots from the checkpoint
+ try (OmMetadataManagerImpl checkpointMetadataManager =
OmMetadataManagerImpl
+ .createCheckpointMetadataManager(om.getConfiguration(),
checkpoint)) {
+ snapshotPaths = getSnapshotDirsFromDB(checkpointMetadataManager);
+ }
writeDBToArchive(sstFilesToExclude, getCompactionLogDir(),
maxTotalSstSize, archiveOutputStream, tmpdir,
hardLinkFileMap, false);
writeDBToArchive(sstFilesToExclude, sstBackupFiles.stream(),
@@ -295,7 +301,7 @@ public void writeDbDataToStream(HttpServletRequest request,
OutputStream destina
* @param hardLinkFileMap Map of hardlink file paths to their unique
identifiers for deduplication.
* @throws IOException if an I/O error occurs during processing.
*/
- private void transferSnapshotData(Set<String> sstFilesToExclude, Path
tmpdir, Set<Path> snapshotPaths,
+ void transferSnapshotData(Set<String> sstFilesToExclude, Path tmpdir,
Set<Path> snapshotPaths,
AtomicLong maxTotalSstSize, ArchiveOutputStream<TarArchiveEntry>
archiveOutputStream,
Map<String, String> hardLinkFileMap) throws IOException {
OzoneManager om = (OzoneManager)
getServletContext().getAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE);
@@ -380,20 +386,24 @@ private OzoneConfiguration getConf() {
}
/**
- * Collects paths to all snapshot databases.
+ * Collects paths to all snapshot databases from the OM DB.
*
* @param omMetadataManager OMMetadataManager instance
* @return Set of paths to snapshot databases
* @throws IOException if an I/O error occurs
*/
- Set<Path> getSnapshotDirs(OMMetadataManager omMetadataManager) throws
IOException {
+ Set<Path> getSnapshotDirsFromDB(OMMetadataManager omMetadataManager) throws
IOException {
Set<Path> snapshotPaths = new HashSet<>();
- SnapshotChainManager snapshotChainManager = new
SnapshotChainManager(omMetadataManager);
- for (SnapshotChainInfo snapInfo :
snapshotChainManager.getGlobalSnapshotChain().values()) {
- String snapshotDir =
- OmSnapshotManager.getSnapshotPath(getConf(),
SnapshotInfo.getCheckpointDirName(snapInfo.getSnapshotId()));
- Path path = Paths.get(snapshotDir);
- snapshotPaths.add(path);
+ try (TableIterator<String, ? extends Table.KeyValue<String, SnapshotInfo>>
iter =
+ omMetadataManager.getSnapshotInfoTable().iterator()) {
+ while (iter.hasNext()) {
+ Table.KeyValue<String, SnapshotInfo> kv = iter.next();
+ SnapshotInfo snapshotInfo = kv.getValue();
+ String snapshotDir = OmSnapshotManager.getSnapshotPath(getConf(),
+ snapshotInfo.getCheckpointDirName());
+ Path path = Paths.get(snapshotDir);
+ snapshotPaths.add(path);
+ }
}
return snapshotPaths;
}
@@ -482,7 +492,7 @@ private boolean writeDBToArchive(Set<String>
sstFilesToExclude, Stream<Path> fil
* @param flush If true, flushes in-memory data to disk before
checkpointing.
* @throws IOException If an error occurs during checkpoint creation or file
copying.
*/
- private DBCheckpoint createAndPrepareCheckpoint(boolean flush) throws
IOException {
+ DBCheckpoint createAndPrepareCheckpoint(boolean flush) throws IOException {
// Create & return the checkpoint.
return getDbStore().getCheckpoint(flush);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]