mladjan-gadzic commented on code in PR #5083:
URL: https://github.com/apache/ozone/pull/5083#discussion_r1268530196
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java:
##########
@@ -1036,6 +1070,304 @@ public void testInstallCorruptedCheckpointFailure()
throws Exception {
assertLogCapture(logCapture, msg);
}
+ @Test
+ @DisplayName("testSnapshotBackgroundServices")
+ @SuppressWarnings("methodlength")
+ public void testSnapshotBackgroundServices()
+ throws Exception {
+ // Get the leader OM
+ String leaderOMNodeId = OmFailoverProxyUtil
+ .getFailoverProxyProvider(objectStore.getClientProxy())
+ .getCurrentProxyOMNodeId();
+ OzoneManager leaderOM = cluster.getOzoneManager(leaderOMNodeId);
+
+ // Find the inactive OM
+ String followerNodeId = leaderOM.getPeerNodes().get(0).getNodeId();
+ if (cluster.isOMActive(followerNodeId)) {
+ followerNodeId = leaderOM.getPeerNodes().get(1).getNodeId();
+ }
+ OzoneManager followerOM = cluster.getOzoneManager(followerNodeId);
+
+ // Create some snapshots, each with new keys
+ int keyIncrement = 10;
+ String snapshotNamePrefix = "snapshot";
+ String snapshotName = "";
+ List<String> keys = new ArrayList<>();
+ SnapshotInfo snapshotInfo = null;
+ for (int snapshotCount = 0; snapshotCount < 10;
+ snapshotCount++) {
+ snapshotName = snapshotNamePrefix + snapshotCount;
+ keys = writeKeys(keyIncrement);
+ snapshotInfo = createOzoneSnapshot(leaderOM, snapshotName);
+ }
+
+ // Get the latest db checkpoint from the leader OM.
+ TransactionInfo transactionInfo =
+ TransactionInfo.readTransactionInfo(leaderOM.getMetadataManager());
+ TermIndex leaderOMTermIndex =
+ TermIndex.valueOf(transactionInfo.getTerm(),
+ transactionInfo.getTransactionIndex());
+ long leaderOMSnapshotIndex = leaderOMTermIndex.getIndex();
+
+ // Start the inactive OM. Checkpoint installation will happen
spontaneously.
+ cluster.startInactiveOM(followerNodeId);
+
+ // The recently started OM should be lagging behind the leader OM.
+ // Wait & for follower to update transactions to leader snapshot index.
+ // Timeout error if follower does not load update within 10s
+ GenericTestUtils.waitFor(() ->
+ followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex()
+ >= leaderOMSnapshotIndex - 1, 100, 10000);
+
+
+ // Verify RPC server is running
+ GenericTestUtils.waitFor(followerOM::isOmRpcServerRunning, 100, 5000);
+
+ // Read & Write after snapshot installed.
+ List<String> newKeys = writeKeys(1);
+ readKeys(newKeys);
+
+ checkSnapshot(leaderOM, followerOM, snapshotName, keys, snapshotInfo);
+
+ // verify that the bootstrap Follower OM can become leader again
Review Comment:
You are right. I fixed a comment so it does not introduce confusion.
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java:
##########
@@ -1036,6 +1070,304 @@ public void testInstallCorruptedCheckpointFailure()
throws Exception {
assertLogCapture(logCapture, msg);
}
+ @Test
+ @DisplayName("testSnapshotBackgroundServices")
+ @SuppressWarnings("methodlength")
+ public void testSnapshotBackgroundServices()
+ throws Exception {
+ // Get the leader OM
+ String leaderOMNodeId = OmFailoverProxyUtil
+ .getFailoverProxyProvider(objectStore.getClientProxy())
+ .getCurrentProxyOMNodeId();
+ OzoneManager leaderOM = cluster.getOzoneManager(leaderOMNodeId);
+
+ // Find the inactive OM
+ String followerNodeId = leaderOM.getPeerNodes().get(0).getNodeId();
+ if (cluster.isOMActive(followerNodeId)) {
+ followerNodeId = leaderOM.getPeerNodes().get(1).getNodeId();
+ }
+ OzoneManager followerOM = cluster.getOzoneManager(followerNodeId);
+
+ // Create some snapshots, each with new keys
+ int keyIncrement = 10;
+ String snapshotNamePrefix = "snapshot";
+ String snapshotName = "";
+ List<String> keys = new ArrayList<>();
+ SnapshotInfo snapshotInfo = null;
+ for (int snapshotCount = 0; snapshotCount < 10;
+ snapshotCount++) {
+ snapshotName = snapshotNamePrefix + snapshotCount;
+ keys = writeKeys(keyIncrement);
+ snapshotInfo = createOzoneSnapshot(leaderOM, snapshotName);
+ }
+
+ // Get the latest db checkpoint from the leader OM.
+ TransactionInfo transactionInfo =
+ TransactionInfo.readTransactionInfo(leaderOM.getMetadataManager());
+ TermIndex leaderOMTermIndex =
+ TermIndex.valueOf(transactionInfo.getTerm(),
+ transactionInfo.getTransactionIndex());
+ long leaderOMSnapshotIndex = leaderOMTermIndex.getIndex();
+
+ // Start the inactive OM. Checkpoint installation will happen
spontaneously.
+ cluster.startInactiveOM(followerNodeId);
+
+ // The recently started OM should be lagging behind the leader OM.
+ // Wait & for follower to update transactions to leader snapshot index.
+ // Timeout error if follower does not load update within 10s
+ GenericTestUtils.waitFor(() ->
+ followerOM.getOmRatisServer().getLastAppliedTermIndex().getIndex()
+ >= leaderOMSnapshotIndex - 1, 100, 10000);
+
+
+ // Verify RPC server is running
+ GenericTestUtils.waitFor(followerOM::isOmRpcServerRunning, 100, 5000);
+
+ // Read & Write after snapshot installed.
+ List<String> newKeys = writeKeys(1);
+ readKeys(newKeys);
+
+ checkSnapshot(leaderOM, followerOM, snapshotName, keys, snapshotInfo);
+
+ // verify that the bootstrap Follower OM can become leader again
Review Comment:
You are right. I fixed the comment so it does not introduce confusion.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]