This is an automated email from the ASF dual-hosted git repository.

swamirishi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 5feb9eacd8 HDDS-11453. OmSnapshotPurge should be in a different ozone 
manager double buffer batch (#7188)
5feb9eacd8 is described below

commit 5feb9eacd8318d8cd0106dc78a9fef7ddc4e8303
Author: Swaminathan Balachandran <[email protected]>
AuthorDate: Fri Sep 13 05:44:03 2024 -0700

    HDDS-11453. OmSnapshotPurge should be in a different ozone manager double 
buffer batch (#7188)
---
 .../ozone/om/ratis/OzoneManagerDoubleBuffer.java   | 32 +++++++----
 .../om/ratis/TestOzoneManagerDoubleBuffer.java     | 63 +++++++++++++++++-----
 2 files changed, 72 insertions(+), 23 deletions(-)

diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java
index a6fcc40dda..8e4cc9fbf4 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerDoubleBuffer.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.ozone.om.S3SecretManager;
 import org.apache.hadoop.ozone.om.codec.OMDBDefinition;
 import org.apache.hadoop.ozone.om.response.CleanupTableInfo;
 import org.apache.hadoop.ozone.om.response.OMClientResponse;
+import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
 import 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse;
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.Time;
@@ -426,8 +427,12 @@ public final class OzoneManagerDoubleBuffer {
    * in RocksDB callback flush. If multiple operations are flushed in one
    * specific batch, we are not sure at the flush of which specific operation
    * the callback is coming.
-   * There could be a possibility of race condition that is exposed to rocksDB
-   * behaviour for the batch.
+   * PurgeSnapshot is also considered a barrier, since purgeSnapshot 
transaction on a standalone basis is an
+   * idempotent operation. Once the directory gets deleted the previous 
transactions that have been performed on the
+   * snapshotted rocksdb would start failing on replay since those 
transactions have not been committed but the
+   * directory could have been partially deleted/ fully deleted. This could 
also lead to inconsistencies in the DB
+   * reads from the purged rocksdb if operations are not performed consciously.
+   * There could be a possibility of race condition that is exposed to rocksDB 
behaviour for the batch.
    * Hence, we treat createSnapshot as separate batch flush.
    * <p>
    * e.g. requestBuffer = [request1, request2, snapshotRequest1,
@@ -435,19 +440,17 @@ public final class OzoneManagerDoubleBuffer {
    * response = [[request1, request2], [snapshotRequest1], [request3],
    * [snapshotRequest2], [request4]]
    */
-  private List<Queue<Entry>> splitReadyBufferAtCreateSnapshot() {
+  private synchronized List<Queue<Entry>> splitReadyBufferAtCreateSnapshot() {
     final List<Queue<Entry>> response = new ArrayList<>();
-
     OMResponse previousOmResponse = null;
     for (final Entry entry : readyBuffer) {
       OMResponse omResponse = entry.getResponse().getOMResponse();
       // New queue gets created in three conditions:
       // 1. It is first element in the response,
-      // 2. Current request is createSnapshot request.
-      // 3. Previous request was createSnapshot request.
-      if (response.isEmpty() || omResponse.hasCreateSnapshotResponse()
-          || (previousOmResponse != null &&
-          previousOmResponse.hasCreateSnapshotResponse())) {
+      // 2. Current request is createSnapshot/purgeSnapshot request.
+      // 3. Previous request was createSnapshot/purgeSnapshot request.
+      if (response.isEmpty() || isStandaloneBatchCmdTypes(omResponse)
+          || isStandaloneBatchCmdTypes(previousOmResponse)) {
         response.add(new LinkedList<>());
       }
 
@@ -458,6 +461,15 @@ public final class OzoneManagerDoubleBuffer {
     return response;
   }
 
+  private static boolean isStandaloneBatchCmdTypes(OMResponse response) {
+    if (response == null) {
+      return false;
+    }
+    final OzoneManagerProtocolProtos.Type type = response.getCmdType();
+    return type == OzoneManagerProtocolProtos.Type.SnapshotPurge
+        || type == OzoneManagerProtocolProtos.Type.CreateSnapshot;
+  }
+
   private void addCleanupEntry(Entry entry, Map<String, List<Long>> 
cleanupEpochs) {
     Class<? extends OMClientResponse> responseClass =
         entry.getResponse().getClass();
@@ -612,7 +624,7 @@ public final class OzoneManagerDoubleBuffer {
     return currentBuffer.size();
   }
 
-  int getReadyBufferSize() {
+  synchronized int getReadyBufferSize() {
     return readyBuffer.size();
   }
 
diff --git 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java
 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java
index 125c9efcaf..6e24c9ff93 100644
--- 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java
+++ 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBuffer.java
@@ -44,9 +44,9 @@ import org.apache.hadoop.ozone.om.response.OMClientResponse;
 import org.apache.hadoop.ozone.om.response.bucket.OMBucketCreateResponse;
 import org.apache.hadoop.ozone.om.response.key.OMKeyCreateResponse;
 import org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotCreateResponse;
+import org.apache.hadoop.ozone.om.response.snapshot.OMSnapshotPurgeResponse;
 import org.apache.hadoop.ozone.om.s3.S3SecretCacheProvider;
 import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
-import 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.CreateSnapshotResponse;
 import 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.OMResponse;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authentication.util.KerberosName;
@@ -81,12 +81,12 @@ class TestOzoneManagerDoubleBuffer {
   private OzoneManagerDoubleBuffer doubleBuffer;
   private OzoneManager ozoneManager;
   private S3SecretLockedManager secretManager;
-  private final CreateSnapshotResponse snapshotResponse1 = 
mock(CreateSnapshotResponse.class);
-  private final CreateSnapshotResponse snapshotResponse2 = 
mock(CreateSnapshotResponse.class);
   private final OMResponse omKeyResponse = mock(OMResponse.class);
   private final OMResponse omBucketResponse = mock(OMResponse.class);
   private final OMResponse omSnapshotResponse1 = mock(OMResponse.class);
   private final OMResponse omSnapshotResponse2 = mock(OMResponse.class);
+  private final OMResponse omSnapshotPurgeResponseProto1 = 
mock(OMResponse.class);
+  private final OMResponse omSnapshotPurgeResponseProto2 = 
mock(OMResponse.class);
   private static OMClientResponse omKeyCreateResponse =
       mock(OMKeyCreateResponse.class);
   private static OMClientResponse omBucketCreateResponse =
@@ -95,6 +95,9 @@ class TestOzoneManagerDoubleBuffer {
       mock(OMSnapshotCreateResponse.class);
   private static OMClientResponse omSnapshotCreateResponse2 =
       mock(OMSnapshotCreateResponse.class);
+  private static OMClientResponse omSnapshotPurgeResponse1 = 
mock(OMSnapshotPurgeResponse.class);
+  private static OMClientResponse omSnapshotPurgeResponse2 = 
mock(OMSnapshotPurgeResponse.class);
+
   @TempDir
   private File tempDir;
   private OzoneManagerDoubleBuffer.FlushNotifier flushNotifier;
@@ -143,19 +146,22 @@ class TestOzoneManagerDoubleBuffer {
     doNothing().when(omBucketCreateResponse).checkAndUpdateDB(any(), any());
     doNothing().when(omSnapshotCreateResponse1).checkAndUpdateDB(any(), any());
     doNothing().when(omSnapshotCreateResponse2).checkAndUpdateDB(any(), any());
+    doNothing().when(omSnapshotPurgeResponse1).checkAndUpdateDB(any(), any());
+    doNothing().when(omSnapshotPurgeResponse2).checkAndUpdateDB(any(), any());
 
     when(omKeyResponse.getTraceID()).thenReturn("keyTraceId");
     when(omBucketResponse.getTraceID()).thenReturn("bucketTraceId");
     when(omSnapshotResponse1.getTraceID()).thenReturn("snapshotTraceId-1");
     when(omSnapshotResponse2.getTraceID()).thenReturn("snapshotTraceId-2");
-    when(omSnapshotResponse1.hasCreateSnapshotResponse())
-        .thenReturn(true);
-    when(omSnapshotResponse2.hasCreateSnapshotResponse())
-        .thenReturn(true);
-    when(omSnapshotResponse1.getCreateSnapshotResponse())
-        .thenReturn(snapshotResponse1);
-    when(omSnapshotResponse2.getCreateSnapshotResponse())
-        .thenReturn(snapshotResponse2);
+    
when(omSnapshotPurgeResponseProto1.getTraceID()).thenReturn("snapshotPurgeTraceId-1");
+    
when(omSnapshotPurgeResponseProto2.getTraceID()).thenReturn("snapshotPurgeTraceId-2");
+
+    
when(omKeyResponse.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.CreateKey);
+    
when(omBucketResponse.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.CreateBucket);
+    
when(omSnapshotPurgeResponseProto1.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.SnapshotPurge);
+    
when(omSnapshotPurgeResponseProto2.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.SnapshotPurge);
+    
when(omSnapshotResponse1.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.SnapshotPurge);
+    
when(omSnapshotResponse2.getCmdType()).thenReturn(OzoneManagerProtocolProtos.Type.SnapshotPurge);
 
     when(omKeyCreateResponse.getOMResponse()).thenReturn(omKeyResponse);
     when(omBucketCreateResponse.getOMResponse()).thenReturn(omBucketResponse);
@@ -163,6 +169,10 @@ class TestOzoneManagerDoubleBuffer {
         .thenReturn(omSnapshotResponse1);
     when(omSnapshotCreateResponse2.getOMResponse())
         .thenReturn(omSnapshotResponse2);
+    when(omSnapshotPurgeResponse1.getOMResponse())
+        .thenReturn(omSnapshotPurgeResponseProto1);
+    when(omSnapshotPurgeResponse2.getOMResponse())
+        .thenReturn(omSnapshotPurgeResponseProto2);
   }
 
   @AfterEach
@@ -194,8 +204,35 @@ class TestOzoneManagerDoubleBuffer {
                 omSnapshotCreateResponse1,
                 omSnapshotCreateResponse2,
                 omBucketCreateResponse),
-            4L, 4L, 14L, 16L, 1L, 1.142F)
-    );
+            4L, 4L, 14L, 16L, 1L, 1.142F),
+        Arguments.of(Arrays.asList(omSnapshotPurgeResponse1,
+                omSnapshotPurgeResponse2),
+            2L, 2L, 16L, 18L, 1L, 1.125F),
+        Arguments.of(Arrays.asList(omKeyCreateResponse,
+                omBucketCreateResponse,
+                omSnapshotPurgeResponse1,
+                omSnapshotPurgeResponse2),
+            3L, 4L, 19L, 22L, 2L, 1.157F),
+        Arguments.of(Arrays.asList(omKeyCreateResponse,
+                omSnapshotPurgeResponse1,
+                omBucketCreateResponse,
+                omSnapshotPurgeResponse2),
+            4L, 4L, 23L, 26L, 1L, 1.1300F),
+        Arguments.of(Arrays.asList(omKeyCreateResponse,
+                omSnapshotPurgeResponse1,
+                omSnapshotPurgeResponse2,
+                omBucketCreateResponse),
+            4L, 4L, 27L, 30L, 1L, 1.111F),
+        Arguments.of(Arrays.asList(omKeyCreateResponse,
+                omBucketCreateResponse,
+                omSnapshotPurgeResponse1,
+                omSnapshotCreateResponse1,
+                omSnapshotPurgeResponse2,
+                omBucketCreateResponse,
+                omSnapshotCreateResponse2),
+            6L, 7L, 33L, 37L, 2L, 1.121F)
+
+        );
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to