jojochuang commented on code in PR #9150:
URL: https://github.com/apache/ozone/pull/9150#discussion_r2492067326


##########
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotLocalDataManager.java:
##########
@@ -730,47 +841,76 @@ public synchronized void commit() throws IOException {
       // Need to update the disk state if and only if the dirty bit is set.
       if (isDirty()) {
         String filePath = getSnapshotLocalPropertyYamlPath(super.snapshotId);
-        String tmpFilePath = filePath + ".tmp";
-        File tmpFile = new File(tmpFilePath);
-        boolean tmpFileExists = tmpFile.exists();
-        if (tmpFileExists) {
-          tmpFileExists = !tmpFile.delete();
-        }
-        if (tmpFileExists) {
-          throw new IOException("Unable to delete tmp file " + tmpFilePath);
+        File snapshotLocalDataFile = new File(filePath);
+        if (!localDataVersionNodes.getSnapshotVersions().isEmpty()) {
+          String tmpFilePath = filePath + ".tmp";
+          File tmpFile = new File(tmpFilePath);
+          boolean tmpFileExists = tmpFile.exists();
+          if (tmpFileExists) {
+            tmpFileExists = !tmpFile.delete();
+          }
+          if (tmpFileExists) {
+            throw new IOException("Unable to delete tmp file " + tmpFilePath);
+          }
+          snapshotLocalDataSerializer.save(new File(tmpFilePath), 
super.snapshotLocalData);
+          Files.move(tmpFile.toPath(), Paths.get(filePath), 
StandardCopyOption.ATOMIC_MOVE,
+              StandardCopyOption.REPLACE_EXISTING);
+        } else if (snapshotLocalDataFile.exists()) {
+          LOG.info("Deleting YAML file corresponding to snapshotId: {} in path 
: {}",
+              super.snapshotId, snapshotLocalDataFile.getAbsolutePath());
+          if (!snapshotLocalDataFile.delete()) {
+            throw new IOException("Unable to delete file " + 
snapshotLocalDataFile.getAbsolutePath());
+          }
         }
-        snapshotLocalDataSerializer.save(new File(tmpFilePath), 
super.snapshotLocalData);
-        Files.move(tmpFile.toPath(), Paths.get(filePath), 
StandardCopyOption.ATOMIC_MOVE,
-            StandardCopyOption.REPLACE_EXISTING);
-        upsertNode(super.snapshotId, localDataVersionNodes);
+        upsertNode(super.snapshotId, localDataVersionNodes, 
getSnapshotLocalData().getTransactionInfo() != null);
         // Reset dirty bit
         resetDirty();
       }
     }
 
-    private void upsertNode(UUID snapshotId, SnapshotVersionsMeta 
snapshotVersions) throws IOException {
+    private void upsertNode(UUID snapshotId, SnapshotVersionsMeta 
snapshotVersions,
+        boolean transactionInfoSet) throws IOException {

Review Comment:
   the variable name 'transactionInfoSet' does not convey what it is for. I 
wasn't able to tell what it's for until I read further down in the comments.



##########
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotLocalDataManager.java:
##########
@@ -730,47 +841,76 @@ public synchronized void commit() throws IOException {
       // Need to update the disk state if and only if the dirty bit is set.
       if (isDirty()) {
         String filePath = getSnapshotLocalPropertyYamlPath(super.snapshotId);
-        String tmpFilePath = filePath + ".tmp";
-        File tmpFile = new File(tmpFilePath);
-        boolean tmpFileExists = tmpFile.exists();
-        if (tmpFileExists) {
-          tmpFileExists = !tmpFile.delete();
-        }
-        if (tmpFileExists) {
-          throw new IOException("Unable to delete tmp file " + tmpFilePath);
+        File snapshotLocalDataFile = new File(filePath);
+        if (!localDataVersionNodes.getSnapshotVersions().isEmpty()) {
+          String tmpFilePath = filePath + ".tmp";
+          File tmpFile = new File(tmpFilePath);
+          boolean tmpFileExists = tmpFile.exists();
+          if (tmpFileExists) {
+            tmpFileExists = !tmpFile.delete();
+          }
+          if (tmpFileExists) {
+            throw new IOException("Unable to delete tmp file " + tmpFilePath);
+          }
+          snapshotLocalDataSerializer.save(new File(tmpFilePath), 
super.snapshotLocalData);
+          Files.move(tmpFile.toPath(), Paths.get(filePath), 
StandardCopyOption.ATOMIC_MOVE,
+              StandardCopyOption.REPLACE_EXISTING);
+        } else if (snapshotLocalDataFile.exists()) {
+          LOG.info("Deleting YAML file corresponding to snapshotId: {} in path 
: {}",
+              super.snapshotId, snapshotLocalDataFile.getAbsolutePath());
+          if (!snapshotLocalDataFile.delete()) {
+            throw new IOException("Unable to delete file " + 
snapshotLocalDataFile.getAbsolutePath());
+          }
         }
-        snapshotLocalDataSerializer.save(new File(tmpFilePath), 
super.snapshotLocalData);
-        Files.move(tmpFile.toPath(), Paths.get(filePath), 
StandardCopyOption.ATOMIC_MOVE,
-            StandardCopyOption.REPLACE_EXISTING);
-        upsertNode(super.snapshotId, localDataVersionNodes);
+        upsertNode(super.snapshotId, localDataVersionNodes, 
getSnapshotLocalData().getTransactionInfo() != null);
         // Reset dirty bit
         resetDirty();
       }
     }
 
-    private void upsertNode(UUID snapshotId, SnapshotVersionsMeta 
snapshotVersions) throws IOException {
+    private void upsertNode(UUID snapshotId, SnapshotVersionsMeta 
snapshotVersions,
+        boolean transactionInfoSet) throws IOException {
       internalLock.writeLock().lock();
       try {
         SnapshotVersionsMeta existingSnapVersions = 
getVersionNodeMap().remove(snapshotId);
         Map<Integer, LocalDataVersionNode> existingVersions = 
existingSnapVersions == null ? Collections.emptyMap() :
             existingSnapVersions.getSnapshotVersions();
+        Map<Integer, LocalDataVersionNode> newVersions = 
snapshotVersions.getSnapshotVersions();
         Map<Integer, List<LocalDataVersionNode>> predecessors = new 
HashMap<>();
+        boolean versionsRemoved = false;
         // Track all predecessors of the existing versions and remove the node 
from the graph.
         for (Map.Entry<Integer, LocalDataVersionNode> existingVersion : 
existingVersions.entrySet()) {
           LocalDataVersionNode existingVersionNode = 
existingVersion.getValue();
           // Create a copy of predecessors since the list of nodes returned 
would be a mutable set and it changes as the
           // nodes in the graph would change.
           predecessors.put(existingVersion.getKey(), new 
ArrayList<>(localDataGraph.predecessors(existingVersionNode)));
+          versionsRemoved = versionsRemoved || 
!newVersions.containsKey(existingVersion.getKey());
           localDataGraph.removeNode(existingVersionNode);
         }
+
         // Add the nodes to be added in the graph and map.
         addSnapshotVersionMeta(snapshotId, snapshotVersions);
         // Reconnect all the predecessors for existing nodes.
-        for (Map.Entry<Integer, LocalDataVersionNode> entry : 
snapshotVersions.getSnapshotVersions().entrySet()) {
+        for (Map.Entry<Integer, LocalDataVersionNode> entry : 
newVersions.entrySet()) {
           for (LocalDataVersionNode predecessor : 
predecessors.getOrDefault(entry.getKey(), Collections.emptyList())) {
             localDataGraph.putEdge(predecessor, entry.getValue());
           }
         }
+        if (existingSnapVersions != null) {

Review Comment:
   i don't like that this method does two very different tasks, which is hard 
to tell what exactly it does by looking at the method name. please consider 
refactor this block of code out to another method, call it something like 
'scheduleOrphanMetadataCheck'



##########
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotLocalDataManager.java:
##########
@@ -275,6 +311,65 @@ private void init() throws IOException {
       }
       addVersionNodeWithDependents(snapshotLocalData);
     }
+    for (UUID snapshotId : versionNodeMap.keySet()) {
+      incrementOrphanCheckCount(snapshotId);
+    }
+    long snapshotLocalDataManagerServiceInterval = 
configuration.getTimeDuration(
+        OZONE_OM_SNAPSHOT_LOCAL_DATA_MANAGER_SERVICE_INTERVAL, 
OZONE_OM_SNAPSHOT_LOCAL_DATA_MANAGER_SERVICE_INTERVAL_DEFAULT,
+        TimeUnit.MILLISECONDS);
+    if (snapshotLocalDataManagerServiceInterval > 0) {
+      this.scheduler = new Scheduler(LOCAL_DATA_MANAGER_SERVICE_NAME, true, 1);
+      this.scheduler.scheduleWithFixedDelay(
+          () -> {
+            try {
+              checkOrphanSnapshotVersions(omMetadataManager, chainManager);
+            } catch (IOException e) {
+              LOG.error("Exception while checking orphan snapshot versions", 
e);
+            }
+          }, snapshotLocalDataManagerServiceInterval, 
snapshotLocalDataManagerServiceInterval, TimeUnit.MILLISECONDS);
+    }
+
+  }
+
+  private void checkOrphanSnapshotVersions(OMMetadataManager metadataManager, 
SnapshotChainManager chainManager)
+      throws IOException {
+    for (Map.Entry<UUID, Integer> entry : 
snapshotToBeCheckedForOrphans.entrySet()) {
+      UUID snapshotId = entry.getKey();
+      int countBeforeCheck = entry.getValue();
+      checkOrphanSnapshotVersions(metadataManager, chainManager, snapshotId);
+      decrementOrphanCheckCount(snapshotId, countBeforeCheck);
+    }
+  }
+
+  @VisibleForTesting
+  void checkOrphanSnapshotVersions(OMMetadataManager metadataManager, 
SnapshotChainManager chainManager,
+      UUID snapshotId) throws IOException {
+    try (WritableOmSnapshotLocalDataProvider snapshotLocalDataProvider = new 
WritableOmSnapshotLocalDataProvider(
+        snapshotId)) {
+      OmSnapshotLocalData snapshotLocalData = 
snapshotLocalDataProvider.getSnapshotLocalData();
+      boolean isSnapshotPurged = 
OmSnapshotManager.isSnapshotPurged(chainManager, metadataManager, snapshotId,
+          snapshotLocalData.getTransactionInfo());
+      for (Map.Entry<Integer, LocalDataVersionNode> 
integerLocalDataVersionNodeEntry : getVersionNodeMap()
+          .get(snapshotId).getSnapshotVersions().entrySet()) {

Review Comment:
   are you sure about this? Looks like write lock is held later inside commit() 
--> upsert(). Right at here there's no lock.



##########
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotLocalDataManager.java:
##########
@@ -730,47 +841,76 @@ public synchronized void commit() throws IOException {
       // Need to update the disk state if and only if the dirty bit is set.
       if (isDirty()) {
         String filePath = getSnapshotLocalPropertyYamlPath(super.snapshotId);
-        String tmpFilePath = filePath + ".tmp";
-        File tmpFile = new File(tmpFilePath);
-        boolean tmpFileExists = tmpFile.exists();
-        if (tmpFileExists) {
-          tmpFileExists = !tmpFile.delete();
-        }
-        if (tmpFileExists) {
-          throw new IOException("Unable to delete tmp file " + tmpFilePath);
+        File snapshotLocalDataFile = new File(filePath);
+        if (!localDataVersionNodes.getSnapshotVersions().isEmpty()) {
+          String tmpFilePath = filePath + ".tmp";
+          File tmpFile = new File(tmpFilePath);
+          boolean tmpFileExists = tmpFile.exists();
+          if (tmpFileExists) {
+            tmpFileExists = !tmpFile.delete();
+          }
+          if (tmpFileExists) {
+            throw new IOException("Unable to delete tmp file " + tmpFilePath);
+          }
+          snapshotLocalDataSerializer.save(new File(tmpFilePath), 
super.snapshotLocalData);
+          Files.move(tmpFile.toPath(), Paths.get(filePath), 
StandardCopyOption.ATOMIC_MOVE,
+              StandardCopyOption.REPLACE_EXISTING);
+        } else if (snapshotLocalDataFile.exists()) {
+          LOG.info("Deleting YAML file corresponding to snapshotId: {} in path 
: {}",
+              super.snapshotId, snapshotLocalDataFile.getAbsolutePath());
+          if (!snapshotLocalDataFile.delete()) {
+            throw new IOException("Unable to delete file " + 
snapshotLocalDataFile.getAbsolutePath());
+          }

Review Comment:
   else: clean: no prior snapshot versions and no local meta file. nothing to 
do here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to