hemantk-12 commented on code in PR #5968:
URL: https://github.com/apache/ozone/pull/5968#discussion_r1449447433


##########
hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java:
##########
@@ -296,105 +262,34 @@ private synchronized void cleanup() {
    * TODO: [SNAPSHOT] Add new ozone debug CLI command to trigger this directly.
    */
   private void cleanupInternal() {
-    long numEntriesToEvict = (long) dbMap.size() - cacheSizeLimit;
-    while (numEntriesToEvict > 0L && pendingEvictionList.size() > 0) {
-      // Get the first instance in the clean up list
-      ReferenceCounted<IOmMetadataReader, SnapshotCache> rcOmSnapshot =
-          pendingEvictionList.iterator().next();
+    Iterator<Map.Entry<String, ReferenceCounted<IOmMetadataReader, 
SnapshotCache>>> iterator =
+        dbMap.entrySet().iterator();
+    while (iterator.hasNext()) {
+      Map.Entry<String, ReferenceCounted<IOmMetadataReader, SnapshotCache>> 
entry = iterator.next();
+      // Get the first instance in the clean-up list
+      ReferenceCounted<IOmMetadataReader, SnapshotCache> rcOmSnapshot = 
entry.getValue();
       OmSnapshot omSnapshot = (OmSnapshot) rcOmSnapshot.get();
-      LOG.debug("Evicting OmSnapshot instance {} with table key {}",
-          rcOmSnapshot, omSnapshot.getSnapshotTableKey());
-      // Sanity check
-      Preconditions.checkState(rcOmSnapshot.getTotalRefCount() == 0L,
-          "Illegal state: OmSnapshot reference count non-zero ("
-              + rcOmSnapshot.getTotalRefCount() + ") but shows up in the "
-              + "clean up list");
-
       final String key = omSnapshot.getSnapshotTableKey();
-      final ReferenceCounted<IOmMetadataReader, SnapshotCache> result =
-          dbMap.remove(key);
-      // Sanity check
-      Preconditions.checkState(rcOmSnapshot == result,
-          "Cache map entry removal failure. The cache is in an inconsistent "
-              + "state. Expected OmSnapshot instance: " + rcOmSnapshot
-              + ", actual: " + result + " for key: " + key);
-
-      pendingEvictionList.remove(result);
-
-      // Close the instance, which also closes its DB handle.
+      Lock lock = getLock(key);
+      lock.lock();
       try {
-        ((OmSnapshot) rcOmSnapshot.get()).close();
-      } catch (IOException ex) {
-        throw new IllegalStateException("Error while closing snapshot DB", ex);
-      }
-
-      --numEntriesToEvict;
-    }
-
-    // Print warning message if actual cache size is exceeding the soft limit
-    // even after the cleanup procedure above.
-    if ((long) dbMap.size() > cacheSizeLimit) {
-      LOG.warn("Current snapshot cache size ({}) is exceeding configured "
-          + "soft-limit ({}) after possible evictions.",
-          dbMap.size(), cacheSizeLimit);
-
-      Preconditions.checkState(pendingEvictionList.size() == 0);
-    }
-  }
-
-  /**
-   * Check cache consistency.
-   * @return true if the cache internal structure is consistent to the best of
-   * its knowledge, false if found to be inconsistent and details logged.
-   */
-  @VisibleForTesting
-  public boolean isConsistent() {
-    // Uses dbMap as the source of truth for this check, whether dbMap entries
-    // are in OM DB's snapshotInfoTable is out of the scope of this check.
-
-    LOG.info("dbMap has {} entries", dbMap.size());
-    LOG.info("pendingEvictionList has {} entries",
-        pendingEvictionList.size());
-
-    // pendingEvictionList must be empty if cache size exceeds limit
-    if (dbMap.size() > cacheSizeLimit) {
-      if (pendingEvictionList.size() != 0) {
-        // cleanup() is not functioning correctly
-        LOG.error("pendingEvictionList is not empty even when cache size"
-            + "exceeds limit");
-      }
-    }
-
-    dbMap.forEach((k, v) -> {
-      if (v.getTotalRefCount() == 0L) {
-        long threadRefCount = v.getCurrentThreadRefCount();
-        if (threadRefCount != 0L) {
-          LOG.error("snapshotTableKey='{}' instance has inconsistent "
-              + "ref count. Total ref count is 0 but thread "
-              + "ref count is {}", k, threadRefCount);
+        if (rcOmSnapshot.getTotalRefCount() > 0) {
+          LOG.debug("Snapshot {} is still being referenced ({}), skipping its 
clean up",
+              key, rcOmSnapshot.getTotalRefCount());
+          continue;
         }
-        // Zero ref count values in dbMap must be in pendingEvictionList
-        if (!pendingEvictionList.contains(v)) {
-          LOG.error("snapshotTableKey='{}' instance has zero ref count but "
-              + "not in pendingEvictionList", k);
+
+        // Close the instance, which also closes its DB handle.
+        try {
+          ((OmSnapshot) rcOmSnapshot.get()).close();
+        } catch (IOException ex) {
+          throw new IllegalStateException("Error while closing snapshot DB", 
ex);
         }
-      }
-    });
 
-    pendingEvictionList.forEach(v -> {
-      // Objects in pendingEvictionList should still be in dbMap
-      if (!dbMap.contains(v)) {
-        LOG.error("Instance '{}' is in pendingEvictionList but not in "
-            + "dbMap", v);
-      }
-      // Instances in pendingEvictionList must have ref count equals 0
-      if (v.getTotalRefCount() != 0L) {
-        LOG.error("Instance '{}' is in pendingEvictionList but ref count "
-            + "is not zero", v);
+        iterator.remove();

Review Comment:
   You are right in a sense that concurrent access on non-ConcurrentHashMap can 
lead to undesired states as also pointed out by Siyao 
https://github.com/apache/ozone/pull/5968#discussion_r1449039737.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to