keith-turner commented on code in PR #5651:
URL: https://github.com/apache/accumulo/pull/5651#discussion_r2155359488


##########
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java:
##########
@@ -937,70 +928,92 @@ synchronized void completeClose(boolean saveState) throws 
IOException {
         return currentlyUnreserved;
       });
 
-      if (log.isDebugEnabled() && System.nanoTime() - lastLogTime > 
TimeUnit.SECONDS.toNanos(60)) {
-        for (ScanDataSource activeScan : runningScans) {
-          log.debug("Waiting on scan in completeClose {} {}", extent, 
activeScan);
-        }
-
-        lastLogTime = System.nanoTime();
-      }
+      long lastLogTime = System.nanoTime();
 
-      try {
-        log.debug("Waiting to completeClose for {}. {} writes {} scans", 
extent, writesInProgress,
-            runningScans.size());
-        this.wait(50);
-      } catch (InterruptedException e) {
-        log.error("Interrupted waiting to completeClose for extent {}", 
extent, e);
-      }
-    }
+      // wait for reads and writes to complete
+      while (writesInProgress > 0 || !runningScans.isEmpty()) {
+        runningScans.removeIf(scanDataSource -> {
+          boolean currentlyUnreserved = 
disallowNewReservations(scanDataSource.getScanParameters());
+          if (currentlyUnreserved) {
+            log.debug("Disabled scan session in tablet close {} {}", extent, 
scanDataSource);
+          }
+          return currentlyUnreserved;
+        });
 
-    // It is assumed that nothing new would have been added to activeScans 
since it was copied, so
-    // check that assumption. At this point activeScans should be empty or 
everything in it should
-    // be disabled.
-    Preconditions.checkState(activeScans.stream()
-        .allMatch(scanDataSource -> 
disallowNewReservations(scanDataSource.getScanParameters())));
+        if (log.isDebugEnabled()
+            && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60)) 
{
+          for (ScanDataSource activeScan : runningScans) {
+            log.debug("Waiting on scan in completeClose {} {}", extent, 
activeScan);
+          }
 
-    getTabletMemory().waitForMinC();
+          lastLogTime = System.nanoTime();
+        }
 
-    if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) {
-      try {
-        prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run();
-      } catch (NoNodeException e) {
-        throw new RuntimeException("Exception on " + extent + " during prep 
for MinC", e);
+        try {
+          log.debug("Waiting to completeClose for {}. {} writes {} scans", 
extent, writesInProgress,
+              runningScans.size());
+          this.wait(50);
+        } catch (InterruptedException e) {
+          log.error("Interrupted waiting to completeClose for extent {}", 
extent, e);
+        }
       }
-    }
 
-    if (saveState) {
-      // at this point all tablet data is flushed, so do a consistency check
-      RuntimeException err = null;
-      for (int i = 0; i < 5; i++) {
+      // It is assumed that nothing new would have been added to activeScans 
since it was copied, so
+      // check that assumption. At this point activeScans should be empty or 
everything in it should
+      // be disabled.
+      Preconditions.checkState(activeScans.stream()
+          .allMatch(scanDataSource -> 
disallowNewReservations(scanDataSource.getScanParameters())));
+
+      getTabletMemory().waitForMinC();
+
+      shouldPrepMinC = saveState && 
getTabletMemory().getMemTable().getNumEntries() > 0;
+      if (shouldPrepMinC) {
         try {
-          closeConsistencyCheck();
-          err = null;
-        } catch (RuntimeException t) {
-          err = t;
-          log.error("Consistency check fails, retrying", t);
-          sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+          mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE);
+        } catch (NoNodeException e) {
+          throw new RuntimeException("Exception on " + extent + " during prep 
for MinC", e);
         }
       }
-      if (err != null) {
-        log.error("Tablet closed consistency check has failed for {} giving up 
and closing",
-            this.extent);
-      }
     }
 
-    try {
-      getTabletMemory().getMemTable().delete(0);
-    } catch (Exception t) {
-      log.error("Failed to delete mem table : " + t.getMessage() + " for 
tablet " + extent, t);
+    if (shouldPrepMinC) {
+      // must not run while tablet is locked, as this may result in deadlocks
+      mct.run();

Review Comment:
   Normally minor compactoins run concurrently w/ reads and writes. At this 
point in the code no more reads and writes should be happening so I think this 
code did the minor compaction in the synch block because it did not matter if 
reads and writes were blocked.  
   
   It should be ok to run this outside of the sync block because the close 
state change at the top of this method should prevent any new reads or writes 
from starting.  Added a suggestion to check some things after the lock  is 
reacquired.



##########
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java:
##########
@@ -937,70 +928,92 @@ synchronized void completeClose(boolean saveState) throws 
IOException {
         return currentlyUnreserved;
       });
 
-      if (log.isDebugEnabled() && System.nanoTime() - lastLogTime > 
TimeUnit.SECONDS.toNanos(60)) {
-        for (ScanDataSource activeScan : runningScans) {
-          log.debug("Waiting on scan in completeClose {} {}", extent, 
activeScan);
-        }
-
-        lastLogTime = System.nanoTime();
-      }
+      long lastLogTime = System.nanoTime();
 
-      try {
-        log.debug("Waiting to completeClose for {}. {} writes {} scans", 
extent, writesInProgress,
-            runningScans.size());
-        this.wait(50);
-      } catch (InterruptedException e) {
-        log.error("Interrupted waiting to completeClose for extent {}", 
extent, e);
-      }
-    }
+      // wait for reads and writes to complete
+      while (writesInProgress > 0 || !runningScans.isEmpty()) {
+        runningScans.removeIf(scanDataSource -> {
+          boolean currentlyUnreserved = 
disallowNewReservations(scanDataSource.getScanParameters());
+          if (currentlyUnreserved) {
+            log.debug("Disabled scan session in tablet close {} {}", extent, 
scanDataSource);
+          }
+          return currentlyUnreserved;
+        });
 
-    // It is assumed that nothing new would have been added to activeScans 
since it was copied, so
-    // check that assumption. At this point activeScans should be empty or 
everything in it should
-    // be disabled.
-    Preconditions.checkState(activeScans.stream()
-        .allMatch(scanDataSource -> 
disallowNewReservations(scanDataSource.getScanParameters())));
+        if (log.isDebugEnabled()
+            && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60)) 
{
+          for (ScanDataSource activeScan : runningScans) {
+            log.debug("Waiting on scan in completeClose {} {}", extent, 
activeScan);
+          }
 
-    getTabletMemory().waitForMinC();
+          lastLogTime = System.nanoTime();
+        }
 
-    if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) {
-      try {
-        prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run();
-      } catch (NoNodeException e) {
-        throw new RuntimeException("Exception on " + extent + " during prep 
for MinC", e);
+        try {
+          log.debug("Waiting to completeClose for {}. {} writes {} scans", 
extent, writesInProgress,
+              runningScans.size());
+          this.wait(50);
+        } catch (InterruptedException e) {
+          log.error("Interrupted waiting to completeClose for extent {}", 
extent, e);
+        }
       }
-    }
 
-    if (saveState) {
-      // at this point all tablet data is flushed, so do a consistency check
-      RuntimeException err = null;
-      for (int i = 0; i < 5; i++) {
+      // It is assumed that nothing new would have been added to activeScans 
since it was copied, so
+      // check that assumption. At this point activeScans should be empty or 
everything in it should
+      // be disabled.
+      Preconditions.checkState(activeScans.stream()
+          .allMatch(scanDataSource -> 
disallowNewReservations(scanDataSource.getScanParameters())));
+
+      getTabletMemory().waitForMinC();
+
+      shouldPrepMinC = saveState && 
getTabletMemory().getMemTable().getNumEntries() > 0;
+      if (shouldPrepMinC) {
         try {
-          closeConsistencyCheck();
-          err = null;
-        } catch (RuntimeException t) {
-          err = t;
-          log.error("Consistency check fails, retrying", t);
-          sleepUninterruptibly(500, TimeUnit.MILLISECONDS);
+          mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE);
+        } catch (NoNodeException e) {
+          throw new RuntimeException("Exception on " + extent + " during prep 
for MinC", e);
         }
       }
-      if (err != null) {
-        log.error("Tablet closed consistency check has failed for {} giving up 
and closing",
-            this.extent);
-      }
     }
 
-    try {
-      getTabletMemory().getMemTable().delete(0);
-    } catch (Exception t) {
-      log.error("Failed to delete mem table : " + t.getMessage() + " for 
tablet " + extent, t);
+    if (shouldPrepMinC) {
+      // must not run while tablet is locked, as this may result in deadlocks
+      mct.run();
     }
 
-    getTabletMemory().close();
+    synchronized (this) {

Review Comment:
   ```suggestion
       synchronized (this) {
         // gave up the lock to allow a minor compaction to run, now that the 
lock is reacquired validate that nothing unexpectedly changed
         Preconditions.checkState(closeState = CloseState.CLOSED, 
"closeState:%s extent:%s", closeState, extent)
         Preconditions.checkState(writesInProgress == 0, "writesInProgress:%s 
extent:%s", writesInProgress, extent);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@accumulo.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to