keith-turner commented on code in PR #5651: URL: https://github.com/apache/accumulo/pull/5651#discussion_r2155359488
########## server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java: ########## @@ -937,70 +928,92 @@ synchronized void completeClose(boolean saveState) throws IOException { return currentlyUnreserved; }); - if (log.isDebugEnabled() && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60)) { - for (ScanDataSource activeScan : runningScans) { - log.debug("Waiting on scan in completeClose {} {}", extent, activeScan); - } - - lastLogTime = System.nanoTime(); - } + long lastLogTime = System.nanoTime(); - try { - log.debug("Waiting to completeClose for {}. {} writes {} scans", extent, writesInProgress, - runningScans.size()); - this.wait(50); - } catch (InterruptedException e) { - log.error("Interrupted waiting to completeClose for extent {}", extent, e); - } - } + // wait for reads and writes to complete + while (writesInProgress > 0 || !runningScans.isEmpty()) { + runningScans.removeIf(scanDataSource -> { + boolean currentlyUnreserved = disallowNewReservations(scanDataSource.getScanParameters()); + if (currentlyUnreserved) { + log.debug("Disabled scan session in tablet close {} {}", extent, scanDataSource); + } + return currentlyUnreserved; + }); - // It is assumed that nothing new would have been added to activeScans since it was copied, so - // check that assumption. At this point activeScans should be empty or everything in it should - // be disabled. - Preconditions.checkState(activeScans.stream() - .allMatch(scanDataSource -> disallowNewReservations(scanDataSource.getScanParameters()))); + if (log.isDebugEnabled() + && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60)) { + for (ScanDataSource activeScan : runningScans) { + log.debug("Waiting on scan in completeClose {} {}", extent, activeScan); + } - getTabletMemory().waitForMinC(); + lastLogTime = System.nanoTime(); + } - if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) { - try { - prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run(); - } catch (NoNodeException e) { - throw new RuntimeException("Exception on " + extent + " during prep for MinC", e); + try { + log.debug("Waiting to completeClose for {}. {} writes {} scans", extent, writesInProgress, + runningScans.size()); + this.wait(50); + } catch (InterruptedException e) { + log.error("Interrupted waiting to completeClose for extent {}", extent, e); + } } - } - if (saveState) { - // at this point all tablet data is flushed, so do a consistency check - RuntimeException err = null; - for (int i = 0; i < 5; i++) { + // It is assumed that nothing new would have been added to activeScans since it was copied, so + // check that assumption. At this point activeScans should be empty or everything in it should + // be disabled. + Preconditions.checkState(activeScans.stream() + .allMatch(scanDataSource -> disallowNewReservations(scanDataSource.getScanParameters()))); + + getTabletMemory().waitForMinC(); + + shouldPrepMinC = saveState && getTabletMemory().getMemTable().getNumEntries() > 0; + if (shouldPrepMinC) { try { - closeConsistencyCheck(); - err = null; - } catch (RuntimeException t) { - err = t; - log.error("Consistency check fails, retrying", t); - sleepUninterruptibly(500, TimeUnit.MILLISECONDS); + mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE); + } catch (NoNodeException e) { + throw new RuntimeException("Exception on " + extent + " during prep for MinC", e); } } - if (err != null) { - log.error("Tablet closed consistency check has failed for {} giving up and closing", - this.extent); - } } - try { - getTabletMemory().getMemTable().delete(0); - } catch (Exception t) { - log.error("Failed to delete mem table : " + t.getMessage() + " for tablet " + extent, t); + if (shouldPrepMinC) { + // must not run while tablet is locked, as this may result in deadlocks + mct.run(); Review Comment: Normally minor compactoins run concurrently w/ reads and writes. At this point in the code no more reads and writes should be happening so I think this code did the minor compaction in the synch block because it did not matter if reads and writes were blocked. It should be ok to run this outside of the sync block because the close state change at the top of this method should prevent any new reads or writes from starting. Added a suggestion to check some things after the lock is reacquired. ########## server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java: ########## @@ -937,70 +928,92 @@ synchronized void completeClose(boolean saveState) throws IOException { return currentlyUnreserved; }); - if (log.isDebugEnabled() && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60)) { - for (ScanDataSource activeScan : runningScans) { - log.debug("Waiting on scan in completeClose {} {}", extent, activeScan); - } - - lastLogTime = System.nanoTime(); - } + long lastLogTime = System.nanoTime(); - try { - log.debug("Waiting to completeClose for {}. {} writes {} scans", extent, writesInProgress, - runningScans.size()); - this.wait(50); - } catch (InterruptedException e) { - log.error("Interrupted waiting to completeClose for extent {}", extent, e); - } - } + // wait for reads and writes to complete + while (writesInProgress > 0 || !runningScans.isEmpty()) { + runningScans.removeIf(scanDataSource -> { + boolean currentlyUnreserved = disallowNewReservations(scanDataSource.getScanParameters()); + if (currentlyUnreserved) { + log.debug("Disabled scan session in tablet close {} {}", extent, scanDataSource); + } + return currentlyUnreserved; + }); - // It is assumed that nothing new would have been added to activeScans since it was copied, so - // check that assumption. At this point activeScans should be empty or everything in it should - // be disabled. - Preconditions.checkState(activeScans.stream() - .allMatch(scanDataSource -> disallowNewReservations(scanDataSource.getScanParameters()))); + if (log.isDebugEnabled() + && System.nanoTime() - lastLogTime > TimeUnit.SECONDS.toNanos(60)) { + for (ScanDataSource activeScan : runningScans) { + log.debug("Waiting on scan in completeClose {} {}", extent, activeScan); + } - getTabletMemory().waitForMinC(); + lastLogTime = System.nanoTime(); + } - if (saveState && getTabletMemory().getMemTable().getNumEntries() > 0) { - try { - prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE).run(); - } catch (NoNodeException e) { - throw new RuntimeException("Exception on " + extent + " during prep for MinC", e); + try { + log.debug("Waiting to completeClose for {}. {} writes {} scans", extent, writesInProgress, + runningScans.size()); + this.wait(50); + } catch (InterruptedException e) { + log.error("Interrupted waiting to completeClose for extent {}", extent, e); + } } - } - if (saveState) { - // at this point all tablet data is flushed, so do a consistency check - RuntimeException err = null; - for (int i = 0; i < 5; i++) { + // It is assumed that nothing new would have been added to activeScans since it was copied, so + // check that assumption. At this point activeScans should be empty or everything in it should + // be disabled. + Preconditions.checkState(activeScans.stream() + .allMatch(scanDataSource -> disallowNewReservations(scanDataSource.getScanParameters()))); + + getTabletMemory().waitForMinC(); + + shouldPrepMinC = saveState && getTabletMemory().getMemTable().getNumEntries() > 0; + if (shouldPrepMinC) { try { - closeConsistencyCheck(); - err = null; - } catch (RuntimeException t) { - err = t; - log.error("Consistency check fails, retrying", t); - sleepUninterruptibly(500, TimeUnit.MILLISECONDS); + mct = prepareForMinC(getFlushID(), MinorCompactionReason.CLOSE); + } catch (NoNodeException e) { + throw new RuntimeException("Exception on " + extent + " during prep for MinC", e); } } - if (err != null) { - log.error("Tablet closed consistency check has failed for {} giving up and closing", - this.extent); - } } - try { - getTabletMemory().getMemTable().delete(0); - } catch (Exception t) { - log.error("Failed to delete mem table : " + t.getMessage() + " for tablet " + extent, t); + if (shouldPrepMinC) { + // must not run while tablet is locked, as this may result in deadlocks + mct.run(); } - getTabletMemory().close(); + synchronized (this) { Review Comment: ```suggestion synchronized (this) { // gave up the lock to allow a minor compaction to run, now that the lock is reacquired validate that nothing unexpectedly changed Preconditions.checkState(closeState = CloseState.CLOSED, "closeState:%s extent:%s", closeState, extent) Preconditions.checkState(writesInProgress == 0, "writesInProgress:%s extent:%s", writesInProgress, extent); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: notifications-unsubscr...@accumulo.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org