This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch elasticity in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/elasticity by this push: new cca3683e5b Added check for tablets with errors, when looking for volume replacements. (#4287) cca3683e5b is described below commit cca3683e5b51f66787569e9b968b4599848e136e Author: Arbaaz Khan <bazzy...@yahoo.com> AuthorDate: Tue Feb 27 17:18:36 2024 -0500 Added check for tablets with errors, when looking for volume replacements. (#4287) Added check for tablets with errors, when looking for volume replacements. fixes #4234 --- .../org/apache/accumulo/manager/TabletGroupWatcher.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index e8bdd794a4..e7ea20413a 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -353,6 +353,7 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { int[] counts = new int[TabletState.values().length]; private int totalUnloaded; private long totalVolumeReplacements; + private int tabletsWithErrors; } private TableMgmtStats manageTablets(Iterator<TabletManagement> iter, @@ -398,6 +399,7 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { "Error on TabletServer trying to get Tablet management information for extent: {}. Error message: {}", tm.getExtent(), mtiError); this.metrics.incrementTabletGroupWatcherError(this.store.getLevel()); + tableMgmtStats.tabletsWithErrors++; continue; } @@ -670,7 +672,17 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { iter = store.iterator(tableMgmtParams); var tabletMgmtStats = manageTablets(iter, tableMgmtParams, currentTServers, true); - lookForTabletsNeedingVolReplacement = tabletMgmtStats.totalVolumeReplacements != 0; + + // If currently looking for volume replacements, determine if the next round needs to look. + if (lookForTabletsNeedingVolReplacement) { + // Continue to look for tablets needing volume replacement if there was an error + // processing tablets in the call to manageTablets() or if we are still performing volume + // replacement. We only want to stop looking for tablets that need volume replacement when + // we have successfully processed all tablet metadata and no more volume replacements are + // being performed. + lookForTabletsNeedingVolReplacement = tabletMgmtStats.totalVolumeReplacements != 0 + || tabletMgmtStats.tabletsWithErrors != 0; + } // provide stats after flushing changes to avoid race conditions w/ delete table stats.end(managerState);