Repository: hbase Updated Branches: refs/heads/0.94 9498adde2 -> a228e925e
HBASE-10871 Indefinite OPEN/CLOSE wait on busy RegionServers (Esteban) Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a228e925 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a228e925 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a228e925 Branch: refs/heads/0.94 Commit: a228e925e7a80ae479700aa40b43ab2e002d4e7c Parents: 9498add Author: Jimmy Xiang <[email protected]> Authored: Fri Jun 13 09:13:37 2014 -0700 Committer: Jimmy Xiang <[email protected]> Committed: Fri Jun 13 09:13:37 2014 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/master/AssignmentManager.java | 23 ++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/a228e925/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 312a211..8755317 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -1681,6 +1681,8 @@ public class AssignmentManager extends ZooKeeperListener { boolean hijack) { boolean regionAlreadyInTransitionException = false; boolean serverNotRunningYet = false; + boolean socketTimeoutException = false; + long maxRegionServerStartupWaitTime = -1; for (int i = 0; i < this.maximumAssignmentAttempts; i++) { int versionOfOfflineNode = -1; @@ -1776,6 +1778,8 @@ public class AssignmentManager extends ZooKeeperListener { } regionAlreadyInTransitionException = false; serverNotRunningYet = false; + socketTimeoutException = false; + if (t instanceof RegionAlreadyInTransitionException) { regionAlreadyInTransitionException = true; if (LOG.isDebugEnabled()) { @@ -1812,16 +1816,26 @@ public class AssignmentManager extends ZooKeeperListener { + region.getRegionNameAsString() + ", but the region might already be opened on " + plan.getDestination() + ".", t); - return; + socketTimeoutException = true; + try { + Thread.sleep(100); + i--; // reset the try count + } catch (InterruptedException ie) { + LOG.warn("Failed to assign " + state.getRegion().getRegionNameAsString() + + " since interrupted", ie); + Thread.currentThread().interrupt(); + return; + } } LOG.warn("Failed assignment of " + state.getRegion().getRegionNameAsString() + " to " + plan.getDestination() + ", trying to assign " - + (regionAlreadyInTransitionException || serverNotRunningYet + + (regionAlreadyInTransitionException || serverNotRunningYet || socketTimeoutException ? "to the same region server because of " - + "RegionAlreadyInTransitionException/ServerNotRunningYetException;" + + "RegionAlreadyInTransitionException/ServerNotRunningYetException/" + + "SocketTimeoutException;" : "elsewhere instead; ") + "retry=" + i, t); // Clean out plan we failed execute and one that doesn't look like it'll @@ -1832,7 +1846,8 @@ public class AssignmentManager extends ZooKeeperListener { // RS may cause double assignments. In case of RegionAlreadyInTransitionException // reassigning to same RS. RegionPlan newPlan = plan; - if (!regionAlreadyInTransitionException && !serverNotRunningYet) { + if (!regionAlreadyInTransitionException + && !serverNotRunningYet && !socketTimeoutException) { // Force a new plan and reassign. Will return null if no servers. // The new plan could be the same as the existing plan since we don't // exclude the server of the original plan, which should not be
