ACCUMULO-378 Try to reduce the spam in the logs when the peer is unavailable.
Increase the sleep time at a linear scale when we fail to talk to the replication coordinator. Eventually, bail out completely and let the replication process take over again. Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/27905426 Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/27905426 Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/27905426 Branch: refs/heads/ACCUMULO-378 Commit: 27905426f5a0a3f5c140d1ae1b0c00497e6ad0bb Parents: 5e8d6d2 Author: Josh Elser <[email protected]> Authored: Wed May 28 11:45:57 2014 -0400 Committer: Josh Elser <[email protected]> Committed: Wed May 28 11:45:57 2014 -0400 ---------------------------------------------------------------------- .../apache/accumulo/core/client/impl/ReplicationClient.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/27905426/core/src/main/java/org/apache/accumulo/core/client/impl/ReplicationClient.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/impl/ReplicationClient.java b/core/src/main/java/org/apache/accumulo/core/client/impl/ReplicationClient.java index 02ae3d0..d7b12c7 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/impl/ReplicationClient.java +++ b/core/src/main/java/org/apache/accumulo/core/client/impl/ReplicationClient.java @@ -51,17 +51,18 @@ public class ReplicationClient { * Instance for the peer replicant * @return Client to the ReplicationCoordinator service */ - public static ReplicationCoordinator.Client getCoordinatorConnectionWithRetry(Instance instance) { + public static ReplicationCoordinator.Client getCoordinatorConnectionWithRetry(Instance instance) throws AccumuloException { checkArgument(instance != null, "instance is null"); - while (true) { + for (int attempts = 1; attempts <= 10; attempts++) { ReplicationCoordinator.Client result = getCoordinatorConnection(instance); if (result != null) return result; - UtilWaitThread.sleep(250); + UtilWaitThread.sleep(attempts * 250); } + throw new AccumuloException("Timed out trying to communicate with master from " + instance.getInstanceName()); } public static ReplicationCoordinator.Client getCoordinatorConnection(Instance instance) {
