Repository: giraph Updated Branches: refs/heads/trunk 7cacb1830 -> 608d50697
GRIAPH-1064: Reconnect JobProgressTracker Summary: When workers/master don't talk to JobProgressTracker it can disconnect and throw RejectedExecutionException - we should catch and retry on that exception too. Test Plan: Ran a job where master would fail to talk to JobProgressTracker after a while without this change, with the change it worked Differential Revision: https://reviews.facebook.net/D58323 Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/608d5069 Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/608d5069 Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/608d5069 Branch: refs/heads/trunk Commit: 608d50697c5e19a8249dd9290cd15b652f5f01a1 Parents: 7cacb18 Author: Maja Kabiljo <[email protected]> Authored: Tue May 17 12:22:19 2016 -0700 Committer: Maja Kabiljo <[email protected]> Committed: Wed May 18 02:21:20 2016 -0700 ---------------------------------------------------------------------- .../giraph/graph/RetryableJobProgressTrackerClient.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/608d5069/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java b/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java index 60cb586..21204bd 100644 --- a/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java +++ b/giraph-core/src/main/java/org/apache/giraph/graph/RetryableJobProgressTrackerClient.java @@ -38,6 +38,7 @@ import com.google.common.io.Closeables; import java.io.IOException; import java.net.InetSocketAddress; import java.util.concurrent.ExecutionException; +import java.util.concurrent.RejectedExecutionException; /** * Wrapper around JobProgressTracker which retires to connect and swallows @@ -159,9 +160,9 @@ public class RetryableJobProgressTrackerClient private void executeWithRetry(Runnable runnable) { try { runnable.run(); - } catch (RuntimeTTransportException te) { + } catch (RuntimeTTransportException | RejectedExecutionException te) { if (LOG.isDebugEnabled()) { - LOG.debug("RuntimeTTransportException occurred while talking to " + + LOG.debug(te.getClass() + " occurred while talking to " + "JobProgressTracker server, trying to reconnect", te); } try { @@ -171,7 +172,8 @@ public class RetryableJobProgressTrackerClient } catch (Exception e) { // CHECKSTYLE: resume IllegalCatch if (LOG.isDebugEnabled()) { - LOG.debug(""); + LOG.debug( + "Exception occurred while trying to close client manager", e); } } resetConnection();
