This is an automated email from the ASF dual-hosted git repository. abudnik pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 63ca5c159ef0272fc1d2add6ff73a88c8e16ea0c Author: Andrei Budnik <[email protected]> AuthorDate: Wed May 6 13:13:51 2020 +0200 Logged connection error message before shutting down the executor. Previously, if an executor failed to connect to the agent, it would silently shutdown itself without writing an error message to the log. After we added the support for the domain sockets, a set of potential failures during `connect` increased. In this patch, we logged the connection failures to help in debugging. Review: https://reviews.apache.org/r/72475 --- src/executor/executor.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/executor/executor.cpp b/src/executor/executor.cpp index 213f38e..974049c 100644 --- a/src/executor/executor.cpp +++ b/src/executor/executor.cpp @@ -561,11 +561,13 @@ protected: recoveryTimer = delay( recoveryTimeout.get(), self(), - &Self::_recoveryTimeout); + &Self::_recoveryTimeout, + failure); // Backoff and reconnect only if framework checkpointing is enabled. backoff(); } else { + LOG(INFO) << "Disconnected from agent: " << failure << "; Shutting down"; shutdown(); } } @@ -599,7 +601,7 @@ protected: return future; } - void _recoveryTimeout() + void _recoveryTimeout(const string& failure) { // It's possible that a new connection was established since the timeout // fired and we were unable to cancel this timeout. If this occurs, don't @@ -612,7 +614,8 @@ protected: CHECK_SOME(recoveryTimeout); LOG(INFO) << "Recovery timeout of " << recoveryTimeout.get() - << " exceeded; Shutting down"; + << " exceeded following the first connection failure: " << failure + << "; Shutting down"; shutdown(); }
