This is an automated email from the ASF dual-hosted git repository.
asekretenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
The following commit(s) were added to refs/heads/master by this push:
new 098315f Exposed hierarchical allocator recovery parameters as master
flags.
098315f is described below
commit 098315f6b92ca076ef7cd197b039efcbcaf80e77
Author: Thomas Langé <[email protected]>
AuthorDate: Wed Sep 9 15:59:34 2020 +0200
Exposed hierarchical allocator recovery parameters as master flags.
This closes #367
---
include/mesos/allocator/allocator.hpp | 8 ++++++++
src/master/allocator/mesos/hierarchical.cpp | 10 +++-------
src/master/flags.cpp | 14 ++++++++++++++
src/master/flags.hpp | 2 ++
src/master/master.cpp | 2 ++
5 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/include/mesos/allocator/allocator.hpp
b/include/mesos/allocator/allocator.hpp
index 6d67d5d..b0a5d6a 100644
--- a/include/mesos/allocator/allocator.hpp
+++ b/include/mesos/allocator/allocator.hpp
@@ -43,6 +43,10 @@
namespace mesos {
namespace allocator {
+constexpr Duration DEFAULT_ALLOCATOR_RECOVERY_TIMEOUT = Minutes(10);
+constexpr double DEFAULT_ALLOCATOR_AGENT_RECOVERY_FACTOR = 0.80;
+
+
/**
* Pass in configuration to the allocator.
*/
@@ -71,6 +75,10 @@ struct Options
// Mesos master's authorizer.
Option<::mesos::Authorizer*> authorizer;
+
+ // Recovery options
+ Duration recoveryTimeout = DEFAULT_ALLOCATOR_RECOVERY_TIMEOUT;
+ double agentRecoveryFactor = DEFAULT_ALLOCATOR_AGENT_RECOVERY_FACTOR;
};
diff --git a/src/master/allocator/mesos/hierarchical.cpp
b/src/master/allocator/mesos/hierarchical.cpp
index d4374c3..35264b9 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -702,13 +702,9 @@ void HierarchicalAllocatorProcess::recover(
updateQuota(role, quota);
}
- // TODO(alexr): Consider exposing these constants.
- const Duration ALLOCATION_HOLD_OFF_RECOVERY_TIMEOUT = Minutes(10);
- const double AGENT_RECOVERY_FACTOR = 0.8;
-
// Record the number of expected agents.
expectedAgentCount =
- static_cast<int>(_expectedAgentCount * AGENT_RECOVERY_FACTOR);
+ static_cast<int>(_expectedAgentCount * options.agentRecoveryFactor);
// Skip recovery if there are no expected agents. This is not strictly
// necessary for the allocator to function correctly, but maps better
@@ -726,11 +722,11 @@ void HierarchicalAllocatorProcess::recover(
pause();
// Setup recovery timer.
- delay(ALLOCATION_HOLD_OFF_RECOVERY_TIMEOUT, self(), &Self::resume);
+ delay(options.recoveryTimeout, self(), &Self::resume);
LOG(INFO) << "Triggered allocator recovery: waiting for "
<< expectedAgentCount.get() << " agents to reconnect or "
- << ALLOCATION_HOLD_OFF_RECOVERY_TIMEOUT << " to pass";
+ << options.recoveryTimeout << " to pass";
}
diff --git a/src/master/flags.cpp b/src/master/flags.cpp
index 31a8da1..be4f010 100644
--- a/src/master/flags.cpp
+++ b/src/master/flags.cpp
@@ -24,6 +24,8 @@
#include "master/constants.hpp"
#include "master/flags.hpp"
+#include <mesos/allocator/allocator.hpp>
+
using std::string;
mesos::internal::master::Flags::Flags()
@@ -460,6 +462,18 @@ mesos::internal::master::Flags::Flags()
"load an alternate allocator module using `--modules`.",
DEFAULT_ALLOCATOR);
+ add(&Flags::allocator_agent_recovery_factor,
+ "allocator_agent_recovery_factor",
+ "Minimum fraction of known agents re-registered after leader election\n"
+ "for the allocator to start generating offers.",
+ mesos::allocator::DEFAULT_ALLOCATOR_AGENT_RECOVERY_FACTOR);
+
+ add(&Flags::allocator_recovery_timeout,
+ "allocator_recovery_timeout",
+ "Maximum time to wait before sending offers after a leader\n"
+ "re-election.",
+ mesos::allocator::DEFAULT_ALLOCATOR_RECOVERY_TIMEOUT);
+
add(&Flags::fair_sharing_excluded_resource_names,
"fair_sharing_excluded_resource_names",
"A comma-separated list of the resource names (e.g. 'gpus')\n"
diff --git a/src/master/flags.hpp b/src/master/flags.hpp
index 9500a0a..862ae59 100644
--- a/src/master/flags.hpp
+++ b/src/master/flags.hpp
@@ -81,6 +81,8 @@ public:
Option<std::string> modulesDir;
std::string authenticators;
std::string allocator;
+ double allocator_agent_recovery_factor;
+ Duration allocator_recovery_timeout;
Option<std::set<std::string>> fair_sharing_excluded_resource_names;
bool filter_gpu_resources;
std::string min_allocatable_resources;
diff --git a/src/master/master.cpp b/src/master/master.cpp
index fefa72d..576ae10 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -796,6 +796,8 @@ void Master::initialize()
options.publishPerFrameworkMetrics = flags.publish_per_framework_metrics;
options.readonlyHttpAuthenticationRealm = READONLY_HTTP_AUTHENTICATION_REALM;
options.authorizer = authorizer;
+ options.recoveryTimeout = flags.allocator_recovery_timeout;
+ options.agentRecoveryFactor = flags.allocator_agent_recovery_factor;
// Initialize the allocator.
allocator->initialize(