This is an automated email from the ASF dual-hosted git repository.

asekretenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git


The following commit(s) were added to refs/heads/master by this push:
     new 098315f  Exposed hierarchical allocator recovery parameters as master 
flags.
098315f is described below

commit 098315f6b92ca076ef7cd197b039efcbcaf80e77
Author: Thomas LangĂ© <[email protected]>
AuthorDate: Wed Sep 9 15:59:34 2020 +0200

    Exposed hierarchical allocator recovery parameters as master flags.
    
    This closes #367
---
 include/mesos/allocator/allocator.hpp       |  8 ++++++++
 src/master/allocator/mesos/hierarchical.cpp | 10 +++-------
 src/master/flags.cpp                        | 14 ++++++++++++++
 src/master/flags.hpp                        |  2 ++
 src/master/master.cpp                       |  2 ++
 5 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/include/mesos/allocator/allocator.hpp 
b/include/mesos/allocator/allocator.hpp
index 6d67d5d..b0a5d6a 100644
--- a/include/mesos/allocator/allocator.hpp
+++ b/include/mesos/allocator/allocator.hpp
@@ -43,6 +43,10 @@
 namespace mesos {
 namespace allocator {
 
+constexpr Duration DEFAULT_ALLOCATOR_RECOVERY_TIMEOUT = Minutes(10);
+constexpr double DEFAULT_ALLOCATOR_AGENT_RECOVERY_FACTOR = 0.80;
+
+
 /**
  *  Pass in configuration to the allocator.
  */
@@ -71,6 +75,10 @@ struct Options
 
   // Mesos master's authorizer.
   Option<::mesos::Authorizer*> authorizer;
+
+  // Recovery options
+  Duration recoveryTimeout = DEFAULT_ALLOCATOR_RECOVERY_TIMEOUT;
+  double agentRecoveryFactor = DEFAULT_ALLOCATOR_AGENT_RECOVERY_FACTOR;
 };
 
 
diff --git a/src/master/allocator/mesos/hierarchical.cpp 
b/src/master/allocator/mesos/hierarchical.cpp
index d4374c3..35264b9 100644
--- a/src/master/allocator/mesos/hierarchical.cpp
+++ b/src/master/allocator/mesos/hierarchical.cpp
@@ -702,13 +702,9 @@ void HierarchicalAllocatorProcess::recover(
     updateQuota(role, quota);
   }
 
-  // TODO(alexr): Consider exposing these constants.
-  const Duration ALLOCATION_HOLD_OFF_RECOVERY_TIMEOUT = Minutes(10);
-  const double AGENT_RECOVERY_FACTOR = 0.8;
-
   // Record the number of expected agents.
   expectedAgentCount =
-    static_cast<int>(_expectedAgentCount * AGENT_RECOVERY_FACTOR);
+    static_cast<int>(_expectedAgentCount * options.agentRecoveryFactor);
 
   // Skip recovery if there are no expected agents. This is not strictly
   // necessary for the allocator to function correctly, but maps better
@@ -726,11 +722,11 @@ void HierarchicalAllocatorProcess::recover(
   pause();
 
   // Setup recovery timer.
-  delay(ALLOCATION_HOLD_OFF_RECOVERY_TIMEOUT, self(), &Self::resume);
+  delay(options.recoveryTimeout, self(), &Self::resume);
 
   LOG(INFO) << "Triggered allocator recovery: waiting for "
             << expectedAgentCount.get() << " agents to reconnect or "
-            << ALLOCATION_HOLD_OFF_RECOVERY_TIMEOUT << " to pass";
+            << options.recoveryTimeout << " to pass";
 }
 
 
diff --git a/src/master/flags.cpp b/src/master/flags.cpp
index 31a8da1..be4f010 100644
--- a/src/master/flags.cpp
+++ b/src/master/flags.cpp
@@ -24,6 +24,8 @@
 #include "master/constants.hpp"
 #include "master/flags.hpp"
 
+#include <mesos/allocator/allocator.hpp>
+
 using std::string;
 
 mesos::internal::master::Flags::Flags()
@@ -460,6 +462,18 @@ mesos::internal::master::Flags::Flags()
       "load an alternate allocator module using `--modules`.",
       DEFAULT_ALLOCATOR);
 
+  add(&Flags::allocator_agent_recovery_factor,
+      "allocator_agent_recovery_factor",
+      "Minimum fraction of known agents re-registered after leader election\n"
+      "for the allocator to start generating offers.",
+      mesos::allocator::DEFAULT_ALLOCATOR_AGENT_RECOVERY_FACTOR);
+
+  add(&Flags::allocator_recovery_timeout,
+      "allocator_recovery_timeout",
+      "Maximum time to wait before sending offers after a leader\n"
+      "re-election.",
+      mesos::allocator::DEFAULT_ALLOCATOR_RECOVERY_TIMEOUT);
+
   add(&Flags::fair_sharing_excluded_resource_names,
       "fair_sharing_excluded_resource_names",
       "A comma-separated list of the resource names (e.g. 'gpus')\n"
diff --git a/src/master/flags.hpp b/src/master/flags.hpp
index 9500a0a..862ae59 100644
--- a/src/master/flags.hpp
+++ b/src/master/flags.hpp
@@ -81,6 +81,8 @@ public:
   Option<std::string> modulesDir;
   std::string authenticators;
   std::string allocator;
+  double allocator_agent_recovery_factor;
+  Duration allocator_recovery_timeout;
   Option<std::set<std::string>> fair_sharing_excluded_resource_names;
   bool filter_gpu_resources;
   std::string min_allocatable_resources;
diff --git a/src/master/master.cpp b/src/master/master.cpp
index fefa72d..576ae10 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -796,6 +796,8 @@ void Master::initialize()
   options.publishPerFrameworkMetrics = flags.publish_per_framework_metrics;
   options.readonlyHttpAuthenticationRealm = READONLY_HTTP_AUTHENTICATION_REALM;
   options.authorizer = authorizer;
+  options.recoveryTimeout = flags.allocator_recovery_timeout;
+  options.agentRecoveryFactor = flags.allocator_agent_recovery_factor;
 
   // Initialize the allocator.
   allocator->initialize(

Reply via email to