Repository: mesos
Updated Branches:
  refs/heads/master 6a8738f89 -> 39acb0a6b


Made `shutdown_grace_period` configurable in `ExecutorInfo`.

If `ExecutorInfo.shutdown_grace_period` is set, the executor
driver uses it, otherwise it falls back to the environment
variable `MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD`.

Review: https://reviews.apache.org/r/44655/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/92266d05
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/92266d05
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/92266d05

Branch: refs/heads/master
Commit: 92266d05a883c2898a41f29a47766a1679fa827c
Parents: 6a8738f
Author: Alexander Rukletsov <ruklet...@gmail.com>
Authored: Thu Mar 24 17:29:10 2016 +0100
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Thu Mar 24 18:20:59 2016 +0100

----------------------------------------------------------------------
 CHANGELOG                                 |  9 +++++++++
 docs/configuration.md                     |  9 +++++++--
 include/mesos/executor/executor.proto     | 21 +++++++++++++++------
 include/mesos/mesos.proto                 |  8 ++++++++
 include/mesos/v1/executor/executor.proto  | 21 +++++++++++++++------
 include/mesos/v1/mesos.proto              |  8 ++++++++
 src/slave/containerizer/containerizer.cpp | 11 ++++++++++-
 src/slave/flags.cpp                       |  8 ++++++--
 src/slave/slave.cpp                       | 10 +++++++++-
 9 files changed, 87 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/92266d05/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 1fadaa1..5aeb479 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,5 +1,14 @@
 Release Notes - Mesos - Version 0.29.0 (WIP)
 --------------------------------------------
+This release contains the following new features:
+  * [MESOS-4949] - The executor shutdown grace period can now be configured in
+    `ExecutorInfo`, which overrides the agent flag. When shutting down an
+    executor the agent will wait in a best-effort manner for the grace period
+    specified here before forcibly destroying the container. The executor must
+    not assume that it will always be allotted the full grace period, as the
+    agent may decide to allot a shorter period and failures / forcible
+    terminations may occur. Together with kill policies this gives frameworks
+    flexibility around how to clean up tasks and executors.
 
 Deprecations:
   * [MESOS-5001] - The 'allocator/event_queue_dispatches' metric is now

http://git-wip-us.apache.org/repos/asf/mesos/blob/92266d05/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 70686ad..b0f9cb5 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1181,8 +1181,13 @@ shutting it down (e.g., 60secs, 3mins, etc) (default: 
1mins)
     --executor_shutdown_grace_period=VALUE
   </td>
   <td>
-Amount of time to wait for an executor
-to shut down (e.g., 60secs, 3mins, etc) (default: 5secs)
+Default amount of time to wait for an executor to shut down
+(e.g. 60secs, 3mins, etc). ExecutorInfo.shutdown_grace_period
+overrides this default. Note that the executor must not assume
+that it will always be allotted the full grace period, as the
+agent may decide to allot a shorter period, and failures / forcible
+terminations may occur.
+(default: 5secs)
   </td>
 </tr>
 <tr>

http://git-wip-us.apache.org/repos/asf/mesos/blob/92266d05/include/mesos/executor/executor.proto
----------------------------------------------------------------------
diff --git a/include/mesos/executor/executor.proto 
b/include/mesos/executor/executor.proto
index ae21119..6b4141f 100644
--- a/include/mesos/executor/executor.proto
+++ b/include/mesos/executor/executor.proto
@@ -41,12 +41,21 @@ message Event {
 
     // Received when the agent asks the executor to shutdown/kill itself.
     // The executor is then required to kill all its active tasks, send
-    // `TASK_KILLED` status updates and gracefully exit. If an executor
-    // does not terminate within a `MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD`
-    // period (an environment variable set by the agent upon executor
-    // startup), the agent will forcefully destroy the container where the
-    // executor is running. The agent would then send `TASK_LOST` updates
-    // for any remaining active tasks of this executor.
+    // `TASK_KILLED` status updates and gracefully exit. The executor
+    // should terminate within a `MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD`
+    // (an environment variable set by the agent upon executor startup);
+    // it can be configured via `ExecutorInfo.shutdown_grace_period`. If
+    // the executor fails to do so, the agent will forcefully destroy the
+    // container where the executor is running. The agent would then send
+    // `TASK_LOST` updates for any remaining active tasks of this executor.
+    //
+    // NOTE: The executor must not assume that it will always be allotted
+    // the full grace period, as the agent may decide to allot a shorter
+    // period and failures / forcible terminations may occur.
+    //
+    // TODO(alexr): Consider adding a duration field into the `Shutdown`
+    // message so that the agent can communicate when a shorter period
+    // has been allotted.
     SHUTDOWN = 7;
   }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/92266d05/include/mesos/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/mesos.proto b/include/mesos/mesos.proto
index 59f5d3a..ade6f5a 100644
--- a/include/mesos/mesos.proto
+++ b/include/mesos/mesos.proto
@@ -450,6 +450,14 @@ message ExecutorInfo {
   // discovery system to use this information as needed and to handle
   // executors without service discovery information.
   optional DiscoveryInfo discovery = 12;
+
+  // When shutting down an executor the agent will wait in a
+  // best-effort manner for the grace period specified here
+  // before forcibly destroying the container. The executor
+  // must not assume that it will always be allotted the full
+  // grace period, as the agent may decide to allot a shorter
+  // period and failures / forcible terminations may occur.
+  optional DurationInfo shutdown_grace_period = 13;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/92266d05/include/mesos/v1/executor/executor.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/executor/executor.proto 
b/include/mesos/v1/executor/executor.proto
index 36a2b3f..d7b1dd5 100644
--- a/include/mesos/v1/executor/executor.proto
+++ b/include/mesos/v1/executor/executor.proto
@@ -41,12 +41,21 @@ message Event {
 
     // Received when the agent asks the executor to shutdown/kill itself.
     // The executor is then required to kill all its active tasks, send
-    // `TASK_KILLED` status updates and gracefully exit. If an executor
-    // does not terminate within a `MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD`
-    // period (an environment variable set by the agent upon executor
-    // startup), the agent will forcefully destroy the container where the
-    // executor is running. The agent would then send `TASK_LOST` updates
-    // for any remaining active tasks of this executor.
+    // `TASK_KILLED` status updates and gracefully exit. The executor
+    // should terminate within a `MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD`
+    // (an environment variable set by the agent upon executor startup);
+    // it can be configured via `ExecutorInfo.shutdown_grace_period`. If
+    // the executor fails to do so, the agent will forcefully destroy the
+    // container where the executor is running. The agent would then send
+    // `TASK_LOST` updates for any remaining active tasks of this executor.
+    //
+    // NOTE: The executor must not assume that it will always be allotted
+    // the full grace period, as the agent may decide to allot a shorter
+    // period and failures / forcible terminations may occur.
+    //
+    // TODO(alexr): Consider adding a duration field into the `Shutdown`
+    // message so that the agent can communicate when a shorter period
+    // has been allotted.
     SHUTDOWN = 7;
   }
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/92266d05/include/mesos/v1/mesos.proto
----------------------------------------------------------------------
diff --git a/include/mesos/v1/mesos.proto b/include/mesos/v1/mesos.proto
index 6556a1c..7a5b680 100644
--- a/include/mesos/v1/mesos.proto
+++ b/include/mesos/v1/mesos.proto
@@ -450,6 +450,14 @@ message ExecutorInfo {
   // discovery system to use this information as needed and to handle
   // executors without service discovery information.
   optional DiscoveryInfo discovery = 12;
+
+  // When shutting down an executor the agent will wait in a
+  // best-effort manner for the grace period specified here
+  // before forcibly destroying the container. The executor
+  // must not assume that it will always be allotted the full
+  // grace period, as the agent may decide to allot a shorter
+  // period and failures / forcible terminations may occur.
+  optional DurationInfo shutdown_grace_period = 13;
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/92266d05/src/slave/containerizer/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/containerizer.cpp 
b/src/slave/containerizer/containerizer.cpp
index f6fc786..3556040 100644
--- a/src/slave/containerizer/containerizer.cpp
+++ b/src/slave/containerizer/containerizer.cpp
@@ -329,8 +329,17 @@ map<string, string> executorEnvironment(
   environment["MESOS_SLAVE_PID"] = stringify(slavePid);
   environment["MESOS_AGENT_ENDPOINT"] = stringify(slavePid.address);
   environment["MESOS_CHECKPOINT"] = checkpoint ? "1" : "0";
+
+  // Set executor's shutdown grace period. If set, the customized value
+  // from `ExecutorInfo` overrides the default from agent flags.
+  Duration executorShutdownGracePeriod = flags.executor_shutdown_grace_period;
+  if (executorInfo.has_shutdown_grace_period()) {
+    executorShutdownGracePeriod =
+      Nanoseconds(executorInfo.shutdown_grace_period().nanoseconds());
+  }
+
   environment["MESOS_EXECUTOR_SHUTDOWN_GRACE_PERIOD"] =
-    stringify(flags.executor_shutdown_grace_period);
+    stringify(executorShutdownGracePeriod);
 
   if (checkpoint) {
     environment["MESOS_RECOVERY_TIMEOUT"] = stringify(flags.recovery_timeout);

http://git-wip-us.apache.org/repos/asf/mesos/blob/92266d05/src/slave/flags.cpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp
index 71685ce..e831ce7 100644
--- a/src/slave/flags.cpp
+++ b/src/slave/flags.cpp
@@ -251,8 +251,12 @@ mesos::internal::slave::Flags::Flags()
 
   add(&Flags::executor_shutdown_grace_period,
       "executor_shutdown_grace_period",
-      "Amount of time to wait for an executor\n"
-      "to shut down (e.g., 60secs, 3mins, etc)",
+      "Default amount of time to wait for an executor to shut down\n"
+      "(e.g. 60secs, 3mins, etc). ExecutorInfo.shutdown_grace_period\n"
+      "overrides this default. Note that the executor must not assume\n"
+      "that it will always be allotted the full grace period, as the\n"
+      "agent may decide to allot a shorter period, and failures / forcible\n"
+      "terminations may occur.",
       DEFAULT_EXECUTOR_SHUTDOWN_GRACE_PERIOD);
 
   add(&Flags::gc_delay,

http://git-wip-us.apache.org/repos/asf/mesos/blob/92266d05/src/slave/slave.cpp
----------------------------------------------------------------------
diff --git a/src/slave/slave.cpp b/src/slave/slave.cpp
index 08ff1ed..c6fd810 100644
--- a/src/slave/slave.cpp
+++ b/src/slave/slave.cpp
@@ -4346,8 +4346,16 @@ void Slave::_shutdownExecutor(Framework* framework, 
Executor* executor)
   // will be dropped to the floor!
   executor->send(ShutdownExecutorMessage());
 
+  // If the executor specifies shutdown grace period,
+  // pass it instead of the default.
+  Duration shutdownTimeout = flags.executor_shutdown_grace_period;
+  if (executor->info.has_shutdown_grace_period()) {
+    shutdownTimeout = Nanoseconds(
+        executor->info.shutdown_grace_period().nanoseconds());
+  }
+
   // Prepare for sending a kill if the executor doesn't comply.
-  delay(flags.executor_shutdown_grace_period,
+  delay(shutdownTimeout,
         self(),
         &Slave::shutdownExecutorTimeout,
         framework->id(),

Reply via email to