Repository: aurora Updated Branches: refs/heads/master 13be937c4 -> ea2c9ad24
Allow config of the /quitquitquit and /abortabortabort endpoints. Fix name of test for sigterm being ignored. Based on https://github.com/tellapart/incubator-aurora/commit/c90532d69d69045c545004451e3660f5282e6e34 Testing Done: Unittests and manually in vagrant cluster. Also verified that the flaky tests pass. Bugs closed: AURORA-1257 Reviewed at https://reviews.apache.org/r/32973/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/ea2c9ad2 Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/ea2c9ad2 Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/ea2c9ad2 Branch: refs/heads/master Commit: ea2c9ad24ce7f489feaa510403f08a3bc5ed6bf9 Parents: 13be937 Author: Brian Brazil <[email protected]> Authored: Tue Jun 16 15:25:17 2015 -0700 Committer: Kevin Sweeney <[email protected]> Committed: Tue Jun 16 15:25:17 2015 -0700 ---------------------------------------------------------------------- docs/configuration-reference.md | 14 ++++++++++++++ src/main/python/apache/aurora/common/http_signaler.py | 6 ------ .../apache/aurora/executor/thermos_task_runner.py | 6 ++++-- src/main/python/apache/thermos/config/schema_base.py | 4 ++++ .../python/apache/aurora/common/test_http_signaler.py | 4 ++-- .../aurora/executor/test_thermos_task_runner.py | 13 ++++++++----- 6 files changed, 32 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/ea2c9ad2/docs/configuration-reference.md ---------------------------------------------------------------------- diff --git a/docs/configuration-reference.md b/docs/configuration-reference.md index fb753ea..7bfd633 100644 --- a/docs/configuration-reference.md +++ b/docs/configuration-reference.md @@ -162,6 +162,8 @@ can be omitted. In Mesos, `resources` is also required. ```max_failures``` | Integer | Maximum process failures before being considered failed (Default: 1) ```max_concurrency``` | Integer | Maximum number of concurrent processes (Default: 0, unlimited concurrency.) ```finalization_wait``` | Integer | Amount of time allocated for finalizing processes, in seconds. (Default: 30) + ```graceful_shutdown_endpoint``` | String | Endpoint to hit to indicate that a task should gracefully shutdown. (Default: /quitquitquit) + ```shutdown_endpoint``` | String | Endpoint to hit to give a task its final warning before being killed. (Default: /abortabortabort) #### name `name` is a string denoting the name of this task. It defaults to the name of the first Process in @@ -277,6 +279,18 @@ Client applications with higher priority may force a shorter finalization wait (e.g. through parameters to `thermos kill`), so this is mostly a best-effort signal. +#### graceful_shutdown_endpoint + +If the Job has a port named `health`, a HTTP POST request will be sent over +localhost to this endpoint to request that the task gracefully shut itself +down. + +#### shutdown_endpoint + +If the Job has a port named `health`, a HTTP POST request will be sent over +localhost to this endpoint to request as a final warning before being shut +down. + ### Constraint Object Current constraint objects only support a single ordering constraint, `order`, http://git-wip-us.apache.org/repos/asf/aurora/blob/ea2c9ad2/src/main/python/apache/aurora/common/http_signaler.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/common/http_signaler.py b/src/main/python/apache/aurora/common/http_signaler.py index 531f1fe..a3193f3 100644 --- a/src/main/python/apache/aurora/common/http_signaler.py +++ b/src/main/python/apache/aurora/common/http_signaler.py @@ -103,9 +103,3 @@ class HttpSignaler(object): return (True, None) except self.QueryError as e: return (False, str(e)) - - def quitquitquit(self): - return self('/quitquitquit', use_post_method=True) - - def abortabortabort(self): - return self('/abortabortabort', use_post_method=True) http://git-wip-us.apache.org/repos/asf/aurora/blob/ea2c9ad2/src/main/python/apache/aurora/executor/thermos_task_runner.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/executor/thermos_task_runner.py b/src/main/python/apache/aurora/executor/thermos_task_runner.py index 837ad5c..7bcd6c4 100644 --- a/src/main/python/apache/aurora/executor/thermos_task_runner.py +++ b/src/main/python/apache/aurora/executor/thermos_task_runner.py @@ -118,8 +118,10 @@ class ThermosTaskRunner(TaskRunner): http_signaler = HttpSignaler(self._ports['health']) - for exit_request in [http_signaler.quitquitquit, http_signaler.abortabortabort]: - handled, _ = exit_request() + for exit_endpoint in [ + self._task.graceful_shutdown_endpoint().get(), + self._task.shutdown_endpoint().get()]: + handled, _ = http_signaler(exit_endpoint, use_post_method=True) if handled: self._clock.sleep(self.ESCALATION_WAIT.as_(Time.SECONDS)) if self.status is not None: http://git-wip-us.apache.org/repos/asf/aurora/blob/ea2c9ad2/src/main/python/apache/thermos/config/schema_base.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/thermos/config/schema_base.py b/src/main/python/apache/thermos/config/schema_base.py index f9143cc..a85def9 100644 --- a/src/main/python/apache/thermos/config/schema_base.py +++ b/src/main/python/apache/thermos/config/schema_base.py @@ -74,6 +74,10 @@ class Task(Struct): # > 0 is max concurrent processes. finalization_wait = Default(Integer, 30) # the amount of time in seconds we allocate to run the # finalization schedule. + # Endpoint to hit to indicate that a task should gracefully shutdown. + graceful_shutdown_endpoint = Default(String, "/quitquitquit") + # Endpoint to hit to give a task it's final warning before being killed. + shutdown_endpoint = Default(String, "/abortabortabort") # TODO(jon): remove/replace with proper solution to MESOS-3546 user = String http://git-wip-us.apache.org/repos/asf/aurora/blob/ea2c9ad2/src/test/python/apache/aurora/common/test_http_signaler.py ---------------------------------------------------------------------- diff --git a/src/test/python/apache/aurora/common/test_http_signaler.py b/src/test/python/apache/aurora/common/test_http_signaler.py index c6a2170..f68c71a 100644 --- a/src/test/python/apache/aurora/common/test_http_signaler.py +++ b/src/test/python/apache/aurora/common/test_http_signaler.py @@ -61,8 +61,8 @@ class TestHttpSignaler(unittest.TestCase): self._mox.ReplayAll() signaler = HttpSignaler(self.PORT) - assert signaler.quitquitquit() == (True, None) - assert signaler.abortabortabort() == (True, None) + assert signaler('/quitquitquit', use_post_method=True) == (True, None) + assert signaler('/abortabortabort', use_post_method=True) == (True, None) def test_health_checks(self): self._mox.StubOutWithMock(urllib_request, 'urlopen') http://git-wip-us.apache.org/repos/asf/aurora/blob/ea2c9ad2/src/test/python/apache/aurora/executor/test_thermos_task_runner.py ---------------------------------------------------------------------- diff --git a/src/test/python/apache/aurora/executor/test_thermos_task_runner.py b/src/test/python/apache/aurora/executor/test_thermos_task_runner.py index 1e21a11..3569a6a 100644 --- a/src/test/python/apache/aurora/executor/test_thermos_task_runner.py +++ b/src/test/python/apache/aurora/executor/test_thermos_task_runner.py @@ -79,10 +79,12 @@ class TestThermosTaskRunnerIntegration(object): print('Saving executor logs in %s' % cls.LOG_DIR) @contextlib.contextmanager - def yield_runner(self, runner_class, portmap={}, clock=time, **bindings): + def yield_runner(self, runner_class, portmap=None, clock=time, **bindings): with contextlib.nested(temporary_dir(), temporary_dir()) as (td1, td2): sandbox = DirectorySandbox(td1) checkpoint_root = td2 + if not portmap: + portmap = {} task_runner = runner_class( runner_pex=os.path.join('dist', 'thermos_runner.pex'), @@ -195,7 +197,7 @@ class TestThermosTaskRunnerIntegration(object): assert task_runner.status.status == mesos_pb2.TASK_LOST @pytest.mark.skipif('True', reason='Flaky test (AURORA-1054)') - def test_integration_quitquitquit(self): + def test_integration_ignores_sigterm(self): ignorant_script = ';'.join([ 'import time, signal', 'signal.signal(signal.SIGTERM, signal.SIG_IGN)', @@ -218,8 +220,7 @@ class TestThermosTaskRunnerIntegration(object): @patch('apache.aurora.executor.thermos_task_runner.HttpSignaler') def test_integration_http_teardown(self, SignalerClass): signaler = SignalerClass.return_value - signaler.quitquitquit.return_value = (False, 'failed to dispatch') - signaler.abortabortabort.return_value = (True, None) + signaler.side_effect = lambda path, use_post_method: (path != '/quitquitquit', None) clock = Mock(wraps=time) @@ -240,7 +241,9 @@ class TestThermosTaskRunnerIntegration(object): escalation_wait = call(ShortEscalationRunner.ESCALATION_WAIT.as_(Time.SECONDS)) assert clock.sleep.mock_calls.count(escalation_wait) == 1 - assert signaler.mock_calls == [call.quitquitquit(), call.abortabortabort()] + assert signaler.mock_calls == [ + call('/quitquitquit', use_post_method=True), + call('/abortabortabort', use_post_method=True)] def test_thermos_normal_exit_status(self): with self.exit_with_status(0, TaskState.SUCCESS) as task_runner:
