Repository: aurora Updated Branches: refs/heads/master ca683cb9e -> ed72b1bf6
Add min_consecutive_health_checks in HealthCheckConfig HealthCheckConfig should accept a new configuration value that will tell how many positive consecutive health checks an instance requires to move from STARTING to RUNNING. Bugs closed: AURORA-1224 Reviewed at https://reviews.apache.org/r/52094/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/ed72b1bf Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/ed72b1bf Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/ed72b1bf Branch: refs/heads/master Commit: ed72b1bf662d1e29d2bb483b317c787630c26a9e Parents: ca683cb Author: Kai Huang <[email protected]> Authored: Fri Sep 30 14:56:28 2016 -0500 Committer: Joshua Cohen <[email protected]> Committed: Fri Sep 30 14:56:28 2016 -0500 ---------------------------------------------------------------------- docs/reference/configuration.md | 3 +- src/main/python/apache/aurora/client/config.py | 31 ++++++++++++++++---- .../python/apache/aurora/config/schema/base.py | 1 + .../python/apache/aurora/client/test_config.py | 31 +++++++------------- 4 files changed, 39 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/docs/reference/configuration.md ---------------------------------------------------------------------- diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index f2a0b18..71d2ce5 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -379,9 +379,10 @@ Parameters for controlling a task's health checks via HTTP or a shell command. | param | type | description | ------- | :-------: | -------- | ```health_checker``` | HealthCheckerConfig | Configure what kind of health check to use. -| ```initial_interval_secs``` | Integer | Initial delay for performing a health check. (Default: 15) +| ```initial_interval_secs``` | Integer | Initial grace period for performing health checks. (Default: 15) | ```interval_secs``` | Integer | Interval on which to check the task's health. (Default: 10) | ```max_consecutive_failures``` | Integer | Maximum number of consecutive failures that will be tolerated before considering a task unhealthy (Default: 0) +| ```min_consecutive_successes``` | Integer | Minimum number of consecutive successful health checks required before considering a task healthy (Default: 1) | ```timeout_secs``` | Integer | Health check timeout. (Default: 1) ### HealthCheckerConfig Objects http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/src/main/python/apache/aurora/client/config.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/client/config.py b/src/main/python/apache/aurora/client/config.py index 0186af5..ce4bffe 100644 --- a/src/main/python/apache/aurora/client/config.py +++ b/src/main/python/apache/aurora/client/config.py @@ -92,15 +92,31 @@ health check interval (%d seconds) plus %d consecutive failures at a check inter ''' +INITIAL_INTERVAL_SECS_INSUFFICIENT_ERROR_FORMAT = ''' +You have specified an insufficiently short initial interval period (%d seconds) +in your health check configuration. Your health check will always fail. In order for +the health check to pass, HealthCheckConfig.initial_interval_secs must be greater +than the duration of %d consecutive successful health checks at a check interval +of %d seconds. You can either increase initial_interval_secs, decrease interval_secs +or decrease min_consecutive_successes. +''' + + +INVALID_MIN_CONSECUTIVE_SUCCESSES_ERROR = ''' +You have specified an invalid min_consecutive_successes value (%d) in your health check +configuration. Your health check will always succeed. In order for the updater to detect +health check failures, HealthCheckConfig.min_consecutive_successes must be a positive value. +''' + + def _validate_update_config(config): job_size = config.instances() update_config = config.update_config() health_check_config = config.health_check_config() max_failures = update_config.max_total_failures().get() - watch_secs = update_config.watch_secs().get() initial_interval_secs = health_check_config.initial_interval_secs().get() - max_consecutive_failures = health_check_config.max_consecutive_failures().get() + min_consecutive_successes = health_check_config.min_consecutive_successes().get() interval_secs = health_check_config.interval_secs().get() if max_failures >= job_size: @@ -111,10 +127,13 @@ def _validate_update_config(config): if max_failures < min_failure_threshold: die(UPDATE_CONFIG_DEDICATED_THRESHOLD_ERROR % (job_size, min_failure_threshold)) - target_watch = initial_interval_secs + (max_consecutive_failures * interval_secs) - if watch_secs <= target_watch: - die(WATCH_SECS_INSUFFICIENT_ERROR_FORMAT % - (watch_secs, target_watch, initial_interval_secs, max_consecutive_failures, interval_secs)) + if min_consecutive_successes <= 0: + die(INVALID_MIN_CONSECUTIVE_SUCCESSES_ERROR % min_consecutive_successes) + + target_initial_interval_secs = interval_secs * min_consecutive_successes + if initial_interval_secs <= target_initial_interval_secs: + die(INITIAL_INTERVAL_SECS_INSUFFICIENT_ERROR_FORMAT % + (initial_interval_secs, min_consecutive_successes, interval_secs)) PRODUCTION_DEPRECATED_WARNING = ( http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/src/main/python/apache/aurora/config/schema/base.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/config/schema/base.py b/src/main/python/apache/aurora/config/schema/base.py index 8451630..baea660 100644 --- a/src/main/python/apache/aurora/config/schema/base.py +++ b/src/main/python/apache/aurora/config/schema/base.py @@ -60,6 +60,7 @@ class HealthCheckConfig(Struct): initial_interval_secs = Default(Float, 15.0) interval_secs = Default(Float, 10.0) max_consecutive_failures = Default(Integer, 0) + min_consecutive_successes = Default(Integer, 1) timeout_secs = Default(Float, 1.0) http://git-wip-us.apache.org/repos/asf/aurora/blob/ed72b1bf/src/test/python/apache/aurora/client/test_config.py ---------------------------------------------------------------------- diff --git a/src/test/python/apache/aurora/client/test_config.py b/src/test/python/apache/aurora/client/test_config.py index 5cf68a5..ff46558 100644 --- a/src/test/python/apache/aurora/client/test_config.py +++ b/src/test/python/apache/aurora/client/test_config.py @@ -24,15 +24,7 @@ from apache.aurora.client.config import get_config as get_aurora_config from apache.aurora.client.config import PRODUCTION_DEPRECATED_WARNING from apache.aurora.config import AuroraConfig from apache.aurora.config.loader import AuroraConfigLoader -from apache.aurora.config.schema.base import ( - MB, - Announcer, - HealthCheckConfig, - Job, - Resources, - Task, - UpdateConfig -) +from apache.aurora.config.schema.base import MB, Announcer, HealthCheckConfig, Job, Resources, Task from apache.thermos.config.schema_base import Process MESOS_CONFIG_BASE = """ @@ -192,21 +184,21 @@ def test_update_config_passes_with_default_values(): config._validate_update_config(AuroraConfig(base_job)) -def test_update_config_passes_with_min_requirement_values(): +def test_health_check_config_fails_insufficient_initital_interval_secs_less_than_target(): base_job = Job( name='hello_world', role='john_doe', cluster='test-cluster', - update_config=UpdateConfig(watch_secs=26), - health_check_config=HealthCheckConfig(max_consecutive_failures=1), + health_check_config=HealthCheckConfig(initial_interval_secs=5), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) - config._validate_update_config(AuroraConfig(base_job)) + with pytest.raises(SystemExit): + config._validate_update_config(AuroraConfig(base_job)) -def test_update_config_fails_insufficient_watch_secs_less_than_target(): +def test_health_check_config_fails_insufficient_initital_interval_secs_equal_to_target(): base_job = Job( name='hello_world', role='john_doe', cluster='test-cluster', - update_config=UpdateConfig(watch_secs=10), + health_check_config=HealthCheckConfig(initial_interval_secs=10), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) @@ -214,16 +206,15 @@ def test_update_config_fails_insufficient_watch_secs_less_than_target(): config._validate_update_config(AuroraConfig(base_job)) -def test_update_config_fails_insufficient_watch_secs_equal_to_target(): +def test_health_check_config_passes_with_min_requirement_values(): base_job = Job( name='hello_world', role='john_doe', cluster='test-cluster', - update_config=UpdateConfig(watch_secs=25), - health_check_config=HealthCheckConfig(max_consecutive_failures=1), + health_check_config=HealthCheckConfig(initial_interval_secs=21, + min_consecutive_successes=2), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) - with pytest.raises(SystemExit): - config._validate_update_config(AuroraConfig(base_job)) + config._validate_update_config(AuroraConfig(base_job)) def test_validate_deprecated_config_adds_warning_for_production():
