AMBARI-13463. Auto start should allow selection of components that can be auto-started (smohanty)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/4ba6aceb Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/4ba6aceb Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/4ba6aceb Branch: refs/heads/branch-dev-patch-upgrade Commit: 4ba6aceb270037a6e1c33db3ed394b13e2add05c Parents: 4017f06 Author: Sumit Mohanty <[email protected]> Authored: Sun Oct 18 18:32:44 2015 -0700 Committer: Sumit Mohanty <[email protected]> Committed: Sun Oct 18 18:33:18 2015 -0700 ---------------------------------------------------------------------- .../src/main/python/ambari_agent/ActionQueue.py | 11 +- .../main/python/ambari_agent/RecoveryManager.py | 70 +++++++++-- .../test/python/ambari_agent/TestActionQueue.py | 2 +- .../python/ambari_agent/TestRecoveryManager.py | 123 +++++++++++++++---- .../ambari/server/agent/HeartBeatHandler.java | 2 +- .../ambari/server/agent/RecoveryConfig.java | 26 ++++ .../server/configuration/Configuration.java | 30 ++++- 7 files changed, 221 insertions(+), 43 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-agent/src/main/python/ambari_agent/ActionQueue.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py index 6a64f99..b82afe8 100644 --- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py +++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py @@ -326,7 +326,8 @@ class ActionQueue(threading.Thread): # let recovery manager know the current state if status == self.COMPLETED_STATUS: - if self.controller.recovery_manager.enabled() and command.has_key('roleCommand'): + if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \ + and self.controller.recovery_manager.configured_for_recovery(command['role']): if command['roleCommand'] == self.ROLE_COMMAND_START: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS) self.controller.recovery_manager.update_config_staleness(command['role'], False) @@ -441,10 +442,14 @@ class ActionQueue(threading.Thread): if component_status_result['exitcode'] == 0: component_status = LiveStatus.LIVE_STATUS - self.controller.recovery_manager.update_current_status(component, component_status) + if self.controller.recovery_manager.enabled() \ + and self.controller.recovery_manager.configured_for_recovery(component): + self.controller.recovery_manager.update_current_status(component, component_status) else: component_status = LiveStatus.DEAD_STATUS - self.controller.recovery_manager.update_current_status(component, component_status) + if self.controller.recovery_manager.enabled() \ + and self.controller.recovery_manager.configured_for_recovery(component): + self.controller.recovery_manager.update_current_status(component, component_status) request_execution_cmd = self.controller.recovery_manager.requires_recovery(component) if component_status_result.has_key('structuredOut'): http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py index 595c4fc..cab81f5 100644 --- a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py +++ b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py @@ -85,6 +85,8 @@ class RecoveryManager: self.id = int(time.time()) self.allowed_desired_states = [self.STARTED, self.INSTALLED] self.allowed_current_states = [self.INIT, self.INSTALLED] + self.enabled_components = [] + self.disabled_components = [] self.actions = {} self.statuses = {} self.__status_lock = threading.RLock() @@ -93,7 +95,7 @@ class RecoveryManager: self.active_command_count = 0 self.paused = False - self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only) + self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only, "", "") pass @@ -186,6 +188,21 @@ class RecoveryManager: self.remove_command(component) pass + """ + Whether specific components are enabled/disabled for recovery. Being enabled takes + precedence over being disabled. When specific components are enabled then only + those components are enabled. When specific components are disabled then all of + the other components are enabled. + """ + def configured_for_recovery(self, component): + if len(self.disabled_components) == 0 and len(self.enabled_components) == 0: + return True + if len(self.disabled_components) > 0 and component not in self.disabled_components \ + and len(self.enabled_components) == 0: + return True + if len(self.enabled_components) > 0 and component in self.enabled_components: + return True + return False def requires_recovery(self, component): """ @@ -197,6 +214,9 @@ class RecoveryManager: if not self.enabled(): return False + if not self.configured_for_recovery(component): + return False + if component not in self.statuses: return False @@ -433,7 +453,9 @@ class RecoveryManager: "type" : "DEFAULT|AUTO_START|FULL", "maxCount" : 10, "windowInMinutes" : 60, - "retryGap" : 0 } + "retryGap" : 0, + "disabledComponents" : "a,b", + "enabledComponents" : "c,d"} """ recovery_enabled = False @@ -442,8 +464,12 @@ class RecoveryManager: window_in_min = 60 retry_gap = 5 max_lifetime_count = 12 + enabled_components = "" + disabled_components = "" + if reg_resp and "recoveryConfig" in reg_resp: + logger.info("RecoverConfig = " + pprint.pformat(reg_resp["recoveryConfig"])) config = reg_resp["recoveryConfig"] if "type" in config: if config["type"] in ["AUTO_START", "FULL"]: @@ -458,11 +484,18 @@ class RecoveryManager: retry_gap = self._read_int_(config["retryGap"], retry_gap) if 'maxLifetimeCount' in config: max_lifetime_count = self._read_int_(config['maxLifetimeCount'], max_lifetime_count) - self.update_config(max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled, auto_start_only) + + if 'enabledComponents' in config: + enabled_components = config['enabledComponents'] + if 'disabledComponents' in config: + disabled_components = config['disabledComponents'] + self.update_config(max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled, auto_start_only, + enabled_components, disabled_components) pass - def update_config(self, max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled, auto_start_only): + def update_config(self, max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled, + auto_start_only, enabled_components, disabled_components): """ Update recovery configuration, recovery is disabled if configuration values are not correct @@ -493,6 +526,8 @@ class RecoveryManager: self.retry_gap_in_sec = retry_gap * 60 self.auto_start_only = auto_start_only self.max_lifetime_count = max_lifetime_count + self.disabled_components = [] + self.enabled_components = [] self.allowed_desired_states = [self.STARTED, self.INSTALLED] self.allowed_current_states = [self.INIT, self.INSTALLED, self.STARTED] @@ -501,11 +536,25 @@ class RecoveryManager: self.allowed_desired_states = [self.STARTED] self.allowed_current_states = [self.INSTALLED] + if enabled_components is not None and len(enabled_components) > 0: + components = enabled_components.split(",") + for component in components: + if len(component.strip()) > 0: + self.enabled_components.append(component.strip()) + + if disabled_components is not None and len(disabled_components) > 0: + components = disabled_components.split(",") + for component in components: + if len(component.strip()) > 0: + self.disabled_components.append(component.strip()) + self.recovery_enabled = recovery_enabled if self.recovery_enabled: logger.info( - "==> Auto recovery is enabled with maximum %s in %s minutes with gap of %s minutes between and lifetime max being %s.", - self.max_count, self.window_in_min, self.retry_gap, self.max_lifetime_count) + "==> Auto recovery is enabled with maximum %s in %s minutes with gap of %s minutes between and" + " lifetime max being %s. Enabled components - %s and Disabled components - %s", + self.max_count, self.window_in_min, self.retry_gap, self.max_lifetime_count, + ', '.join(self.enabled_components), ', '.join(self.disabled_components)) pass @@ -536,16 +585,19 @@ class RecoveryManager: for command in commands: if self.COMMAND_TYPE in command and command[self.COMMAND_TYPE] == ActionQueue.EXECUTION_COMMAND: if self.ROLE in command: - if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP): + if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP) \ + and self.configured_for_recovery(command[self.ROLE]): self.update_desired_status(command[self.ROLE], LiveStatus.DEAD_STATUS) logger.info("Received EXECUTION_COMMAND (STOP/INSTALL), desired state of " + command[self.ROLE] + " to " + self.get_desired_status(command[self.ROLE]) ) - elif command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START: + elif command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START \ + and self.configured_for_recovery(command[self.ROLE]): self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS) logger.info("Received EXECUTION_COMMAND (START), desired state of " + command[self.ROLE] + " to " + self.get_desired_status(command[self.ROLE]) ) elif command[self.HOST_LEVEL_PARAMS].has_key('custom_command') and \ - command[self.HOST_LEVEL_PARAMS]['custom_command'] == ActionQueue.CUSTOM_COMMAND_RESTART: + command[self.HOST_LEVEL_PARAMS]['custom_command'] == ActionQueue.CUSTOM_COMMAND_RESTART \ + and self.configured_for_recovery(command[self.ROLE]): self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS) logger.info("Received EXECUTION_COMMAND (RESTART), desired state of " + command[self.ROLE] + " to " + self.get_desired_status(command[self.ROLE]) ) http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py index f5ea107..a583131 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py +++ b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py @@ -341,7 +341,7 @@ class TestActionQueue(TestCase): config.set('agent', 'tolerate_download_failures', "true") dummy_controller = MagicMock() dummy_controller.recovery_manager = RecoveryManager() - dummy_controller.recovery_manager.update_config(5, 5, 1, 11, True, False) + dummy_controller.recovery_manager.update_config(5, 5, 1, 11, True, False, "", "") actionQueue = ActionQueue(config, dummy_controller) unfreeze_flag = threading.Event() http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py index 1669a2c..e6115e3 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py +++ b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py @@ -173,32 +173,32 @@ class TestRecoveryManager(TestCase): rm = RecoveryManager(True, False) self.assertTrue(rm.enabled()) - rm.update_config(0, 60, 5, 12, True, False) + rm.update_config(0, 60, 5, 12, True, False, "", "") self.assertFalse(rm.enabled()) - rm.update_config(6, 60, 5, 12, True, False) + rm.update_config(6, 60, 5, 12, True, False, "", "") self.assertTrue(rm.enabled()) - rm.update_config(6, 0, 5, 12, True, False) + rm.update_config(6, 0, 5, 12, True, False, "", "") self.assertFalse(rm.enabled()) - rm.update_config(6, 60, 0, 12, True, False) + rm.update_config(6, 60, 0, 12, True, False, "", "") self.assertFalse(rm.enabled()) - rm.update_config(6, 60, 1, 12, True, False) + rm.update_config(6, 60, 1, 12, True, False, None, None) self.assertTrue(rm.enabled()) - rm.update_config(6, 60, 61, 12, True, False) + rm.update_config(6, 60, 61, 12, True, False, "", None) self.assertFalse(rm.enabled()) - rm.update_config(6, 60, 5, 0, True, False) + rm.update_config(6, 60, 5, 0, True, False, None, "") self.assertFalse(rm.enabled()) - rm.update_config(6, 60, 5, 4, True, False) + rm.update_config(6, 60, 5, 4, True, False, "", "") self.assertFalse(rm.enabled()) # maximum 2 in 2 minutes and at least 1 minute wait - rm.update_config(2, 5, 1, 4, True, False) + rm.update_config(2, 5, 1, 4, True, False, "", "") self.assertTrue(rm.enabled()) # T = 1000-2 @@ -224,7 +224,7 @@ class TestRecoveryManager(TestCase): self.assertFalse(rm.may_execute("NODEMANAGER")) # too soon # maximum 2 in 2 minutes and no min wait - rm.update_config(2, 5, 1, 5, True, True) + rm.update_config(2, 5, 1, 5, True, True, "", "") # T = 1500-3 self.assertTrue(rm.execute("NODEMANAGER2")) @@ -286,6 +286,55 @@ class TestRecoveryManager(TestCase): rm.update_current_status("NODEMANAGER", "INSTALLED") rm.update_desired_status("NODEMANAGER", "START") self.assertFalse(rm.requires_recovery("NODEMANAGER")) + + pass + + def test_recovery_required2(self): + + rm = RecoveryManager(True, True) + rm.update_config(15, 5, 1, 16, True, False, "", "") + rm.update_current_status("NODEMANAGER", "INSTALLED") + rm.update_desired_status("NODEMANAGER", "STARTED") + self.assertTrue(rm.requires_recovery("NODEMANAGER")) + + rm = RecoveryManager(True, True) + rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "") + rm.update_current_status("NODEMANAGER", "INSTALLED") + rm.update_desired_status("NODEMANAGER", "STARTED") + self.assertTrue(rm.requires_recovery("NODEMANAGER")) + + rm.update_current_status("DATANODE", "INSTALLED") + rm.update_desired_status("DATANODE", "STARTED") + self.assertFalse(rm.requires_recovery("DATANODE")) + + rm = RecoveryManager(True, True) + rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER") + rm.update_current_status("NODEMANAGER", "INSTALLED") + rm.update_desired_status("NODEMANAGER", "STARTED") + self.assertFalse(rm.requires_recovery("NODEMANAGER")) + + rm.update_current_status("DATANODE", "INSTALLED") + rm.update_desired_status("DATANODE", "STARTED") + self.assertTrue(rm.requires_recovery("DATANODE")) + + rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER") + rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "") + rm.update_current_status("NODEMANAGER", "INSTALLED") + rm.update_desired_status("NODEMANAGER", "STARTED") + self.assertTrue(rm.requires_recovery("NODEMANAGER")) + + rm.update_current_status("DATANODE", "INSTALLED") + rm.update_desired_status("DATANODE", "STARTED") + self.assertFalse(rm.requires_recovery("DATANODE")) + + rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "NODEMANAGER") + rm.update_current_status("NODEMANAGER", "INSTALLED") + rm.update_desired_status("NODEMANAGER", "STARTED") + self.assertTrue(rm.requires_recovery("NODEMANAGER")) + + rm.update_current_status("DATANODE", "INSTALLED") + rm.update_desired_status("DATANODE", "STARTED") + self.assertFalse(rm.requires_recovery("DATANODE")) pass @patch('time.time', MagicMock(side_effects=[1])) @@ -343,7 +392,7 @@ class TestRecoveryManager(TestCase): 4200, 4201, 4202, 4300, 4301, 4302] rm = RecoveryManager(True) - rm.update_config(15, 5, 1, 16, True, False) + rm.update_config(15, 5, 1, 16, True, False, "", "") command1 = copy.deepcopy(self.command) @@ -374,14 +423,14 @@ class TestRecoveryManager(TestCase): self.assertEqual(1, len(commands)) self.assertEqual("INSTALL", commands[0]["roleCommand"]) - rm.update_config(2, 5, 1, 5, True, True) + rm.update_config(2, 5, 1, 5, True, True, "", "") rm.update_current_status("NODEMANAGER", "INIT") rm.update_desired_status("NODEMANAGER", "INSTALLED") commands = rm.get_recovery_commands() self.assertEqual(0, len(commands)) - rm.update_config(12, 5, 1, 15, True, False) + rm.update_config(12, 5, 1, 15, True, False, "", "") rm.update_current_status("NODEMANAGER", "INIT") rm.update_desired_status("NODEMANAGER", "INSTALLED") @@ -422,25 +471,25 @@ class TestRecoveryManager(TestCase): def test_update_rm_config(self, mock_uc): rm = RecoveryManager() rm.update_configuration_from_registration(None) - mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True)]) + mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")]) mock_uc.reset_mock() rm.update_configuration_from_registration({}) - mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True)]) + mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")]) mock_uc.reset_mock() rm.update_configuration_from_registration( {"recoveryConfig": { "type" : "DEFAULT"}} ) - mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True)]) + mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")]) mock_uc.reset_mock() rm.update_configuration_from_registration( {"recoveryConfig": { "type" : "FULL"}} ) - mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False)]) + mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False, "", "")]) mock_uc.reset_mock() rm.update_configuration_from_registration( @@ -448,7 +497,7 @@ class TestRecoveryManager(TestCase): "type" : "AUTO_START", "max_count" : "med"}} ) - mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True)]) + mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True, "", "")]) mock_uc.reset_mock() rm.update_configuration_from_registration( @@ -457,9 +506,11 @@ class TestRecoveryManager(TestCase): "maxCount" : "5", "windowInMinutes" : 20, "retryGap" : 2, - "maxLifetimeCount" : 5}} + "maxLifetimeCount" : 5, + "enabledComponents" : " A,B", + "disabledComponents" : "C"}} ) - mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True)]) + mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True, " A,B", "C")]) pass @patch.object(RecoveryManager, "_now_") @@ -471,7 +522,7 @@ class TestRecoveryManager(TestCase): rec_st = rm.get_recovery_status() self.assertEquals(rec_st, {"summary": "DISABLED"}) - rm.update_config(2, 5, 1, 4, True, True) + rm.update_config(2, 5, 1, 4, True, True, "", "") rec_st = rm.get_recovery_status() self.assertEquals(rec_st, {"summary": "RECOVERABLE", "componentReports": []}) @@ -515,7 +566,7 @@ class TestRecoveryManager(TestCase): [1000, 1001, 1002, 1003, 1104, 1105, 1106, 1807, 1808, 1809, 1810, 1811, 1812] rm = RecoveryManager(True) - rm.update_config(5, 5, 1, 11, True, False) + rm.update_config(5, 5, 1, 11, True, False, "", "") command1 = copy.deepcopy(self.command) @@ -552,4 +603,30 @@ class TestRecoveryManager(TestCase): rm.stop_execution_command() self.assertTrue(rm.has_active_command()) rm.stop_execution_command() - self.assertFalse(rm.has_active_command()) \ No newline at end of file + self.assertFalse(rm.has_active_command()) + + def test_configured_for_recovery(self): + rm = RecoveryManager(True) + self.assertTrue(rm.configured_for_recovery("A")) + self.assertTrue(rm.configured_for_recovery("B")) + + rm.update_config(5, 5, 1, 11, True, False, "", "") + self.assertTrue(rm.configured_for_recovery("A")) + self.assertTrue(rm.configured_for_recovery("B")) + + rm.update_config(5, 5, 1, 11, True, False, "A", "") + self.assertTrue(rm.configured_for_recovery("A")) + self.assertFalse(rm.configured_for_recovery("B")) + + rm.update_config(5, 5, 1, 11, True, False, "", "B,C") + self.assertTrue(rm.configured_for_recovery("A")) + self.assertFalse(rm.configured_for_recovery("B")) + self.assertFalse(rm.configured_for_recovery("C")) + + rm.update_config(5, 5, 1, 11, True, False, "A, D, F ", "B,C") + self.assertTrue(rm.configured_for_recovery("A")) + self.assertFalse(rm.configured_for_recovery("B")) + self.assertFalse(rm.configured_for_recovery("C")) + self.assertTrue(rm.configured_for_recovery("D")) + self.assertFalse(rm.configured_for_recovery("E")) + self.assertTrue(rm.configured_for_recovery("F")) http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java index 29364e9..01b6e2c 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java @@ -965,7 +965,7 @@ public class HeartBeatHandler { } response.setRecoveryConfig(RecoveryConfig.getRecoveryConfig(config)); if(response.getRecoveryConfig() != null) { - LOG.debug("Recovery configuration set to " + response.getRecoveryConfig().toString()); + LOG.info("Recovery configuration set to " + response.getRecoveryConfig().toString()); } Long requestId = 0L; http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java index 9ebfb49..3f558eb 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java @@ -48,6 +48,28 @@ public class RecoveryConfig { @SerializedName("maxLifetimeCount") private String maxLifetimeCount; + @SerializedName("enabledComponents") + private String enabledComponents; + + @SerializedName("disabledComponents") + private String disabledComponents; + + + public String getDisabledComponents() { + return disabledComponents; + } + + public void setDisabledComponents(String disabledComponents) { + this.disabledComponents = disabledComponents; + } + + public String getEnabledComponents() { + return enabledComponents; + } + + public void setEnabledComponents(String enabledComponents) { + this.enabledComponents = enabledComponents; + } public String getType() { return type; @@ -96,6 +118,8 @@ public class RecoveryConfig { rc.setRetryGap(conf.getNodeRecoveryRetryGap()); rc.setType(conf.getNodeRecoveryType()); rc.setWindowInMinutes(conf.getNodeRecoveryWindowInMin()); + rc.setDisabledComponents(conf.getDisabledComponents()); + rc.setEnabledComponents(conf.getEnabledComponents()); return rc; } @@ -107,6 +131,8 @@ public class RecoveryConfig { buffer.append(", windowInMinutes=").append(windowInMinutes); buffer.append(", retryGap=").append(retryGap); buffer.append(", maxLifetimeCount=").append(maxLifetimeCount); + buffer.append(", disabledComponents=").append(disabledComponents); + buffer.append(", enabledComponents=").append(enabledComponents); buffer.append('}'); return buffer.toString(); } http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java index 23f9803fe..702e12d 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java @@ -300,6 +300,8 @@ public class Configuration { public static final String RECOVERY_WINDOW_IN_MIN_DEFAULT = "60"; public static final String RECOVERY_RETRY_GAP_KEY = "recovery.retry_interval"; public static final String RECOVERY_RETRY_GAP_DEFAULT = "5"; + public static final String RECOVERY_DISABLED_COMPONENTS_KEY = "recovery.disabled_components"; + public static final String RECOVERY_ENABLED_COMPONENTS_KEY = "recovery.enabled_components"; /** * Allow proxy calls to these hosts and ports only @@ -1672,17 +1674,17 @@ public class Configuration { public Integer getRequestReadTimeout() { return Integer.parseInt(properties.getProperty(REQUEST_READ_TIMEOUT, - REQUEST_READ_TIMEOUT_DEFAULT)); + REQUEST_READ_TIMEOUT_DEFAULT)); } public Integer getRequestConnectTimeout() { return Integer.parseInt(properties.getProperty(REQUEST_CONNECT_TIMEOUT, - REQUEST_CONNECT_TIMEOUT_DEFAULT)); + REQUEST_CONNECT_TIMEOUT_DEFAULT)); } public String getExecutionSchedulerConnections() { return properties.getProperty(EXECUTION_SCHEDULER_CONNECTIONS, - DEFAULT_SCHEDULER_MAX_CONNECTIONS); + DEFAULT_SCHEDULER_MAX_CONNECTIONS); } public Long getExecutionSchedulerMisfireToleration() { @@ -1708,7 +1710,7 @@ public class Configuration { public String getCustomActionDefinitionPath() { return properties.getProperty(CUSTOM_ACTION_DEFINITION_KEY, - CUSTOM_ACTION_DEFINITION_DEF_VALUE); + CUSTOM_ACTION_DEFINITION_DEF_VALUE); } public int getAgentPackageParallelCommandsLimit() { @@ -1757,7 +1759,7 @@ public class Configuration { */ public int getClientThreadPoolSize() { return Integer.parseInt(properties.getProperty( - CLIENT_THREADPOOL_SIZE_KEY, String.valueOf(CLIENT_THREADPOOL_SIZE_DEFAULT))); + CLIENT_THREADPOOL_SIZE_KEY, String.valueOf(CLIENT_THREADPOOL_SIZE_DEFAULT))); } /** @@ -1795,7 +1797,7 @@ public class Configuration { */ public long getViewExtractionThreadPoolTimeout() { return Long.parseLong(properties.getProperty( - VIEW_EXTRACTION_THREADPOOL_TIMEOUT_KEY, String.valueOf(VIEW_EXTRACTION_THREADPOOL_TIMEOUT_DEFAULT))); + VIEW_EXTRACTION_THREADPOOL_TIMEOUT_KEY, String.valueOf(VIEW_EXTRACTION_THREADPOOL_TIMEOUT_DEFAULT))); } /** @@ -1865,6 +1867,22 @@ public class Configuration { } /** + * Get the components for which recovery is disabled + * @return + */ + public String getDisabledComponents() { + return properties.getProperty(RECOVERY_DISABLED_COMPONENTS_KEY, ""); + } + + /** + * Get the components for which recovery is enabled + * @return + */ + public String getEnabledComponents() { + return properties.getProperty(RECOVERY_ENABLED_COMPONENTS_KEY, ""); + } + + /** * Get the configured retry gap between tries per host component * @return */
