Repository: ambari Updated Branches: refs/heads/branch-2.4 a4ad965c2 -> 93c3c77a2 refs/heads/trunk 3a671d884 -> f80107a7a
AMBARI-16876. Need ability to ignoreBadMounts on a large cluster with bad disks vs workaround of tens of Config Groups (aonishuk) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/f80107a7 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/f80107a7 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/f80107a7 Branch: refs/heads/trunk Commit: f80107a7a604d85bcab9a7d99bfff24b938cdfee Parents: 3a671d8 Author: Andrew Onishuk <[email protected]> Authored: Thu May 26 14:22:38 2016 +0300 Committer: Andrew Onishuk <[email protected]> Committed: Thu May 26 14:22:38 2016 +0300 ---------------------------------------------------------------------- .../resource_management/TestDatanodeHelper.py | 8 +++- .../libraries/functions/mounted_dirs_helper.py | 40 +++++++++++++++----- .../server/upgrade/UpgradeCatalog240.java | 3 ++ .../HDP/2.0.6/configuration/cluster-env.xml | 18 +++++++++ .../python/stacks/2.0.6/configs/default.json | 4 +- .../python/stacks/2.0.6/configs/secured.json | 4 +- 6 files changed, 63 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/f80107a7/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py b/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py index 4dfada3..9ff2f2d 100644 --- a/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py +++ b/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py @@ -23,6 +23,7 @@ from mock.mock import Mock, MagicMock, patch from resource_management.libraries.functions import mounted_dirs_helper from resource_management.core.logger import Logger from resource_management import Directory +from resource_management.libraries.script.script import Script class StubParams(object): @@ -49,6 +50,7 @@ def fake_create_dir(directory): print "Fake function to create directory {0}".format(directory) [email protected](Script, "get_config", new=MagicMock(return_value={'configurations':{'cluster-env': {'ignore_bad_mounts': False}}})) class TestDatanodeHelper(TestCase): """ Test the functionality of the dfs_datanode_helper.py @@ -94,13 +96,14 @@ class TestDatanodeHelper(TestCase): @patch("resource_management.libraries.functions.mounted_dirs_helper.Directory") @patch.object(Logger, "info") + @patch.object(Logger, "warning") @patch.object(Logger, "error") @patch.object(mounted_dirs_helper, "get_dir_to_mount_from_file") @patch.object(mounted_dirs_helper, "get_mount_point_for_dir") @patch.object(os.path, "isdir") @patch.object(os.path, "exists") def test_grid_becomes_unmounted(self, mock_os_exists, mock_os_isdir, mock_get_mount_point, - mock_get_data_dir_to_mount_from_file, log_error, log_info, dir_mock): + mock_get_data_dir_to_mount_from_file, log_error, log_warning, log_info, dir_mock): """ Test when grid2 becomes unmounted """ @@ -127,13 +130,14 @@ class TestDatanodeHelper(TestCase): @patch("resource_management.libraries.functions.mounted_dirs_helper.Directory") @patch.object(Logger, "info") + @patch.object(Logger, "warning") @patch.object(Logger, "error") @patch.object(mounted_dirs_helper, "get_dir_to_mount_from_file") @patch.object(mounted_dirs_helper, "get_mount_point_for_dir") @patch.object(os.path, "isdir") @patch.object(os.path, "exists") def test_grid_becomes_remounted(self, mock_os_exists, mock_os_isdir, mock_get_mount_point, - mock_get_data_dir_to_mount_from_file, log_error, log_info, dir_mock): + mock_get_data_dir_to_mount_from_file, log_error, log_warning, log_info, dir_mock): """ Test when grid2 becomes remounted """ http://git-wip-us.apache.org/repos/asf/ambari/blob/f80107a7/ambari-common/src/main/python/resource_management/libraries/functions/mounted_dirs_helper.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/mounted_dirs_helper.py b/ambari-common/src/main/python/resource_management/libraries/functions/mounted_dirs_helper.py index dc7a5cb..9574ce5 100644 --- a/ambari-common/src/main/python/resource_management/libraries/functions/mounted_dirs_helper.py +++ b/ambari-common/src/main/python/resource_management/libraries/functions/mounted_dirs_helper.py @@ -26,6 +26,8 @@ import re from resource_management.libraries.functions.file_system import get_mount_point_for_dir, get_and_cache_mount_points from resource_management.core.logger import Logger from resource_management.core.resources.system import Directory +from resource_management.core.exceptions import Fail +from resource_management.libraries.script.script import Script DIR_TO_MOUNT_HEADER = """ # This file keeps track of the last known mount-point for each dir. @@ -110,6 +112,7 @@ def handle_mounted_dirs(func, dirs_string, history_filename, update_cache=True): valid_dirs = [] # dirs that have been normalized error_messages = [] # list of error messages to report at the end dirs_unmounted = set() # set of dirs that have become unmounted + valid_existing_dirs = [] dirs_string = ",".join([re.sub(r'^\[.+\]', '', dfs_dir.strip()) for dfs_dir in dirs_string.split(",")]) for dir in dirs_string.split(","): @@ -118,10 +121,22 @@ def handle_mounted_dirs(func, dirs_string, history_filename, update_cache=True): dir = dir.strip() valid_dirs.append(dir) + + if os.path.isdir(dir): + valid_existing_dirs.append(dir) - if not os.path.isdir(dir): + used_mounts = set([get_mount_point_for_dir(dir) for dir in valid_existing_dirs]) + + for dir in valid_dirs: + if not dir in valid_existing_dirs: may_create_this_dir = allowed_to_create_any_dir last_mount_point_for_dir = None + + curr_mount_point = get_mount_point_for_dir(dir) + + # This means that create_this_dir will stay false if the directory became unmounted. + # In other words, allow creating if it was already on /, or it's currently not on / + is_non_root_dir = (curr_mount_point is not None and curr_mount_point != "/") # Determine if should be allowed to create the dir directory. # Either first time, became unmounted, or was just mounted on a drive @@ -129,16 +144,21 @@ def handle_mounted_dirs(func, dirs_string, history_filename, update_cache=True): last_mount_point_for_dir = prev_dir_to_mount_point[dir] if dir in prev_dir_to_mount_point else None if last_mount_point_for_dir is None: - # Couldn't retrieve any information about where this dir used to be mounted, so allow creating the directory to be safe. - may_create_this_dir = True + may_create_this_dir = (is_non_root_dir or Script.get_config()['configurations']['cluster-env']['create_dirs_on_root']) else: - curr_mount_point = get_mount_point_for_dir(dir) - - # This means that create_this_dir will stay false if the directory became unmounted. - # In other words, allow creating if it was already on /, or it's currently not on / - if last_mount_point_for_dir == "/" or (curr_mount_point is not None and curr_mount_point != "/"): - may_create_this_dir = True - + may_create_this_dir = (last_mount_point_for_dir == "/" or is_non_root_dir) + + if may_create_this_dir and Script.get_config()['configurations']['cluster-env']['ignore_bad_mounts']: + Logger.warning("Not creating {0} as cluster-env/ignore_bad_mounts is enabled.".format(dir)) + may_create_this_dir = False + + if may_create_this_dir and curr_mount_point in used_mounts: + message = "Trying to create another directory on the following mount: " + curr_mount_point + if Script.get_config()['configurations']['cluster-env']['one_dir_per_partition']: + raise Fail(message + " . Please turn off cluster-env/one_dir_per_partition or handle the situation manually.") + else: + Logger.warning(message) + if may_create_this_dir: Logger.info("Forcefully creating directory: {0}".format(dir)) http://git-wip-us.apache.org/repos/asf/ambari/blob/f80107a7/ambari-server/src/main/java/org/apache/ambari/server/upgrade/UpgradeCatalog240.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/upgrade/UpgradeCatalog240.java b/ambari-server/src/main/java/org/apache/ambari/server/upgrade/UpgradeCatalog240.java index f4f614e..870fd15 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/upgrade/UpgradeCatalog240.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/upgrade/UpgradeCatalog240.java @@ -144,6 +144,7 @@ public class UpgradeCatalog240 extends AbstractUpgradeCatalog { private static final String HIVE_ENV_CONFIG = "hive-env"; private static final String AMS_SITE = "ams-site"; public static final String TIMELINE_METRICS_SINK_COLLECTION_PERIOD = "timeline.metrics.sink.collection.period"; + public static final String ONE_DIR_PER_PARITION_PROPERTY = "one_dir_per_partition"; public static final String VIEWURL_TABLE = "viewurl"; public static final String URL_ID_COLUMN = "url_id"; private static final String PRINCIPAL_TYPE_TABLE = "adminprincipaltype"; @@ -1744,6 +1745,7 @@ public class UpgradeCatalog240 extends AbstractUpgradeCatalog { * Updates {@code cluster-env} in the following ways: * <ul> * <li>Adds {@link ConfigHelper#CLUSTER_ENV_ALERT_REPEAT_TOLERANCE} = 1</li> + * <li>Adds {@link UpgradeCatalog240#ONE_DIR_PER_PARITION_PROPERTY} = false</li> * </ul> * * @throws Exception @@ -1751,6 +1753,7 @@ public class UpgradeCatalog240 extends AbstractUpgradeCatalog { protected void updateClusterEnv() throws AmbariException { Map<String, String> propertyMap = new HashMap<>(); propertyMap.put(ConfigHelper.CLUSTER_ENV_ALERT_REPEAT_TOLERANCE, "1"); + propertyMap.put(ONE_DIR_PER_PARITION_PROPERTY, "false"); AmbariManagementController ambariManagementController = injector.getInstance( AmbariManagementController.class); http://git-wip-us.apache.org/repos/asf/ambari/blob/f80107a7/ambari-server/src/main/resources/stacks/HDP/2.0.6/configuration/cluster-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/configuration/cluster-env.xml b/ambari-server/src/main/resources/stacks/HDP/2.0.6/configuration/cluster-env.xml index 4f70b5a..31556cb 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/configuration/cluster-env.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/configuration/cluster-env.xml @@ -186,4 +186,22 @@ gpgcheck=0</value> <description>The number of consecutive alerts required to transition an alert from the SOFT to the HARD state.</description> </property> + <property> + <name>ignore_bad_mounts</name> + <value>false</value> + <description>For properties handled by handle_mounted_dirs this will make Ambari not to create any directories.</description> + </property> + + <property> + <name>create_dirs_on_root</name> + <value>true</value> + <description>For properties handled by handle_mounted_dirs this will make Ambari to create not-existent unknown directories on / partition</description> + </property> + + <property> + <name>one_dir_per_partition</name> + <value>true</value> + <description>For properties handled by handle_mounted_dirs this will make Ambari </description> + </property> + </configuration> http://git-wip-us.apache.org/repos/asf/ambari/blob/f80107a7/ambari-server/src/test/python/stacks/2.0.6/configs/default.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/configs/default.json b/ambari-server/src/test/python/stacks/2.0.6/configs/default.json index f0e5208..8d04e36 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/configs/default.json +++ b/ambari-server/src/test/python/stacks/2.0.6/configs/default.json @@ -596,7 +596,9 @@ "metrics_collector_vip_host": "c6402.ambari.apache.org", "metrics_collector_vip_port": "6189", "override_uid" : "true", - "fetch_nonlocal_groups": "true" + "fetch_nonlocal_groups": "true", + "create_dirs_on_root": "true", + "ignore_bad_mounts": "false" }, "hbase-env": { "hbase_pid_dir": "/var/run/hbase", http://git-wip-us.apache.org/repos/asf/ambari/blob/f80107a7/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json b/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json index 38f569f..2957c6f 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json +++ b/ambari-server/src/test/python/stacks/2.0.6/configs/secured.json @@ -609,7 +609,9 @@ "kerberos_domain": "EXAMPLE.COM", "user_group": "hadoop", "smokeuser_keytab": "/etc/security/keytabs/smokeuser.headless.keytab", - "kinit_path_local": "/usr/bin" + "kinit_path_local": "/usr/bin", + "create_dirs_on_root": "true", + "ignore_bad_mounts": "false" }, "hadoop-env": { "hdfs_tmp_dir": "/tmp",
