Repository: ambari Updated Branches: refs/heads/branch-2.4 482631e7b -> 8329d7341
AMBARI-17720: HAWQ fails to execute queries in low-memory environment (mithmatt) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/8329d734 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/8329d734 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/8329d734 Branch: refs/heads/branch-2.4 Commit: 8329d73416539539d0918d9b4a06387acd7da6bb Parents: 482631e Author: Matt <[email protected]> Authored: Wed Jul 20 11:04:37 2016 -0700 Committer: Matt <[email protected]> Committed: Wed Jul 20 11:04:37 2016 -0700 ---------------------------------------------------------------------- .../HAWQ/2.0.0/configuration/hawq-site.xml | 12 +++ .../HAWQ/2.0.0/service_advisor.py | 26 +++++- .../PXF/3.0.0/service_advisor.py | 2 +- .../HAWQ/test_service_advisor.py | 98 ++++++++++++++++++-- .../common-services/PXF/test_service_advisor.py | 4 +- 5 files changed, 128 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml index 0bddb07..9baaa1c 100644 --- a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml +++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml @@ -310,6 +310,12 @@ The default set during installation is the number of HAWQ Segments multiplied by 6. Adjust the value when the number of HAWQ Segments changes in the cluster. If parameter value is changed, existing Hash distributed tables must be redistributed. </description> + <depends-on> + <property> + <type>hawq-site</type> + <name>hawq_rm_memory_limit_perseg</name> + </property> + </depends-on> <value-attributes> <type>int</type> <minimum>1</minimum> @@ -327,6 +333,12 @@ This parameter is used to limit the maximum resource usage on each segment node. The default is 6. Not recommended to change this parameter. </description> + <depends-on> + <property> + <type>hawq-site</type> + <name>hawq_rm_memory_limit_perseg</name> + </property> + </depends-on> <value-attributes> <type>int</type> <minimum>1</minimum> http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py index dffe57d..9f85518 100644 --- a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py +++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py @@ -254,6 +254,21 @@ class HAWQ200ServiceAdvisor(service_advisor.ServiceAdvisor): # hawq_rm_memory_limit_perseg does not support decimal value so trim decimal using int putHawqSiteProperty("hawq_rm_memory_limit_perseg", "{0}{1}".format(int(recommended_mem), unit)) + # Set default hawq_rm_nvseg_perquery_perseg_limit to 6, only if value was less than 6 + if "hawq_rm_nvseg_perquery_perseg_limit" in hawq_site and int(hawq_site["hawq_rm_nvseg_perquery_perseg_limit"]) < 6: + putHawqSiteProperty('hawq_rm_nvseg_perquery_perseg_limit', 6) + + if "hawq_global_rm_type" in hawq_site and "hawq_rm_memory_limit_perseg" in hawq_site: + hawq_rm_memory_limit_perseg = hawq_site["hawq_rm_memory_limit_perseg"].strip() + unit = hawq_rm_memory_limit_perseg[-2:] + value = hawq_rm_memory_limit_perseg[:-2] + # For clusters running with hawq_rm_memory_limit_perseg greater than or equal to 1GB but less than 2GB + if (unit == "GB" and 1 <= int(value) < 2) or (unit == "MB" and 1024 <= int(value) < 2048): + factor = 4 # Since memory is less drop hawq_rm_nvseg_perquery_perseg_limit to 4 + buckets = min(factor * numSegments, int(hawq_site["default_hash_table_bucket_number"])) if "default_hash_table_bucket_number" in hawq_site else factor * numSegments + putHawqSiteProperty('default_hash_table_bucket_number', buckets) + putHawqSiteProperty('hawq_rm_nvseg_perquery_perseg_limit', factor) + # Show / Hide properties based on the value of hawq_global_rm_type YARN_MODE = True if hawq_site["hawq_global_rm_type"].lower() == "yarn" else False yarn_mode_properties_visibility = { @@ -279,7 +294,7 @@ class HAWQ200ServiceAdvisor(service_advisor.ServiceAdvisor): def getHAWQYARNPropertyMapping(self): return { "hawq_rm_yarn_address": "yarn.resourcemanager.address", "hawq_rm_yarn_scheduler_address": "yarn.resourcemanager.scheduler.address" } - def getConfigurationsValidationItems(self, configurations, recommendedDefaults, services, hosts): + def getServiceConfigurationsValidationItems(self, configurations, recommendedDefaults, services, hosts): siteName = "hawq-site" method = self.validateHAWQSiteConfigurations items = self.validateConfigurationsForSite(configurations, recommendedDefaults, services, hosts, siteName, method) @@ -368,6 +383,15 @@ class HAWQ200ServiceAdvisor(service_advisor.ServiceAdvisor): message = "Default buckets for Hash Distributed tables parameter value should not be greater than the value of Virtual Segments Limit per Query (Total) parameter, currently set to {0}.".format(hawq_site["hawq_rm_nvseg_perquery_limit"]) validationItems.append({"config-name": "default_hash_table_bucket_number", "item": self.getErrorItem(message)}) + if "hawq_global_rm_type" in hawq_site and "hawq_rm_memory_limit_perseg" in hawq_site: + hawq_rm_memory_limit_perseg = hawq_site["hawq_rm_memory_limit_perseg"] + unit = hawq_rm_memory_limit_perseg[-2:] + value = hawq_rm_memory_limit_perseg[:-2] + # For clusters running with hawq_rm_memory_limit_perseg less than 1GB + if (unit == "GB" and int(value) < 1) or (unit == "MB" and int(value) < 1024): + message = "HAWQ Segment Memory less than 1GB is not sufficient" + validationItems.append({"config-name": "hawq_global_rm_type", "item": self.getErrorItem(message)}) + return self.toConfigurationValidationProblems(validationItems, "hawq-site") def validateHAWQHdfsClientConfigurations(self, properties, recommendedDefaults, configurations, services, hosts): http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py b/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py index 4089b75..d2a80a9 100644 --- a/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py +++ b/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py @@ -86,7 +86,7 @@ class PXF300ServiceAdvisor(service_advisor.ServiceAdvisor): return self.toConfigurationValidationProblems(validationItems, "hbase-env") - def getConfigurationsValidationItems(self, configurations, recommendedDefaults, services, hosts): + def getServiceConfigurationsValidationItems(self, configurations, recommendedDefaults, services, hosts): siteName = "hbase-env" method = self.validatePXFHBaseEnvConfigurations items = self.validateConfigurationsForSite(configurations, recommendedDefaults, services, hosts, siteName, method) http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py b/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py index f8a9468..b3edae4 100644 --- a/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py +++ b/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py @@ -180,7 +180,8 @@ class TestHAWQ200ServiceAdvisor(TestCase): "properties": { "hawq_rm_memory_limit_perseg": "65535MB", "hawq_rm_nvcore_limit_perseg": "16", - "hawq_global_rm_type": "yarn" + "hawq_global_rm_type": "yarn", + "default_hash_table_bucket_number": 18 } }, "hdfs-site": { @@ -356,6 +357,44 @@ class TestHAWQ200ServiceAdvisor(TestCase): self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, None, services, hosts) self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"], "730GB") + ## Test if default_hash_table_bucket_number and hawq_rm_nvseg_perquery_perseg_limit are set correctly based on low hawq_rm_memory_limit_perseg + + # Case 1: When hawq_rm_memory_limit_perseg is between 1GB and 2GB + # Set hawq_rm_nvseg_perquery_perseg_limit to 4 and default_hash_table_bucket_number as hawq_rm_nvseg_perquery_perseg_limit * numSegments + hosts["items"][0]["Hosts"]["total_mem"] = 2097152 + hosts["items"][1]["Hosts"]["total_mem"] = 2097152 + hosts["items"][3]["Hosts"]["total_mem"] = 2097152 + services["configurations"]["hawq-site"]["properties"]["hawq_global_rm_type"] = "none" + services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_limit"] = "512" + self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, None, services, hosts) + self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"], "1152MB") + self.assertEqual(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"], "8") + self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"], "4") + + # Case 2: When hawq_rm_memory_limit_perseg > 2GB + # Set hawq_rm_nvseg_perquery_perseg_limit to 6 and default_hash_table_bucket_number as hawq_rm_nvseg_perquery_perseg_limit * numSegments + hosts["items"][0]["Hosts"]["total_mem"] = 1073741824 + hosts["items"][1]["Hosts"]["total_mem"] = 2073741824 + hosts["items"][3]["Hosts"]["total_mem"] = 3073741824 + services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_limit"] = "512" + services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"] = "4" + self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, None, services, hosts) + self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"], "730GB") + self.assertEqual(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"], "12") + self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"], "6") + + # Case 3: When hawq_rm_memory_limit_perseg > 2GB + # Set hawq_rm_nvseg_perquery_perseg_limit to 8 and default_hash_table_bucket_number as hawq_rm_nvseg_perquery_perseg_limit * numSegments + hosts["items"][0]["Hosts"]["total_mem"] = 1073741824 + hosts["items"][1]["Hosts"]["total_mem"] = 2073741824 + hosts["items"][3]["Hosts"]["total_mem"] = 3073741824 + services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_limit"] = "512" + services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"] = "8" + self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, None, services, hosts) + self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"], "730GB") + self.assertEqual(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"], "12") + self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"], "8") + ## Test if the properties are set to visible / invisible based on the value of hawq_global_rm_type # Case 1: When hawq_global_rm_type is yarn @@ -399,6 +438,7 @@ class TestHAWQ200ServiceAdvisor(TestCase): self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, None, services, hosts) self.assertEqual(configurations["hawq-sysctl-env"]["property_attributes"]["vm.overcommit_ratio"]["visible"], "true") + def test_createComponentLayoutRecommendations_hawq_3_Hosts(self): """ Test that HAWQSTANDBY is recommended on a 3-node cluster """ @@ -790,11 +830,16 @@ class TestHAWQ200ServiceAdvisor(TestCase): hawqSegmentComponent = self.getHosts(componentsList, "HAWQSEGMENT") # setup default configuration values - services["configurations"]["hawq-site"] = {"properties": {"default_hash_table_bucket_number": "24", - "hawq_rm_nvseg_perquery_limit": "512", - "hawq_rm_yarn_address": "localhost:8032", - "hawq_rm_yarn_scheduler_address": "localhost:8030", - "hawq_global_rm_type": "none"}} + services["configurations"]["hawq-site"] = { + "properties": { + "default_hash_table_bucket_number": "24", + "hawq_rm_nvseg_perquery_limit": "512", + "hawq_rm_yarn_address": "localhost:8032", + "hawq_rm_yarn_scheduler_address": "localhost:8030", + "hawq_global_rm_type": "none", + "hawq_rm_nvseg_perquery_perseg_limit": "6" + } + } services["configurations"]["hdfs-client"] = {"properties": {"output.replace-datanode-on-failure": "true"}} services["configurations"]["hawq-sysctl-env"] = {"properties": {}} @@ -1009,8 +1054,13 @@ class TestHAWQ200ServiceAdvisor(TestCase): } # setup default configuration values configurations = services["configurations"] - configurations["hawq-site"] = {"properties": {"default_hash_table_bucket_number": "600", - "hawq_rm_nvseg_perquery_limit": "500"}} + configurations["hawq-site"] = { + "properties": { + "default_hash_table_bucket_number": "600", + "hawq_rm_nvseg_perquery_limit": "500", + "hawq_rm_nvseg_perquery_perseg_limit": "6" + } + } properties = configurations["hawq-site"]["properties"] defaults = {} hosts = {} @@ -1026,8 +1076,36 @@ class TestHAWQ200ServiceAdvisor(TestCase): self.assertEqual(len(problems), 1) self.assertEqual(problems[0], expected) - configurations["hawq-site"] = {"properties": {"default_hash_table_bucket_number": "500", - "hawq_rm_nvseg_perquery_limit": "500"}} + configurations["hawq-site"] = { + "properties": { + "default_hash_table_bucket_number": "500", + "hawq_rm_nvseg_perquery_limit": "500" + } + } + properties = configurations["hawq-site"]["properties"] + problems = self.serviceAdvisor.validateHAWQSiteConfigurations(properties, defaults, configurations, services, hosts) + self.assertEqual(len(problems), 0) + + configurations["hawq-site"] = { + "properties": + { + "hawq_global_rm_type": "none", + "hawq_rm_memory_limit_perseg": "1023MB" + } + } + expected = { + 'config-type': 'hawq-site', + 'message': 'HAWQ Segment Memory less than 1GB is not sufficient', + 'type': 'configuration', + 'config-name': 'hawq_global_rm_type', + 'level': 'ERROR' + } + properties = configurations["hawq-site"]["properties"] + problems = self.serviceAdvisor.validateHAWQSiteConfigurations(properties, defaults, configurations, services, hosts) + self.assertEqual(len(problems), 1) + self.assertEqual(problems[0], expected) + + configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"] = "1GB" properties = configurations["hawq-site"]["properties"] problems = self.serviceAdvisor.validateHAWQSiteConfigurations(properties, defaults, configurations, services, hosts) self.assertEqual(len(problems), 0) http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py b/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py index 7510e5f..8b2a4ba 100644 --- a/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py +++ b/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py @@ -121,12 +121,12 @@ class TestPXF300ServiceAdvisor(TestCase): "level": "WARN" } ] - items = self.serviceAdvisor.getConfigurationsValidationItems(properties, properties, services, None) + items = self.serviceAdvisor.getServiceConfigurationsValidationItems(properties, properties, services, None) self.assertEquals(items, expected) # Case 2: No warning should be generated if PXF_PATH is present in hbase-env properties = services["configurations"]["hbase-env"]["properties"]["content"] = self.PXF_PATH - items = self.serviceAdvisor.getConfigurationsValidationItems(properties, properties, services, None) + items = self.serviceAdvisor.getServiceConfigurationsValidationItems(properties, properties, services, None) self.assertEquals(items, [])
