Repository: ambari
Updated Branches:
  refs/heads/branch-2.4 482631e7b -> 8329d7341


AMBARI-17720: HAWQ fails to execute queries in low-memory environment (mithmatt)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/8329d734
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/8329d734
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/8329d734

Branch: refs/heads/branch-2.4
Commit: 8329d73416539539d0918d9b4a06387acd7da6bb
Parents: 482631e
Author: Matt <[email protected]>
Authored: Wed Jul 20 11:04:37 2016 -0700
Committer: Matt <[email protected]>
Committed: Wed Jul 20 11:04:37 2016 -0700

----------------------------------------------------------------------
 .../HAWQ/2.0.0/configuration/hawq-site.xml      | 12 +++
 .../HAWQ/2.0.0/service_advisor.py               | 26 +++++-
 .../PXF/3.0.0/service_advisor.py                |  2 +-
 .../HAWQ/test_service_advisor.py                | 98 ++++++++++++++++++--
 .../common-services/PXF/test_service_advisor.py |  4 +-
 5 files changed, 128 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml
 
b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml
index 0bddb07..9baaa1c 100644
--- 
a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml
+++ 
b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/configuration/hawq-site.xml
@@ -310,6 +310,12 @@
       The default set during installation is the number of HAWQ Segments 
multiplied by 6.
       Adjust the value when the number of HAWQ Segments changes in the 
cluster. If parameter value is changed, existing Hash distributed tables must 
be redistributed.
     </description>
+    <depends-on>
+      <property>
+        <type>hawq-site</type>
+        <name>hawq_rm_memory_limit_perseg</name>
+      </property>
+    </depends-on>
     <value-attributes>
       <type>int</type>
       <minimum>1</minimum>
@@ -327,6 +333,12 @@
       This parameter is used to limit the maximum resource usage on each 
segment node.
       The default is 6. Not recommended to change this parameter.
     </description>
+    <depends-on>
+      <property>
+        <type>hawq-site</type>
+        <name>hawq_rm_memory_limit_perseg</name>
+      </property>
+    </depends-on>
     <value-attributes>
       <type>int</type>
       <minimum>1</minimum>

http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
 
b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
index dffe57d..9f85518 100644
--- 
a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
+++ 
b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/service_advisor.py
@@ -254,6 +254,21 @@ class 
HAWQ200ServiceAdvisor(service_advisor.ServiceAdvisor):
         # hawq_rm_memory_limit_perseg does not support decimal value so trim 
decimal using int
         putHawqSiteProperty("hawq_rm_memory_limit_perseg", 
"{0}{1}".format(int(recommended_mem), unit))
 
+      # Set default hawq_rm_nvseg_perquery_perseg_limit to 6, only if value 
was less than 6
+      if "hawq_rm_nvseg_perquery_perseg_limit" in hawq_site and 
int(hawq_site["hawq_rm_nvseg_perquery_perseg_limit"]) < 6:
+        putHawqSiteProperty('hawq_rm_nvseg_perquery_perseg_limit', 6)
+
+      if "hawq_global_rm_type" in hawq_site and "hawq_rm_memory_limit_perseg" 
in hawq_site:
+        hawq_rm_memory_limit_perseg = 
hawq_site["hawq_rm_memory_limit_perseg"].strip()
+        unit = hawq_rm_memory_limit_perseg[-2:]
+        value = hawq_rm_memory_limit_perseg[:-2]
+        # For clusters running with hawq_rm_memory_limit_perseg greater than 
or equal to 1GB but less than 2GB
+        if (unit == "GB" and 1 <= int(value) < 2) or (unit == "MB" and 1024 <= 
int(value) < 2048):
+          factor = 4 # Since memory is less drop 
hawq_rm_nvseg_perquery_perseg_limit to 4
+          buckets = min(factor * numSegments, 
int(hawq_site["default_hash_table_bucket_number"])) if 
"default_hash_table_bucket_number" in hawq_site else factor * numSegments
+          putHawqSiteProperty('default_hash_table_bucket_number', buckets)
+          putHawqSiteProperty('hawq_rm_nvseg_perquery_perseg_limit', factor)
+
       # Show / Hide properties based on the value of hawq_global_rm_type
       YARN_MODE = True if hawq_site["hawq_global_rm_type"].lower() == "yarn" 
else False
       yarn_mode_properties_visibility = {
@@ -279,7 +294,7 @@ class HAWQ200ServiceAdvisor(service_advisor.ServiceAdvisor):
   def getHAWQYARNPropertyMapping(self):
     return { "hawq_rm_yarn_address": "yarn.resourcemanager.address", 
"hawq_rm_yarn_scheduler_address": "yarn.resourcemanager.scheduler.address" }
 
-  def getConfigurationsValidationItems(self, configurations, 
recommendedDefaults, services, hosts):
+  def getServiceConfigurationsValidationItems(self, configurations, 
recommendedDefaults, services, hosts):
     siteName = "hawq-site"
     method = self.validateHAWQSiteConfigurations
     items = self.validateConfigurationsForSite(configurations, 
recommendedDefaults, services, hosts, siteName, method)
@@ -368,6 +383,15 @@ class 
HAWQ200ServiceAdvisor(service_advisor.ServiceAdvisor):
       message = "Default buckets for Hash Distributed tables parameter value 
should not be greater than the value of Virtual Segments Limit per Query 
(Total) parameter, currently set to 
{0}.".format(hawq_site["hawq_rm_nvseg_perquery_limit"])
       validationItems.append({"config-name": 
"default_hash_table_bucket_number", "item": self.getErrorItem(message)})
 
+    if "hawq_global_rm_type" in hawq_site and "hawq_rm_memory_limit_perseg" in 
hawq_site:
+      hawq_rm_memory_limit_perseg = hawq_site["hawq_rm_memory_limit_perseg"]
+      unit = hawq_rm_memory_limit_perseg[-2:]
+      value = hawq_rm_memory_limit_perseg[:-2]
+      # For clusters running with hawq_rm_memory_limit_perseg less than 1GB
+      if (unit == "GB" and int(value) < 1) or (unit == "MB" and int(value) < 
1024):
+        message = "HAWQ Segment Memory less than 1GB is not sufficient"
+        validationItems.append({"config-name": "hawq_global_rm_type", "item": 
self.getErrorItem(message)})
+
     return self.toConfigurationValidationProblems(validationItems, "hawq-site")
 
   def validateHAWQHdfsClientConfigurations(self, properties, 
recommendedDefaults, configurations, services, hosts):

http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py 
b/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py
index 4089b75..d2a80a9 100644
--- 
a/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py
+++ 
b/ambari-server/src/main/resources/common-services/PXF/3.0.0/service_advisor.py
@@ -86,7 +86,7 @@ class PXF300ServiceAdvisor(service_advisor.ServiceAdvisor):
 
     return self.toConfigurationValidationProblems(validationItems, "hbase-env")
 
-  def getConfigurationsValidationItems(self, configurations, 
recommendedDefaults, services, hosts):
+  def getServiceConfigurationsValidationItems(self, configurations, 
recommendedDefaults, services, hosts):
     siteName = "hbase-env"
     method = self.validatePXFHBaseEnvConfigurations
     items = self.validateConfigurationsForSite(configurations, 
recommendedDefaults, services, hosts, siteName, method)

http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py 
b/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py
index f8a9468..b3edae4 100644
--- a/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py
+++ b/ambari-server/src/test/python/common-services/HAWQ/test_service_advisor.py
@@ -180,7 +180,8 @@ class TestHAWQ200ServiceAdvisor(TestCase):
         "properties": {
           "hawq_rm_memory_limit_perseg": "65535MB",
           "hawq_rm_nvcore_limit_perseg": "16",
-          "hawq_global_rm_type": "yarn"
+          "hawq_global_rm_type": "yarn",
+          "default_hash_table_bucket_number": 18
         }
       },
       "hdfs-site": {
@@ -356,6 +357,44 @@ class TestHAWQ200ServiceAdvisor(TestCase):
     self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, 
None, services, hosts)
     
self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"],
 "730GB")
 
+    ## Test if default_hash_table_bucket_number and 
hawq_rm_nvseg_perquery_perseg_limit are set correctly based on low 
hawq_rm_memory_limit_perseg
+
+    # Case 1: When hawq_rm_memory_limit_perseg is between 1GB and 2GB
+    # Set hawq_rm_nvseg_perquery_perseg_limit to 4 and 
default_hash_table_bucket_number as hawq_rm_nvseg_perquery_perseg_limit * 
numSegments
+    hosts["items"][0]["Hosts"]["total_mem"] = 2097152
+    hosts["items"][1]["Hosts"]["total_mem"] = 2097152
+    hosts["items"][3]["Hosts"]["total_mem"] = 2097152
+    
services["configurations"]["hawq-site"]["properties"]["hawq_global_rm_type"] = 
"none"
+    
services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_limit"]
 = "512"
+    self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, 
None, services, hosts)
+    
self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"],
 "1152MB")
+    
self.assertEqual(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"],
 "8")
+    
self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"],
 "4")
+
+    # Case 2: When hawq_rm_memory_limit_perseg > 2GB
+    # Set hawq_rm_nvseg_perquery_perseg_limit to 6 and 
default_hash_table_bucket_number as hawq_rm_nvseg_perquery_perseg_limit * 
numSegments
+    hosts["items"][0]["Hosts"]["total_mem"] = 1073741824
+    hosts["items"][1]["Hosts"]["total_mem"] = 2073741824
+    hosts["items"][3]["Hosts"]["total_mem"] = 3073741824
+    
services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_limit"]
 = "512"
+    
services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"]
 = "4"
+    self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, 
None, services, hosts)
+    
self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"],
 "730GB")
+    
self.assertEqual(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"],
 "12")
+    
self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"],
 "6")
+
+    # Case 3: When hawq_rm_memory_limit_perseg > 2GB
+    # Set hawq_rm_nvseg_perquery_perseg_limit to 8 and 
default_hash_table_bucket_number as hawq_rm_nvseg_perquery_perseg_limit * 
numSegments
+    hosts["items"][0]["Hosts"]["total_mem"] = 1073741824
+    hosts["items"][1]["Hosts"]["total_mem"] = 2073741824
+    hosts["items"][3]["Hosts"]["total_mem"] = 3073741824
+    
services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_limit"]
 = "512"
+    
services["configurations"]["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"]
 = "8"
+    self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, 
None, services, hosts)
+    
self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"],
 "730GB")
+    
self.assertEqual(configurations["hawq-site"]["properties"]["default_hash_table_bucket_number"],
 "12")
+    
self.assertEqual(configurations["hawq-site"]["properties"]["hawq_rm_nvseg_perquery_perseg_limit"],
 "8")
+
     ## Test if the properties are set to visible / invisible based on the 
value of hawq_global_rm_type
 
     # Case 1: When hawq_global_rm_type is yarn
@@ -399,6 +438,7 @@ class TestHAWQ200ServiceAdvisor(TestCase):
     self.serviceAdvisor.getServiceConfigurationRecommendations(configurations, 
None, services, hosts)
     
self.assertEqual(configurations["hawq-sysctl-env"]["property_attributes"]["vm.overcommit_ratio"]["visible"],
 "true")
 
+
   def test_createComponentLayoutRecommendations_hawq_3_Hosts(self):
     """ Test that HAWQSTANDBY is recommended on a 3-node cluster """
 
@@ -790,11 +830,16 @@ class TestHAWQ200ServiceAdvisor(TestCase):
     hawqSegmentComponent = self.getHosts(componentsList, "HAWQSEGMENT")
 
     # setup default configuration values
-    services["configurations"]["hawq-site"] = {"properties": 
{"default_hash_table_bucket_number": "24",
-                                                              
"hawq_rm_nvseg_perquery_limit": "512",
-                                                              
"hawq_rm_yarn_address": "localhost:8032",
-                                                              
"hawq_rm_yarn_scheduler_address": "localhost:8030",
-                                                              
"hawq_global_rm_type":  "none"}}
+    services["configurations"]["hawq-site"] = {
+      "properties": {
+        "default_hash_table_bucket_number": "24",
+        "hawq_rm_nvseg_perquery_limit": "512",
+        "hawq_rm_yarn_address": "localhost:8032",
+        "hawq_rm_yarn_scheduler_address": "localhost:8030",
+        "hawq_global_rm_type":  "none",
+        "hawq_rm_nvseg_perquery_perseg_limit": "6"
+      }
+    }
 
     services["configurations"]["hdfs-client"] = {"properties": 
{"output.replace-datanode-on-failure": "true"}}
     services["configurations"]["hawq-sysctl-env"] = {"properties": {}}
@@ -1009,8 +1054,13 @@ class TestHAWQ200ServiceAdvisor(TestCase):
     }
     # setup default configuration values
     configurations = services["configurations"]
-    configurations["hawq-site"] = {"properties": 
{"default_hash_table_bucket_number": "600",
-                                                  
"hawq_rm_nvseg_perquery_limit": "500"}}
+    configurations["hawq-site"] = {
+      "properties": {
+        "default_hash_table_bucket_number": "600",
+        "hawq_rm_nvseg_perquery_limit": "500",
+        "hawq_rm_nvseg_perquery_perseg_limit": "6"
+      }
+    }
     properties = configurations["hawq-site"]["properties"]
     defaults = {}
     hosts = {}
@@ -1026,8 +1076,36 @@ class TestHAWQ200ServiceAdvisor(TestCase):
     self.assertEqual(len(problems), 1)
     self.assertEqual(problems[0], expected)
 
-    configurations["hawq-site"] = {"properties": 
{"default_hash_table_bucket_number": "500",
-                                                  
"hawq_rm_nvseg_perquery_limit": "500"}}
+    configurations["hawq-site"] = {
+      "properties": {
+        "default_hash_table_bucket_number": "500",
+        "hawq_rm_nvseg_perquery_limit": "500"
+      }
+    }
+    properties = configurations["hawq-site"]["properties"]
+    problems = self.serviceAdvisor.validateHAWQSiteConfigurations(properties, 
defaults, configurations, services, hosts)
+    self.assertEqual(len(problems), 0)
+
+    configurations["hawq-site"] = {
+      "properties":
+        {
+          "hawq_global_rm_type": "none",
+          "hawq_rm_memory_limit_perseg": "1023MB"
+        }
+    }
+    expected = {
+      'config-type': 'hawq-site',
+      'message': 'HAWQ Segment Memory less than 1GB is not sufficient',
+      'type': 'configuration',
+      'config-name': 'hawq_global_rm_type',
+      'level': 'ERROR'
+    }
+    properties = configurations["hawq-site"]["properties"]
+    problems = self.serviceAdvisor.validateHAWQSiteConfigurations(properties, 
defaults, configurations, services, hosts)
+    self.assertEqual(len(problems), 1)
+    self.assertEqual(problems[0], expected)
+
+    configurations["hawq-site"]["properties"]["hawq_rm_memory_limit_perseg"] = 
"1GB"
     properties = configurations["hawq-site"]["properties"]
     problems = self.serviceAdvisor.validateHAWQSiteConfigurations(properties, 
defaults, configurations, services, hosts)
     self.assertEqual(len(problems), 0)

http://git-wip-us.apache.org/repos/asf/ambari/blob/8329d734/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py 
b/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py
index 7510e5f..8b2a4ba 100644
--- a/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py
+++ b/ambari-server/src/test/python/common-services/PXF/test_service_advisor.py
@@ -121,12 +121,12 @@ class TestPXF300ServiceAdvisor(TestCase):
         "level": "WARN"
       }
     ]
-    items = self.serviceAdvisor.getConfigurationsValidationItems(properties, 
properties, services, None)
+    items = 
self.serviceAdvisor.getServiceConfigurationsValidationItems(properties, 
properties, services, None)
     self.assertEquals(items, expected)
 
     # Case 2: No warning should be generated if PXF_PATH is present in 
hbase-env
     properties = 
services["configurations"]["hbase-env"]["properties"]["content"] = self.PXF_PATH
-    items = self.serviceAdvisor.getConfigurationsValidationItems(properties, 
properties, services, None)
+    items = 
self.serviceAdvisor.getServiceConfigurationsValidationItems(properties, 
properties, services, None)
     self.assertEquals(items, [])
 
 

Reply via email to