Repository: ambari Updated Branches: refs/heads/trunk 55394ec0a -> 6016fc6e8
AMBARI-10998. Stack advisor: hive.tez.container.size and tez.task.resource.memory.mb should have same value/calculation logic (mpapirkovskyy via srimanth) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/6016fc6e Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/6016fc6e Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/6016fc6e Branch: refs/heads/trunk Commit: 6016fc6e8ff30f891fc4d6d3524b396c43a7aa07 Parents: 55394ec Author: Srimanth Gunturi <[email protected]> Authored: Fri May 8 11:49:38 2015 -0700 Committer: Srimanth Gunturi <[email protected]> Committed: Fri May 8 11:49:38 2015 -0700 ---------------------------------------------------------------------- .../services/HIVE/configuration/hive-site.xml | 4 ++ .../2.2/services/TEZ/configuration/tez-site.xml | 10 ++++ .../stacks/HDP/2.2/services/stack_advisor.py | 15 ++++-- .../stacks/2.2/common/test_stack_advisor.py | 55 +++++++++++++++++--- 4 files changed, 73 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/6016fc6e/ambari-server/src/main/resources/stacks/HDP/2.1/services/HIVE/configuration/hive-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.1/services/HIVE/configuration/hive-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.1/services/HIVE/configuration/hive-site.xml index ad0b8bb..f0b5a04 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.1/services/HIVE/configuration/hive-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.1/services/HIVE/configuration/hive-site.xml @@ -374,6 +374,10 @@ limitations under the License. <type>yarn-site</type> <name>yarn.scheduler.minimum-allocation-mb</name> </property> + <property> + <type>yarn-site</type> + <name>yarn.scheduler.maximum-allocation-mb</name> + </property> </depends-on> </property> http://git-wip-us.apache.org/repos/asf/ambari/blob/6016fc6e/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml index 24c14bb..72873b7 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/TEZ/configuration/tez-site.xml @@ -90,6 +90,16 @@ <description>The amount of memory to be used by launched tasks. Used only if the value is not specified explicitly by the DAG definition. </description> + <depends-on> + <property> + <type>yarn-site</type> + <name>yarn.scheduler.minimum-allocation-mb</name> + </property> + <property> + <type>yarn-site</type> + <name>yarn.scheduler.maximum-allocation-mb</name> + </property> + </depends-on> </property> <property> http://git-wip-us.apache.org/repos/asf/ambari/blob/6016fc6e/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py index cfcc2b5..24248f2 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/stack_advisor.py @@ -302,10 +302,12 @@ class HDP22StackAdvisor(HDP21StackAdvisor): if not "yarn-site" in configurations: self.recommendYARNConfigurations(configurations, clusterData, services, hosts) + #properties below should be always present as they are provided in HDP206 stack advisor at least + yarnMaxAllocationSize = min(30 * int(configurations["yarn-site"]["properties"]["yarn.scheduler.minimum-allocation-mb"]), int(configurations["yarn-site"]["properties"]["yarn.scheduler.maximum-allocation-mb"])) + #duplicate tez task resource calc logic, direct dependency doesn't look good here (in case of Hive without Tez) + container_size = clusterData['mapMemory'] if clusterData['mapMemory'] > 2048 else int(clusterData['reduceMemory']) + container_size = min(clusterData['containers'] * clusterData['ramPerContainer'], container_size, yarnMaxAllocationSize) - if "yarn-site" in configurations and \ - "yarn.scheduler.minimum-allocation-mb" in configurations["yarn-site"]["properties"]: - container_size = configurations["yarn-site"]["properties"]["yarn.scheduler.minimum-allocation-mb"] putHiveSiteProperty("hive.tez.container.size", container_size) putHiveSiteProperty("hive.prewarm.enabled", "false") putHiveSiteProperty("hive.prewarm.numcontainers", "3") @@ -528,11 +530,16 @@ class HDP22StackAdvisor(HDP21StackAdvisor): def recommendTezConfigurations(self, configurations, clusterData, services, hosts): + if not "yarn-site" in configurations: + self.recommendYARNConfigurations(configurations, clusterData, services, hosts) + #properties below should be always present as they are provided in HDP206 stack advisor + yarnMaxAllocationSize = min(30 * int(configurations["yarn-site"]["properties"]["yarn.scheduler.minimum-allocation-mb"]), int(configurations["yarn-site"]["properties"]["yarn.scheduler.maximum-allocation-mb"])) + putTezProperty = self.putProperty(configurations, "tez-site") putTezProperty("tez.am.resource.memory.mb", int(clusterData['amMemory']) * 2 if int(clusterData['amMemory']) < 3072 else int(clusterData['amMemory'])) taskResourceMemory = clusterData['mapMemory'] if clusterData['mapMemory'] > 2048 else int(clusterData['reduceMemory']) - taskResourceMemory = min(clusterData['containers'] * clusterData['ramPerContainer'], taskResourceMemory) + taskResourceMemory = min(clusterData['containers'] * clusterData['ramPerContainer'], taskResourceMemory, yarnMaxAllocationSize) putTezProperty("tez.task.resource.memory.mb", taskResourceMemory) putTezProperty("tez.runtime.io.sort.mb", min(int(taskResourceMemory * 0.4), 2047)) putTezProperty("tez.runtime.unordered.output.buffer.size-mb", int(taskResourceMemory * 0.075)) http://git-wip-us.apache.org/repos/asf/ambari/blob/6016fc6e/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py index 8b4918f..51574f3 100644 --- a/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py +++ b/ambari-server/src/test/python/stacks/2.2/common/test_stack_advisor.py @@ -68,7 +68,14 @@ class TestHDP22StackAdvisor(TestCase): return self.get_system_min_uid_real() def test_recommendTezConfigurations(self): - configurations = {} + configurations = { + "yarn-site": { + "properties": { + "yarn.scheduler.minimum-allocation-mb": "256", + "yarn.scheduler.maximum-allocation-mb": "2048", + }, + } + } clusterData = { "mapMemory": 3000, "amMemory": 2000, @@ -84,13 +91,26 @@ class TestHDP22StackAdvisor(TestCase): "tez.runtime.io.sort.mb": "307", "tez.runtime.unordered.output.buffer.size-mb": "57" } + }, + 'yarn-site': { + 'properties': { + 'yarn.scheduler.minimum-allocation-mb': '256', + 'yarn.scheduler.maximum-allocation-mb': '2048' + } } } self.stackAdvisor.recommendTezConfigurations(configurations, clusterData, None, None) self.assertEquals(configurations, expected) def test_recommendTezConfigurations_amMemoryMoreThan3072(self): - configurations = {} + configurations = { + "yarn-site": { + "properties": { + "yarn.scheduler.minimum-allocation-mb": "256", + "yarn.scheduler.maximum-allocation-mb": "2048", + }, + } + } clusterData = { "mapMemory": 4000, "amMemory": 3100, @@ -106,13 +126,26 @@ class TestHDP22StackAdvisor(TestCase): "tez.runtime.io.sort.mb": "307", "tez.runtime.unordered.output.buffer.size-mb": "57" } + }, + 'yarn-site': { + 'properties': { + 'yarn.scheduler.minimum-allocation-mb': '256', + 'yarn.scheduler.maximum-allocation-mb': '2048' + } } } self.stackAdvisor.recommendTezConfigurations(configurations, clusterData, None, None) self.assertEquals(configurations, expected) def test_recommendTezConfigurations_mapMemoryLessThan768(self): - configurations = {} + configurations = { + "yarn-site": { + "properties": { + "yarn.scheduler.minimum-allocation-mb": "256", + "yarn.scheduler.maximum-allocation-mb": "2048", + }, + } + } clusterData = { "mapMemory": 760, "amMemory": 2000, @@ -128,6 +161,12 @@ class TestHDP22StackAdvisor(TestCase): "tez.runtime.io.sort.mb": "304", "tez.runtime.unordered.output.buffer.size-mb": "57" } + }, + 'yarn-site': { + 'properties': { + 'yarn.scheduler.minimum-allocation-mb': '256', + 'yarn.scheduler.maximum-allocation-mb': '2048' + } } } self.stackAdvisor.recommendTezConfigurations(configurations, clusterData, None, None) @@ -872,6 +911,7 @@ class TestHDP22StackAdvisor(TestCase): "yarn-site": { "properties": { "yarn.scheduler.minimum-allocation-mb": "256", + "yarn.scheduler.maximum-allocation-mb": "8192", }, }, "capacity-scheduler": { @@ -896,7 +936,8 @@ class TestHDP22StackAdvisor(TestCase): }, 'yarn-site': { 'properties': { - 'yarn.scheduler.minimum-allocation-mb': '256' + 'yarn.scheduler.minimum-allocation-mb': '256', + 'yarn.scheduler.maximum-allocation-mb': '8192' } }, 'hive-env': { @@ -910,7 +951,7 @@ class TestHDP22StackAdvisor(TestCase): }, 'hive-site': { 'properties': { - 'hive.auto.convert.join.noconditionaltask.size': '89478485', + 'hive.auto.convert.join.noconditionaltask.size': '268435456', 'hive.cbo.enable': 'true', 'hive.compactor.initiator.on': 'false', 'hive.compactor.worker.threads': '0', @@ -936,7 +977,7 @@ class TestHDP22StackAdvisor(TestCase): 'hive.stats.fetch.partition.stats': 'true', 'hive.support.concurrency': 'false', 'hive.tez.auto.reducer.parallelism': 'true', - 'hive.tez.container.size': '256', + 'hive.tez.container.size': '768', 'hive.tez.dynamic.partition.pruning': 'true', 'hive.tez.java.opts': '-server -Xmx615m -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseParallelGC -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps', 'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager', @@ -944,7 +985,7 @@ class TestHDP22StackAdvisor(TestCase): 'hive.vectorized.execution.reduce.enabled': 'false' }, 'property_attributes': { - 'hive.auto.convert.join.noconditionaltask.size': {'maximum': '268435456'}, + 'hive.auto.convert.join.noconditionaltask.size': {'maximum': '805306368'}, 'hive.server2.authentication.pam.services': {'delete': 'true'}, 'hive.server2.custom.authentication.class': {'delete': 'true'}, 'hive.server2.authentication.ldap.baseDN': {'delete': 'true'},
