Repository: ambari Updated Branches: refs/heads/trunk fb9e407ca -> ff0f5c008
AMBARI-17331. Determine Tez for Hive2 config 'tez.am.resource.memory.mb' based on cluster capacity. Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ff0f5c00 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ff0f5c00 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ff0f5c00 Branch: refs/heads/trunk Commit: ff0f5c008a190360c998ee2cca147dd6d0d51063 Parents: fb9e407 Author: Swapan Shridhar <[email protected]> Authored: Tue Jun 21 12:58:24 2016 -0700 Committer: Swapan Shridhar <[email protected]> Committed: Tue Jun 21 12:59:31 2016 -0700 ---------------------------------------------------------------------- .../HIVE/configuration/tez-interactive-site.xml | 31 ++++++++++ .../stacks/HDP/2.5/services/stack_advisor.py | 64 ++++++++++---------- .../stacks/2.5/common/test_stack_advisor.py | 27 ++++++++- 3 files changed, 88 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/ff0f5c00/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml index 3c83c5c..42d3459 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml @@ -76,4 +76,35 @@ </description> <on-ambari-upgrade add="true"/> </property> + <property> + <name>tez.am.resource.memory.mb</name> + <value>1536</value> + <description>The amount of memory to be used by the AppMaster</description> + <value-attributes> + <type>int</type> + </value-attributes> + <depends-on> + <property> + <type>hive-interactive-env</type> + <name>enable_hive_interactive</name> + </property> + <property> + <type>hive-interactive-env</type> + <name>llap_queue_capacity</name> + </property> + <property> + <type>hive-interactive-site</type> + <name>hive.llap.daemon.queue.name</name> + </property> + <property> + <type>capacity-scheduler</type> + <name>yarn.scheduler.capacity.root.queues</name> + </property> + <property> + <type>hive-interactive-site</type> + <name>hive.server2.tez.sessions.per.default.queue</name> + </property> + </depends-on> + <on-ambari-upgrade add="true"/> + </property> </configuration> http://git-wip-us.apache.org/repos/asf/ambari/blob/ff0f5c00/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py index 76654c3..8aa2f5f 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py @@ -470,6 +470,8 @@ class HDP25StackAdvisor(HDP24StackAdvisor): putHiveInteractiveEnvProperty = self.putProperty(configurations, "hive-interactive-env", services) putHiveInteractiveEnvPropertyAttribute = self.putPropertyAttribute(configurations, "hive-interactive-env") + putTezInteractiveSiteProperty = self.putProperty(configurations, "tez-interactive-site", services) + llap_daemon_selected_queue_name = None llap_queue_selected_in_current_call = None LLAP_MAX_CONCURRENCY = 32 # Allow a max of 32 concurrency. @@ -566,7 +568,10 @@ class HDP25StackAdvisor(HDP24StackAdvisor): "yarn_nm_mem_in_mb : {2}".format(total_cluster_capacity, node_manager_cnt, yarn_nm_mem_in_mb)) yarn_min_container_size = self.get_yarn_min_container_size(services, configurations) - tez_am_container_size = self._normalizeUp(self.get_tez_am_container_size(services, configurations), yarn_min_container_size) + tez_am_container_size = self.calculate_tez_am_container_size(long(total_cluster_capacity)) + normalized_tez_am_container_size = self._normalizeUp(tez_am_container_size, yarn_min_container_size) + Logger.info("Calculated normalized_tez_am_container_size : {0}, using following : tez_am_container_size : {1}, " + "total_cluster_capacity : {2}".format(normalized_tez_am_container_size, tez_am_container_size, total_cluster_capacity)) total_llap_queue_size = long(self._normalizeDown((float(current_selected_queue_for_llap_cap) / 100 * total_cluster_capacity), yarn_min_container_size)) # Get calculated value for Slider AM container Size slider_am_container_size = self._normalizeUp(self.calculate_slider_am_size(yarn_min_container_size), yarn_min_container_size) @@ -574,10 +579,10 @@ class HDP25StackAdvisor(HDP24StackAdvisor): # Read 'hive.server2.tez.sessions.per.default.queue' prop if it's in changed-configs, else calculate it. if not llap_concurrency_in_changed_configs: # Calculate llap concurrency (i.e. Number of Tez AM's) - llap_concurrency = float(total_llap_queue_size * 0.25 / tez_am_container_size) + llap_concurrency = float(total_llap_queue_size * 0.25 / normalized_tez_am_container_size) llap_concurrency = max(long(llap_concurrency), 1) Logger.info("Calculated llap_concurrency : {0}, using following : total_llap_queue_size : {1}, " - "tez_am_container_size : {2}".format(llap_concurrency, total_llap_queue_size, tez_am_container_size)) + "normalized_tez_am_container_size : {2}".format(llap_concurrency, total_llap_queue_size, normalized_tez_am_container_size)) # Limit 'llap_concurrency' to reach a max. of 32. if llap_concurrency > LLAP_MAX_CONCURRENCY: llap_concurrency = LLAP_MAX_CONCURRENCY @@ -593,13 +598,13 @@ class HDP25StackAdvisor(HDP24StackAdvisor): # Calculate 'total memory available for llap daemons' across cluster - total_am_capacity_required = tez_am_container_size * llap_concurrency + slider_am_container_size + total_am_capacity_required = normalized_tez_am_container_size * llap_concurrency + slider_am_container_size cap_available_for_daemons = total_llap_queue_size - total_am_capacity_required Logger.info("Calculated cap_available_for_daemons : {0}, using following : current_selected_queue_for_llap_cap : {1}, " - "yarn_nm_mem_in_mb : {2}, total_cluster_capacity : {3}, total_llap_queue_size : {4}, tez_am_container_size" + "yarn_nm_mem_in_mb : {2}, total_cluster_capacity : {3}, total_llap_queue_size : {4}, normalized_tez_am_container_size" " : {5}, yarn_min_container_size : {6}, llap_concurrency : {7}, total_am_capacity_required : {8}" .format(cap_available_for_daemons, current_selected_queue_for_llap_cap, yarn_nm_mem_in_mb, total_cluster_capacity, - total_llap_queue_size, tez_am_container_size, yarn_min_container_size, llap_concurrency, + total_llap_queue_size, normalized_tez_am_container_size, yarn_min_container_size, llap_concurrency, total_am_capacity_required)) if cap_available_for_daemons < yarn_min_container_size : raise Fail("'Capacity available for LLAP daemons'({0}) < 'YARN minimum container size'({1}). Invalid configuration detected. " @@ -663,12 +668,16 @@ class HDP25StackAdvisor(HDP24StackAdvisor): # Updating calculated configs. + normalized_tez_am_container_size = long(normalized_tez_am_container_size) + putTezInteractiveSiteProperty('tez.am.resource.memory.mb', normalized_tez_am_container_size) + Logger.info("'Tez for Hive2' config 'tez.am.resource.memory.mb' updated. Current: {0}".format(normalized_tez_am_container_size)) + if not llap_concurrency_in_changed_configs: min_llap_concurrency = 1 putHiveInteractiveSiteProperty('hive.server2.tez.sessions.per.default.queue', llap_concurrency) putHiveInteractiveSitePropertyAttribute('hive.server2.tez.sessions.per.default.queue', "minimum", min_llap_concurrency) putHiveInteractiveSitePropertyAttribute('hive.server2.tez.sessions.per.default.queue', "maximum", LLAP_MAX_CONCURRENCY) - Logger.info("LLAP config 'hive.server2.tez.sessions.per.default.queue' updated. Min : {0}, Current: {1}, Max: {2}" \ + Logger.info("Hive2 config 'hive.server2.tez.sessions.per.default.queue' updated. Min : {0}, Current: {1}, Max: {2}" \ .format(min_llap_concurrency, llap_concurrency, LLAP_MAX_CONCURRENCY)) num_llap_nodes = long(num_llap_nodes) @@ -708,7 +717,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor): llap_io_enabled = 'true' putHiveInteractiveSiteProperty('hive.llap.io.enabled', llap_io_enabled) - Logger.info("HiveServer2 config 'hive.llap.io.enabled' updated to '{0}' as part of " + Logger.info("Hive2 config 'hive.llap.io.enabled' updated to '{0}' as part of " "'hive.llap.io.memory.size' calculation.".format(llap_io_enabled)) llap_xmx = long(llap_xmx) @@ -926,31 +935,21 @@ class HDP25StackAdvisor(HDP24StackAdvisor): return yarn_nm_mem_in_mb """ - Gets Tez App Master container size (tez.am.resource.memory.mb) from tez-site. Takes into account if it has been calculated - as part of current Stack Advisor invocation. + Determines Tez App Master container size (tez.am.resource.memory.mb) for tez_hive2/tez-site based on total cluster capacity. """ - def get_tez_am_container_size(self, services, configurations): - llap_daemon_container_size = None - # Check if 'tez.am.resource.memory.mb' is modified in current ST invocation. - if 'tez-site' in configurations and 'tez.am.resource.memory.mb' in configurations['tez-site']['properties']: - llap_daemon_container_size = float(configurations['tez-site']['properties']['tez.am.resource.memory.mb']) - Logger.info("'tez.am.resource.memory.mb' read from configurations as : {0}".format(llap_daemon_container_size)) - - if not llap_daemon_container_size: - # Check if 'tez.am.resource.memory.mb' is input in services array. - if 'tez-site' in services['configurations'] and \ - 'tez.am.resource.memory.mb' in services['configurations']['tez-site']['properties']: - llap_daemon_container_size = float(services['configurations']['tez-site']['properties']['tez.am.resource.memory.mb']) - Logger.info("'tez.am.resource.memory.mb' read from services as : {0}".format(llap_daemon_container_size)) + def calculate_tez_am_container_size(self, total_cluster_capacity): + if total_cluster_capacity is None or not isinstance(total_cluster_capacity, long): + raise Fail ("Passed-in 'Total Cluster Capacity' is : '{0}'".format(total_cluster_capacity)) + if total_cluster_capacity <= 0: + raise Fail ("Passed-in 'Total Cluster Capacity' ({0}) is Invalid.".format(total_cluster_capacity)) + if total_cluster_capacity <= 4096: + return 256 + elif total_cluster_capacity > 4096 and total_cluster_capacity <= 73728: + return 512 + elif total_cluster_capacity > 73728: + return 1536 - if not llap_daemon_container_size: - raise Fail("Couldn't retrieve Hive Server's 'tez.am.resource.memory.mb' config.") - - assert (llap_daemon_container_size > 0), "'tez.am.resource.memory.mb' current value : {0}. " \ - "Expected value : > 0".format(llap_daemon_container_size) - - return llap_daemon_container_size """ Calculate minimum queue capacity required in order to get LLAP and HIVE2 app into running state. @@ -966,7 +965,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor): yarn_min_container_size = self.get_yarn_min_container_size(services, configurations) slider_am_size = self.calculate_slider_am_size(yarn_min_container_size) hive_tez_container_size = self.get_hive_tez_container_size(services, configurations) - tez_am_container_size = self.get_tez_am_container_size(services, configurations) + tez_am_container_size = self.calculate_tez_am_container_size(long(total_cluster_cap)) normalized_val = self._normalizeUp(slider_am_size, yarn_min_container_size) + self._normalizeUp\ (hive_tez_container_size, yarn_min_container_size) + self._normalizeUp(tez_am_container_size, yarn_min_container_size) @@ -1017,6 +1016,9 @@ class HDP25StackAdvisor(HDP24StackAdvisor): llap_slider_cap_percentage = int( services['configurations']['hive-interactive-env']['properties']['llap_queue_capacity']) min_reqd_queue_cap_perc = self.min_queue_perc_reqd_for_llap_and_hive_app(services, hosts, configurations) + if min_reqd_queue_cap_perc > 100: + min_reqd_queue_cap_perc = 100 + Logger.info("Received 'Minimum Required LLAP queue capacity' : {0}% (out of bounds), adjusted it to : 100%".format(min_reqd_queue_cap_perc)) # Adjust 'llap' queue capacity slider value to be minimum required if out of expected bounds. if llap_slider_cap_percentage <= 0 or llap_slider_cap_percentage > 100: http://git-wip-us.apache.org/repos/asf/ambari/blob/ff0f5c00/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py b/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py index c9c21ae..fce1c1c 100644 --- a/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py +++ b/ambari-server/src/test/python/stacks/2.5/common/test_stack_advisor.py @@ -3540,6 +3540,8 @@ class TestHDP25StackAdvisor(TestCase): self.assertEqual(configurations['hive-interactive-env']['properties']['slider_am_container_size'], '512') self.assertEqual(configurations['hive-interactive-site']['properties']['hive.server2.tez.default.queues'], 'llap') + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '512') + # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '1024') self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.num.executors_copy'], '1') @@ -3738,6 +3740,8 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'visible': 'false'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '512') + # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '5120') self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.num.executors_copy'], '1') @@ -3940,6 +3944,8 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'maximum': '100', 'minimum': '20', 'visible': 'true'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '1024') + # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '9216') self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.num.executors_copy'], '1') @@ -4143,6 +4149,8 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'maximum': '100', 'minimum': '20', 'visible': 'true'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '2048') + # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '40960') self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.num.executors_copy'], '4') @@ -4320,7 +4328,7 @@ class TestHDP25StackAdvisor(TestCase): self.stackAdvisor.recommendYARNConfigurations(configurations, clusterData, services, self.hosts) - self.assertEqual(configurations['hive-interactive-site']['properties']['hive.server2.tez.sessions.per.default.queue'], '5') + self.assertEqual(configurations['hive-interactive-site']['properties']['hive.server2.tez.sessions.per.default.queue'], '11') self.assertEquals(configurations['hive-interactive-site']['property_attributes']['hive.server2.tez.sessions.per.default.queue'], {'minimum': '1', 'maximum': '32'}) self.assertEqual(configurations['hive-interactive-env']['properties']['num_llap_nodes'], '2') @@ -4341,6 +4349,8 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'maximum': '100', 'minimum': '20', 'visible': 'true'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '682') + # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '10230') self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.num.executors_copy'], '3') @@ -4536,6 +4546,8 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'maximum': '100', 'minimum': '20', 'visible': 'true'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '2048') + # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '61440') @@ -4739,6 +4751,7 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'maximum': '100', 'minimum': '20', 'visible': 'true'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '3072') # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '39936') @@ -4917,7 +4930,7 @@ class TestHDP25StackAdvisor(TestCase): self.stackAdvisor.recommendYARNConfigurations(configurations, clusterData, services, self.hosts) - self.assertEqual(configurations['hive-interactive-site']['properties']['hive.server2.tez.sessions.per.default.queue'], '32') + self.assertEqual(configurations['hive-interactive-site']['properties']['hive.server2.tez.sessions.per.default.queue'], '18') self.assertEquals(configurations['hive-interactive-site']['property_attributes']['hive.server2.tez.sessions.per.default.queue'], {'minimum': '1', 'maximum': '32'}) self.assertEqual(configurations['hive-interactive-env']['properties']['num_llap_nodes'], '3') @@ -4938,6 +4951,8 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'maximum': '100', 'minimum': '20', 'visible': 'true'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '682') + # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '10230') @@ -5131,6 +5146,7 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'maximum': '100', 'minimum': '25', 'visible': 'true'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '82240') # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '164480') @@ -5317,6 +5333,7 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'maximum': '100', 'minimum': '49', 'visible': 'true'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '164480') # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '164480') @@ -5505,6 +5522,8 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'maximum': '100', 'minimum': '20', 'visible': 'true'}) + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '82240') + # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '246720') @@ -5721,7 +5740,7 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'visible': 'false'}) - + self.assertEqual(configurations['tez-interactive-site']['properties']['tez.am.resource.memory.mb'], '512') # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '10240') @@ -5940,6 +5959,8 @@ class TestHDP25StackAdvisor(TestCase): self.assertEquals(configurations['hive-interactive-env']['property_attributes']['llap_queue_capacity'], {'visible': 'false'}) + self.assertTrue('tez.am.resource.memory.mb' not in configurations['tez-interactive-site']['properties']) + # Check '*_copy' configs, used for displaying it as label on UI. self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.yarn.container.mb_copy'], '512') self.assertEqual(configurations['hive-interactive-site']['properties']['hive.llap.daemon.num.executors_copy'], '0')
