Repository: ambari Updated Branches: refs/heads/branch-2.5 ea4051ab4 -> 31a146077
AMBARI-19547. HSI tez am memory set to 0, incorrect calculations for non llap queues. (Siddharth Seth via Swapan Shridhar). Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/31a14607 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/31a14607 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/31a14607 Branch: refs/heads/branch-2.5 Commit: 31a1460779c8afb6c63db0e62c87f045b399c7f9 Parents: ea4051a Author: Swapan Shridhar <[email protected]> Authored: Mon Jan 16 16:47:10 2017 -0800 Committer: Swapan Shridhar <[email protected]> Committed: Mon Jan 16 16:47:10 2017 -0800 ---------------------------------------------------------------------- .../package/scripts/hive_server_interactive.py | 6 +- .../HIVE/configuration/hive-interactive-env.xml | 22 ++-- .../configuration/hive-interactive-site.xml | 15 ++- .../HIVE/configuration/tez-interactive-site.xml | 2 +- .../stacks/HDP/2.5/services/stack_advisor.py | 106 ++++++++++++++++++- .../configuration/hive-interactive-site.xml | 5 +- 6 files changed, 135 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/31a14607/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py index f2f0554..2d7b232 100644 --- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py +++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server_interactive.py @@ -286,6 +286,10 @@ class HiveServerInteractiveDefault(HiveServerInteractive): slider_placement = 0 Logger.info("Setting slider_placement : 0, as llap_daemon_container_size : {0} > 0.5 * " "YARN NodeManager Memory({1})".format(params.llap_daemon_container_size, params.yarn_nm_mem)) + else: + Logger.info("Setting slider_placement: 4, as llap_daemon_container_size : {0} <= 0.5 * " + "YARN NodeManager Memory({1})".format(params.llap_daemon_container_size, params.yarn_nm_mem)) + cmd = format("{stack_root}/current/hive-server2-hive2/bin/hive --service llap --instances {params.num_llap_nodes}" " --slider-am-container-mb {params.slider_am_container_mb} --size {params.llap_daemon_container_size}m" @@ -599,4 +603,4 @@ class HiveServerInteractiveWindows(HiveServerInteractive): pass if __name__ == "__main__": - HiveServerInteractive().execute() \ No newline at end of file + HiveServerInteractive().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/31a14607/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-env.xml b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-env.xml index a1f6d22..89eccc6 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-env.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-env.xml @@ -94,8 +94,8 @@ <property> <name>llap_heap_size</name> <value>0</value> - <description>Heap Size used by LLAP app.</description> - <display-name>LLAP heap size</display-name> + <description>LLAP Daemon Heap Size in MB.</description> + <display-name>LLAP Daemon Heap Size (MB)</display-name> <value-attributes> <type>int</type> </value-attributes> @@ -163,8 +163,12 @@ <property> <name>llap_headroom_space</name> <value>6144</value> - <description>LLAP app headroom space</description> - <display-name>LLAP's reserved headroom for YARN container</display-name> + <description> + Maximum headroom reserved from the YARN container running LLAP daemons. + This is an upper limit used during automatic size calculations, and the actual + value may be lower. + </description> + <display-name>LLAP Daemon Container Max Headroom</display-name> <value-attributes> <type>int</type> <unit>MB</unit> @@ -174,14 +178,14 @@ <property> <name>llap_log_level</name> <value>INFO</value> - <description>LLAP app logging level (WARN/INFO/DEBUG/TRACE)</description> - <display-name>LLAP app logging level (WARN/INFO/DEBUG/TRACE)</display-name> + <description>LLAP daemon log level (WARN/INFO/DEBUG/TRACE)</description> + <display-name>LLAP daemon log level (WARN/INFO/DEBUG/TRACE)</display-name> <on-ambari-upgrade add="true"/> </property> <property> <name>hive_aux_jars</name> <value/> - <description>A list of comma separated JARs</description> + <description>List of jars to be made available to LLAP daemons</description> <display-name>Auxillary JAR list</display-name> <on-ambari-upgrade add="true"/> <value-attributes> @@ -198,8 +202,8 @@ <property> <name>llap_java_opts</name> <value>-XX:+AlwaysPreTouch {% if java_version > 7 %}-XX:+UseG1GC -XX:TLABSize=8m -XX:+ResizeTLAB -XX:+UseNUMA -XX:+AggressiveOpts -XX:MetaspaceSize=1024m -XX:InitiatingHeapOccupancyPercent=80 -XX:MaxGCPauseMillis=200{% else %}-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseParallelGC{% endif %}</value> - <description>Java opts for llap application</description> - <display-name>LLAP app java opts</display-name> + <description>Java opts for llap daemons</description> + <display-name>LLAP daemon java opts</display-name> <on-ambari-upgrade add="true"/> </property> <property> http://git-wip-us.apache.org/repos/asf/ambari/blob/31a14607/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-site.xml index 2fb1553..640f30f 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-interactive-site.xml @@ -384,7 +384,7 @@ limitations under the License. <value>@llap0</value> <description> Explicitly specified hosts to use for LLAP scheduling. Useful for testing. By default, - YARN registry is used. + a ZooKeeper based registry is used. </description> <on-ambari-upgrade add="true"/> </property> @@ -434,8 +434,8 @@ limitations under the License. </property> <property> <name>hive.llap.daemon.num.executors</name> - <display-name>Maximum CPUs per Daemon</display-name> - <description>The maximum number of CPUs a single LLAP daemon will use. Usually this should be equal to the number of available CPUs.</description> + <display-name>Number of executors per LLAP Daemon</display-name> + <description>The number of fragments that a single LLAP daemon will run concurrently. Usually, this will be the same as the number of available CPUs</description> <value>1</value> <value-attributes> <type>int</type> @@ -476,8 +476,13 @@ limitations under the License. </property> <property> <name>hive.llap.daemon.yarn.container.mb</name> - <display-name>Memory per daemon</display-name> - <description>Total memory used by individual LLAP daemons. This includes memory for the cache as well as for the query execution.</description> + <display-name>Memory per Daemon</display-name> + <description> + Total memory used by individual LLAP daemons (YARN Container size). This includes memory + for the cache as well as for the query execution. Should be larger than the sum of + the Daemon cache size and the daemon heap size, and should leave some headroom + after this (In most cases: cache size + heap size + headroom = Memory Per Daemon). + </description> <value>0</value> <value-attributes> <type>int</type> http://git-wip-us.apache.org/repos/asf/ambari/blob/31a14607/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml index b331736..1c5117e 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/tez-interactive-site.xml @@ -83,7 +83,7 @@ </property> <property> <name>tez.am.resource.memory.mb</name> - <value>0</value> + <value>SET_ON_FIRST_INVOCATION</value> <description>The amount of memory to be used by the AppMaster</description> <depends-on> <property> http://git-wip-us.apache.org/repos/asf/ambari/blob/31a14607/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py b/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py index b08ce75..2ad35a2 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/stack_advisor.py @@ -650,6 +650,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor): return [host['Hosts']['host_name'] for host in phoenix_query_server_hosts] def recommendHIVEConfigurations(self, configurations, clusterData, services, hosts): + Logger.info("DBG: Invoked recommendHiveConfiguration") super(HDP25StackAdvisor, self).recommendHIVEConfigurations(configurations, clusterData, services, hosts) putHiveInteractiveEnvProperty = self.putProperty(configurations, "hive-interactive-env", services) putHiveInteractiveSiteProperty = self.putProperty(configurations, self.HIVE_INTERACTIVE_SITE, services) @@ -676,6 +677,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor): putHiveInteractiveSiteProperty("hive.server2.tez.default.queues", hive_tez_default_queue) Logger.debug("Updated 'hive.server2.tez.default.queues' config : '{0}'".format(hive_tez_default_queue)) else: + Logger.info("DBG: Setting visibility for num_llap_nodes to false") putHiveInteractiveEnvProperty('enable_hive_interactive', 'false') putHiveInteractiveEnvPropertyAttribute("num_llap_nodes", "visible", "false") @@ -736,6 +738,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor): Note: All memory calculations are in MB, unless specified otherwise. """ + Logger.info("DBG: Entered updateLlapConfigs"); putHiveInteractiveSiteProperty = self.putProperty(configurations, self.HIVE_INTERACTIVE_SITE, services) putHiveInteractiveSitePropertyAttribute = self.putPropertyAttribute(configurations, self.HIVE_INTERACTIVE_SITE) putHiveInteractiveEnvProperty = self.putProperty(configurations, "hive-interactive-env", services) @@ -786,14 +789,21 @@ class HDP25StackAdvisor(HDP24StackAdvisor): putHiveInteractiveSiteProperty('hive.llap.daemon.queue.name', first_leaf_queue) putHiveInteractiveSiteProperty('hive.server2.tez.default.queues', first_leaf_queue) llap_named_queue_selected_in_curr_invocation = False + Logger.info("DBG: llap_named_queue_selected_in_curr_invocation = {0}".format(llap_named_queue_selected_in_curr_invocation)) if (len(leafQueueNames) == 2 and (llap_daemon_selected_queue_name and llap_daemon_selected_queue_name == llap_queue_name) or llap_named_queue_selected_in_curr_invocation) or \ (len(leafQueueNames) == 1 and llap_daemon_selected_queue_name == 'default' and llap_named_queue_selected_in_curr_invocation): + Logger.info("Setting visibility of num_llap_nodes to true.") putHiveInteractiveEnvPropertyAttribute("num_llap_nodes", "visible", "true") selected_queue_is_ambari_managed_llap = True + Logger.info("DBG: Selected YARN queue for LLAP is : '{0}'. Current YARN queues : {1}. Setting 'Number of LLAP nodes' " + "slider visibility to 'True'".format(llap_queue_name, list(leafQueueNames))) else: + Logger.info("Setting visibility of num_llap_nodes to false.") putHiveInteractiveEnvPropertyAttribute("num_llap_nodes", "visible", "false") + Logger.info("Selected YARN queue for LLAP is : '{0}'. Current YARN queues : {1}. Setting 'Number of LLAP nodes' " + "visibility to 'False'.".format(llap_daemon_selected_queue_name, list(leafQueueNames))) selected_queue_is_ambari_managed_llap = False if not llap_named_queue_selected_in_curr_invocation: # We would be creating the 'llap' queue later. Thus, cap-sched doesn't have @@ -834,13 +844,17 @@ class HDP25StackAdvisor(HDP24StackAdvisor): if not changed_configs_in_hive_int_env and not llap_concurrency_in_changed_configs and \ not llap_daemon_queue_in_changed_configs and services["changed-configurations"]: - + Logger.info("DBG: LLAP parameters not modified. Not adjusting LLAP configs.") + Logger.info("DBG: Current 'changed-configuration' received is : {0}".format(services["changed-configurations"])) return + Logger.info("\nDBG: Performing LLAP config calculations ......") node_manager_host_list = self.getHostsForComponent(services, "YARN", "NODEMANAGER") node_manager_cnt = len(node_manager_host_list) yarn_nm_mem_in_mb = self.get_yarn_nm_mem_in_mb(services, configurations) total_cluster_capacity = node_manager_cnt * yarn_nm_mem_in_mb + Logger.info("DBG: Calculated total_cluster_capacity : {0}, using following : node_manager_cnt : {1}, " + "yarn_nm_mem_in_mb : {2}".format(total_cluster_capacity, node_manager_cnt, yarn_nm_mem_in_mb)) yarn_min_container_size = float(self.get_yarn_min_container_size(services, configurations)) tez_am_container_size = self.calculate_tez_am_container_size(services, long(total_cluster_capacity)) normalized_tez_am_container_size = self._normalizeUp(tez_am_container_size, yarn_min_container_size) @@ -850,10 +864,16 @@ class HDP25StackAdvisor(HDP24StackAdvisor): else: self.recommendDefaultLlapConfiguration(configurations, services, hosts) return + Logger.info("DBG Calculated normalized_tez_am_container_size : {0}, using following : tez_am_container_size : {1}, " + "total_cluster_capacity : {2}".format(normalized_tez_am_container_size, tez_am_container_size, + total_cluster_capacity)) # Calculate the available memory for LLAP app yarn_nm_mem_in_mb_normalized = self._normalizeDown(yarn_nm_mem_in_mb, yarn_min_container_size) mem_per_thread_for_llap = self.calculate_mem_per_thread_for_llap(services, yarn_nm_mem_in_mb_normalized, cpu_per_nm_host) + Logger.info("DBG: Calculated mem_per_thread_for_llap : {0}, using following: yarn_nm_mem_in_mb_normalized : {1}, " + "cpu_per_nm_host : {2}".format(mem_per_thread_for_llap, yarn_nm_mem_in_mb_normalized, cpu_per_nm_host)) + if mem_per_thread_for_llap is None: self.recommendDefaultLlapConfiguration(configurations, services, hosts) @@ -861,6 +881,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor): mem_per_thread_for_llap = float(mem_per_thread_for_llap) + Logger.info("DBG: selected_queue_is_ambari_managed_llap = {0}".format(selected_queue_is_ambari_managed_llap)) if not selected_queue_is_ambari_managed_llap: llap_daemon_selected_queue_cap = self.__getSelectedQueueTotalCap(capacity_scheduler_properties, llap_daemon_selected_queue_name, total_cluster_capacity) @@ -871,16 +892,29 @@ class HDP25StackAdvisor(HDP24StackAdvisor): return total_llap_mem_normalized = self._normalizeDown(llap_daemon_selected_queue_cap, yarn_min_container_size) - num_llap_nodes_requested = math.floor(total_llap_mem_normalized / yarn_nm_mem_in_mb_normalized) + Logger.info("DBG: Calculated '{0}' queue available capacity : {1}, using following: llap_daemon_selected_queue_cap : {2}, " + "yarn_min_container_size : {3}".format(llap_daemon_selected_queue_name, total_llap_mem_normalized, + llap_daemon_selected_queue_cap, yarn_min_container_size)) + '''Rounding up numNodes so that we run more daemons, and utilitze more CPUs. The rest of the calcaulkations will take care of cutting this down if required''' + num_llap_nodes_requested = math.ceil(total_llap_mem_normalized / yarn_nm_mem_in_mb_normalized) + Logger.info("DBG: Calculated 'num_llap_nodes_requested' : {0}, using following: total_llap_mem_normalized : {1}, " + "yarn_nm_mem_in_mb_normalized : {2}".format(num_llap_nodes_requested, total_llap_mem_normalized, yarn_nm_mem_in_mb_normalized)) queue_am_fraction_perc = float(self.__getQueueAmFractionFromCapacityScheduler(capacity_scheduler_properties, llap_daemon_selected_queue_name)) hive_tez_am_cap_available = queue_am_fraction_perc * total_llap_mem_normalized + Logger.info("DBG: Calculated 'hive_tez_am_cap_available' : {0}, using following: queue_am_fraction_perc : {1}, " + "total_llap_mem_normalized : {2}".format(hive_tez_am_cap_available, queue_am_fraction_perc, total_llap_mem_normalized)) else: # Ambari managed 'llap' named queue at root level. num_llap_nodes_requested = self.get_num_llap_nodes(services, configurations) #Input total_llap_mem = num_llap_nodes_requested * yarn_nm_mem_in_mb_normalized + Logger.info("DBG: Calculated 'total_llap_mem' : {0}, using following: num_llap_nodes_requested : {1}, " + "yarn_nm_mem_in_mb_normalized : {2}".format(total_llap_mem, num_llap_nodes_requested, yarn_nm_mem_in_mb_normalized)) total_llap_mem_normalized = float(self._normalizeDown(total_llap_mem, yarn_min_container_size)) + Logger.info("DBG: Calculated 'total_llap_mem_normalized' : {0}, using following: total_llap_mem : {1}, " + "yarn_min_container_size : {2}".format(total_llap_mem_normalized, total_llap_mem, yarn_min_container_size)) # What percent is 'total_llap_mem' of 'total_cluster_capacity' ? llap_named_queue_cap_fraction = math.ceil(total_llap_mem_normalized / total_cluster_capacity * 100) + Logger.info("DBG: Calculated '{0}' queue capacity percent = {1}.".format(llap_queue_name, llap_named_queue_cap_fraction)) if llap_named_queue_cap_fraction > 100: Logger.warning("Calculated '{0}' queue size = {1}. Cannot be > 100.".format(llap_queue_name, llap_named_queue_cap_fraction)) @@ -890,14 +924,19 @@ class HDP25StackAdvisor(HDP24StackAdvisor): # Adjust capacity scheduler for the 'llap' named queue. self.checkAndManageLlapQueue(services, configurations, hosts, llap_queue_name, llap_named_queue_cap_fraction) hive_tez_am_cap_available = total_llap_mem_normalized + Logger.info("DBG: hive_tez_am_cap_available : {0}".format(hive_tez_am_cap_available)) # Common calculations now, irrespective of the queue selected. # Get calculated value for Slider AM container Size slider_am_container_size = self._normalizeUp(self.calculate_slider_am_size(yarn_min_container_size), yarn_min_container_size) + Logger.info("DBG: Calculated 'slider_am_container_size' : {0}, using following: yarn_min_container_size : " + "{1}".format(slider_am_container_size, yarn_min_container_size)) llap_mem_for_tezAm_and_daemons = total_llap_mem_normalized - slider_am_container_size + Logger.info("DBG: Calculated 'llap_mem_for_tezAm_and_daemons' : {0}, using following : total_llap_mem_normalized : {1}, " + "slider_am_container_size : {2}".format(llap_mem_for_tezAm_and_daemons, total_llap_mem_normalized, slider_am_container_size)) if llap_mem_for_tezAm_and_daemons < 2 * yarn_min_container_size: Logger.warning("Not enough capacity available on the cluster to run LLAP") @@ -914,12 +953,21 @@ class HDP25StackAdvisor(HDP24StackAdvisor): self.recommendDefaultLlapConfiguration(configurations, services, hosts) return + Logger.info("DBG: Calculated 'max_executors_per_node' : {0}, using following: yarn_nm_mem_in_mb_normalized : {1}, cpu_per_nm_host : {2}, " + "mem_per_thread_for_llap: {3}".format(max_executors_per_node, yarn_nm_mem_in_mb_normalized, cpu_per_nm_host, mem_per_thread_for_llap)) + # Default 1 AM for every 20 executor threads. # The second part of the min calculates based on mem required for DEFAULT_EXECUTOR_TO_AM_RATIO executors + 1 AM, # making use of total memory. However, it's possible that total memory will not be used - and the numExecutors is # instead limited by #CPUs. Use maxPerNode to factor this in. llap_concurreny_limit = min(math.floor(max_executors_per_node * num_llap_nodes_requested / DEFAULT_EXECUTOR_TO_AM_RATIO), MAX_CONCURRENT_QUERIES) + Logger.info("DBG: Calculated 'llap_concurreny_limit' : {0}, using following : max_executors_per_node : {1}, num_llap_nodes_requested : {2}, DEFAULT_EXECUTOR_TO_AM_RATIO " + ": {3}, MAX_CONCURRENT_QUERIES : {4}".format(llap_concurreny_limit, max_executors_per_node, num_llap_nodes_requested, DEFAULT_EXECUTOR_TO_AM_RATIO, MAX_CONCURRENT_QUERIES)) llap_concurrency = min(llap_concurreny_limit, math.floor(llap_mem_for_tezAm_and_daemons / (DEFAULT_EXECUTOR_TO_AM_RATIO * mem_per_thread_for_llap + normalized_tez_am_container_size))) + Logger.info("DBG: Calculated 'llap_concurrency' : {0}, using following : llap_concurreny_limit : {1}, llap_mem_for_tezAm_and_daemons : " + "{2}, DEFAULT_EXECUTOR_TO_AM_RATIO : {3}, mem_per_thread_for_llap : {4}, normalized_tez_am_container_size : " + "{5}".format(llap_concurrency, llap_concurreny_limit, llap_mem_for_tezAm_and_daemons, DEFAULT_EXECUTOR_TO_AM_RATIO, + mem_per_thread_for_llap, normalized_tez_am_container_size)) if llap_concurrency == 0: llap_concurrency = 1 @@ -930,6 +978,8 @@ class HDP25StackAdvisor(HDP24StackAdvisor): Logger.warning("Calculated 'LLAP Concurrent Queries' = {0}. Expected value >= 1.".format(llap_concurrency)) self.recommendDefaultLlapConfiguration(configurations, services, hosts) return + Logger.info("DBG: Adjusted 'llap_concurrency' : {0}, using following: hive_tez_am_cap_available : {1}, normalized_tez_am_container_size: " + "{2}".format(llap_concurrency, hive_tez_am_cap_available, normalized_tez_am_container_size)) else: # Read current value if 'hive.server2.tez.sessions.per.default.queue' in hsi_site: @@ -938,6 +988,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor): Logger.warning("'hive.server2.tez.sessions.per.default.queue' current value : {0}. Expected value : >= 1".format(llap_concurrency)) self.recommendDefaultLlapConfiguration(configurations, services, hosts) return + Logger.info("DBG: Read 'llap_concurrency' : {0}".format(llap_concurrency )) else: llap_concurrency = 1 Logger.warning("Couldn't retrieve Hive Server interactive's 'hive.server2.tez.sessions.per.default.queue' config. Setting default value 1.") @@ -946,10 +997,19 @@ class HDP25StackAdvisor(HDP24StackAdvisor): # Calculate 'Max LLAP Consurrency', irrespective of whether 'llap_concurrency' was read or calculated. max_llap_concurreny_limit = min(math.floor(max_executors_per_node * num_llap_nodes_requested / MIN_EXECUTOR_TO_AM_RATIO), MAX_CONCURRENT_QUERIES) + Logger.info("DBG: Calculated 'max_llap_concurreny_limit' : {0}, using following : max_executors_per_node : {1}, num_llap_nodes_requested " + ": {2}, MIN_EXECUTOR_TO_AM_RATIO : {3}, MAX_CONCURRENT_QUERIES : {4}".format(max_llap_concurreny_limit, max_executors_per_node, + num_llap_nodes_requested, MIN_EXECUTOR_TO_AM_RATIO, + MAX_CONCURRENT_QUERIES)) max_llap_concurreny = min(max_llap_concurreny_limit, math.floor(llap_mem_for_tezAm_and_daemons / (MIN_EXECUTOR_TO_AM_RATIO * mem_per_thread_for_llap + normalized_tez_am_container_size))) + Logger.info("DBG: Calculated 'max_llap_concurreny' : {0}, using following : max_llap_concurreny_limit : {1}, llap_mem_for_tezAm_and_daemons : " + "{2}, MIN_EXECUTOR_TO_AM_RATIO : {3}, mem_per_thread_for_llap : {4}, normalized_tez_am_container_size : " + "{5}".format(max_llap_concurreny, max_llap_concurreny_limit, llap_mem_for_tezAm_and_daemons, MIN_EXECUTOR_TO_AM_RATIO, + mem_per_thread_for_llap, normalized_tez_am_container_size)) if max_llap_concurreny == 0: max_llap_concurreny = 1 + Logger.info("DBG: Adjusted 'max_llap_concurreny' : 1.") if (max_llap_concurreny * normalized_tez_am_container_size) > hive_tez_am_cap_available: max_llap_concurreny = math.floor(hive_tez_am_cap_available / normalized_tez_am_container_size) @@ -957,9 +1017,13 @@ class HDP25StackAdvisor(HDP24StackAdvisor): Logger.warning("Calculated 'Max. LLAP Concurrent Queries' = {0}. Expected value > 1".format(max_llap_concurreny)) self.recommendDefaultLlapConfiguration(configurations, services, hosts) return + Logger.info("DBG: Adjusted 'max_llap_concurreny' : {0}, using following: hive_tez_am_cap_available : {1}, normalized_tez_am_container_size: " + "{2}".format(max_llap_concurreny, hive_tez_am_cap_available, normalized_tez_am_container_size)) # Calculate value for 'num_llap_nodes', an across cluster config. tez_am_memory_required = llap_concurrency * normalized_tez_am_container_size + Logger.info("DBG: Calculated 'tez_am_memory_required' : {0}, using following : llap_concurrency : {1}, normalized_tez_am_container_size : " + "{2}".format(tez_am_memory_required, llap_concurrency, normalized_tez_am_container_size)) llap_mem_daemon_size = llap_mem_for_tezAm_and_daemons - tez_am_memory_required if llap_mem_daemon_size < yarn_min_container_size: @@ -972,25 +1036,36 @@ class HDP25StackAdvisor(HDP24StackAdvisor): Logger.warning("Not enough memory available for executors.") self.recommendDefaultLlapConfiguration(configurations, services, hosts) return + Logger.info("DBG: Calculated 'llap_mem_daemon_size' : {0}, using following : llap_mem_for_tezAm_and_daemons : {1}, tez_am_memory_required : " + "{2}".format(llap_mem_daemon_size, llap_mem_for_tezAm_and_daemons, tez_am_memory_required)) llap_daemon_mem_per_node = self._normalizeDown(llap_mem_daemon_size / num_llap_nodes_requested, yarn_min_container_size) + Logger.info("DBG: Calculated 'llap_daemon_mem_per_node' : {0}, using following : llap_mem_daemon_size : {1}, num_llap_nodes_requested : {2}, " + "yarn_min_container_size: {3}".format(llap_daemon_mem_per_node, llap_mem_daemon_size, num_llap_nodes_requested, yarn_min_container_size)) if llap_daemon_mem_per_node == 0: # Small cluster. No capacity left on a node after running AMs. llap_daemon_mem_per_node = mem_per_thread_for_llap num_llap_nodes = math.floor(llap_mem_daemon_size / mem_per_thread_for_llap) + Logger.info("DBG: 'llap_daemon_mem_per_node' : 0, adjusted 'llap_daemon_mem_per_node' : {0}, 'num_llap_nodes' : {1}, using following: llap_mem_daemon_size : {2}, " + "mem_per_thread_for_llap : {3}".format(llap_daemon_mem_per_node, num_llap_nodes, llap_mem_daemon_size, mem_per_thread_for_llap)) elif llap_daemon_mem_per_node < mem_per_thread_for_llap: # Previously computed value of memory per thread may be too high. Cut the number of nodes. (Alternately reduce memory per node) llap_daemon_mem_per_node = mem_per_thread_for_llap num_llap_nodes = math.floor(llap_mem_daemon_size / mem_per_thread_for_llap) + Logger.info("DBG: 'llap_daemon_mem_per_node'({0}) < mem_per_thread_for_llap({1}), adjusted 'llap_daemon_mem_per_node' " + ": {2}".format(llap_daemon_mem_per_node, mem_per_thread_for_llap, llap_daemon_mem_per_node)) else: # All good. We have a proper value for memoryPerNode. num_llap_nodes = num_llap_nodes_requested + Logger.info("DBG: num_llap_nodes : {0}".format(num_llap_nodes)) num_executors_per_node_max = self.get_max_executors_per_node(yarn_nm_mem_in_mb_normalized, cpu_per_nm_host, mem_per_thread_for_llap) if num_executors_per_node_max < 1: Logger.warning("Calculated 'Max. Executors per Node' = {0}. Expected values >= 1.".format(num_executors_per_node_max)) self.recommendDefaultLlapConfiguration(configurations, services, hosts) return + Logger.info("DBG: Calculated 'num_executors_per_node_max' : {0}, using following : yarn_nm_mem_in_mb_normalized : {1}, cpu_per_nm_host : {2}, " + "mem_per_thread_for_llap: {3}".format(num_executors_per_node_max, yarn_nm_mem_in_mb_normalized, cpu_per_nm_host, mem_per_thread_for_llap)) # NumExecutorsPerNode is not necessarily max - since some capacity would have been reserved for AMs, if this value were based on mem. num_executors_per_node = min(math.floor(llap_daemon_mem_per_node / mem_per_thread_for_llap), num_executors_per_node_max) @@ -998,6 +1073,8 @@ class HDP25StackAdvisor(HDP24StackAdvisor): Logger.warning("Calculated 'Number of Executors Per Node' = {0}. Expected value >= 1".format(num_executors_per_node)) self.recommendDefaultLlapConfiguration(configurations, services, hosts) return + Logger.info("DBG: Calculated 'num_executors_per_node' : {0}, using following : llap_daemon_mem_per_node : {1}, num_executors_per_node_max : {2}, " + "mem_per_thread_for_llap: {3}".format(num_executors_per_node, llap_daemon_mem_per_node, num_executors_per_node_max, mem_per_thread_for_llap)) # Now figure out how much of the memory will be used by the executors, and how much will be used by the cache. total_mem_for_executors_per_node = num_executors_per_node * mem_per_thread_for_llap @@ -1010,6 +1087,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor): # Calculate value for prop 'llap_heap_size' llap_xmx = max(total_mem_for_executors_per_node * 0.8, total_mem_for_executors_per_node - self.get_llap_headroom_space(services, configurations)) + Logger.info("DBG: Calculated llap_app_heap_size : {0}, using following : total_mem_for_executors : {1}".format(llap_xmx, total_mem_for_executors_per_node)) # Calculate 'hive_heapsize' for Hive2/HiveServer2 (HSI) hive_server_interactive_heapsize = None @@ -1020,8 +1098,11 @@ class HDP25StackAdvisor(HDP24StackAdvisor): if hive_server_interactive_hosts is not None and len(hive_server_interactive_hosts) > 0: host_mem = long(hive_server_interactive_hosts[0]["Hosts"]["total_mem"]) hive_server_interactive_heapsize = min(max(2048.0, 400.0*llap_concurrency), 3.0/8 * host_mem) + Logger.info("DBG: Calculated 'hive_server_interactive_heapsize' : {0}, using following : llap_concurrency : {1}, host_mem : " + "{2}".format(hive_server_interactive_heapsize, llap_concurrency, host_mem)) # Done with calculations, updating calculated configs. + Logger.info("DBG: Applying the calculated values....") normalized_tez_am_container_size = long(normalized_tez_am_container_size) putTezInteractiveSiteProperty('tez.am.resource.memory.mb', normalized_tez_am_container_size) @@ -1037,6 +1118,11 @@ class HDP25StackAdvisor(HDP24StackAdvisor): num_llap_nodes = long(num_llap_nodes) putHiveInteractiveEnvPropertyAttribute('num_llap_nodes', "minimum", 1) putHiveInteractiveEnvPropertyAttribute('num_llap_nodes', "maximum", node_manager_cnt) + #TODO A single value is not being set for numNodes in case of a custom queue. Also the attribute is set to non-visible, so the UI likely ends up using an old cached value + if (num_llap_nodes != num_llap_nodes_requested): + Logger.info("User requested num_llap_nodes : {0}, but used/adjusted value for calculations is : {1}".format(num_llap_nodes_requested, num_llap_nodes)) + else: + Logger.info("Used num_llap_nodes for calculations : {0}".format(num_llap_nodes_requested)) llap_container_size = long(llap_daemon_mem_per_node) putHiveInteractiveSiteProperty('hive.llap.daemon.yarn.container.mb', llap_container_size) @@ -1075,8 +1161,11 @@ class HDP25StackAdvisor(HDP24StackAdvisor): putHiveInteractiveEnvProperty('llap_heap_size', long(llap_xmx)) putHiveInteractiveEnvProperty('slider_am_container_mb', long(slider_am_container_size)) + Logger.info("DBG: Done putting all configs") + #TODO: What is this doing? What error will be displayed on the UI if something like this is hit? def recommendDefaultLlapConfiguration(self, configurations, services, hosts): + Logger.info("DBG: Something likely went wrong. recommendDefaultLlapConfiguration") putHiveInteractiveSiteProperty = self.putProperty(configurations, self.HIVE_INTERACTIVE_SITE, services) putHiveInteractiveSitePropertyAttribute = self.putPropertyAttribute(configurations, self.HIVE_INTERACTIVE_SITE) @@ -1181,8 +1270,10 @@ class HDP25StackAdvisor(HDP24StackAdvisor): else: calculated_hive_tez_container_size = 4096 + Logger.info("DBG: Calculated and returning 'hive_tez_container_size' : {0}".format(calculated_hive_tez_container_size)) return calculated_hive_tez_container_size else: + Logger.info("DBG: Returning 'hive_tez_container_size' : {0}".format(hive_tez_container_size)) return hive_tez_container_size def get_hive_tez_container_size(self, services): @@ -1217,6 +1308,7 @@ class HDP25StackAdvisor(HDP24StackAdvisor): return llap_headroom_space + #TODO Convert this to a helper. It can apply to any property. Check config, or check if in the list of changed configurations and read the latest value def get_yarn_min_container_size(self, services, configurations): """ Gets YARN's minimum container size (yarn.scheduler.minimum-allocation-mb). @@ -1241,10 +1333,12 @@ class HDP25StackAdvisor(HDP24StackAdvisor): # Check if services["changed-configurations"] is empty and 'yarn.scheduler.minimum-allocation-mb' is modified in current ST invocation. if not services["changed-configurations"] and yarn_site and yarn_min_allocation_property in yarn_site: yarn_min_container_size = yarn_site[yarn_min_allocation_property] + Logger.info("DBG: 'yarn.scheduler.minimum-allocation-mb' read from configurations as : {0}".format(yarn_min_container_size)) # Check if 'yarn.scheduler.minimum-allocation-mb' is input in services array. elif yarn_site_properties and yarn_min_allocation_property in yarn_site_properties: yarn_min_container_size = yarn_site_properties[yarn_min_allocation_property] + Logger.info("DBG: 'yarn.scheduler.minimum-allocation-mb' read from services as : {0}".format(yarn_min_container_size)) if not yarn_min_container_size: Logger.error("{0} was not found in the configuration".format(yarn_min_allocation_property)) @@ -1308,8 +1402,10 @@ class HDP25StackAdvisor(HDP24StackAdvisor): elif total_cluster_capacity > 73728: calculated_tez_am_resource_memory_mb = 1536 + Logger.info("DBG: Calculated and returning 'tez_am_resource_memory_mb' as : {0}".format(calculated_tez_am_resource_memory_mb)) return float(calculated_tez_am_resource_memory_mb) else: + Logger.info("DBG: Returning 'tez_am_resource_memory_mb' as : {0}".format(tez_am_resource_memory_mb)) return float(tez_am_resource_memory_mb) def get_tez_am_resource_memory_mb(self, services): @@ -1572,6 +1668,7 @@ yarn.scheduler.capacity.root.{0}.maximum-am-resource-percent=1""".format(llap_qu account that 'capacity-scheduler' may have changed (got updated) in current Stack Advisor invocation. """ Logger.info("Determining 'hive.llap.daemon.queue.name' config Property Attributes.") + #TODO Determine if this is doing the right thing if some queue is setup with capacity=0, or is STOPPED. Maybe don't list it. putHiveInteractiveSitePropertyAttribute = self.putPropertyAttribute(configurations, self.HIVE_INTERACTIVE_SITE) capacity_scheduler_properties = dict() @@ -1636,7 +1733,8 @@ yarn.scheduler.capacity.root.{0}.maximum-am-resource-percent=1""".format(llap_qu current_selected_queue_for_llap_cap = None for key in cap_sched_keys: # Expected capacity prop key is of form : 'yarn.scheduler.capacity.<one or more queues in path separated by '.'>.[llap_daemon_selected_queue_name].capacity' - if key.endswith(llap_daemon_selected_queue_name+".capacity"): + if key.endswith(llap_daemon_selected_queue_name+".capacity") and key.startswith("yarn.scheduler.capacity.root"): + Logger.info("DBG: Selected queue name as: " + key) llap_selected_queue_cap_key = key break; return llap_selected_queue_cap_key @@ -1683,7 +1781,7 @@ yarn.scheduler.capacity.root.{0}.maximum-am-resource-percent=1""".format(llap_qu """ Calculates the total available capacity for the passed-in YARN queue of any level based on the percentages. """ - Logger.info("Entered __getSelectedQueueTotalCap fn().") + Logger.info("Entered __getSelectedQueueTotalCap fn() with llap_daemon_selected_queue_name= '{0}'.".format(llap_daemon_selected_queue_name)) available_capacity = total_cluster_capacity queue_cap_key = self.__getQueueCapacityKeyFromCapacityScheduler(capacity_scheduler_properties, llap_daemon_selected_queue_name) if queue_cap_key: http://git-wip-us.apache.org/repos/asf/ambari/blob/31a14607/ambari-server/src/main/resources/stacks/HDP/2.6/services/HIVE/configuration/hive-interactive-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.6/services/HIVE/configuration/hive-interactive-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.6/services/HIVE/configuration/hive-interactive-site.xml index 1c6bc3f..101de44 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.6/services/HIVE/configuration/hive-interactive-site.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.6/services/HIVE/configuration/hive-interactive-site.xml @@ -42,7 +42,10 @@ limitations under the License. <property> <name>hive.mapjoin.hybridgrace.hashtable</name> <value>true</value> - <description>Whether to use hybrid grace hash join as the join method for mapjoin. Tez only.</description> + <description>Whether to use hybrid grace hash join as the join method for mapjoin. + Applies to dynamically partitioned joins when running in LLAP, but not to regular + broadcast(map) joins. hive.llap.enable.grace.join.in.llap is used for this. + </description> <on-ambari-upgrade add="true"/> </property>
