http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/metainfo.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/metainfo.xml b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/metainfo.xml new file mode 100644 index 0000000..48352e8 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/metainfo.xml @@ -0,0 +1,383 @@ +<?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<metainfo> + <schemaVersion>2.0</schemaVersion> + <services> + <service> + <name>YARN</name> + <displayName>YARN</displayName> + <comment>Apache Hadoop NextGen MapReduce (YARN)</comment> + <version>3.0.0.3.0</version> + <components> + + <component> + <component> + <name>APP_TIMELINE_SERVER</name> + <displayName>App Timeline Server</displayName> + <category>MASTER</category> + <cardinality>1</cardinality> + <versionAdvertised>true</versionAdvertised> + <reassignAllowed>true</reassignAllowed> + + <commandScript> + <script>scripts/application_timeline_server.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + + <dependencies> + <dependency> + <name>TEZ/TEZ_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>SPARK/SPARK_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + </dependencies> + </component> + + <name>RESOURCEMANAGER</name> + <displayName>ResourceManager</displayName> + <category>MASTER</category> + <cardinality>1-2</cardinality> + <versionAdvertised>true</versionAdvertised> + <reassignAllowed>true</reassignAllowed> + <commandScript> + <script>scripts/resourcemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <dependencies> + <dependency> + <name>TEZ/TEZ_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + </dependencies> + <logs> + <log> + <logId>yarn_resourcemanager</logId> + <primary>true</primary> + </log> + <log> + <logId>yarn_historyserver</logId> + </log> + <log> + <logId>yarn_jobsummary</logId> + </log> + </logs> + <customCommands> + <customCommand> + <name>DECOMMISSION</name> + <commandScript> + <script>scripts/resourcemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </customCommand> + <customCommand> + <name>REFRESHQUEUES</name> + <commandScript> + <script>scripts/resourcemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>600</timeout> + </commandScript> + </customCommand> + </customCommands> + <configuration-dependencies> + <config-type>capacity-scheduler</config-type> + <config-type>hdfs-site</config-type> + </configuration-dependencies> + </component> + + <component> + <name>NODEMANAGER</name> + <displayName>NodeManager</displayName> + <category>SLAVE</category> + <cardinality>1+</cardinality> + <versionAdvertised>true</versionAdvertised> + <decommissionAllowed>true</decommissionAllowed> + <commandScript> + <script>scripts/nodemanager.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <bulkCommands> + <displayName>NodeManagers</displayName> + <!-- Used by decommission and recommission --> + <masterComponent>RESOURCEMANAGER</masterComponent> + </bulkCommands> + <logs> + <log> + <logId>yarn_nodemanager</logId> + </log> + </logs> + </component> + + <component> + <name>YARN_CLIENT</name> + <displayName>YARN Client</displayName> + <category>CLIENT</category> + <cardinality>1+</cardinality> + <versionAdvertised>true</versionAdvertised> + <commandScript> + <script>scripts/yarn_client.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <configFiles> + <configFile> + <type>xml</type> + <fileName>yarn-site.xml</fileName> + <dictionaryName>yarn-site</dictionaryName> + </configFile> + <configFile> + <type>xml</type> + <fileName>core-site.xml</fileName> + <dictionaryName>core-site</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>yarn-env.sh</fileName> + <dictionaryName>yarn-env</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>log4j.properties</fileName> + <dictionaryName>hdfs-log4j,yarn-log4j</dictionaryName> + </configFile> + <configFile> + <type>xml</type> + <fileName>capacity-scheduler.xml</fileName> + <dictionaryName>capacity-scheduler</dictionaryName> + </configFile> + </configFiles> + </component> + </components> + + <osSpecifics> + <osSpecific> + <osFamily>any</osFamily> + <packages> + <package> + <name>hadoop-yarn</name> + </package> + <package> + <name>hadoop-hdfs</name> + </package> + <package> + <name>hadoop-mapreduce</name> + </package> + </packages> + </osSpecific> + </osSpecifics> + + <commandScript> + <script>scripts/service_check.py</script> + <scriptType>PYTHON</scriptType> + <timeout>300</timeout> + </commandScript> + + <requiredServices> + <service>HDFS</service> + <service>MAPREDUCE2</service> + </requiredServices> + + <themes> + <theme> + <fileName>theme.json</fileName> + <default>true</default> + </theme> + </themes> + + <quickLinksConfigurations> + <quickLinksConfiguration> + <fileName>quicklinks.json</fileName> + <default>true</default> + </quickLinksConfiguration> + </quickLinksConfigurations> + + <configuration-dependencies> + <config-type>yarn-site</config-type> + <config-type>yarn-env</config-type> + <config-type>hdfs-site</config-type> + <config-type>hadoop-env</config-type> + <config-type>core-site</config-type> + <config-type>mapred-site</config-type> + <config-type>yarn-log4j</config-type> + <config-type>ams-ssl-client</config-type> + <config-type>ranger-yarn-plugin-properties</config-type> + <config-type>ranger-yarn-audit</config-type> + <config-type>ranger-yarn-policymgr-ssl</config-type> + <config-type>ranger-yarn-security</config-type> + </configuration-dependencies> + + <widgetsFileName>YARN_widgets.json</widgetsFileName> + <metricsFileName>YARN_metrics.json</metricsFileName> + </service> + + <service> + <name>MAPREDUCE2</name> + <displayName>MapReduce2</displayName> + <comment>Apache Hadoop NextGen MapReduce (YARN)</comment> + <version>2.1.0.2.0.6.0</version> + + <components> + <component> + <name>HISTORYSERVER</name> + <displayName>History Server</displayName> + <category>MASTER</category> + <cardinality>1</cardinality> + <versionAdvertised>true</versionAdvertised> + <reassignAllowed>true</reassignAllowed> + <auto-deploy> + <enabled>true</enabled> + <co-locate>YARN/RESOURCEMANAGER</co-locate> + </auto-deploy> + <dependencies> + <dependency> + <name>HDFS/HDFS_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>TEZ/TEZ_CLIENT</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + <dependency> + <name>SLIDER/SLIDER</name> + <scope>host</scope> + <auto-deploy> + <enabled>true</enabled> + </auto-deploy> + </dependency> + </dependencies> + <commandScript> + <script>scripts/historyserver.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <logs> + <log> + <logId>mapred_historyserver</logId> + <primary>true</primary> + </log> + </logs> + </component> + + <component> + <name>MAPREDUCE2_CLIENT</name> + <displayName>MapReduce2 Client</displayName> + <category>CLIENT</category> + <cardinality>0+</cardinality> + <versionAdvertised>true</versionAdvertised> + <commandScript> + <script>scripts/mapreduce2_client.py</script> + <scriptType>PYTHON</scriptType> + <timeout>1200</timeout> + </commandScript> + <configFiles> + <configFile> + <type>xml</type> + <fileName>mapred-site.xml</fileName> + <dictionaryName>mapred-site</dictionaryName> + </configFile> + <configFile> + <type>xml</type> + <fileName>core-site.xml</fileName> + <dictionaryName>core-site</dictionaryName> + </configFile> + <configFile> + <type>env</type> + <fileName>mapred-env.sh</fileName> + <dictionaryName>mapred-env</dictionaryName> + </configFile> + </configFiles> + </component> + </components> + + <osSpecifics> + <osSpecific> + <osFamily>any</osFamily> + <packages> + <package> + <name>hadoop-mapreduce</name> + </package> + </packages> + </osSpecific> + </osSpecifics> + + <commandScript> + <script>scripts/mapred_service_check.py</script> + <scriptType>PYTHON</scriptType> + <timeout>300</timeout> + </commandScript> + + <requiredServices> + <service>YARN</service> + </requiredServices> + + <themes-dir>themes-mapred</themes-dir> + <themes> + <theme> + <fileName>theme.json</fileName> + <default>true</default> + </theme> + </themes> + + <quickLinksConfigurations-dir>quicklinks-mapred</quickLinksConfigurations-dir> + <quickLinksConfigurations> + <quickLinksConfiguration> + <fileName>quicklinks.json</fileName> + <default>true</default> + </quickLinksConfiguration> + </quickLinksConfigurations> + + <configuration-dir>configuration-mapred</configuration-dir> + + <configuration-dependencies> + <config-type>hdfs-site</config-type> + <config-type>hadoop-env</config-type> + <config-type>core-site</config-type> + <config-type>mapred-site</config-type> + <config-type>mapred-env</config-type> + <config-type>ssl-client</config-type> + <config-type>ssl-server</config-type> + <config-type>ams-ssl-client</config-type> + </configuration-dependencies> + <restartRequiredAfterRackChange>true</restartRequiredAfterRackChange> + <widgetsFileName>MAPREDUCE2_widgets.json</widgetsFileName> + <metricsFileName>MAPREDUCE2_metrics.json</metricsFileName> + </service> + </services> +</metainfo>
http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/alerts/alert_nodemanager_health.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/alerts/alert_nodemanager_health.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/alerts/alert_nodemanager_health.py new file mode 100644 index 0000000..d7159e4 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/alerts/alert_nodemanager_health.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. +import socket +import urllib2 +import logging +import traceback +from ambari_commons import OSCheck +from ambari_commons.inet_utils import resolve_address +from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER +from resource_management.core.environment import Environment + +RESULT_CODE_OK = 'OK' +RESULT_CODE_CRITICAL = 'CRITICAL' +RESULT_CODE_UNKNOWN = 'UNKNOWN' + +NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.address}}' +NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.https.address}}' +YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}' + +OK_MESSAGE = 'NodeManager Healthy' +CRITICAL_CONNECTION_MESSAGE = 'Connection failed to {0} ({1})' +CRITICAL_HTTP_STATUS_MESSAGE = 'HTTP {0} returned from {1} ({2}) \n{3}' +CRITICAL_NODEMANAGER_STATUS_MESSAGE = 'NodeManager returned an unexpected status of "{0}"' +CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager health from unexpected JSON response' + +KERBEROS_KEYTAB = '{{yarn-site/yarn.nodemanager.webapp.spnego-keytab-file}}' +KERBEROS_PRINCIPAL = '{{yarn-site/yarn.nodemanager.webapp.spnego-principal}}' +SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}' +SMOKEUSER_KEY = '{{cluster-env/smokeuser}}' +EXECUTABLE_SEARCH_PATHS = '{{kerberos-env/executable_search_paths}}' + +NODEMANAGER_DEFAULT_PORT = 8042 + +CONNECTION_TIMEOUT_KEY = 'connection.timeout' +CONNECTION_TIMEOUT_DEFAULT = 5.0 + +LOGGER_EXCEPTION_MESSAGE = "[Alert] NodeManager Health on {0} fails:" +logger = logging.getLogger('ambari_alerts') + +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (NODEMANAGER_HTTP_ADDRESS_KEY,NODEMANAGER_HTTPS_ADDRESS_KEY, EXECUTABLE_SEARCH_PATHS, + YARN_HTTP_POLICY_KEY, SMOKEUSER_KEY, KERBEROS_KEYTAB, KERBEROS_PRINCIPAL, SECURITY_ENABLED_KEY) + + +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + result_code = RESULT_CODE_UNKNOWN + + if configurations is None: + return (result_code, ['There were no configurations supplied to the script.']) + + if host_name is None: + host_name = socket.getfqdn() + + scheme = 'http' + http_uri = None + https_uri = None + http_policy = 'HTTP_ONLY' + + if SMOKEUSER_KEY in configurations: + smokeuser = configurations[SMOKEUSER_KEY] + + executable_paths = None + if EXECUTABLE_SEARCH_PATHS in configurations: + executable_paths = configurations[EXECUTABLE_SEARCH_PATHS] + + security_enabled = False + if SECURITY_ENABLED_KEY in configurations: + security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' + + kerberos_keytab = None + if KERBEROS_KEYTAB in configurations: + kerberos_keytab = configurations[KERBEROS_KEYTAB] + + kerberos_principal = None + if KERBEROS_PRINCIPAL in configurations: + kerberos_principal = configurations[KERBEROS_PRINCIPAL] + kerberos_principal = kerberos_principal.replace('_HOST', host_name) + + if NODEMANAGER_HTTP_ADDRESS_KEY in configurations: + http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY] + + if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations: + https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY] + + if YARN_HTTP_POLICY_KEY in configurations: + http_policy = configurations[YARN_HTTP_POLICY_KEY] + + + # parse script arguments + connection_timeout = CONNECTION_TIMEOUT_DEFAULT + if CONNECTION_TIMEOUT_KEY in parameters: + connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) + + + # determine the right URI and whether to use SSL + host_port = http_uri + if http_policy == 'HTTPS_ONLY': + scheme = 'https' + + if https_uri is not None: + host_port = https_uri + + label = '' + url_response = None + node_healthy = 'false' + total_time = 0 + + # replace hostname on host fqdn to make it work on all environments + if host_port is not None: + if ":" in host_port: + uri_host, uri_port = host_port.split(':') + host_port = '{0}:{1}'.format(host_name, uri_port) + else: + host_port = host_name + + # some yarn-site structures don't have the web ui address + if host_port is None: + host_port = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT) + + query = "{0}://{1}/ws/v1/node/info".format(scheme, host_port) + + try: + if kerberos_principal is not None and kerberos_keytab is not None and security_enabled: + env = Environment.get_instance() + + # curl requires an integer timeout + curl_connection_timeout = int(connection_timeout) + + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + + url_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, + query, "nm_health_alert", executable_paths, False, "NodeManager Health", smokeuser, + connection_timeout=curl_connection_timeout, kinit_timer_ms = kinit_timer_ms) + + json_response = json.loads(url_response) + else: + # execute the query for the JSON that includes templeton status + url_response = urllib2.urlopen(query, timeout=connection_timeout) + json_response = json.loads(url_response.read()) + except urllib2.HTTPError, httpError: + label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query, + str(httpError), traceback.format_exc()) + + return (RESULT_CODE_CRITICAL, [label]) + except: + label = CRITICAL_CONNECTION_MESSAGE.format(query, traceback.format_exc()) + return (RESULT_CODE_CRITICAL, [label]) + + # URL response received, parse it + try: + node_healthy = json_response['nodeInfo']['nodeHealthy'] + node_healthy_report = json_response['nodeInfo']['healthReport'] + + # convert boolean to string + node_healthy = str(node_healthy) + except: + return (RESULT_CODE_CRITICAL, [query + "\n" + traceback.format_exc()]) + finally: + if url_response is not None: + try: + url_response.close() + except: + pass + + # proper JSON received, compare against known value + if node_healthy.lower() == 'true': + result_code = RESULT_CODE_OK + label = OK_MESSAGE + elif node_healthy.lower() == 'false': + result_code = RESULT_CODE_CRITICAL + label = node_healthy_report + else: + result_code = RESULT_CODE_CRITICAL + label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy) + + return (result_code, [label]) http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/alerts/alert_nodemanagers_summary.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/alerts/alert_nodemanagers_summary.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/alerts/alert_nodemanagers_summary.py new file mode 100644 index 0000000..adf27ec --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/alerts/alert_nodemanagers_summary.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import urllib2 +import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. +import logging +import traceback + +from ambari_commons.urllib_handlers import RefreshHeaderProcessor +from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER +from resource_management.core.environment import Environment + +ERROR_LABEL = '{0} NodeManager{1} {2} unhealthy.' +OK_LABEL = 'All NodeManagers are healthy' + +NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.address}}' +NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.https.address}}' +YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}' + +KERBEROS_KEYTAB = '{{yarn-site/yarn.nodemanager.webapp.spnego-keytab-file}}' +KERBEROS_PRINCIPAL = '{{yarn-site/yarn.nodemanager.webapp.spnego-principal}}' +SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}' +SMOKEUSER_KEY = '{{cluster-env/smokeuser}}' +EXECUTABLE_SEARCH_PATHS = '{{kerberos-env/executable_search_paths}}' + +CONNECTION_TIMEOUT_KEY = 'connection.timeout' +CONNECTION_TIMEOUT_DEFAULT = 5.0 + +LOGGER_EXCEPTION_MESSAGE = "[Alert] NodeManager Health Summary on {0} fails:" +logger = logging.getLogger('ambari_alerts') + +QRY = "Hadoop:service=ResourceManager,name=RMNMInfo" + +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return NODEMANAGER_HTTP_ADDRESS_KEY, NODEMANAGER_HTTPS_ADDRESS_KEY, EXECUTABLE_SEARCH_PATHS, \ + YARN_HTTP_POLICY_KEY, SMOKEUSER_KEY, KERBEROS_KEYTAB, KERBEROS_PRINCIPAL, SECURITY_ENABLED_KEY + + +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + + if configurations is None: + return (('UNKNOWN', ['There were no configurations supplied to the script.'])) + + scheme = 'http' + http_uri = None + https_uri = None + http_policy = 'HTTP_ONLY' + + security_enabled = False + if SECURITY_ENABLED_KEY in configurations: + security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' + + executable_paths = None + if EXECUTABLE_SEARCH_PATHS in configurations: + executable_paths = configurations[EXECUTABLE_SEARCH_PATHS] + + kerberos_keytab = None + if KERBEROS_KEYTAB in configurations: + kerberos_keytab = configurations[KERBEROS_KEYTAB] + + kerberos_principal = None + if KERBEROS_PRINCIPAL in configurations: + kerberos_principal = configurations[KERBEROS_PRINCIPAL] + kerberos_principal = kerberos_principal.replace('_HOST', host_name) + + if NODEMANAGER_HTTP_ADDRESS_KEY in configurations: + http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY] + + if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations: + https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY] + + if YARN_HTTP_POLICY_KEY in configurations: + http_policy = configurations[YARN_HTTP_POLICY_KEY] + + if SMOKEUSER_KEY in configurations: + smokeuser = configurations[SMOKEUSER_KEY] + + # parse script arguments + connection_timeout = CONNECTION_TIMEOUT_DEFAULT + if CONNECTION_TIMEOUT_KEY in parameters: + connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) + + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + + # determine the right URI and whether to use SSL + uri = http_uri + if http_policy == 'HTTPS_ONLY': + scheme = 'https' + + if https_uri is not None: + uri = https_uri + + uri = str(host_name) + ":" + uri.split(":")[1] + live_nodemanagers_qry = "{0}://{1}/jmx?qry={2}".format(scheme, uri, QRY) + convert_to_json_failed = False + response_code = None + try: + if kerberos_principal is not None and kerberos_keytab is not None and security_enabled: + env = Environment.get_instance() + + # curl requires an integer timeout + curl_connection_timeout = int(connection_timeout) + + url_response, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, + live_nodemanagers_qry, "nm_health_summary_alert", executable_paths, False, + "NodeManager Health Summary", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) + + try: + url_response_json = json.loads(url_response) + live_nodemanagers = json.loads(find_value_in_jmx(url_response_json, "LiveNodeManagers", live_nodemanagers_qry)) + except ValueError, error: + convert_to_json_failed = True + logger.exception("[Alert][{0}] Convert response to json failed or json doesn't contain needed data: {1}". + format("NodeManager Health Summary", str(error))) + + if convert_to_json_failed: + response_code, error_msg, time_millis = curl_krb_request(env.tmp_dir, kerberos_keytab, kerberos_principal, + live_nodemanagers_qry, "nm_health_summary_alert", executable_paths, True, + "NodeManager Health Summary", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) + else: + live_nodemanagers = json.loads(get_value_from_jmx(live_nodemanagers_qry, + "LiveNodeManagers", connection_timeout)) + + if kerberos_principal is not None and kerberos_keytab is not None and security_enabled: + if response_code in [200, 307] and convert_to_json_failed: + return ('UNKNOWN', ['HTTP {0} response (metrics unavailable)'.format(str(response_code))]) + elif convert_to_json_failed and response_code not in [200, 307]: + raise Exception("[Alert][NodeManager Health Summary] Getting data from {0} failed with http code {1}".format( + str(live_nodemanagers_qry), str(response_code))) + + unhealthy_count = 0 + + for nodemanager in live_nodemanagers: + health_report = nodemanager['State'] + if health_report == 'UNHEALTHY': + unhealthy_count += 1 + + if unhealthy_count == 0: + result_code = 'OK' + label = OK_LABEL + else: + result_code = 'CRITICAL' + if unhealthy_count == 1: + label = ERROR_LABEL.format(unhealthy_count, '', 'is') + else: + label = ERROR_LABEL.format(unhealthy_count, 's', 'are') + + except: + label = traceback.format_exc() + result_code = 'UNKNOWN' + + return (result_code, [label]) + + +def get_value_from_jmx(query, jmx_property, connection_timeout): + response = None + + try: + # use a customer header process that will look for the non-standard + # "Refresh" header and attempt to follow the redirect + url_opener = urllib2.build_opener(RefreshHeaderProcessor()) + response = url_opener.open(query, timeout=connection_timeout) + + data = response.read() + data_dict = json.loads(data) + return find_value_in_jmx(data_dict, jmx_property, query) + finally: + if response is not None: + try: + response.close() + except: + pass + + +def find_value_in_jmx(data_dict, jmx_property, query): + json_data = data_dict["beans"][0] + + if jmx_property not in json_data: + beans = data_dict['beans'] + for jmx_prop_list_item in beans: + if "name" in jmx_prop_list_item and jmx_prop_list_item["name"] == QRY: + if jmx_property not in jmx_prop_list_item: + raise Exception("Unable to find {0} in JSON from {1} ".format(jmx_property, query)) + json_data = jmx_prop_list_item + + return json_data[jmx_property] \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/files/validateYarnComponentStatusWindows.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/files/validateYarnComponentStatusWindows.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/files/validateYarnComponentStatusWindows.py new file mode 100644 index 0000000..5e2b4d9 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/files/validateYarnComponentStatusWindows.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python + +''' +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import optparse +import subprocess +import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. +import urllib2 + +RESOURCEMANAGER = 'rm' +NODEMANAGER = 'nm' +HISTORYSERVER = 'hs' + +STARTED_STATE = 'STARTED' +RUNNING_STATE = 'RUNNING' + +#Return reponse for given path and address +def getResponse(path, address, ssl_enabled): + if ssl_enabled: + url = 'https://' + address + path + else: + url = 'http://' + address + path + + try: + handle = urllib2.urlopen(url) + output = handle.read() + handle.close() + response = json.loads(output) + if response == None: + print 'There is no response for url: ' + str(url) + exit(1) + return response + except Exception as e: + print 'Error getting response for url:' + str(url), e + exit(1) + +#Verify that REST api is available for given component +def validateAvailability(component, path, address, ssl_enabled): + + try: + response = getResponse(path, address, ssl_enabled) + is_valid = validateAvailabilityResponse(component, response) + if not is_valid: + exit(1) + except Exception as e: + print 'Error checking availability status of component', e + exit(1) + +#Validate component-specific response +def validateAvailabilityResponse(component, response): + try: + if component == RESOURCEMANAGER: + rm_state = response['clusterInfo']['state'] + if rm_state == STARTED_STATE: + return True + else: + print 'Resourcemanager is not started' + return False + + elif component == NODEMANAGER: + node_healthy = bool(response['nodeInfo']['nodeHealthy']) + if node_healthy: + return True + else: + return False + elif component == HISTORYSERVER: + hs_start_time = response['historyInfo']['startedOn'] + if hs_start_time > 0: + return True + else: + return False + else: + return False + except Exception as e: + print 'Error validation of availability response for ' + str(component), e + return False + +#Verify that component has required resources to work +def validateAbility(component, path, address, ssl_enabled): + + try: + response = getResponse(path, address, ssl_enabled) + is_valid = validateAbilityResponse(component, response) + if not is_valid: + exit(1) + except Exception as e: + print 'Error checking ability of component', e + exit(1) + +#Validate component-specific response that it has required resources to work +def validateAbilityResponse(component, response): + try: + if component == RESOURCEMANAGER: + nodes = [] + if response.has_key('nodes') and not response['nodes'] == None and response['nodes'].has_key('node'): + nodes = response['nodes']['node'] + connected_nodes_count = len(nodes) + if connected_nodes_count == 0: + print 'There is no connected nodemanagers to resourcemanager' + return False + active_nodes = filter(lambda x: x['state'] == RUNNING_STATE, nodes) + active_nodes_count = len(active_nodes) + + if connected_nodes_count == 0: + print 'There is no connected active nodemanagers to resourcemanager' + return False + else: + return True + else: + return False + except Exception as e: + print 'Error validation of ability response', e + return False + +# +# Main. +# +def main(): + parser = optparse.OptionParser(usage="usage: %prog [options] component ") + parser.add_option("-p", "--port", dest="address", help="Host:Port for REST API of a desired component") + parser.add_option("-s", "--ssl", dest="ssl_enabled", help="Is SSL enabled for UI of component") + + (options, args) = parser.parse_args() + + component = args[0] + + address = options.address + ssl_enabled = (options.ssl_enabled) in 'true' + if component == RESOURCEMANAGER: + path = '/ws/v1/cluster/info' + elif component == NODEMANAGER: + path = '/ws/v1/node/info' + elif component == HISTORYSERVER: + path = '/ws/v1/history/info' + else: + parser.error("Invalid component") + + validateAvailability(component, path, address, ssl_enabled) + + if component == RESOURCEMANAGER: + path = '/ws/v1/cluster/nodes' + validateAbility(component, path, address, ssl_enabled) + +if __name__ == "__main__": + main() http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/__init__.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/__init__.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/__init__.py new file mode 100644 index 0000000..35de4bb --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/__init__.py @@ -0,0 +1,20 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/application_timeline_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/application_timeline_server.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/application_timeline_server.py new file mode 100644 index 0000000..03fff21 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/application_timeline_server.py @@ -0,0 +1,162 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management.libraries.script.script import Script +from resource_management.libraries.functions import conf_select, stack_select +from resource_management.libraries.functions.constants import StackFeature +from resource_management.libraries.functions.stack_features import check_stack_feature +from resource_management.libraries.functions import check_process_status +from resource_management.libraries.functions.security_commons import build_expectations, \ + cached_kinit_executor, get_params_from_filesystem, validate_security_config_properties,\ + FILE_TYPE_XML +from resource_management.libraries.functions.format import format +from resource_management.core.logger import Logger +from resource_management.core.resources.system import Execute + +from yarn import yarn +from service import service +from ambari_commons import OSConst +from ambari_commons.os_family_impl import OsFamilyImpl + + +class ApplicationTimelineServer(Script): + def install(self, env): + self.install_packages(env) + + def start(self, env, upgrade_type=None): + import params + env.set_params(params) + self.configure(env) # FOR SECURITY + service('timelineserver', action='start') + + def stop(self, env, upgrade_type=None): + import params + env.set_params(params) + service('timelineserver', action='stop') + + def configure(self, env): + import params + env.set_params(params) + yarn(name='apptimelineserver') + + +@OsFamilyImpl(os_family=OSConst.WINSRV_FAMILY) +class ApplicationTimelineServerWindows(ApplicationTimelineServer): + def status(self, env): + service('timelineserver', action='status') + + +@OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT) +class ApplicationTimelineServerDefault(ApplicationTimelineServer): + def get_component_name(self): + return "hadoop-yarn-timelineserver" + + def pre_upgrade_restart(self, env, upgrade_type=None): + Logger.info("Executing Stack Upgrade pre-restart") + import params + env.set_params(params) + + if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): + conf_select.select(params.stack_name, "hadoop", params.version) + stack_select.select("hadoop-yarn-timelineserver", params.version) + + def status(self, env): + import status_params + env.set_params(status_params) + check_process_status(status_params.yarn_historyserver_pid_file) + + def security_status(self, env): + import status_params + env.set_params(status_params) + if status_params.security_enabled: + props_value_check = {"yarn.timeline-service.enabled": "true", + "yarn.timeline-service.http-authentication.type": "kerberos", + "yarn.acl.enable": "true"} + props_empty_check = ["yarn.timeline-service.principal", + "yarn.timeline-service.keytab", + "yarn.timeline-service.http-authentication.kerberos.principal", + "yarn.timeline-service.http-authentication.kerberos.keytab"] + + props_read_check = ["yarn.timeline-service.keytab", + "yarn.timeline-service.http-authentication.kerberos.keytab"] + yarn_site_props = build_expectations('yarn-site', props_value_check, props_empty_check, + props_read_check) + + yarn_expectations ={} + yarn_expectations.update(yarn_site_props) + + security_params = get_params_from_filesystem(status_params.hadoop_conf_dir, + {'yarn-site.xml': FILE_TYPE_XML}) + result_issues = validate_security_config_properties(security_params, yarn_expectations) + if not result_issues: # If all validations passed successfully + try: + # Double check the dict before calling execute + if ( 'yarn-site' not in security_params + or 'yarn.timeline-service.keytab' not in security_params['yarn-site'] + or 'yarn.timeline-service.principal' not in security_params['yarn-site']) \ + or 'yarn.timeline-service.http-authentication.kerberos.keytab' not in security_params['yarn-site'] \ + or 'yarn.timeline-service.http-authentication.kerberos.principal' not in security_params['yarn-site']: + self.put_structured_out({"securityState": "UNSECURED"}) + self.put_structured_out( + {"securityIssuesFound": "Keytab file or principal are not set property."}) + return + + cached_kinit_executor(status_params.kinit_path_local, + status_params.yarn_user, + security_params['yarn-site']['yarn.timeline-service.keytab'], + security_params['yarn-site']['yarn.timeline-service.principal'], + status_params.hostname, + status_params.tmp_dir) + cached_kinit_executor(status_params.kinit_path_local, + status_params.yarn_user, + security_params['yarn-site']['yarn.timeline-service.http-authentication.kerberos.keytab'], + security_params['yarn-site']['yarn.timeline-service.http-authentication.kerberos.principal'], + status_params.hostname, + status_params.tmp_dir) + self.put_structured_out({"securityState": "SECURED_KERBEROS"}) + except Exception as e: + self.put_structured_out({"securityState": "ERROR"}) + self.put_structured_out({"securityStateErrorInfo": str(e)}) + else: + issues = [] + for cf in result_issues: + issues.append("Configuration file %s did not pass the validation. Reason: %s" % (cf, result_issues[cf])) + self.put_structured_out({"securityIssuesFound": ". ".join(issues)}) + self.put_structured_out({"securityState": "UNSECURED"}) + else: + self.put_structured_out({"securityState": "UNSECURED"}) + + def get_log_folder(self): + import params + return params.yarn_log_dir + + def get_user(self): + import params + return params.yarn_user + + def get_pid_files(self): + import status_params + Execute(format("mv {status_params.yarn_historyserver_pid_file_old} {status_params.yarn_historyserver_pid_file}"), + only_if = format("test -e {status_params.yarn_historyserver_pid_file_old}", user=status_params.yarn_user)) + return [status_params.yarn_historyserver_pid_file] + +if __name__ == "__main__": + ApplicationTimelineServer().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/historyserver.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/historyserver.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/historyserver.py new file mode 100644 index 0000000..8f5d380 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/historyserver.py @@ -0,0 +1,192 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management.libraries.script.script import Script +from resource_management.libraries.resources.hdfs_resource import HdfsResource +from resource_management.libraries.functions import conf_select, stack_select +from resource_management.libraries.functions.constants import StackFeature +from resource_management.libraries.functions.stack_features import check_stack_feature +from resource_management.libraries.functions.check_process_status import check_process_status +from resource_management.libraries.functions.copy_tarball import copy_to_hdfs +from resource_management.libraries.functions.security_commons import build_expectations, \ + cached_kinit_executor, get_params_from_filesystem, validate_security_config_properties, \ + FILE_TYPE_XML +from resource_management.core.source import Template +from resource_management.core.logger import Logger + +from install_jars import install_tez_jars +from yarn import yarn +from service import service +from ambari_commons import OSConst +from ambari_commons.os_family_impl import OsFamilyImpl + + +class HistoryServer(Script): + def install(self, env): + self.install_packages(env) + + def stop(self, env, upgrade_type=None): + import params + env.set_params(params) + service('historyserver', action='stop', serviceName='mapreduce') + + def configure(self, env): + import params + env.set_params(params) + yarn(name="historyserver") + + +@OsFamilyImpl(os_family=OSConst.WINSRV_FAMILY) +class HistoryserverWindows(HistoryServer): + def start(self, env): + import params + env.set_params(params) + self.configure(env) + service('historyserver', action='start', serviceName='mapreduce') + + def status(self, env): + service('historyserver', action='status') + + +@OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT) +class HistoryServerDefault(HistoryServer): + def get_component_name(self): + return "hadoop-mapreduce-historyserver" + + def pre_upgrade_restart(self, env, upgrade_type=None): + Logger.info("Executing Stack Upgrade pre-restart") + import params + env.set_params(params) + + if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): + conf_select.select(params.stack_name, "hadoop", params.version) + stack_select.select("hadoop-mapreduce-historyserver", params.version) + # MC Hammer said, "Can't touch this" + copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) + copy_to_hdfs("tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) + copy_to_hdfs("slider", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) + params.HdfsResource(None, action="execute") + + def start(self, env, upgrade_type=None): + import params + env.set_params(params) + self.configure(env) # FOR SECURITY + + if params.stack_version_formatted_major and check_stack_feature(StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major): + # MC Hammer said, "Can't touch this" + resource_created = copy_to_hdfs( + "mapreduce", + params.user_group, + params.hdfs_user, + skip=params.sysprep_skip_copy_tarballs_hdfs) + resource_created = copy_to_hdfs( + "tez", + params.user_group, + params.hdfs_user, + skip=params.sysprep_skip_copy_tarballs_hdfs) or resource_created + resource_created = copy_to_hdfs( + "slider", + params.user_group, + params.hdfs_user, + skip=params.sysprep_skip_copy_tarballs_hdfs) or resource_created + if resource_created: + params.HdfsResource(None, action="execute") + else: + # In stack versions before copy_tarball_to_hdfs support tez.tar.gz was copied to a different folder in HDFS. + install_tez_jars() + + service('historyserver', action='start', serviceName='mapreduce') + + def status(self, env): + import status_params + env.set_params(status_params) + check_process_status(status_params.mapred_historyserver_pid_file) + + def security_status(self, env): + import status_params + env.set_params(status_params) + if status_params.security_enabled: + expectations = {} + expectations.update(build_expectations('mapred-site', + None, + [ + 'mapreduce.jobhistory.keytab', + 'mapreduce.jobhistory.principal', + 'mapreduce.jobhistory.webapp.spnego-keytab-file', + 'mapreduce.jobhistory.webapp.spnego-principal' + ], + None)) + + security_params = get_params_from_filesystem(status_params.hadoop_conf_dir, + {'mapred-site.xml': FILE_TYPE_XML}) + result_issues = validate_security_config_properties(security_params, expectations) + if not result_issues: # If all validations passed successfully + try: + # Double check the dict before calling execute + if ( 'mapred-site' not in security_params or + 'mapreduce.jobhistory.keytab' not in security_params['mapred-site'] or + 'mapreduce.jobhistory.principal' not in security_params['mapred-site'] or + 'mapreduce.jobhistory.webapp.spnego-keytab-file' not in security_params['mapred-site'] or + 'mapreduce.jobhistory.webapp.spnego-principal' not in security_params['mapred-site']): + self.put_structured_out({"securityState": "UNSECURED"}) + self.put_structured_out( + {"securityIssuesFound": "Keytab file or principal not set."}) + return + + cached_kinit_executor(status_params.kinit_path_local, + status_params.mapred_user, + security_params['mapred-site']['mapreduce.jobhistory.keytab'], + security_params['mapred-site']['mapreduce.jobhistory.principal'], + status_params.hostname, + status_params.tmp_dir) + cached_kinit_executor(status_params.kinit_path_local, + status_params.mapred_user, + security_params['mapred-site']['mapreduce.jobhistory.webapp.spnego-keytab-file'], + security_params['mapred-site']['mapreduce.jobhistory.webapp.spnego-principal'], + status_params.hostname, + status_params.tmp_dir) + self.put_structured_out({"securityState": "SECURED_KERBEROS"}) + except Exception as e: + self.put_structured_out({"securityState": "ERROR"}) + self.put_structured_out({"securityStateErrorInfo": str(e)}) + else: + issues = [] + for cf in result_issues: + issues.append("Configuration file %s did not pass the validation. Reason: %s" % (cf, result_issues[cf])) + self.put_structured_out({"securityIssuesFound": ". ".join(issues)}) + self.put_structured_out({"securityState": "UNSECURED"}) + else: + self.put_structured_out({"securityState": "UNSECURED"}) + + def get_log_folder(self): + import params + return params.mapred_log_dir + + def get_user(self): + import params + return params.mapred_user + + def get_pid_files(self): + import status_params + return [status_params.mapred_historyserver_pid_file] + +if __name__ == "__main__": + HistoryServer().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/install_jars.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/install_jars.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/install_jars.py new file mode 100644 index 0000000..728a014 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/install_jars.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +from resource_management.libraries.functions.format import format +import os +import glob + +def install_tez_jars(): + import params + + destination_hdfs_dirs = get_tez_hdfs_dir_paths(params.tez_lib_uris) + + # If tez libraries are to be stored in hdfs + if destination_hdfs_dirs: + for hdfs_dir in destination_hdfs_dirs: + params.HdfsResource(hdfs_dir, + type="directory", + action="create_on_execute", + owner=params.tez_user, + mode=0755 + ) + + app_dir_path = None + lib_dir_path = None + + if len(destination_hdfs_dirs) > 0: + for path in destination_hdfs_dirs: + if 'lib' in path: + lib_dir_path = path + else: + app_dir_path = path + pass + pass + pass + + tez_jars = {} + if app_dir_path: + tez_jars[params.tez_local_api_jars] = app_dir_path + if lib_dir_path: + tez_jars[params.tez_local_lib_jars] = lib_dir_path + + for src_file_regex, dest_dir in tez_jars.iteritems(): + for src_filepath in glob.glob(src_file_regex): + src_filename = os.path.basename(src_filepath) + params.HdfsResource(format("{dest_dir}/{src_filename}"), + type="file", + action="create_on_execute", + source=src_filepath, + mode=0755, + owner=params.tez_user + ) + + for src_file_regex, dest_dir in tez_jars.iteritems(): + for src_filepath in glob.glob(src_file_regex): + src_filename = os.path.basename(src_filepath) + params.HdfsResource(format("{dest_dir}/{src_filename}"), + type="file", + action="create_on_execute", + source=src_filepath, + mode=0755, + owner=params.tez_user + ) + params.HdfsResource(None, action="execute") + + +def get_tez_hdfs_dir_paths(tez_lib_uris = None): + hdfs_path_prefix = 'hdfs://' + lib_dir_paths = [] + if tez_lib_uris and tez_lib_uris.strip().find(hdfs_path_prefix, 0) != -1: + dir_paths = tez_lib_uris.split(',') + for path in dir_paths: + if not "tez.tar.gz" in path: + lib_dir_path = path.replace(hdfs_path_prefix, '') + lib_dir_path = lib_dir_path if lib_dir_path.endswith(os.sep) else lib_dir_path + os.sep + lib_dir_paths.append(lib_dir_path) + else: + lib_dir_path = path.replace(hdfs_path_prefix, '') + lib_dir_paths.append(os.path.dirname(lib_dir_path)) + pass + pass + + return lib_dir_paths http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/mapred_service_check.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/mapred_service_check.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/mapred_service_check.py new file mode 100644 index 0000000..6288ac0 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/mapred_service_check.py @@ -0,0 +1,172 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +import sys +from resource_management.libraries.script.script import Script +from resource_management.libraries.resources.execute_hadoop import ExecuteHadoop +from resource_management.libraries.functions.format import format +from resource_management.core.resources.system import Execute, File +from resource_management.core.source import StaticFile +from ambari_commons import OSConst +from ambari_commons.os_family_impl import OsFamilyImpl +from resource_management.core.logger import Logger + + +class MapReduce2ServiceCheck(Script): + def service_check(self, env): + pass + + +@OsFamilyImpl(os_family=OSConst.WINSRV_FAMILY) +class MapReduce2ServiceCheckWindows(MapReduce2ServiceCheck): + def service_check(self, env): + import params + + env.set_params(params) + + component_type = 'hs' + if params.hadoop_ssl_enabled: + component_address = params.hs_webui_address + else: + component_address = params.hs_webui_address + + validateStatusFileName = "validateYarnComponentStatusWindows.py" + validateStatusFilePath = os.path.join(os.path.dirname(params.hadoop_home), "temp", validateStatusFileName) + python_executable = sys.executable + validateStatusCmd = "{0} {1} {2} -p {3} -s {4}".format( + python_executable, validateStatusFilePath, component_type, component_address, params.hadoop_ssl_enabled) + + if params.security_enabled: + kinit_cmd = "{0} -kt {1} {2};".format(params.kinit_path_local, params.smoke_user_keytab, params.smokeuser) + smoke_cmd = kinit_cmd + validateStatusCmd + else: + smoke_cmd = validateStatusCmd + + File(validateStatusFilePath, + content=StaticFile(validateStatusFileName) + ) + + Execute(smoke_cmd, + tries=3, + try_sleep=5, + logoutput=True + ) + + # hadoop_exe = os.path.join(params.hadoop_home, "bin", "hadoop") + # + # tested_file = os.path.join(params.hadoop_home, "bin", "hadoop.cmd") + # jar_path = os.path.join(params.hadoop_mapred2_jar_location, params.hadoopMapredExamplesJarName) + # input_file = format("/user/hadoop/mapredsmokeinput") + # output_file = format("/user/hadoop/mapredsmokeoutput") + # cleanup_cmd = format("cmd /C {hadoop_exe} fs -rm -r -f {output_file} {input_file}") + # create_file_cmd = format("cmd /C {hadoop_exe} fs -put {tested_file} {input_file}") + # run_wordcount_job = format("cmd /C {hadoop_exe} jar {jar_path} wordcount {input_file} {output_file}") + # test_cmd = format("cmd /C {hadoop_exe} fs -test -e {output_file}") + # + # if params.security_enabled: + # kinit_cmd = "{0} -kt {1} {2};".format(kinit_path_local, smoke_user_keytab, smokeuser) + # Execute(kinit_cmd) + # + # Execute(cleanup_cmd, + # tries=1, + # try_sleep=5, + # logoutput=True, + # user=params.hdfs_user + # ) + # + # Execute(create_file_cmd, + # tries=1, + # try_sleep=5, + # logoutput=True, + # user=params.hdfs_user + # ) + # + # Execute(run_wordcount_job, + # tries=1, + # try_sleep=5, + # logoutput=True, + # user=params.hdfs_user + # ) + # + # Execute(test_cmd, + # logoutput=True, + # user=params.hdfs_user + # ) + + +@OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT) +class MapReduce2ServiceCheckDefault(MapReduce2ServiceCheck): + def service_check(self, env): + import params + env.set_params(params) + + jar_path = format("{hadoop_mapred2_jar_location}/{hadoopMapredExamplesJarName}") + input_file = format("/user/{smokeuser}/mapredsmokeinput") + output_file = format("/user/{smokeuser}/mapredsmokeoutput") + + test_cmd = format("fs -test -e {output_file}") + run_wordcount_job = format("jar {jar_path} wordcount {input_file} {output_file}") + + params.HdfsResource(format("/user/{smokeuser}"), + type="directory", + action="create_on_execute", + owner=params.smokeuser, + mode=params.smoke_hdfs_user_mode, + ) + params.HdfsResource(output_file, + action = "delete_on_execute", + type = "directory", + dfs_type = params.dfs_type, + ) + params.HdfsResource(input_file, + action = "create_on_execute", + type = "file", + source = "/etc/passwd", + dfs_type = params.dfs_type, + ) + params.HdfsResource(None, action="execute") + + # initialize the ticket + if params.security_enabled: + kinit_cmd = format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};") + Execute(kinit_cmd, user=params.smokeuser) + + ExecuteHadoop(run_wordcount_job, + tries=1, + try_sleep=5, + user=params.smokeuser, + bin_dir=params.execute_path, + conf_dir=params.hadoop_conf_dir, + logoutput=True) + + # the ticket may have expired, so re-initialize + if params.security_enabled: + kinit_cmd = format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};") + Execute(kinit_cmd, user=params.smokeuser) + + ExecuteHadoop(test_cmd, + user=params.smokeuser, + bin_dir=params.execute_path, + conf_dir=params.hadoop_conf_dir) + + +if __name__ == "__main__": + MapReduce2ServiceCheck().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/mapreduce2_client.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/mapreduce2_client.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/mapreduce2_client.py new file mode 100644 index 0000000..424157b --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/mapreduce2_client.py @@ -0,0 +1,98 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" +# Python imports +import os +import sys + +# Local imports +from resource_management.libraries.script.script import Script +from resource_management.libraries.functions import conf_select, stack_select +from resource_management.libraries.functions.constants import StackFeature +from resource_management.libraries.functions.stack_features import check_stack_feature +from resource_management.core.exceptions import ClientComponentHasNoStatus +from yarn import yarn +from ambari_commons import OSConst +from ambari_commons.os_family_impl import OsFamilyImpl +from resource_management.core.logger import Logger + + +class MapReduce2Client(Script): + def install(self, env): + import params + self.install_packages(env) + self.configure(env) + + def configure(self, env, config_dir=None, upgrade_type=None): + """ + :param env: Python environment + :param config_dir: During rolling upgrade, which config directory to save configs to. + """ + import params + env.set_params(params) + yarn(config_dir=config_dir) + + def status(self, env): + raise ClientComponentHasNoStatus() + + def stack_upgrade_save_new_config(self, env): + """ + Because this gets called during a Rolling Upgrade, the new mapreduce configs have already been saved, so we must be + careful to only call configure() on the directory of the new version. + :param env: + """ + import params + env.set_params(params) + + conf_select_name = "hadoop" + base_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + config_dir = self.get_config_dir_during_stack_upgrade(env, base_dir, conf_select_name) + + if config_dir: + Logger.info("stack_upgrade_save_new_config(): Calling conf-select on %s using version %s" % (conf_select_name, str(params.version))) + + # Because this script was called from ru_execute_tasks.py which already enters an Environment with its own basedir, + # must change it now so this function can find the Jinja Templates for the service. + env.config.basedir = base_dir + conf_select.select(params.stack_name, conf_select_name, params.version) + self.configure(env, config_dir=config_dir) + + +@OsFamilyImpl(os_family=OSConst.WINSRV_FAMILY) +class MapReduce2ClientWindows(MapReduce2Client): + pass + + +@OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT) +class MapReduce2ClientDefault(MapReduce2Client): + def get_component_name(self): + return "hadoop-client" + + def pre_upgrade_restart(self, env, upgrade_type=None): + import params + env.set_params(params) + + if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): + conf_select.select(params.stack_name, "hadoop", params.version) + stack_select.select("hadoop-client", params.version) + + +if __name__ == "__main__": + MapReduce2Client().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/nodemanager.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/nodemanager.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/nodemanager.py new file mode 100644 index 0000000..133d2e1 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/nodemanager.py @@ -0,0 +1,166 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +import nodemanager_upgrade + +from resource_management.libraries.script.script import Script +from resource_management.libraries.functions import conf_select, stack_select +from resource_management.libraries.functions.constants import StackFeature +from resource_management.libraries.functions.stack_features import check_stack_feature +from resource_management.libraries.functions.check_process_status import check_process_status +from resource_management.libraries.functions.format import format +from resource_management.libraries.functions.security_commons import build_expectations, \ + cached_kinit_executor, get_params_from_filesystem, validate_security_config_properties, \ + FILE_TYPE_XML +from resource_management.core.logger import Logger +from yarn import yarn +from service import service +from ambari_commons import OSConst +from ambari_commons.os_family_impl import OsFamilyImpl + + +class Nodemanager(Script): + def install(self, env): + self.install_packages(env) + + def stop(self, env, upgrade_type=None): + import params + env.set_params(params) + service('nodemanager',action='stop') + + def start(self, env, upgrade_type=None): + import params + env.set_params(params) + self.configure(env) # FOR SECURITY + service('nodemanager',action='start') + + def configure(self, env): + import params + env.set_params(params) + yarn(name="nodemanager") + + +@OsFamilyImpl(os_family=OSConst.WINSRV_FAMILY) +class NodemanagerWindows(Nodemanager): + def status(self, env): + service('nodemanager', action='status') + + +@OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT) +class NodemanagerDefault(Nodemanager): + def get_component_name(self): + return "hadoop-yarn-nodemanager" + + def pre_upgrade_restart(self, env, upgrade_type=None): + Logger.info("Executing NodeManager Stack Upgrade pre-restart") + import params + env.set_params(params) + + if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): + conf_select.select(params.stack_name, "hadoop", params.version) + stack_select.select("hadoop-yarn-nodemanager", params.version) + + def post_upgrade_restart(self, env, upgrade_type=None): + Logger.info("Executing NodeManager Stack Upgrade post-restart") + import params + env.set_params(params) + + nodemanager_upgrade.post_upgrade_check() + + def status(self, env): + import status_params + env.set_params(status_params) + check_process_status(status_params.nodemanager_pid_file) + + def security_status(self, env): + import status_params + env.set_params(status_params) + if status_params.security_enabled: + props_value_check = {"yarn.timeline-service.http-authentication.type": "kerberos", + "yarn.acl.enable": "true"} + props_empty_check = ["yarn.nodemanager.principal", + "yarn.nodemanager.keytab", + "yarn.nodemanager.webapp.spnego-principal", + "yarn.nodemanager.webapp.spnego-keytab-file"] + + props_read_check = ["yarn.nodemanager.keytab", + "yarn.nodemanager.webapp.spnego-keytab-file"] + yarn_site_props = build_expectations('yarn-site', props_value_check, props_empty_check, + props_read_check) + + yarn_expectations ={} + yarn_expectations.update(yarn_site_props) + + security_params = get_params_from_filesystem(status_params.hadoop_conf_dir, + {'yarn-site.xml': FILE_TYPE_XML}) + result_issues = validate_security_config_properties(security_params, yarn_site_props) + if not result_issues: # If all validations passed successfully + try: + # Double check the dict before calling execute + if ( 'yarn-site' not in security_params + or 'yarn.nodemanager.keytab' not in security_params['yarn-site'] + or 'yarn.nodemanager.principal' not in security_params['yarn-site']) \ + or 'yarn.nodemanager.webapp.spnego-keytab-file' not in security_params['yarn-site'] \ + or 'yarn.nodemanager.webapp.spnego-principal' not in security_params['yarn-site']: + self.put_structured_out({"securityState": "UNSECURED"}) + self.put_structured_out( + {"securityIssuesFound": "Keytab file or principal are not set property."}) + return + + cached_kinit_executor(status_params.kinit_path_local, + status_params.yarn_user, + security_params['yarn-site']['yarn.nodemanager.keytab'], + security_params['yarn-site']['yarn.nodemanager.principal'], + status_params.hostname, + status_params.tmp_dir) + cached_kinit_executor(status_params.kinit_path_local, + status_params.yarn_user, + security_params['yarn-site']['yarn.nodemanager.webapp.spnego-keytab-file'], + security_params['yarn-site']['yarn.nodemanager.webapp.spnego-principal'], + status_params.hostname, + status_params.tmp_dir) + self.put_structured_out({"securityState": "SECURED_KERBEROS"}) + except Exception as e: + self.put_structured_out({"securityState": "ERROR"}) + self.put_structured_out({"securityStateErrorInfo": str(e)}) + else: + issues = [] + for cf in result_issues: + issues.append("Configuration file %s did not pass the validation. Reason: %s" % (cf, result_issues[cf])) + self.put_structured_out({"securityIssuesFound": ". ".join(issues)}) + self.put_structured_out({"securityState": "UNSECURED"}) + else: + self.put_structured_out({"securityState": "UNSECURED"}) + + def get_log_folder(self): + import params + return params.yarn_log_dir + + def get_user(self): + import params + return params.yarn_user + + def get_pid_files(self): + import status_params + return [status_params.nodemanager_pid_file] + +if __name__ == "__main__": + Nodemanager().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/nodemanager_upgrade.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/nodemanager_upgrade.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/nodemanager_upgrade.py new file mode 100644 index 0000000..22cd8cc --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/nodemanager_upgrade.py @@ -0,0 +1,74 @@ +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +import subprocess + +from resource_management.core.logger import Logger +from resource_management.core.exceptions import Fail +from resource_management.core.resources.system import Execute +from resource_management.core import shell +from resource_management.libraries.functions.decorator import retry +from resource_management.libraries.functions.show_logs import show_logs +from resource_management.libraries.functions.format import format + + +def post_upgrade_check(): + ''' + Checks that the NodeManager has rejoined the cluster. + This function will obtain the Kerberos ticket if security is enabled. + :return: + ''' + import params + + Logger.info('NodeManager executing "yarn node -list -states=RUNNING" to verify the node has rejoined the cluster...') + if params.security_enabled and params.nodemanager_kinit_cmd: + Execute(params.nodemanager_kinit_cmd, user=params.yarn_user) + + try: + _check_nodemanager_startup() + except Fail: + show_logs(params.yarn_log_dir, params.yarn_user) + raise + + +@retry(times=30, sleep_time=10, err_class=Fail) +def _check_nodemanager_startup(): + ''' + Checks that a NodeManager is in a RUNNING state in the cluster via + "yarn node -list -states=RUNNING" command. Once the NodeManager is found to be + alive this method will return, otherwise it will raise a Fail(...) and retry + automatically. + :return: + ''' + import params + import socket + + command = 'yarn node -list -states=RUNNING' + return_code, yarn_output = shell.checked_call(command, user=params.yarn_user) + + hostname = params.hostname.lower() + hostname_ip = socket.gethostbyname(params.hostname.lower()) + nodemanager_address = params.nm_address.lower() + yarn_output = yarn_output.lower() + + if hostname in yarn_output or nodemanager_address in yarn_output or hostname_ip in yarn_output: + Logger.info('NodeManager with ID \'{0}\' has rejoined the cluster.'.format(nodemanager_address)) + return + else: + raise Fail('NodeManager with ID \'{0}\' was not found in the list of running NodeManagers. \'{1}\' output was:\n{2}'.format(nodemanager_address, command, yarn_output)) http://git-wip-us.apache.org/repos/asf/ambari/blob/ab4b864c/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/params.py b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/params.py new file mode 100644 index 0000000..d0ad6f6 --- /dev/null +++ b/ambari-server/src/main/resources/common-services/YARN/3.0.0.3.0/package/scripts/params.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" +from ambari_commons import OSCheck +from resource_management.libraries.functions.default import default +from resource_management.libraries.functions.copy_tarball import get_sysprep_skip_copy_tarballs_hdfs + +if OSCheck.is_windows_family(): + from params_windows import * +else: + from params_linux import * + +sysprep_skip_copy_tarballs_hdfs = get_sysprep_skip_copy_tarballs_hdfs() +retryAble = default("/commandParams/command_retry_enabled", False)
