Repository: ambari Updated Branches: refs/heads/trunk ce8e87616 -> febdf2132
AMBARI-17352: A command line script to run pre-install checks and summarize the results (dili) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/febdf213 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/febdf213 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/febdf213 Branch: refs/heads/trunk Commit: febdf21327ac676ff8aa8a1331ec82348317a0e1 Parents: ce8e876 Author: Di Li <[email protected]> Authored: Thu Jun 30 09:59:46 2016 -0400 Committer: Di Li <[email protected]> Committed: Thu Jun 30 09:59:46 2016 -0400 ---------------------------------------------------------------------- .../src/main/python/preinstall_checker.py | 964 +++++++++++++++++++ 1 file changed, 964 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/febdf213/contrib/utils/preinstall-check/src/main/python/preinstall_checker.py ---------------------------------------------------------------------- diff --git a/contrib/utils/preinstall-check/src/main/python/preinstall_checker.py b/contrib/utils/preinstall-check/src/main/python/preinstall_checker.py new file mode 100644 index 0000000..7ff7e4c --- /dev/null +++ b/contrib/utils/preinstall-check/src/main/python/preinstall_checker.py @@ -0,0 +1,964 @@ +#!/usr/bin/env python + +''' +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +import datetime +import time +import json +import logging +import optparse +import shutil +import sys +import subprocess +import os.path +import socket + +logger = logging.getLogger('Precheck') +has_warnings=False +has_errors=False + +#request types +HTTP_REQUEST_GET='GET' +HTTP_REQUEST_POST='POST' + +#defaults +EXIT_MESSAGE = "Make sure to provide correct cluster information including port, admin user name and password. Default values will be used if you omit the command parameters."; +DEFAULT_HTTP_PORT=8080 +DEFAULT_ADMIN_USER='admin' +DEFAULT_LOG_DIR='/tmp/preinstall_checks' +DEFAULT_LOG_FILE='preinstall_checks.log' +DEFAULT_HTTP_REQUEST_TYPE=HTTP_REQUEST_GET +DEFAULT_AMBARI_SERVER_PROPERTIES='/etc/ambari-server/conf/ambari.properties' +DEFAULT_MAX_COUNTER=4 +DEFAULT_TIMER_LONG=5 +DEFAULT_POLLING_TIMER_REQUEST=10 +DEFAULT_MINDISKSPACE=2.0 #in GB +DEFAULT_MINDISKSPACEUSRLIB=1.0 #in GB + +#ops +OPERATION_CHECK='check' +OPERATIONS=[OPERATION_CHECK] + +#codes +CODE_SUCCESS=0 +CODE_ERROR=1 +CODE_WARNING=2 +CODE_CONNECTION_REFUSED=7 +HTTP_FORBIDDEN=403 + +#labels +LABEL_OK='[ OK ]' +LABEL_WARNING='[WARNING]' +LABEL_ERROR='[ ERROR ]' + +#status +STATUS_ACCEPTED='Accepted' +STATUS_COMPLETED='COMPLETED' +STATUS_PASSED='PASSED' +STATUS_WARNING='FAILED' +STATUS_FAILED='WARNING' +STATUS_ABORTED='ABORTED' +STATUS_IN_PROGRESS='IN_PROGRESS' +STATUS_PENDING='PENDING' +#list of status indicating the operation has yet to finish +LIST_FINISHED_REQUEST_STATUS=[STATUS_FAILED, STATUS_COMPLETED, STATUS_ABORTED] + +def init_parser_options(parser): + parser.add_option('-p', '--port', + dest="port", default=DEFAULT_HTTP_PORT, + help="Ambari Server port corrsponding to the network protocol. Default port is {0} for an HTTP connection".format(DEFAULT_HTTP_PORT)) + parser.add_option('-u', '--user', + dest="user", default=DEFAULT_ADMIN_USER, + help="Ambari admin user. Default user name is {0}".format(DEFAULT_ADMIN_USER)) + parser.add_option('-a', '--password', + dest="password", + help="Ambari admin user password.") + parser.add_option('-l', '--log', + dest="log", + default=DEFAULT_LOG_DIR, + help="The log file home location. Default log file home is {0}.".format(DEFAULT_LOG_DIR), + metavar="DIR") + parser.add_option('--operation', + dest='operation', default=OPERATION_CHECK, + help='Operation can one of the following {0}'.format(', '.join(OPERATIONS))) + parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Output verbosity.") + +""" +Validate parameters passed in from the command line. +Exit if there are validation errors. +""" +def validate_options(options): + errors = [] + + """General parameters that must be passed in via command line or set with a default value""" + if not options.port: + errors.append("No Ambari server port provided.") + if not options.user: + errors.append("No Ambari admin user name provided.") + if not options.password: + errors.append("No Ambari admin user passsword provided.") + if not options.log: + errors.append("No log patch provided.") + + """General check for operations""" + if not options.operation: + errors.append('No operation provided') + elif not options.operation in OPERATIONS: + errors.append('Unknow operation {0}. Specify one of the following operations: {1}'.format(options.operation, ', '.join(OPERATIONS))) + + if not errors: + return 'Parameters validation finished successfully', CODE_SUCCESS + else: + return 'Parameters validation finished with error(s). {0}'.format('. '.join(errors)), CODE_ERROR + +def get_log_file(log_home): + return '{0}/{1}'.format(log_home, DEFAULT_LOG_FILE) + +def init_logger(options): + log_dir = options.log + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + logging_level = logging.DEBUG if options.verbose else logging.INFO + logger.setLevel(logging_level) + logger.handlers = [] + + formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s') + file_handler = logging.FileHandler(get_log_file(log_dir), mode='w') + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + stdout_handler = logging.StreamHandler(sys.stdout) + stdout_handler.setFormatter(formatter) + logger.addHandler(stdout_handler) + +""" +Back up log directory if it already exists. +""" +def backup_log(filePath): + if filePath is not None and os.path.exists(filePath): + timestamp = datetime.datetime.now() + simpleformat = '%Y%m%d%H%M%S' + backup_file = filePath + "." + timestamp.strftime(simpleformat) + try: + shutil.move(filePath, backup_file) + except Exception, err: + print('Failed to backup "{0}": {1}'.format(str(filePath), str(err))) + return '', CODE_WARNING + return backup_file, CODE_SUCCESS + else: + return '', CODE_SUCCESS + +def get_current_time(): + total_seconds = time.time() + current_time = datetime.datetime.fromtimestamp(total_seconds).strftime('%Y-%m-%d %H:%M:%S') + return current_time + +def step(msg): + logger.info('') + if len(msg) >= 43: + logger.info('******** Check: {0} ********'.format(msg)) + else: + spaces = ' '.ljust((50 - len(msg))/2) + logger.info('{0}{2}Check: {1}{2}{0}'.format('********',msg,spaces)) + +def print_check_result(check, msgs, code): + if len(check)>=43: + spaces = ' '.ljust(20) + else: + spaces = ' '.ljust(63 - len(check)) + if code == CODE_SUCCESS: + logger.info('{0}{1}{2}'.format(check, spaces, LABEL_OK)) + elif code == CODE_WARNING: + logger.info('{0}{1}{2}'.format(check, spaces, LABEL_WARNING)) + if msgs: + for msg in msgs: + if msg.strip(): + logger.warning('\t{0}'.format(msg.strip())) + else: + logger.info('{0}{1}{2}'.format(check, spaces, LABEL_ERROR)) + if msgs: + for msg in msgs: + logger.error('\t{0}'.format(msg.strip())) + +def print_check_results(results): + global has_warnings + global has_errors + for result in results: + status = result['status'] + if STATUS_PASSED == status: + code = CODE_SUCCESS + print_check_result(result['key'], None, code) + elif STATUS_WARNING == status: + if not has_warnings: + has_warnings = True + code = CODE_WARNING + print_check_result(result['key'], result['warning'], code) + else: + if not has_errors: + has_errors = True + code = CODE_ERROR + print_check_result(result['key'], result['error'] if result['error'] else None, code) + +def dump_parameters_to_log(options): + server_url = get_server_url(options.port) + + logger.info('/******************************************************************************/') + logger.info(' Parameters used for script run ') + logger.info('Cluster parameters') + logger.info("Server URL: {0}".format(server_url)) + logger.info("Port: {0}".format(options.port)) + logger.info("User: {0}".format(options.user)) + logger.info('') + logger.info('Operation info') + logger.info("Operation: {0}".format(options.operation)) + logger.info("Log Home Dir: {0}".format(options.log)) + logger.info("Log File: {0}".format(get_log_file(options.log))) + logger.info('/******************************************************************************/') + +""" +Retrieve property value from Ambari Server properties file. +""" +def get_ambari_server_property(key): + try: + with open(DEFAULT_AMBARI_SERVER_PROPERTIES, 'r') as property_file: + file_content = property_file.read() + lines = file_content.splitlines() + lines.reverse() + for line in lines: + tokens = line.split('=') + if len(tokens) == 2: + if tokens[0] == key: + return tokens[1] + except Exception, err: + logger.error(str(err)) + return None + return None + +def get_server_protocol(): + sslActive = get_ambari_server_property('api.ssl') + if sslActive == "true": + return "https" + else: + return "http" + +def get_admin_server_fqdn(): + return socket.getfqdn() + +def get_server_url(port): + protocol = get_server_protocol() + url = "{0}://{1}:{2}".format(protocol, get_admin_server_fqdn(), str(port)) + return url + +""" +Submit REST API to Ambari Server +""" +def execute_curl_command(url, headers=[], request_type=DEFAULT_HTTP_REQUEST_TYPE, request_body=None, user=DEFAULT_ADMIN_USER, password=None): + """ + @param url: REST URL + @param headers: Optional. Headers to be included in the REST API call + @param request_type: HTTP request type (GET/POST/PUT/DELETE). Use HTTP GET as the default. + @param request_body: Data to be submitted for HTTP POST and PUT requests + @param user: User for Ambari REST API authentication + @param password: Password for the user used to authenticate the Ambari REST API call + """ + curl_cmd_array = ["curl", "-v", "-u", "{0}:{1}".format(user,password), "-k", "-H", "X-Requested-By: ambari"] + + for header in headers: + curl_cmd_array.append('-H') + curl_cmd_array.append(header) + curl_cmd_array.append('-s') + curl_cmd_array.append('-X') + curl_cmd_array.append(request_type) + if request_type == 'PUT' or request_type == 'POST': + if request_body: + curl_cmd_array.append("-d") + curl_cmd_array.append(request_body) + curl_cmd_array.append(url) + logger.debug('Curl command: {0}'.format(' '.join(curl_cmd_array))) + exeProcess = subprocess.Popen(curl_cmd_array, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = exeProcess.communicate() + exit_code = exeProcess.returncode + return out, err, exit_code + +""" +Determine if Ambari Server responded with an error message for the REST API call +""" +def is_erroneous_response_by_server(json_str): + if not json_str: + return False, 0, '' + else: + response = json.loads(json_str) + status_code = response.get('status', -1) + message = response.get('message', None) + if -1 == status_code and not message: + return False, 0, '' + else: + return True, int(status_code), message + +""" +Determine if Ambari Server has accepted the REST API call +""" +def is_request_accepted(json_str): + logger.debug("Checking request in {0}".format(json_str)) + if not json_str: + return False + response = json.loads(json_str) + summary = response.get('Requests', {}) + if summary: + status = summary.get('status', None) + return (STATUS_ACCEPTED == status) + else: + return False + +def get_request_url(json_str, summary_only=True): + if not json_str: + return None + response = json.loads(json_str) + href = response.get('href', None) + if href: + url_str = str(href) + if summary_only: + return '{0}?fields=Requests'.format(url_str) + else: + return url_str + else: + logger.error("Failed to obtain request url. {0} does not contain 'href' section".format(json_str)) + return None + +""" +Determine if the request is finished +""" +def is_request_finished(json_str): + request_status = get_request_status(json_str) + is_finished = (request_status in LIST_FINISHED_REQUEST_STATUS) + is_finished_successfully = (STATUS_COMPLETED == request_status) + response = json.loads(json_str) + summary = response.get('Requests', {}) + progress_percent = summary.get('progress_percent', '-1') + return is_finished, is_finished_successfully, int(progress_percent) + +def is_request_finished_successfully(json_str): + request_status = get_request_status(json_str) + return STATUS_COMPLETED == request_status + +def get_request_status(json_str): + if not json_str: + return None + response = json.loads(json_str) + summary = response.get('Requests', {}) + request_status = summary.get('request_status', None) + if request_status: + return request_status + else: + logger.error("Failed to determin request state. {0} does not contain 'Requests' section".format(json_str)) + return None + +""" +Check request status based on the time interval +""" +def polling_request(url, user=None, password=None, timer=DEFAULT_POLLING_TIMER_REQUEST): + """ + @param url: Request URL returned by the Ambari Server + @param user: User for Ambari REST API authentication + @param password: Password for the user used to authenticate the Ambari REST API call + @param timer: Time interval between two check status REST API call. Default is 10 seconds. + """ + out=None + err=None + ec=0 + request_in_progress = True + + logger.debug("Polling status for {0} every {1} seconds".format(url, timer)) + logger.debug("Polling started at {0}".format(str(get_current_time()))) + + previous_percentage = 0 + while request_in_progress: + out, err, ec = execute_curl_command(url, user=user, password=password) + if CODE_SUCCESS != ec: + logger.debug('Server became offline') + request_in_progress = False + else: + is_finished, is_finished_successfully, percentage = is_request_finished(out) + if percentage >= 0: + if percentage != previous_percentage: + previous_percentage = percentage + logger.debug(' {0}%'.format(percentage)) + else: + logger.debug('.') + if is_finished: + request_in_progress = False + else: + time.sleep(timer) + logger.debug("Stopped polling {0} at {1}. Request finished.".format(url, str(get_current_time()))) + return out, err, ec + +def get_host(json_str): + if not json_str: + return None + response = json.loads(json_str) + task_result = response.get('Tasks', {}) + host_name = task_result.get('host_name', None) + return host_name + +""" +Summarize results of all the tasks executed in the request +""" +def summarize_tasks_by_hosts(request_url, user, password): + """ + @param request_url: Request URL returned by the Ambari Server + @param user: User for Ambari REST API authentication + @param password: Password for the user used to authenticate the Ambari REST API call + """ + task_results_by_host = {} + results_to_print = [] + out, err, ec = execute_curl_command(request_url, user=user, password=password) + if CODE_SUCCESS == ec: + if out: + is_erroneous_response, http_ec, http_err = is_erroneous_response_by_server(out) + if is_erroneous_response: + results_to_print=[{'key':'Error response from server', 'status':http_ec, 'error':[http_err]}] + else: + urls = get_tasks_urls(out) + if urls: + for task_url in urls: + task_out, err, ec = execute_curl_command(task_url, user=user, password=password) + logger.debug(task_out) + if CODE_SUCCESS == ec: + host = get_host(task_out) + if host: + task_results_by_host[host] = task_out + else: + results_to_print=[{'key':'Connection refused', 'status':STATUS_FAILED, 'error':[err]}] + break + else: + results_to_print=[{'key':'Empty task list', 'status':STATUS_FAILED}] + else: + results_to_print=[{'key':'Empty response from server', 'status':STATUS_FAILED}] + else: + results_to_print=[{'key':'Connection refused', 'status':STATUS_FAILED, 'error':[err]}] + return task_results_by_host, results_to_print + +def get_tasks_urls(json_str): + response = json.loads(json_str) + tasks = response.get('tasks', []) + urls = set() + for task in tasks: + url = task.get('href',None) + if url: + urls.add(url) + return urls + +""" +Check if the script can log in Ambari Server REST API via user and password provided +""" +def server_reachable_by_credentials_with_retry(server_url, user, password): + """ + @param server_url: Basic server url to connect and log in + @param user: User for Ambari REST API authentication + @param password: Password for the user used to authenticate the Ambari REST API call + """ + retry_counter = 0 + out = None + ec = CODE_SUCCESS + while retry_counter < DEFAULT_MAX_COUNTER: + out, ec = server_reachable_by_credentials(server_url, user, password) + if CODE_CONNECTION_REFUSED == ec: + retry_counter = retry_counter + 1 + logger.debug('Server may have not become fully online yet, try to reconnect in {0} seconds'.format(DEFAULT_TIMER_LONG)) + time.sleep(DEFAULT_TIMER_LONG) + else: + logger.debug('Connected to server.') + break + if CODE_CONNECTION_REFUSED == ec: + message = 'Server did not become fully online in {0} seconds.'.format(str(DEFAULT_MAX_COUNTER * DEFAULT_TIMER_LONG)) + logger.debug(message) + return out, ec + +""" +Check if the script can log in Ambari Server REST API via user and password provided +""" +def server_reachable_by_credentials(server_url, user, password): + """ + @param server_url: Basic server url to connect and log in + @param user: User for Ambari REST API authentication + @param password: Password for the user used to authenticate the Ambari REST API call + """ + url = '{0}/api/v1/requests'.format(server_url) + out, err, ec = execute_curl_command(url, user=user, password=password) + if ec != CODE_SUCCESS: + return err, ec + else: + is_erroneous_response, http_ec, http_err = is_erroneous_response_by_server(out) + if is_erroneous_response: + return http_err, http_ec + else: + return '', CODE_SUCCESS + +""" +Obtain a list of Ambari Agents registered to the host via a REST API call +""" +def get_ambari_agent_nodes(server_url, user, password): + """ + @param server_url: Basic server url to connect and log in + @param user: User for Ambari REST API authentication + @param password: Password for the user used to authenticate the Ambari REST API call + """ + url = "{0}/api/v1/hosts".format(server_url) + hosts = set() + out, err, ec = execute_curl_command(url, user=user, password=password) + is_erroneous_response, ec, err = is_erroneous_response_by_server(out) + if is_erroneous_response: + logger.error("HTTP {0}:{1}".format(ec, err)) + return hosts + + response = json.loads(out) + host_list = response.get('items', []) + for item in host_list: + host_summary = item.get('Hosts', {}) + host_name = host_summary.get('host_name', None) + if host_name: + hosts.add(host_name) + return hosts + +""" +Run host checks +""" +def run_check(options, url, label_check, data): + """ + @param options: Parameters passed in from the command line + @param url: Ambari Server URL + @param label_check: Text to display for the check result section + @param data: Data to be submitted to the Ambari Server via a REST API call + """ + out, err, ec = execute_curl_command(url, request_type=HTTP_REQUEST_POST, request_body=data, user=options.user, password=options.password) + if CODE_SUCCESS != ec or not out: + logger.debug(out) + logger.debug(ec) + logger.debug(err) + print_check_result(label_check, ['Failed to connect to Ambari server'], ec) + return ec + else: + is_erroneous_response, http_ec, http_err = is_erroneous_response_by_server(out) + if is_erroneous_response: + print_check_result(label_check, [http_err], http_ec) + return http_ec + elif is_request_accepted(out): + request_url = get_request_url(out) + finalresult, err, ec = polling_request(request_url, options.user, options.password) + logger.debug(finalresult) + if is_request_finished_successfully(finalresult): + request_url = get_request_url(out, summary_only=False) + return summarize_tasks_by_hosts(request_url, options.user, options.password) + else: + print_check_result(label_check, [err], CODE_ERROR) + else: + print_check_result(label_check, [out], CODE_ERROR) + +def basic_task_result_parser(json_str, results): + response = json.loads(json_str) + task_result = response.get('Tasks', {}) + host_name = task_result.get('host_name', None) + status = task_result.get('status', None) + if STATUS_COMPLETED != status: + stderr = task_result.get('stderr', None) + results.append({'key':host_name, 'status':status, 'error':stderr}) + return {} + else: + return task_result.get('structured_out', {}) + +def host_check_parser(task_results_by_hosts, results_to_print): + if not task_results_by_hosts: + return + for key in task_results_by_hosts: + json_str = task_results_by_hosts[key] + structured_out = basic_task_result_parser(json_str, results_to_print) + if structured_out: + check_result = structured_out.get('host_resolution_check', {}) + ec = check_result.get('exit_code', -1) + if CODE_SUCCESS == ec: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + else: + results_to_print.append({'key':key, 'status':STATUS_FAILED, 'error':[check_result.get('message', None)]}) + +""" +Host name resolution check +""" +def run_host_checks(options, agents, server_url): + label_check = 'Host name resolution' + step(label_check) + url = '{0}/api/v1/requests'.format(server_url) + data = '{{"RequestInfo":{{"action":"check_host","context":"Check host","parameters":{{"check_execute_list":"host_resolution_check","jdk_location":"{0}/resources/","threshold":"20","hosts":"{1}"}}}},"Requests/resource_filters":[{{"hosts":"{1}"}}]}}'.format(server_url, ','.join(agents)) + logger.debug('Host resolution check data {0}'.format(data)) + task_results_by_hosts, results_to_print = run_check(options, url, label_check, data) + host_check_parser(task_results_by_hosts, results_to_print) + print_check_results(results_to_print) + +def java_home_check_parser(task_results_by_hosts, results_to_print): + if not task_results_by_hosts: + return + for key in task_results_by_hosts: + json_str = task_results_by_hosts[key] + structured_out = basic_task_result_parser(json_str, results_to_print) + if structured_out: + check_result = structured_out.get('java_home_check', {}) + ec = check_result.get('exit_code', -1) + if CODE_SUCCESS == ec: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + else: + results_to_print.append({'key':key, 'status':STATUS_FAILED, 'error':[check_result.get('message', None)]}) + +""" +Java home path check +""" +def run_java_home_checks(options, agents, server_url): + label_check = 'Java Home location' + step(label_check) + url = '{0}/api/v1/requests'.format(server_url) + java_home = get_ambari_server_property('java.home') + logger.info('Ambari server java home: {0}'.format(java_home)) + data = '{{"RequestInfo":{{"context":"Check hosts","action":"check_host","parameters":{{"threshold":"60","java_home":"{0}","jdk_location":"{1}/resources/","check_execute_list":"java_home_check"}}}},"Requests/resource_filters":[{{"hosts":"{2}"}}]}}'.format(java_home, server_url, ','.join(agents)) + logger.debug('Java home check data {0}'.format(data)) + task_results_by_hosts, results_to_print = run_check(options, url, label_check, data) + java_home_check_parser(task_results_by_hosts, results_to_print) + print_check_results(results_to_print) + +def thp_checks_parser(task_results_by_hosts, results_to_print): + if not task_results_by_hosts: + return + for key in task_results_by_hosts: + json_str = task_results_by_hosts[key] + structured_out = basic_task_result_parser(json_str, results_to_print) + if structured_out: + check_result = structured_out.get('transparentHugePage', {}) + thp_message = check_result.get('message', None) + if thp_message == 'always': + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':['Transparent Huge Pages (THP) is enabled', 'THP should be disabled to avoid potential Hadoop performance issues.']}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + +def disk_space_checks_parser(host_info_by_host, results_to_print): + min_disk_space = DEFAULT_MINDISKSPACE * 1024 * 1024 + min_disk_space_usrlib = DEFAULT_MINDISKSPACEUSRLIB * 1024 * 1024 + for key in host_info_by_host: + host_summary = host_info_by_host[key] + info = host_summary.get('Hosts', {}) + disk_info = info.get('disk_info', []) + for disk in disk_info: + errors = [] + passes = 0 + mountpoint = disk.get('mountpoint', None) + if '/' == mountpoint: + free_space = disk.get('available', -1) + if free_space == -1: + errors.append('Failed to obtain free space for mountpoint /') + elif free_space < min_disk_space: + errors.append('A miminum of {} GB free space for mountpoint /'.format(DEFAULT_MINDISKSPACE)) + else: + passes += 1 + elif '/usr' == mountpoint or '/usr/lib' == mountpoint: + free_space = disk.get('available', -1) + if free_space == -1: + errors.append('Failed to obtain free space for mountpoint /usr or /usr/lib') + elif free_space < min_disk_space_usrlib: + errors.append('A miminum of {} GB free space for mountpoint /usr or /usr/lib'.format(DEFAULT_MINDISKSPACEUSRLIB)) + else: + passes += 1 + if passes > 0: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + elif errors: + results_to_print.append({'key':key, 'status':STATUS_FAILED, 'error':errors}) + +def get_last_agent_env(host_info): + info = host_info.get('Hosts', {}) + last_agent_env = info.get('last_agent_env', {}) + return last_agent_env + +def firewall_checks_parser(task_results_by_host, results_to_print): + for key in task_results_by_host: + structured_out = basic_task_result_parser(task_results_by_host[key], results_to_print) + if structured_out: + last_agent_env = structured_out.get('last_agent_env_check', {}) + if 'firewallRunning' in last_agent_env: + firewall_running = last_agent_env['firewallRunning'] + if firewall_running: + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':['Firewall is running on the host', 'Please configure the firewall to allow communications on the ports documented in the Configuring Ports section of the Ambari documentation.']}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + else: + results_to_print.append({'key':key, 'status':STATUS_FAILED, 'error':['Failed to determine if firewall is running on the host']}) + +def java_process_checks_parser(task_results_by_host, results_to_print): + for key in task_results_by_host: + structured_out = basic_task_result_parser(task_results_by_host[key], results_to_print) + if structured_out: + last_agent_env = structured_out.get('last_agent_env_check', {}) + host_health = last_agent_env.get('hostHealth', {}) + active_java_processes = host_health.get('activeJavaProcs', []) + if active_java_processes: + warnings = [] + for process in active_java_processes: + warnings.append('Process {0} under user {1} should not be running'.format(process['pid'], process['user'])) + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':warnings}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + +def install_packages_checks_parser(task_results_by_host, results_to_print): + for key in task_results_by_host: + structured_out = basic_task_result_parser(task_results_by_host[key], results_to_print) + installed_packages = structured_out.get('installed_packages', []) + if installed_packages: + warnings = [] + for package in installed_packages: + warnings.append('{0} (version {1}) is installed from repo {2}. It should be removed before deploying the cluster.'.format(package['name'], package['version'], package['repoName'])) + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':warnings}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + +def file_and_folder_checks_parser(task_results_by_host, results_to_print): + for key in task_results_by_host: + structured_out = basic_task_result_parser(task_results_by_host[key], results_to_print) + last_agent_env = structured_out.get('last_agent_env_check', []) + stack_files_and_folders = last_agent_env.get('stackFoldersAndFiles',[]) + if stack_files_and_folders: + warnings = [] + for item in stack_files_and_folders: + warnings.append('{0} {1} should not exist.'.format(item['type'].title(), item['name'])) + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':warnings}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + +def live_services_checks_parser(task_results_by_host, results_to_print): + for key in task_results_by_host: + structured_out = basic_task_result_parser(task_results_by_host[key], results_to_print) + last_agent_env = structured_out.get('last_agent_env_check', []) + host_health = last_agent_env.get('hostHealth', {}) + live_services = host_health.get('liveServices', []) + if live_services: + warnings = [] + for service in live_services: + if 'Unhealthy' == service['status']: + warnings.append('Service {0} shoud be up.'.format(service['name'])) + if warnings: + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':warnings}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + +def default_user_ids_checks_parser(task_results_by_host, results_to_print): + for key in task_results_by_host: + structured_out = basic_task_result_parser(task_results_by_host[key], results_to_print) + last_agent_env = structured_out.get('last_agent_env_check', []) + existing_users = last_agent_env.get('existingUsers', []) + if existing_users: + messages = [] + for user in existing_users: + messages.append('User {0} with home directory {1} exists.'.format(user['name'], user['homeDir'])) + if messages: + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':messages}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + +def umask_checks_parser(task_results_by_host, results_to_print): + for key in task_results_by_host: + structured_out = basic_task_result_parser(task_results_by_host[key], results_to_print) + last_agent_env = structured_out.get('last_agent_env_check', []) + if 'umask' in last_agent_env: + umask = int(last_agent_env['umask']) + if umask > 23: + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':['Umask is {0}. Consider update it.'.format(umask)]}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + else: + results_to_print.append({'key':key, 'status':STATUS_FAILED, 'errors':['Failed to obtain umask value on the host.']}) + +def alternatives_checks_parser(task_results_by_host, results_to_print): + for key in task_results_by_host: + structured_out = basic_task_result_parser(task_results_by_host[key], results_to_print) + last_agent_env = structured_out.get('last_agent_env_check', []) + alternatives = last_agent_env.get('alternatives', []) + if alternatives: + warnings = [] + for alternative in alternatives: + warnings.append('Existing /etc/alternativies entry: {0} points to {1}'.format(alternative['name'], alternative['target'])) + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':warnings}) + else: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + +def reverse_lookup_checks_parser(task_results_by_host, results_to_print): + for key in task_results_by_host: + structured_out = basic_task_result_parser(task_results_by_host[key], results_to_print) + last_agent_env = structured_out.get('last_agent_env_check', []) + if 'reverseLookup' in last_agent_env: + reverse_lookup = last_agent_env['reverseLookup'] + if reverse_lookup: + results_to_print.append({'key':key, 'status':STATUS_PASSED}) + else: + results_to_print.append({'key':key, 'status':STATUS_WARNING, 'warning':['The hostname was not found in the reverse DNS lookup', 'This may result in incorrect behavior. Please check the DNS setup and fix the issue.']}) + else: + results_to_print.append({'key':key, 'status':STATUS_FAILED, 'error':['Failed to determine if DNS reverse lookup is configured on the host']}) + +""" +Agent last enviornment check +""" +def run_agent_checks(options, agents, server_url): + logger.info('') + logger.info('Prepare for Ambari Agent host check') + label_check = 'Ambari Agent host check' + url = '{0}/api/v1/requests'.format(server_url) + data = '{{"RequestInfo":{{"action":"check_host","context":"Check host","parameters":{{"check_execute_list":"last_agent_env_check,installed_packages,existing_repos,transparentHugePage","jdk_location":"{0}/resources/","threshold":"20"}}}},"Requests/resource_filters":[{{"hosts":"{1}"}}]}}'.format(server_url, ','.join(agents)) + logger.debug('Agent enviornment check data to submit {0}'.format(data)) + task_results_by_host, results_to_print = run_check(options, url, label_check, data) + + step('Transparent Huge Pages') + thp_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + + host_info_url = '{0}/api/v1/hosts?fields=Hosts/total_mem,Hosts/cpu_count,Hosts/disk_info,Hosts/last_agent_env,Hosts/host_name,Hosts/os_type,Hosts/os_arch,Hosts/os_family,Hosts/ip'.format(server_url) + out, err, ec = execute_curl_command(host_info_url, user=options.user, password=options.password) + logger.debug('Agent host information {0}'.format(out)) + host_info_by_host = {} + if out: + response = json.loads(out) + items = response.get('items', {}) + for item in items: + info = item.get('Hosts', {}) + host_name = info.get('host_name', None) + if host_name: + host_info_by_host[host_name]=item + if host_info_by_host: + step('Disk space') + results_to_print = [] + disk_space_checks_parser(host_info_by_host, results_to_print) + print_check_results(results_to_print) + + step('Firewall enabled') + results_to_print = [] + firewall_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + + step('Java processes') + results_to_print = [] + java_process_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + + step('Installed packages') + results_to_print = [] + install_packages_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + + step('Stack files and directories') + results_to_print = [] + file_and_folder_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + + step('Live services') + results_to_print = [] + live_services_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + + step('Default user names') + results_to_print = [] + default_user_ids_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + + step('Umask') + results_to_print = [] + umask_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + + step('Alternatives') + results_to_print = [] + alternatives_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + + step('Reverse lookup') + results_to_print = [] + reverse_lookup_checks_parser(task_results_by_host, results_to_print) + print_check_results(results_to_print) + +""" +Execute the operation passed in from the command line +""" +def run(options): + server_url = get_server_url(options.port) + label_check = 'Ambari server reachable by user credentials' + step(label_check) + out, ec = server_reachable_by_credentials_with_retry(server_url, options.user, options.password) + if CODE_SUCCESS == ec: + print_check_result(label_check, ['Ambari server reachable via {0}'.format(server_url)], ec) + elif CODE_ERROR == ec: + print_check_result(label_check, ['Failed to establish connection to {0}.'.format(server_url)], ec) + return ec + elif HTTP_FORBIDDEN == ec: + print_check_result(label_check, ['Wrong credentials provided.'], ec) + return ec + agents = get_ambari_agent_nodes(server_url, options.user, options.password) + logger.info('Total number of agents {0}'.format(len(agents))) + + if OPERATION_CHECK == options.operation: + run_host_checks(options, agents, server_url) + run_java_home_checks(options, agents, server_url) + run_agent_checks(options, agents, server_url) + logger.info('') + global has_warnings + global has_errors + if has_errors: + logger.error('Checks finished with errors') + elif has_warnings: + logger.warning('Checks finished with warnings') + else: + logger.info('Checks finished') + return CODE_SUCCESS + else: + logger.error('Unknown operation {0}'.options.operation) + return CODE_ERROR + +def main(): + parser = optparse.OptionParser(usage="usage: %prog [option] arg ... [option] arg",) + init_parser_options(parser) + (options, args) = parser.parse_args() + + backup_file, ec = backup_log(options.log) + + init_logger(options) + + if backup_file: + logger.info('Previous logs backed up as {0}'.format(backup_file)) + + out, ec = validate_options(options) + if CODE_ERROR == ec: + logger.error(out) + sys.exit(ec) + else: + dump_parameters_to_log(options) + try: + ec = run(options) + sys.exit(ec) + except Exception, e: + logger.exception(e) + sys.exit(CODE_ERROR) + +if __name__ == "__main__": + try: + main() + except (KeyboardInterrupt, EOFError): + print("Aborting ... Keyboard Interrupt.") + sys.exit(1)
