Repository: incubator-impala Updated Branches: refs/heads/master 46c3e43ed -> ff0cd823c
CDH-39818: Add Breakpad minidump collection script Add two scripts: collect_minidumps.py - generates a compressed tarball that contains minidumps generated by Breakpad. generate_minidump_collection_testdata.py - generates testdata for the above script. Change-Id: I85b3643133e28eca07507ac2a79acbf73128456f Reviewed-on: http://gerrit.cloudera.org:8080/2997 Reviewed-by: Taras Bobrovytsky <[email protected]> Reviewed-by: Michael Brown <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ff0cd823 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ff0cd823 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ff0cd823 Branch: refs/heads/master Commit: ff0cd823cc65b393e919511ef3239532b635a3b5 Parents: 1aeda14 Author: Taras Bobrovytsky <[email protected]> Authored: Thu May 5 19:04:04 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Fri May 13 15:52:53 2016 -0700 ---------------------------------------------------------------------- bin/collect_minidumps.py | 206 ++++++++++++++++++++++ bin/generate_minidump_collection_testdata.py | 137 ++++++++++++++ 2 files changed, 343 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ff0cd823/bin/collect_minidumps.py ---------------------------------------------------------------------- diff --git a/bin/collect_minidumps.py b/bin/collect_minidumps.py new file mode 100755 index 0000000..1fe2301 --- /dev/null +++ b/bin/collect_minidumps.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python +# Copyright (c) 2016, Cloudera, inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script is to be called by Cloudera Manager to collect Breakpad minidump files up to +# a specified date/time. A compressed tarball is created in the user specified location. +# We try to fit as many files as possible into the tarball until a size limit is reached. +# Example invokation by CM to: +# ./collect_minidumps.py --conf_dir=/var/run/.../5555-impala-STATESTORE/impala-conf \ +# --role_name=statestored --max_output_size=50000000 --end_time=1463033495 \ +# --output_file_path=/tmp/minidump_package.tar.gz + +import os +import re +import sys +import tarfile + +from optparse import OptionParser + +class FileArchiver(object): + '''This is a generic class that makes a tarball out of files in the source_dir. We + assume that source_dir contains only files. The resulting file will be compressed with + gzip and placed into output_file_path. If a file with that name already exists, it will + be deleted and re-created. Max_result_size is the maximum allowed size of the resulting + tarball. If all files in the source_dir can't fit into the allowed size, most recent + files will be preferred. start_time and end_time paramenters allow us to specify an + interval of time for which to consider the files. + ''' + + def __init__(self, + source_dir, + output_file_path, + max_output_size, + start_time=None, + end_time=None): + self.source_dir = source_dir + self.max_output_size = max_output_size + self.start_time = start_time + self.end_time = end_time + self.output_file_path = output_file_path + # Maps the number of files in the tarball to the resulting size (in bytes). + self.resulting_sizes = {} + self.file_list = [] + + def _remove_output_file(self): + try: + os.remove(self.output_file_path) + except OSError: + pass + + def _tar_files(self, num_files=None): + '''Make a tarball with num_files most recent files in the file_list. Record the + resulting size into resulting_sizes map and return it. + ''' + num_files = num_files or len(self.file_list) + self._remove_output_file() + if num_files == 0: + size = 0 + else: + with tarfile.open(self.output_file_path, mode='w:gz') as out: + for i in xrange(num_files): + out.add(self.file_list[i]) + size = os.stat(self.output_file_path).st_size + self.resulting_sizes[num_files] = size + return size + + def _compute_file_list(self): + '''Computes a sorted list of eligible files in the source directory by filtering out + files with modified date not in the desired time range. Directories and other + non-files are ignored. + ''' + file_list = [] + for f in os.listdir(self.source_dir): + full_path = os.path.join(self.source_dir, f) + if not os.path.isfile(full_path): + continue + time_modified = os.stat(full_path).st_mtime + if self.start_time and self.start_time > time_modified: + continue + if self.end_time and self.end_time < time_modified: + continue + file_list.append(full_path) + self.file_list = sorted(file_list, key=lambda f: os.stat(f).st_mtime, reverse=True) + + def _binary_search(self): + '''Calculates the maximum number of files that can be collected, such that the tarball + size is less than max_output_size. + ''' + min_num = 0 + max_num = len(self.file_list) + while max_num - min_num > 1: + mid = (min_num + max_num) // 2 + if self._tar_files(mid) <= self.max_output_size: + min_num = mid + else: + max_num = mid + return min_num + + def make_tarball(self): + '''Make a tarball with the maximum number of files such that the size of the tarball + is less than or equal to max_output_size. Returns a pair (status (int), message + (str)). status represents the result of the operation and follows the unix convention + where 0 equals success. message provides additional information. A status of 1 is + returned if source_dir is not empty and no files were able to fit into the tarball. + ''' + self._compute_file_list() + if len(self.file_list) == 0: + status = 0 + msg = 'No files found in "{0}".' + return status, msg.format(self.source_dir) + output_size = self._tar_files() + if output_size <= self.max_output_size: + status = 0 + msg = 'Success, archived all {0} files in "{1}".' + return status, msg.format(len(self.file_list), self.source_dir) + else: + max_num_files = self._binary_search() + if max_num_files == 0: + self._remove_output_file() + status = 1 + msg = ('Unable to archive any files in "{0}". ' + 'Increase max_output_size to at least {1} bytes.') + # If max_num_files is 0, we are guaranteed that the binary search tried making a + # tarball with 1 file. + return status, msg.format(self.source_dir, self.resulting_sizes[1]) + else: + self._tar_files(max_num_files) + status = 0 + msg = 'Success. Archived {0} out of {1} files in "{2}".' + return status, msg.format(max_num_files, len(self.file_list), self.source_dir) + +def get_minidump_dir(conf_dir, role_name): + '''Extracts the minidump directory path for a given role from the configuration file.''' + ROLE_FLAGFILE_MAP = { + 'impalad': 'impalad_flags', + 'statestored': 'state_store_flags', + 'catalogd': 'catalogserver_flags'} + result = None + try: + file_path = os.path.join(conf_dir, ROLE_FLAGFILE_MAP[role_name]) + with open(file_path, 'r') as f: + for line in f: + m = re.match('-minidump_path=(.*)', line) + if m: + result = m.group(1) + except IOError as e: + print >> sys.stderr, 'Error: Unable to open "{0}".'.format(file_path) + sys.exit(1) + if result: + result = os.path.join(result, role_name) + if not os.path.isdir(result): + sys.exit(0) + else: + msg = 'Error: "{0}" does not contain a "-minidump_path" flag.' + print >> sys.stderr, msg.format(file_path) + sys.exit(1) + return result + +def main(): + parser = OptionParser() + parser.add_option('--conf_dir', + help='Directory in which to look for the config file with startup flags') + parser.add_option('--role_name', type='choice', + choices=['impalad', 'statestored', 'catalogd'], default='impalad', + help='For which role to collect the minidumps.') + parser.add_option('--max_output_size', default=40*1024*1024, type='int', + help='The maximum file size of the result tarball to be written given in bytes. ' + 'If the total size exceeds this value, most recent files will be preferred') + parser.add_option('--start_time', default=None, type='int', + help='Interval start time (in epoch seconds UTC).') + parser.add_option('--end_time', default=None, type='int', + help='Interval end time, until when to collect the minidump files ' + '(in epoch seconds UTC).') + parser.add_option('--output_file_path', help='The full path of the output file.') + options, args = parser.parse_args() + if not options.conf_dir: + msg = 'Error: conf_dir is not specified.' + print >> sys.stderr, msg + sys.exit(1) + if not options.output_file_path: + msg = 'Error: output_file_path is not specified.' + print >> sys.stderr, msg + sys.exit(1) + + minidump_dir = get_minidump_dir(options.conf_dir, options.role_name) + file_archiver = FileArchiver(source_dir=minidump_dir, + max_output_size=options.max_output_size, + start_time=options.start_time, + end_time=options.end_time, + output_file_path=options.output_file_path) + status, msg = file_archiver.make_tarball() + print >> sys.stderr, msg + sys.exit(status) + +if __name__ == '__main__': + main() http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ff0cd823/bin/generate_minidump_collection_testdata.py ---------------------------------------------------------------------- diff --git a/bin/generate_minidump_collection_testdata.py b/bin/generate_minidump_collection_testdata.py new file mode 100755 index 0000000..862fdb4 --- /dev/null +++ b/bin/generate_minidump_collection_testdata.py @@ -0,0 +1,137 @@ +#!/usr/bin/env impala-python +# Copyright (c) 2016, Cloudera, inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script generates testdata for collect_minidumps.py. 3 text files will be created +# containing statup flags for each of the services in (conf_dir)/impalad_flags, +# (conf_dir)/state_store_flags, and (conf_dir)/catalogsever_flags. Each of those files +# will have a parameter -minidump_path. Each path will look like (minidump_dir)/impalad, +# (minidump_dir)/catalogd, (minidump_dir)/statestored. Sample minidump files will be +# generated and placed into each of those directories. It is possible to control the +# minidump file timestamps by specifying the start_time and end_time. The timestamps will +# be spaced evenly in the interval. Alternatively, duration can be specified which will +# create the files in the interval [now - duration, now]. Minidumps are simulated by +# making the files easily compressible by having some repeated data. + +import errno +import os +import random +import shutil +import time + +from optparse import OptionParser + +parser = OptionParser() +parser.add_option('--conf_dir', default='/tmp/impala-conf') +parser.add_option('--minidump_dir', default='/tmp/minidumps') +parser.add_option('--start_time', default=None, type='int') +parser.add_option('--end_time', default=None, type='int') +parser.add_option('--duration', default=3600, type='int', + help="if start and end time are not set, they will be calculated based on this value") +parser.add_option('--num_minidumps', default=20, type='int') + +options, args = parser.parse_args() + +CONFIG_FILE = '''-beeswax_port=21000 +-fe_port=21000 +-be_port=22000 +-llama_callback_port=28000 +-hs2_port=21050 +-enable_webserver=true +-mem_limit=108232130560 +-max_log_files=10 +-webserver_port=25000 +-max_result_cache_size=100000 +-state_store_subscriber_port=23000 +-statestore_subscriber_timeout_seconds=30 +-scratch_dirs=/data/1/impala/impalad,/data/10/impala/impalad,/data/11/impala/impalad +-default_query_options +-log_filename=impalad +-audit_event_log_dir=/var/log/impalad/audit +-max_audit_event_log_file_size=5000 +-abort_on_failed_audit_event=false +-lineage_event_log_dir=/var/log/impalad/lineage +-minidump_path={0} +-max_lineage_log_file_size=5000 +-hostname=vb0204.halxg.cloudera.com +-state_store_host=vb0202.halxg.cloudera.com +-enable_rm=false +-state_store_port=24000 +-catalog_service_host=vb0202.halxg.cloudera.com +-catalog_service_port=26000 +-local_library_dir=/var/lib/impala/udfs +-disable_admission_control=true +-disk_spill_encryption=false +-abort_on_config_error=true''' + +ROLE_NAMES = {'impalad': 'impalad_flags', + 'statestored': 'state_store_flags', + 'catalogd': 'catalogserver_flags'} + +def generate_conf_files(): + try: + os.makedirs(options.conf_dir) + except OSError as e: + if e.errno == errno.EEXIST and os.path.isdir(options.conf_dir): + pass + else: + raise e + for role_name in ROLE_NAMES: + with open(os.path.join(options.conf_dir, ROLE_NAMES[role_name]), 'w') as f: + f.write(CONFIG_FILE.format(options.minidump_dir)) + +def random_bytes(num): + return ''.join(chr(random.randint(0, 255)) for _ in range(num)) + +def write_minidump(common_data, timestamp, target_dir): + '''Generate and write the minidump into the target_dir. atime and mtime of the minidump + will be set to timestamp.''' + file_name = ''.join(random.choice('abcdefghijklmnopqrstuvwxyz') for _ in xrange(10)) + with open(os.path.join(target_dir, file_name), 'wb') as f: + # We want the minidump to be pretty similar to each other. The number 8192 was chosen + # arbitratily and seemed like a reasonable guess. + unique_data = random_bytes(8192) + f.write(unique_data) + f.write(common_data) + os.utime(os.path.join(target_dir, file_name), (timestamp, timestamp)) + +def generate_minidumps(): + if options.start_time is None or options.end_time is None: + start_timestamp = time.time() - options.duration + end_timestamp = time.time() + else: + start_timestamp = options.start_time + end_timestamp = options.end_time + if os.path.exists(options.minidump_dir): + shutil.rmtree(options.minidump_dir) + for role_name in ROLE_NAMES: + os.makedirs(os.path.join(options.minidump_dir, role_name)) + # We want the files to have a high compression ratio and be several megabytes in size. + # The parameters below should accomplish this. + repeated_token = random_bytes(256) + common_data = repeated_token * 40000 + if options.num_minidumps == 1: + interval = 0 + else: + interval = (end_timestamp - start_timestamp) // (options.num_minidumps - 1) + for i in xrange(options.num_minidumps): + write_minidump(common_data, + start_timestamp + interval * i, + os.path.join(options.minidump_dir, role_name)) + +def main(): + generate_conf_files() + generate_minidumps() + +if __name__ == '__main__': + main()
