Philip Zeyliger has posted comments on this change. ( http://gerrit.cloudera.org:8080/10056 )
Change subject: IMPALA-6747: Automate diagnostics collection. ...................................................................... Patch Set 1: Code-Review+2 This is an unrevert. I helped review the original diff, so here's the diff-of-diffs. The diff looks reasonable to me. $git reset --hard 322bdc0aeb7b3a7e7dc92ae1438bb8920b8a3a3b^; git cherry-pick asf-gerrit-reviews/56/10056/1; git diff 322bdc0aeb7b3a7e7dc92ae1438bb8920b8a3a3b..HEAD | cat HEAD is now at eb585b7 IMPALA-6694: fix "buffer pool" child profile order [zz 794f7e9] IMPALA-6747: Automate diagnostics collection. Author: Bharath Vissapragada <bhara...@cloudera.com> Date: Thu Apr 12 21:52:29 2018 -0700 5 files changed, 647 insertions(+) create mode 100644 bin/diagnostics/__init__.py create mode 100644 bin/diagnostics/collect_diagnostics.py create mode 100755 bin/diagnostics/collect_shared_libs.sh create mode 100644 tests/unittests/test_command.py diff --git bin/diagnostics/collect_diagnostics.py bin/diagnostics/collect_diagnostics.py index 6abc30a..8fe4561 100644 --- bin/diagnostics/collect_diagnostics.py +++ bin/diagnostics/collect_diagnostics.py @@ -26,11 +26,13 @@ import os import shutil import subprocess import sys +import tarfile import time import tempfile import traceback from collections import namedtuple +from contextlib import closing from struct import Struct from threading import Timer @@ -54,6 +56,7 @@ from threading import Timer # versions without full breakpad support (<= release 2.6) will reliably crash if # we attempt to do that since those versions do not have the corresponding signal # handler. Hence it is suggested to run this script only on releases 2.7 and later. +# 4. python >= 2.6 # # Usage: python collect_diagnostics.py --help # @@ -162,9 +165,11 @@ class ImpalaDiagnosticsHandler(object): def get_num_child_proc(self, name): """Returns number of processes with the given name and target Impala pid as parent.""" - cmd = Command(["pgrep", "-c", "-P", str(self.args.pid), name]) + # Not all pgrep versions support -c parameter. So fetch the stdout and + # count the number of items in the list. + cmd = Command(["pgrep", "-P", str(self.args.pid), name]) cmd.run() - return int(cmd.stdout.strip()) + return len(cmd.stdout.split("\n")) - 1 def get_java_home_from_env(self): """Returns JAVA_HOME set in the env of the target process.""" @@ -218,7 +223,7 @@ class ImpalaDiagnosticsHandler(object): # Otherwise the subsequent check on the process count could pass even when the # fork didn't succeed. This sleep reduces the likelihood of such race. time.sleep(1) - if self.get_num_child_proc(self.target_process_name) == 1: + if self.get_num_child_proc(self.target_process_name) == 0: break return @@ -395,6 +400,8 @@ class ImpalaDiagnosticsHandler(object): for profile_path in sorted_profiles: try: file_size = os.path.getsize(profile_path) + if file_size == 0: + continue if profile_size_included_so_far + file_size > self.args.profiles_max_size_limit: # Copying the whole file violates profiles_max_size_limit. Copy a part of it. # Profile logs are newline delimited with a single profile per line. @@ -402,13 +409,13 @@ class ImpalaDiagnosticsHandler(object): self.args.profiles_max_size_limit - profile_size_included_so_far file_name = os.path.basename(profile_path) copied_bytes = 0 - with open(profile_path, "rb") as in_file,\ - open(os.path.join(out_dir, file_name), "wb") as out_file: - for line in in_file.readlines(): - if copied_bytes + len(line) > num_bytes_to_copy: - break - out_file.write(line) - copied_bytes += len(line) + with open(profile_path, "rb") as in_file: + with open(os.path.join(out_dir, file_name), "wb") as out_file: + for line in in_file.readlines(): + if copied_bytes + len(line) > num_bytes_to_copy: + break + out_file.write(line) + copied_bytes += len(line) return profile_size_included_so_far += file_size shutil.copy2(profile_path, out_dir) @@ -421,12 +428,33 @@ class ImpalaDiagnosticsHandler(object): # Shared libs are collected if either of core dump or minidumps are enabled. if not (self.args.gcore or self.args.minidumps_dir): return + # If gdb binary is missing, we cannot extract the shared library list + if not self.gdb_cmd: + logging.info("'gdb' executable missing. Skipping shared library collection.") + return + out_dir = os.path.join(self.collection_root_dir, "shared_libs") script_path = os.path.join(self.script_dir, "collect_shared_libs.sh") cmd_args = [script_path, self.gdb_cmd, str(self.args.pid), out_dir] Command(cmd_args, self.args.timeout).run() + def archive_diagnostics(self): + """Creates a gztar of the collected diagnostics and cleans up the original + directory. Returns True if successful, False otherwise.""" + try: + # tarfile does not support context managers in python 2.6. We use closing() to work + # around that. + with closing(tarfile.open(self.collection_root_dir + '.tar.gz', mode='w:gz')) as\ + archive: + archive.add(self.collection_root_dir) + return True + except Exception: + logging.exception("Encountered an exception archiving diagnostics, cleaning up.") + return False + finally: + self.cleanup() + def cleanup(self): """Cleans up the directory to which diagnostics were written.""" shutil.rmtree(self.collection_root_dir, ignore_errors=True) @@ -466,9 +494,8 @@ class ImpalaDiagnosticsHandler(object): # Archive the directory, even if it is partial. archive_path = self.collection_root_dir + ".tar.gz" logging.info("Archiving diagnostics to path: %s" % archive_path) - shutil.make_archive(self.collection_root_dir, "gztar", self.collection_root_dir) - self.cleanup() - logging.info("Diagnostics collected at path: %s" % archive_path) + if self.archive_diagnostics(): + logging.info("Diagnostics collected at path: %s" % archive_path) return not exception_encountered def get_args_parser(): -- To view, visit http://gerrit.cloudera.org:8080/10056 To unsubscribe, visit http://gerrit.cloudera.org:8080/settings Gerrit-Project: Impala-ASF Gerrit-Branch: master Gerrit-MessageType: comment Gerrit-Change-Id: I166e726f1dd1ce81187616e4f06d2404fa379bf8 Gerrit-Change-Number: 10056 Gerrit-PatchSet: 1 Gerrit-Owner: Bharath Vissapragada <bhara...@cloudera.com> Gerrit-Reviewer: Bharath Vissapragada <bhara...@cloudera.com> Gerrit-Reviewer: Philip Zeyliger <phi...@cloudera.com> Gerrit-Comment-Date: Fri, 13 Apr 2018 16:19:28 +0000 Gerrit-HasComments: No