Philip Zeyliger has posted comments on this change. ( 
http://gerrit.cloudera.org:8080/10056 )

Change subject: IMPALA-6747: Automate diagnostics collection.
......................................................................


Patch Set 1: Code-Review+2

This is an unrevert. I helped review the original diff, so here's the 
diff-of-diffs.

The diff looks reasonable to me.

$git reset --hard 322bdc0aeb7b3a7e7dc92ae1438bb8920b8a3a3b^; git cherry-pick 
asf-gerrit-reviews/56/10056/1; git diff 
322bdc0aeb7b3a7e7dc92ae1438bb8920b8a3a3b..HEAD | cat
HEAD is now at eb585b7 IMPALA-6694: fix "buffer pool" child profile order
[zz 794f7e9] IMPALA-6747: Automate diagnostics collection.
 Author: Bharath Vissapragada <bhara...@cloudera.com>
 Date: Thu Apr 12 21:52:29 2018 -0700
 5 files changed, 647 insertions(+)
 create mode 100644 bin/diagnostics/__init__.py
 create mode 100644 bin/diagnostics/collect_diagnostics.py
 create mode 100755 bin/diagnostics/collect_shared_libs.sh
 create mode 100644 tests/unittests/test_command.py
diff --git bin/diagnostics/collect_diagnostics.py 
bin/diagnostics/collect_diagnostics.py
index 6abc30a..8fe4561 100644
--- bin/diagnostics/collect_diagnostics.py
+++ bin/diagnostics/collect_diagnostics.py
@@ -26,11 +26,13 @@ import os
 import shutil
 import subprocess
 import sys
+import tarfile
 import time
 import tempfile
 import traceback

 from collections import namedtuple
+from contextlib import closing
 from struct import Struct
 from threading import Timer

@@ -54,6 +56,7 @@ from threading import Timer
 #    versions without full breakpad support (<= release 2.6) will reliably 
crash if
 #    we attempt to do that since those versions do not have the corresponding 
signal
 #    handler. Hence it is suggested to run this script only on releases 2.7 
and later.
+# 4. python >= 2.6
 #
 # Usage: python collect_diagnostics.py --help
 #
@@ -162,9 +165,11 @@ class ImpalaDiagnosticsHandler(object):
   def get_num_child_proc(self, name):
     """Returns number of processes with the given name and target Impala pid
     as parent."""
-    cmd = Command(["pgrep", "-c", "-P", str(self.args.pid), name])
+    # Not all pgrep versions support -c parameter. So fetch the stdout and
+    # count the number of items in the list.
+    cmd = Command(["pgrep", "-P", str(self.args.pid), name])
     cmd.run()
-    return int(cmd.stdout.strip())
+    return len(cmd.stdout.split("\n")) - 1

   def get_java_home_from_env(self):
     """Returns JAVA_HOME set in the env of the target process."""
@@ -218,7 +223,7 @@ class ImpalaDiagnosticsHandler(object):
       # Otherwise the subsequent check on the process count could pass even 
when the
       # fork didn't succeed. This sleep reduces the likelihood of such race.
       time.sleep(1)
-      if self.get_num_child_proc(self.target_process_name) == 1:
+      if self.get_num_child_proc(self.target_process_name) == 0:
         break
     return

@@ -395,6 +400,8 @@ class ImpalaDiagnosticsHandler(object):
     for profile_path in sorted_profiles:
       try:
         file_size = os.path.getsize(profile_path)
+        if file_size == 0:
+          continue
         if profile_size_included_so_far + file_size > 
self.args.profiles_max_size_limit:
           # Copying the whole file violates profiles_max_size_limit. Copy a 
part of it.
           # Profile logs are newline delimited with a single profile per line.
@@ -402,13 +409,13 @@ class ImpalaDiagnosticsHandler(object):
               self.args.profiles_max_size_limit - profile_size_included_so_far
           file_name = os.path.basename(profile_path)
           copied_bytes = 0
-          with open(profile_path, "rb") as in_file,\
-              open(os.path.join(out_dir, file_name), "wb") as out_file:
-            for line in in_file.readlines():
-              if copied_bytes + len(line) > num_bytes_to_copy:
-                break
-              out_file.write(line)
-              copied_bytes += len(line)
+          with open(profile_path, "rb") as in_file:
+            with open(os.path.join(out_dir, file_name), "wb") as out_file:
+              for line in in_file.readlines():
+                if copied_bytes + len(line) > num_bytes_to_copy:
+                  break
+                out_file.write(line)
+                copied_bytes += len(line)
           return
         profile_size_included_so_far += file_size
         shutil.copy2(profile_path, out_dir)
@@ -421,12 +428,33 @@ class ImpalaDiagnosticsHandler(object):
     # Shared libs are collected if either of core dump or minidumps are 
enabled.
     if not (self.args.gcore or self.args.minidumps_dir):
       return
+    # If gdb binary is missing, we cannot extract the shared library list
+    if not self.gdb_cmd:
+      logging.info("'gdb' executable missing. Skipping shared library 
collection.")
+      return
+
     out_dir = os.path.join(self.collection_root_dir, "shared_libs")

     script_path = os.path.join(self.script_dir, "collect_shared_libs.sh")
     cmd_args = [script_path, self.gdb_cmd, str(self.args.pid), out_dir]
     Command(cmd_args, self.args.timeout).run()

+  def archive_diagnostics(self):
+    """Creates a gztar of the collected diagnostics and cleans up the original
+    directory. Returns True if successful, False otherwise."""
+    try:
+      # tarfile does not support context managers in python 2.6. We use 
closing() to work
+      # around that.
+      with closing(tarfile.open(self.collection_root_dir + '.tar.gz', 
mode='w:gz')) as\
+          archive:
+        archive.add(self.collection_root_dir)
+      return True
+    except Exception:
+      logging.exception("Encountered an exception archiving diagnostics, 
cleaning up.")
+      return False
+    finally:
+      self.cleanup()
+
   def cleanup(self):
     """Cleans up the directory to which diagnostics were written."""
     shutil.rmtree(self.collection_root_dir, ignore_errors=True)
@@ -466,9 +494,8 @@ class ImpalaDiagnosticsHandler(object):
     # Archive the directory, even if it is partial.
     archive_path = self.collection_root_dir + ".tar.gz"
     logging.info("Archiving diagnostics to path: %s" % archive_path)
-    shutil.make_archive(self.collection_root_dir, "gztar", 
self.collection_root_dir)
-    self.cleanup()
-    logging.info("Diagnostics collected at path: %s" % archive_path)
+    if self.archive_diagnostics():
+      logging.info("Diagnostics collected at path: %s" % archive_path)
     return not exception_encountered

 def get_args_parser():


--
To view, visit http://gerrit.cloudera.org:8080/10056
To unsubscribe, visit http://gerrit.cloudera.org:8080/settings

Gerrit-Project: Impala-ASF
Gerrit-Branch: master
Gerrit-MessageType: comment
Gerrit-Change-Id: I166e726f1dd1ce81187616e4f06d2404fa379bf8
Gerrit-Change-Number: 10056
Gerrit-PatchSet: 1
Gerrit-Owner: Bharath Vissapragada <bhara...@cloudera.com>
Gerrit-Reviewer: Bharath Vissapragada <bhara...@cloudera.com>
Gerrit-Reviewer: Philip Zeyliger <phi...@cloudera.com>
Gerrit-Comment-Date: Fri, 13 Apr 2018 16:19:28 +0000
Gerrit-HasComments: No

Reply via email to