IMPALA-5769: Add periodic minidump cleanup

Minidumps can be written by sending SIGUSR1 to our daemon processes.
That way, an arbitrary number of minidump files can be created. This
change adds minidump cleanup to the periodic log file cleanup to
effectively bound the maximum number of minidumps we keep around.

Change-Id: Ie02ff2271412d814f84a4ff42ccbca51d91bf980
Reviewed-on: http://gerrit.cloudera.org:8080/7605
Reviewed-by: Lars Volker <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/294d42ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/294d42ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/294d42ad

Branch: refs/heads/master
Commit: 294d42adc117046f975665834af03ddaa53ec27e
Parents: f74f665
Author: Lars Volker <[email protected]>
Authored: Mon Aug 7 11:53:27 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Wed Aug 16 08:41:16 2017 +0000

----------------------------------------------------------------------
 be/src/common/init.cc                 | 13 +++++++----
 be/src/util/minidump.cc               |  9 +-------
 be/src/util/minidump.h                |  4 ++++
 tests/custom_cluster/test_breakpad.py | 36 +++++++++++++++++++++++++-----
 4 files changed, 45 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/294d42ad/be/src/common/init.cc
----------------------------------------------------------------------
diff --git a/be/src/common/init.cc b/be/src/common/init.cc
index 523796a..d778523 100644
--- a/be/src/common/init.cc
+++ b/be/src/common/init.cc
@@ -55,12 +55,13 @@
 
 using namespace impala;
 
+DECLARE_bool(enable_process_lifetime_heap_profiling);
+DECLARE_string(heap_profile_dir);
 DECLARE_string(hostname);
-DECLARE_string(redaction_rules_file);
-// TODO: renamed this to be more generic when we have a good CM release to do 
so.
+// TODO: rename this to be more generic when we have a good CM release to do 
so.
 DECLARE_int32(logbufsecs);
-DECLARE_string(heap_profile_dir);
-DECLARE_bool(enable_process_lifetime_heap_profiling);
+DECLARE_int32(max_minidumps);
+DECLARE_string(redaction_rules_file);
 
 DEFINE_int32(max_log_files, 10, "Maximum number of log files to retain per 
severity "
     "level. The most recent log files are retained. If set to 0, all log files 
are "
@@ -121,6 +122,10 @@ static scoped_ptr<impala::Thread> pause_monitor;
     impala::CheckAndRotateLogFiles(FLAGS_max_log_files);
     // Check for audit event log rotation in every interval of the maintenance 
thread
     impala::CheckAndRotateAuditEventLogFiles(FLAGS_max_audit_event_log_files);
+    // Check for minidump rotation in every interval of the maintenance 
thread. This is
+    // necessary since an arbitrary number of minidumps can be written by 
sending SIGUSR1
+    // to the process.
+    impala::CheckAndRotateMinidumps(FLAGS_max_minidumps);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/294d42ad/be/src/util/minidump.cc
----------------------------------------------------------------------
diff --git a/be/src/util/minidump.cc b/be/src/util/minidump.cc
index f722b20..dbaa0cc 100644
--- a/be/src/util/minidump.cc
+++ b/be/src/util/minidump.cc
@@ -116,9 +116,7 @@ static void SetupSigUSR1Handler(bool minidumps_enabled) {
   sigaction(SIGUSR1, &sig_action, NULL);
 }
 
-/// Check the number of minidump files and removes the oldest ones to maintain 
an upper
-/// bound on the number of files.
-static void CheckAndRemoveMinidumps(int max_minidumps) {
+void CheckAndRotateMinidumps(int max_minidumps) {
   // Disable rotation if 0 or wrong input
   if (max_minidumps <= 0) return;
 
@@ -225,11 +223,6 @@ Status RegisterMinidump(const char* cmd_line_path) {
     return Status(ss.str());
   }
 
-  // Rotate old minidump files. We only need to do this on startup (in 
contrast to
-  // periodically) because only process crashes will trigger the creation of 
new minidump
-  // files.
-  CheckAndRemoveMinidumps(FLAGS_max_minidumps);
-
   google_breakpad::MinidumpDescriptor desc(FLAGS_minidump_path.c_str());
 
   // Limit filesize if configured.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/294d42ad/be/src/util/minidump.h
----------------------------------------------------------------------
diff --git a/be/src/util/minidump.h b/be/src/util/minidump.h
index b2c7160..158e43e 100644
--- a/be/src/util/minidump.h
+++ b/be/src/util/minidump.h
@@ -30,6 +30,10 @@ Status RegisterMinidump(const char* cmd_line_path);
 /// tests that deliberately trigger DCHECKs. Returns true if minidumps were 
previously
 /// enabled or false otherwise.
 bool EnableMinidumpsForTest(bool enabled);
+
+/// Checks the number of minidump files and removes the oldest ones to 
maintain an upper
+/// bound on the number of files.
+void CheckAndRotateMinidumps(int max_minidumps);
 }
 
 #endif

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/294d42ad/tests/custom_cluster/test_breakpad.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_breakpad.py 
b/tests/custom_cluster/test_breakpad.py
index 79ba70c..424e1c0 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -117,7 +117,7 @@ class TestBreakpad(CustomClusterTestSuite):
       return self.cluster.statestored and 1 or 0
     raise RuntimeError("Unknown daemon name: %s" % daemon)
 
-  def wait_for_num_processes(self, daemon, num_expected, timeout=60):
+  def wait_for_num_processes(self, daemon, num_expected, timeout=5):
     end = time.time() + timeout
     self.cluster.refresh()
     num_processes = self.get_num_processes(daemon)
@@ -146,7 +146,6 @@ class TestBreakpad(CustomClusterTestSuite):
     assert self.count_minidumps('statestored', base_dir) == 1
     assert self.count_minidumps('catalogd', base_dir) == 1
 
-
   def assert_num_logfile_entries(self, expected_count):
     self.assert_impalad_log_contains('INFO', 'Wrote minidump to ',
         expected_count=expected_count)
@@ -172,9 +171,9 @@ class TestBreakpad(CustomClusterTestSuite):
     cluster_size = self.get_num_processes('impalad')
     self.kill_cluster(SIGUSR1)
     # Breakpad forks to write its minidump files, wait for all the clones to 
terminate.
-    assert self.wait_for_num_processes('impalad', cluster_size, 5) == 
cluster_size
-    assert self.wait_for_num_processes('catalogd', 1, 5) == 1
-    assert self.wait_for_num_processes('statestored', 1, 5) == 1
+    assert self.wait_for_num_processes('impalad', cluster_size) == cluster_size
+    assert self.wait_for_num_processes('catalogd', 1) == 1
+    assert self.wait_for_num_processes('statestored', 1) == 1
     # Make sure impalad still answers queries.
     client = self.create_impala_client()
     self.execute_query_expect_success(client, "SELECT COUNT(*) FROM 
functional.alltypes")
@@ -229,6 +228,33 @@ class TestBreakpad(CustomClusterTestSuite):
     assert self.count_minidumps('catalogd') == 1
 
   @pytest.mark.execute_serially
+  def test_minidump_cleanup_thread(self):
+    """Check that periodic rotation preserves a limited number of minidumps."""
+    assert self.count_all_minidumps() == 0
+    # Maximum number of minidump that the impalads should keep for this test.
+    max_minidumps = 2
+    # Sleep interval for the log rotation thread.
+    rotation_interval = 1
+    self.start_cluster_with_args(minidump_path=self.tmp_dir,
+                                 max_minidumps=max_minidumps,
+                                 logbufsecs=rotation_interval)
+    cluster_size = self.get_num_processes('impalad')
+    # We trigger several rounds of minidump creation to make sure that all 
daemons wrote
+    # enough files to trigger rotation.
+    for i in xrange(max_minidumps + 1):
+      self.kill_cluster(SIGUSR1)
+      # Breakpad forks to write its minidump files, wait for all the clones to 
terminate.
+      assert self.wait_for_num_processes('impalad', cluster_size) == 
cluster_size
+      assert self.wait_for_num_processes('catalogd', 1) == 1
+      assert self.wait_for_num_processes('statestored', 1) == 1
+      self.assert_num_logfile_entries(i + 1)
+    # Sleep long enough for log cleaning to take effect.
+    time.sleep(rotation_interval + 1)
+    assert self.count_minidumps('impalad') == min(cluster_size, max_minidumps)
+    assert self.count_minidumps('statestored') == max_minidumps
+    assert self.count_minidumps('catalogd') == max_minidumps
+
+  @pytest.mark.execute_serially
   def test_disable_minidumps(self):
     """Check that setting enable_minidumps to false disables minidump 
creation."""
     assert self.count_all_minidumps() == 0

Reply via email to