Add periodic CPython garbage collector statistics logging to aid in
tracking down a memory leak and as a general health beacon for the long
running process.

The interval at which stats are logged is configurable.

Signed-off-by: Gregory Smith <[email protected]>

--- autotest/global_config.ini  2009-12-18 02:24:44.000000000 -0800
+++ autotest/global_config.ini  2009-12-18 02:24:44.000000000 -0800
@@ -70,6 +70,7 @@
 secs_to_wait_for_atomic_group_hosts: 600
 pidfile_timeout_mins: 300
 max_pidfile_refreshes: 2000
+gc_stats_interval_mins: 360
 
 [HOSTS]
 wait_up_processes:
--- /dev/null   2009-12-17 12:29:38.000000000 -0800
+++ autotest/scheduler/gc_stats.py      2009-12-18 02:24:44.000000000 -0800
@@ -0,0 +1,89 @@
+# Compute and gather statistics about garbage collection in this process.
+# This module depends on the CPython gc module and garbage collection behavior.
+
+import gc, logging, pprint
+
+
+verbose = False
+
+
+# A mapping from type objects to a count of instances of those types in the
+# garbage collectors all objects list on the previous call to
+# _log_garbage_collector_stats().
+_previous_obj_type_map = {}
+
+
+# A set of object ids for everything in the all objects list on the
+# previous call to _log_garbage_collector_stats().
+_previous_obj_ids = set()
+
+
+def _log_garbage_collector_stats(minimum_count=10):
+    """
+    Log statistics about how many of what type of Python object exist in this
+    process.
+
+    @param minimum_count: The minimum number of instances of a type for it
+            to be considered worthy of logging.
+    """
+    global _previous_obj_type_map
+    global _previous_obj_ids
+
+    # We get all objects -before- creating any new objects within this 
function.
+    # to avoid having our own local instances in the list.
+    all_objects = gc.get_objects()
+    obj = None
+    new_objects = []
+    try:
+        obj_type_map = {}
+        object_ids = set()
+        for obj in all_objects:
+            obj_type = type(obj)
+            obj_type_map.setdefault(obj_type, 0)
+            obj_type_map[obj_type] += 1
+            object_ids.add(id(obj))
+        whats_new_big_str = ''
+        if verbose and _previous_obj_ids:
+            new_object_ids = object_ids - _previous_obj_ids
+            for obj in all_objects:
+                if id(obj) in new_object_ids:
+                    new_objects.append(obj)
+            whats_new_big_str = pprint.pformat(new_objects, indent=1)
+    finally:
+        # Never keep references to stuff returned by gc.get_objects() around
+        # or it'll just make the future cyclic gc runs more difficult.
+        del all_objects
+        del obj
+        del new_objects
+
+
+    delta = {}
+    for obj_type, count in obj_type_map.iteritems():
+        if obj_type not in _previous_obj_type_map:
+            delta[obj_type] = count
+        elif _previous_obj_type_map[obj_type] != count:
+            delta[obj_type] = count - _previous_obj_type_map[obj_type]
+
+    sorted_stats = reversed(sorted(
+            (count, obj_type) for obj_type, count in obj_type_map.iteritems()))
+    sorted_delta = reversed(sorted(
+            (count, obj_type) for obj_type, count in delta.iteritems()))
+
+    logging.debug('Garbage collector object type counts:')
+    for count, obj_type in sorted_stats:
+        if count >= minimum_count:
+            logging.debug('  %d\t%s', count, obj_type)
+
+    logging.info('Change in object counts since previous GC stats:')
+    for change, obj_type in sorted_delta:
+        if obj_type_map[obj_type] > minimum_count:
+            logging.info('  %+d\t%s\tto %d', change, obj_type,
+                         obj_type_map[obj_type])
+
+    if verbose and whats_new_big_str:
+        logging.debug('Pretty printed representation of the new objects:')
+        logging.debug(whats_new_big_str)
+
+    _previous_obj_type_map = obj_type_map
+    if verbose:
+        _previous_obj_ids = object_ids
--- /dev/null   2009-12-17 12:29:38.000000000 -0800
+++ autotest/scheduler/gc_stats_unittest.py     2009-12-18 02:24:44.000000000 
-0800
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+
+import gc
+import logging
+
+import common
+from autotest_lib.client.common_lib.test_utils import mock
+from autotest_lib.client.common_lib.test_utils import unittest
+from autotest_lib.scheduler import gc_stats
+
+
+class TestGcStats(unittest.TestCase):
+    def setUp(self):
+        self.god = mock.mock_god()
+
+
+    def tearDown(self):
+        self.god.unstub_all()
+
+
+    def test_log_garbage_collector_stats(self):
+        # Call this for code coverage.
+        # Prevent log spam from this test but do test that the log
+        # message formats correctly.
+        def _mock_logging_func(message, *args):
+            if args:
+                message %= args
+        self.god.stub_with(logging, 'debug', _mock_logging_func)
+        self.god.stub_with(logging, 'info', _mock_logging_func)
+        gc_stats._log_garbage_collector_stats()
+        # Add a new dict, exercise the delta counting & printing code.
+        y = {}
+        gc_stats._log_garbage_collector_stats(1)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- autotest/scheduler/monitor_db.py    2009-12-18 02:24:44.000000000 -0800
+++ autotest/scheduler/monitor_db.py    2009-12-18 02:24:44.000000000 -0800
@@ -7,7 +7,7 @@
 
 import datetime, errno, optparse, os, pwd, Queue, re, shutil, signal
 import smtplib, socket, stat, subprocess, sys, tempfile, time, traceback
-import itertools, logging, weakref
+import itertools, logging, weakref, gc
 import common
 import MySQLdb
 from autotest_lib.scheduler import scheduler_logging_config
@@ -19,6 +19,7 @@
 from autotest_lib.scheduler import drone_manager, drones, email_manager
 from autotest_lib.scheduler import monitor_db_cleanup
 from autotest_lib.scheduler import status_server, scheduler_config
+from autotest_lib.scheduler import gc_stats
 
 BABYSITTER_PID_FILE_PREFIX = 'monitor_db_babysitter'
 PID_FILE_PREFIX = 'monitor_db'
@@ -631,6 +632,12 @@
         self._24hr_upkeep = monitor_db_cleanup.TwentyFourHourUpkeep(_db)
         self._host_agents = {}
         self._queue_entry_agents = {}
+        self._tick_count = 0
+        self._last_garbage_stats_time = time.time()
+        self._seconds_between_garbage_stats = 60 * (
+                global_config.global_config.get_config_value(
+                        scheduler_config.CONFIG_SECTION,
+                        'gc_stats_interval_mins', type=int, default=6*60))
 
 
     def initialize(self, recover_hosts=True):
@@ -645,6 +652,7 @@
 
 
     def tick(self):
+        self._garbage_collection()
         _drone_manager.refresh()
         self._run_cleanup()
         self._find_aborting()
@@ -656,6 +664,7 @@
         self._handle_agents()
         _drone_manager.execute_actions()
         email_manager.manager.send_queued_emails()
+        self._tick_count += 1
 
 
     def _run_cleanup(self):
@@ -663,6 +672,21 @@
         self._24hr_upkeep.run_cleanup_maybe()
 
 
+    def _garbage_collection(self):
+        threshold_time = time.time() - self._seconds_between_garbage_stats
+        if threshold_time < self._last_garbage_stats_time:
+            # Don't generate these reports very often.
+            return
+
+        self._last_garbage_stats_time = time.time()
+        # Force a full level 0 collection (because we can, it doesn't hurt
+        # at this interval).
+        gc.collect()
+        logging.info('Logging garbage collector stats on tick %d.',
+                     self._tick_count)
+        gc_stats._log_garbage_collector_stats()
+
+
     def _register_agent_for_ids(self, agent_dict, object_ids, agent):
         for object_id in object_ids:
             agent_dict.setdefault(object_id, set()).add(agent)
--- autotest/scheduler/monitor_db_unittest.py   2009-12-18 02:24:44.000000000 
-0800
+++ autotest/scheduler/monitor_db_unittest.py   2009-12-18 02:24:44.000000000 
-0800
@@ -1,18 +1,19 @@
 #!/usr/bin/python
 
-import unittest, time, subprocess, os, StringIO, tempfile, datetime, shutil
-import logging
+import time, subprocess, os, StringIO, tempfile, datetime, shutil
+import gc, logging
 import common
 import MySQLdb
 from autotest_lib.frontend import setup_django_environment
 from autotest_lib.frontend.afe import frontend_test_utils
 from autotest_lib.client.common_lib import global_config, host_protections
 from autotest_lib.client.common_lib.test_utils import mock
+from autotest_lib.client.common_lib.test_utils import unittest
 from autotest_lib.database import database_connection, migrate
 from autotest_lib.frontend import thread_local
 from autotest_lib.frontend.afe import models
 from autotest_lib.scheduler import monitor_db, drone_manager, email_manager
-from autotest_lib.scheduler import scheduler_config
+from autotest_lib.scheduler import scheduler_config, gc_stats
 
 _DEBUG = False
 
@@ -771,6 +772,22 @@
         self._check_for_extra_schedulings()
 
 
+    def test_garbage_collection(self):
+        self.god.stub_with(self._dispatcher, '_seconds_between_garbage_stats',
+                           999999)
+        self.god.stub_function(gc, 'collect')
+        self.god.stub_function(gc_stats, '_log_garbage_collector_stats')
+        gc.collect.expect_call().and_return(0)
+        gc_stats._log_garbage_collector_stats.expect_call()
+        # Force a garbage collection run
+        self._dispatcher._last_garbage_stats_time = 0
+        self._dispatcher._garbage_collection()
+        # The previous call should have reset the time, it won't do anything
+        # the second time.  If it does, we'll get an unexpected call.
+        self._dispatcher._garbage_collection()
+
+
+
 class DispatcherThrottlingTest(BaseSchedulerTest):
     """
     Test that the dispatcher throttles:
_______________________________________________
Autotest mailing list
[email protected]
http://test.kernel.org/cgi-bin/mailman/listinfo/autotest

Reply via email to