[Autotest] [PATCH] Improve the robustness of the server-side status log processing

John Admanski Wed, 23 Dec 2009 11:39:03 -0800

Makes two major changes to the status log processing in autotest.py:
  1) Adjusts the processing of warnings and status lines to remove
     entries from their buffers as the values are written out, rather
     than removing them all after processing an entire batch.
  2) Changes the error handling during status log processing to
     log-and-ignore exceptions that occur while talking to the remote
     machine, rather than propagating exceptions.


These changes are needed to eliminate runaway failure logging that can
occur when an exception occurs during the remote status log processing.
If such a failure occurs and persists, then you can get into a loop
where a client-disconnect WARN message is logged, and then another error
will happen during the logging leading to another WARN being logged, and
so on as long as the failure keeps occuring (which can be forever in
some cases). The buffering fixed by 1) makes it even worse, since if an
error occurs the next logging pass will re-log everything in the buffer
which never gets cleared due to the error (so you get 1 WARN, then 2,
then 3, then 4, ...)

Risk: High
Visibility: Fixed the error handling of the status log processing in
    autotest.py to be much more robust in the face of remote errors.

Signed-off-by: John Admanski <[email protected]>

--- autotest/server/autotest.py 2009-12-18 00:27:21.000000000 -0800
+++ autotest/server/autotest.py 2009-12-23 11:37:11.000000000 -0800
@@ -920,8 +920,12 @@
         elif test_complete_match:
             self._process_logs()
             fifo_path, = test_complete_match.groups()
-            self.log_collector.collect_client_job_results()
-            self.host.run("echo A > %s" % fifo_path)
+            try:
+                self.log_collector.collect_client_job_results()
+                self.host.run("echo A > %s" % fifo_path)
+            except Exception:
+                msg = "Post-test log collection failed, continuing anyway"
+                logging.exception(msg)
         elif fetch_package_match:
             pkg_name, dest_path, fifo_path = fetch_package_match.groups()
             serve_packages = global_config.global_config.get_config_value(
@@ -932,7 +936,11 @@
                 except Exception:
                     msg = "Package tarball creation failed, continuing anyway"
                     logging.exception(msg)
-            self.host.run("echo B > %s" % fifo_path)
+            try:
+                self.host.run("echo B > %s" % fifo_path)
+            except Exception:
+                msg = "Package tarball installation failed, continuing anyway"
+                logging.exception(msg)
         else:
             logging.info(line)
 
@@ -1007,25 +1015,29 @@
 
     def write(self, data):
         # first check for any new console warnings
-        warnings = self.job._read_warnings() + self.server_warnings
+        self.server_warnings = self.job._read_warnings() + self.server_warnings
+        warnings = self.server_warnings
         warnings.sort()  # sort into timestamp order
-        # now process the newest data written out
+        # now start processing the existng buffer and the new data
         data = self.leftover + data
-        lines = data.split("\n")
-        # process every line but the last one
-        for line in lines[:-1]:
-            self._update_timestamp(line)
-            # output any warnings between now and the next status line
-            old_warnings = [(timestamp, msg) for timestamp, msg in warnings
-                            if timestamp < self.newest_timestamp]
-            self._process_warnings(self.last_line, self.logs, warnings)
-            del warnings[:len(old_warnings)]
-            self._process_line(line)
-        # save off any warnings not yet logged for later processing
-        self.server_warnings = warnings
-        # save the last line for later processing
-        # since we may not have the whole line yet
-        self.leftover = lines[-1]
+        lines = data.split('\n')
+        processed_lines = 0
+        try:
+            # process all the buffered data except the last line
+            # ignore the last line since we may not have all of it yet
+            for line in lines[:-1]:
+                self._update_timestamp(line)
+                # output any warnings between now and the next status line
+                old_warnings = [(timestamp, msg) for timestamp, msg in warnings
+                                if timestamp < self.newest_timestamp]
+                self._process_warnings(self.last_line, self.logs, warnings)
+                del warnings[:len(old_warnings)]
+                # now process the line itself
+                self._process_line(line)
+                processed_lines += 1
+        finally:
+            # save any unprocessed lines for future processing
+            self.leftover = '\n'.join(lines[processed_lines:])
 
 
     def flush(self):
--- autotest/server/autotest_unittest.py        2009-12-23 11:37:11.000000000 
-0800
+++ autotest/server/autotest_unittest.py        2009-12-23 11:37:11.000000000 
-0800
@@ -2,7 +2,7 @@
 
 __author__ = "[email protected] (Travis Miller)"
 
-import unittest, os, tempfile
+import unittest, os, tempfile, logging
 import common
 from autotest_lib.server import autotest, utils, hosts, server_job, profilers
 from autotest_lib.client.bin import sysinfo
@@ -51,6 +51,7 @@
         self.god.stub_function(autotest, "open")
         self.god.stub_function(autotest.global_config.global_config,
                                "get_config_value")
+        self.god.stub_function(logging, "exception")
         self.god.stub_class(autotest, "_Run")
         self.god.stub_class(autotest, "log_collector")
 
@@ -289,5 +290,75 @@
         self.assertEquals(install_dir, '/another/path')
 
 
+    def test_client_logger_process_line_log_copy_collection_failure(self):
+        collector = autotest.log_collector.expect_new(self.host, '', '')
+        logger = autotest.client_logger(self.host, '', '')
+        collector.collect_client_job_results.expect_call().and_raises(
+                Exception('log copy failure'))
+        logging.exception.expect_call(mock.is_string_comparator())
+        logger._process_line('AUTOTEST_TEST_COMPLETE:/autotest/fifo1')
+
+
+    def test_client_logger_process_line_log_copy_fifo_failure(self):
+        collector = autotest.log_collector.expect_new(self.host, '', '')
+        logger = autotest.client_logger(self.host, '', '')
+        collector.collect_client_job_results.expect_call()
+        self.host.run.expect_call('echo A > /autotest/fifo2').and_raises(
+                Exception('fifo failure'))
+        logging.exception.expect_call(mock.is_string_comparator())
+        logger._process_line('AUTOTEST_TEST_COMPLETE:/autotest/fifo2')
+
+
+    def test_client_logger_process_line_package_install_fifo_failure(self):
+        collector = autotest.log_collector.expect_new(self.host, '', '')
+        logger = autotest.client_logger(self.host, '', '')
+        self.god.stub_function(logger, '_send_tarball')
+
+        c = autotest.global_config.global_config
+        c.get_config_value.expect_call('PACKAGES',
+                                       'serve_packages_from_autoserv',
+                                       type=bool).and_return(True)
+        logger._send_tarball.expect_call('pkgname.tar.bz2', '/autotest/dest/')
+        
+        self.host.run.expect_call('echo B > /autotest/fifo3').and_raises(
+                Exception('fifo failure'))
+        logging.exception.expect_call(mock.is_string_comparator())
+        logger._process_line('AUTOTEST_FETCH_PACKAGE:pkgname.tar.bz2:'
+                             '/autotest/dest/:/autotest/fifo3')
+
+
+    def test_client_logger_write_handles_process_line_failures(self):
+        collector = autotest.log_collector.expect_new(self.host, '', '')
+        logger = autotest.client_logger(self.host, '', '')
+        logger.server_warnings = [(x, 'warn%d' % x) for x in xrange(5)]
+        self.god.stub_function(logger, '_process_warnings')
+        self.god.stub_function(logger, '_process_line')
+        def _update_timestamp(line):
+            logger.newest_timestamp += 2
+        class ProcessingException(Exception):
+            pass
+        def _read_warnings():
+            return [(5, 'warn5')]
+        logger._update_timestamp = _update_timestamp
+        logger.newest_timestamp = 0
+        self.host.job._read_warnings = _read_warnings
+        # process line1, newest_timestamp -> 2
+        logger._process_warnings.expect_call(
+                '', {}, [(0, 'warn0'), (1, 'warn1')])
+        logger._process_line.expect_call('line1')
+        # process line2, newest_timestamp -> 4, failure occurs during process
+        logger._process_warnings.expect_call(
+                '', {}, [(2, 'warn2'), (3, 'warn3')])
+        logger._process_line.expect_call('line2').and_raises(
+                ProcessingException('line processing failure'))
+        # when we call write with data we should get an exception
+        self.assertRaises(ProcessingException, logger.write,
+                          'line1\nline2\nline3\nline4')
+        # but, after the exception, the server_warnings and leftover buffers
+        # should contain the unprocessed data, and ONLY the unprocessed data
+        self.assertEqual(logger.server_warnings, [(4, 'warn4'), (5, 'warn5')])
+        self.assertEqual(logger.leftover, 'line2\nline3\nline4')
+
+
 if __name__ == "__main__":
     unittest.main()
_______________________________________________
Autotest mailing list
[email protected]
http://test.kernel.org/cgi-bin/mailman/listinfo/autotest

[Autotest] [PATCH] Improve the robustness of the server-side status log processing

Reply via email to