[Autotest] [PATCH] virt: Update and enhance save_restore test

Chris Evich Fri, 13 Jan 2012 15:03:07 -0800

* Cleaned up subtest config comments and lifecycle varients

* Fixed a race condition between ssh session recovery and host tcp
  recovery.  Moral: Don't leave ssh sessions open during save/restore.


* Enhanced test to run backgrounded command on guest that dirties
  memory and keeps at least one CPU 99% busy in guest. Attempt
  made to stop background job when testing done. Warning issued if it
  will not be possible to kill the background process.

Note: Smoke-tested on f16 host fine, there's a problem/bug on
      RHEL hosts which I'm still tracking down.

Signed-off-by: Chris Evich <[email protected]>
---
 client/virt/subtests.cfg.sample   |   17 ++--------
 client/virt/tests/save_restore.py |   66 ++++++++++++++++++++++++++++++------
 2 files changed, 58 insertions(+), 25 deletions(-)

diff --git a/client/virt/subtests.cfg.sample b/client/virt/subtests.cfg.sample
index 843de30..d1973d1 100644
--- a/client/virt/subtests.cfg.sample
+++ b/client/virt/subtests.cfg.sample
@@ -409,6 +409,9 @@ variants:
         save_restore_delay = 1.0
         # Maximum time test allowed to run
         save_restore_duration = 60.0
+        # Guest command to run, e.g.: Keep one CPU busy and memory dirty.
+        # Note: Command will be backgrounded by appending a '&'
+        save_restore_bg_command = '( mkdir -p /x && mount -t tmpfs -o size=75% 
x /x && while true; do dd if=/dev/urandom of=/x/x; done; umount /x && rmdir /x 
)'
         save_restore_path = "/tmp"
         kill_unresponsive_vms = no
         restart_vm = no
@@ -417,29 +420,15 @@ variants:
             # Tests assumed to include install and boot before,
             # and a shutdown after the lifecycle test
             - lifecycle_short: unattended_install.cdrom boot
-                type = save_restore
                 save_restore_repeat = 25
-                save_restore_start_delay = 1.0
-                save_restore_delay = 1.0
                 save_restore_duration = 1200.0
-                save_restore_path = "/tmp"
-                kill_vm = no
             - lifecycle_medium: unattended_install.cdrom boot
-                type = save_restore
                 save_restore_repeat = 50
-                save_restore_start_delay = 1.0
                 save_restore_delay = 1.0
                 save_restore_duration = 2600.0
-                save_restore_path = "/tmp"
-                kill_vm = no
             - lifecycle_long: unattended_install.cdrom boot
-                type = save_restore
                 save_restore_repeat = 100
-                save_restore_start_delay = 1.0
-                save_restore_delay = 1.0
                 save_restore_duration = 5400.0
-                save_restore_path = "/tmp"
-                kill_vm = no
 
     - autotest: install setup image_copy unattended_install.cdrom
         only Linux
diff --git a/client/virt/tests/save_restore.py 
b/client/virt/tests/save_restore.py
index 86a76a2..b3da92f 100644
--- a/client/virt/tests/save_restore.py
+++ b/client/virt/tests/save_restore.py
@@ -32,6 +32,7 @@ def run_save_restore(test, params, env):
         os.close(fd)
         return filename
 
+
     def nuke_filename(filename):
         """
         Try to unlink filename, ignore any os errors.
@@ -41,41 +42,75 @@ def run_save_restore(test, params, env):
         except OSError:
             pass
 
+
+    def check_system(vm, timeout):
+        """
+        Raise TestFail if system is not in expected state
+        """
+        session = None
+        try:
+            session = vm.wait_for_login(timeout=timeout)
+            result = session.is_responsive(timeout=timeout/10.0)
+            if not result:
+                logging.warning("Login session established, but 
non-responsive")
+                # assume guest is just busy with stuff
+        except:
+            raise error.TestFail("VM check timed out and/or VM non-responsive")
+        finally:
+            del session
+
+
     vm = env.get_vm(params["main_vm"])
-    # TODO: Verify initial VM state
-    session = vm.wait_for_login()
-    # FIXME: If VM already running, it gets paused for some reason.
+    session = vm.wait_for_login(timeout=600)
+
     start_delay = float(params.get("save_restore_start_delay", "10.0"))
     restore_delay = float(params.get("save_restore_delay", "0.0"))
+    save_restore_duration = float(params.get("save_restore_duration", "60.0"))
+    repeat = int(params.get("save_restore_repeat","1"))
+
     path = os.path.abspath(params.get("save_restore_path", "/tmp"))
     file_pfx = vm.name+'-'
+    save_file = get_save_filename(path, file_pfx)
+
+    save_restore_bg_command = params.get("save_restore_bg_command")
+    if save_restore_bg_command:
+        session.cmd(save_restore_bg_command + ' &')
+        try:
+            # assume sh-like shell, try to get background process's pid
+            bg_command_pid = int(session.cmd('jobs -rp'))
+        except ValueError:
+            logging.warning("Background guest command 'job -rp' output not 
PID")
+            bg_command_pid = none
+    del session # don't leave stray ssh session lying around over save/restore
+
     start_time = time.time()
-    now = time_to_stop = (start_time +
-                          float(params.get("save_restore_duration", "60.0")))
-    repeat = int(params.get("save_restore_repeat","1"))
+    # 'now' needs outside scope for error.TestFail() at end
+    # especially if exception thrown in loop before completion
+    now = time_to_stop = (start_time + save_restore_duration)
     while True:
         try:
-            if not session.is_responsive():
-                raise error.TestFail("Guest shell session is non-responsive")
+            vm.verify_kernel_crash()
+            check_system(vm,120) # networking needs time to recover
             logging.info("Save/restores left: %d (or %0.4f more seconds)" %
                          (repeat, (time_to_stop - time.time())))
-            # TODO: Start some background test or load within VM
             if start_delay:
                 logging.debug("Sleeping %0.4f seconds start_delay" %
                               start_delay)
                 time.sleep(start_delay)
             vm.pause()
+            vm.verify_kernel_crash()
             save_file = get_save_filename(path, file_pfx)
             vm.save_to_file(save_file)
+            vm.verify_kernel_crash()
             if restore_delay:
                 logging.debug("Sleeping %0.4f seconds restore_delay" %
                               restore_delay)
                 time.sleep(restore_delay)
             vm.restore_from_file(save_file)
+            vm.verify_kernel_crash()
             vm.resume() # make sure some work gets done
             vm.verify_kernel_crash()
             now = time.time()
-            # TODO: Examine background test/load completion/success status
         finally:
             if save_file:
                 nuke_filename(save_file) # make sure these are cleaned up
@@ -83,9 +118,18 @@ def run_save_restore(test, params, env):
         repeat -= 1
         if (now >= time_to_stop) or (repeat <= 0):#TODO: or BG test status==foo
             break
+        save_file = get_save_filename(path, file_pfx)
+    # Check the final save/restore cycle
+    check_system(vm,120) # networking needs time to recover
     logging.info("Save/Restore itteration(s) complete.")
+    if save_restore_bg_command and bg_command_pid:
+        session = vm.wait_for_login(timeout=120)
+        status = session.cmd_status('kill %d' % bg_command_pid)
+        if status != 0:
+            logging.warning("Background guest command kill %d failed" %\
+                            bg_command_pid)
+        del session
     if repeat > 0: # time_to_stop reached but itterations didn't complete
         raise error.TestFail("Save/Restore save_restore_duration"
                              " exceeded by %0.4f seconds with %d itterations"
                              " remaining." % (now-time_to_stop, repeat+1))
-    # TODO: Check for any other failure condition
-- 
1.7.1

_______________________________________________
Autotest mailing list
[email protected]
http://test.kernel.org/cgi-bin/mailman/listinfo/autotest

[Autotest] [PATCH] virt: Update and enhance save_restore test

Reply via email to