* Cleaned up subtest config comments and lifecycle varients
* Fixed a race condition between ssh session recovery and host tcp
recovery. Moral: Don't leave ssh sessions open during save/restore.
* Enhanced test to run backgrounded command on guest that dirties
memory and keeps at least one CPU 99% busy in guest. Attempt
made to stop background job when testing done. Warning issued if it
will not be possible to kill the background process.
Note: Smoke-tested on f16 host fine, there's a problem/bug on
RHEL hosts which I'm still tracking down.
Signed-off-by: Chris Evich <[email protected]>
---
client/virt/subtests.cfg.sample | 17 ++--------
client/virt/tests/save_restore.py | 66 ++++++++++++++++++++++++++++++------
2 files changed, 58 insertions(+), 25 deletions(-)
diff --git a/client/virt/subtests.cfg.sample b/client/virt/subtests.cfg.sample
index 843de30..d1973d1 100644
--- a/client/virt/subtests.cfg.sample
+++ b/client/virt/subtests.cfg.sample
@@ -409,6 +409,9 @@ variants:
save_restore_delay = 1.0
# Maximum time test allowed to run
save_restore_duration = 60.0
+ # Guest command to run, e.g.: Keep one CPU busy and memory dirty.
+ # Note: Command will be backgrounded by appending a '&'
+ save_restore_bg_command = '( mkdir -p /x && mount -t tmpfs -o size=75%
x /x && while true; do dd if=/dev/urandom of=/x/x; done; umount /x && rmdir /x
)'
save_restore_path = "/tmp"
kill_unresponsive_vms = no
restart_vm = no
@@ -417,29 +420,15 @@ variants:
# Tests assumed to include install and boot before,
# and a shutdown after the lifecycle test
- lifecycle_short: unattended_install.cdrom boot
- type = save_restore
save_restore_repeat = 25
- save_restore_start_delay = 1.0
- save_restore_delay = 1.0
save_restore_duration = 1200.0
- save_restore_path = "/tmp"
- kill_vm = no
- lifecycle_medium: unattended_install.cdrom boot
- type = save_restore
save_restore_repeat = 50
- save_restore_start_delay = 1.0
save_restore_delay = 1.0
save_restore_duration = 2600.0
- save_restore_path = "/tmp"
- kill_vm = no
- lifecycle_long: unattended_install.cdrom boot
- type = save_restore
save_restore_repeat = 100
- save_restore_start_delay = 1.0
- save_restore_delay = 1.0
save_restore_duration = 5400.0
- save_restore_path = "/tmp"
- kill_vm = no
- autotest: install setup image_copy unattended_install.cdrom
only Linux
diff --git a/client/virt/tests/save_restore.py
b/client/virt/tests/save_restore.py
index 86a76a2..b3da92f 100644
--- a/client/virt/tests/save_restore.py
+++ b/client/virt/tests/save_restore.py
@@ -32,6 +32,7 @@ def run_save_restore(test, params, env):
os.close(fd)
return filename
+
def nuke_filename(filename):
"""
Try to unlink filename, ignore any os errors.
@@ -41,41 +42,75 @@ def run_save_restore(test, params, env):
except OSError:
pass
+
+ def check_system(vm, timeout):
+ """
+ Raise TestFail if system is not in expected state
+ """
+ session = None
+ try:
+ session = vm.wait_for_login(timeout=timeout)
+ result = session.is_responsive(timeout=timeout/10.0)
+ if not result:
+ logging.warning("Login session established, but
non-responsive")
+ # assume guest is just busy with stuff
+ except:
+ raise error.TestFail("VM check timed out and/or VM non-responsive")
+ finally:
+ del session
+
+
vm = env.get_vm(params["main_vm"])
- # TODO: Verify initial VM state
- session = vm.wait_for_login()
- # FIXME: If VM already running, it gets paused for some reason.
+ session = vm.wait_for_login(timeout=600)
+
start_delay = float(params.get("save_restore_start_delay", "10.0"))
restore_delay = float(params.get("save_restore_delay", "0.0"))
+ save_restore_duration = float(params.get("save_restore_duration", "60.0"))
+ repeat = int(params.get("save_restore_repeat","1"))
+
path = os.path.abspath(params.get("save_restore_path", "/tmp"))
file_pfx = vm.name+'-'
+ save_file = get_save_filename(path, file_pfx)
+
+ save_restore_bg_command = params.get("save_restore_bg_command")
+ if save_restore_bg_command:
+ session.cmd(save_restore_bg_command + ' &')
+ try:
+ # assume sh-like shell, try to get background process's pid
+ bg_command_pid = int(session.cmd('jobs -rp'))
+ except ValueError:
+ logging.warning("Background guest command 'job -rp' output not
PID")
+ bg_command_pid = none
+ del session # don't leave stray ssh session lying around over save/restore
+
start_time = time.time()
- now = time_to_stop = (start_time +
- float(params.get("save_restore_duration", "60.0")))
- repeat = int(params.get("save_restore_repeat","1"))
+ # 'now' needs outside scope for error.TestFail() at end
+ # especially if exception thrown in loop before completion
+ now = time_to_stop = (start_time + save_restore_duration)
while True:
try:
- if not session.is_responsive():
- raise error.TestFail("Guest shell session is non-responsive")
+ vm.verify_kernel_crash()
+ check_system(vm,120) # networking needs time to recover
logging.info("Save/restores left: %d (or %0.4f more seconds)" %
(repeat, (time_to_stop - time.time())))
- # TODO: Start some background test or load within VM
if start_delay:
logging.debug("Sleeping %0.4f seconds start_delay" %
start_delay)
time.sleep(start_delay)
vm.pause()
+ vm.verify_kernel_crash()
save_file = get_save_filename(path, file_pfx)
vm.save_to_file(save_file)
+ vm.verify_kernel_crash()
if restore_delay:
logging.debug("Sleeping %0.4f seconds restore_delay" %
restore_delay)
time.sleep(restore_delay)
vm.restore_from_file(save_file)
+ vm.verify_kernel_crash()
vm.resume() # make sure some work gets done
vm.verify_kernel_crash()
now = time.time()
- # TODO: Examine background test/load completion/success status
finally:
if save_file:
nuke_filename(save_file) # make sure these are cleaned up
@@ -83,9 +118,18 @@ def run_save_restore(test, params, env):
repeat -= 1
if (now >= time_to_stop) or (repeat <= 0):#TODO: or BG test status==foo
break
+ save_file = get_save_filename(path, file_pfx)
+ # Check the final save/restore cycle
+ check_system(vm,120) # networking needs time to recover
logging.info("Save/Restore itteration(s) complete.")
+ if save_restore_bg_command and bg_command_pid:
+ session = vm.wait_for_login(timeout=120)
+ status = session.cmd_status('kill %d' % bg_command_pid)
+ if status != 0:
+ logging.warning("Background guest command kill %d failed" %\
+ bg_command_pid)
+ del session
if repeat > 0: # time_to_stop reached but itterations didn't complete
raise error.TestFail("Save/Restore save_restore_duration"
" exceeded by %0.4f seconds with %d itterations"
" remaining." % (now-time_to_stop, repeat+1))
- # TODO: Check for any other failure condition
--
1.7.1
_______________________________________________
Autotest mailing list
[email protected]
http://test.kernel.org/cgi-bin/mailman/listinfo/autotest