Oren Laadan [[email protected]] wrote:
|
| I just posted v14-rc3 which includes the c/r of restart-blocks.
| That should improve the situation.
|
| However, depending on which syscalls one uses, process may still
| seem "stuck" after restart because the current code still does
| not save signals nor task timers; If a signal was pending (SIGALRM
| for example) after freezing but before checkpoint, it will be lost.
| If a timer was set at checkpoint, it will not be restored.
|
| So depending on your program, you may still experience issues
| until I add patches to handle that.
Ok, Just an fyi, the original program seemed to work fine, but when
I try to restart a small process tree, I get stuck on restart again.
I am running on v14-rc3 branch. Has this got anything to do with
pending SIGCHLD ? Seems to be easier to repro with larger process
trees (2 children per process, 4 or more levels deep).
Test programs (attached) (they need some cleanup though)
ptree2.c
p2.loop
--------- Processes after restart:
$ ps -ef|grep ptree
root 10461 10459 0 22:07 pts/0 00:00:00 ./ptree2 -n 1 -d 2
root 10465 10461 0 22:07 pts/0 00:00:00 ./ptree2 -n 1 -d 2
root 10466 10465 0 22:07 pts/0 00:00:00 [ptree2] <defunct>
root 10479 8220 0 22:09 pts/1 00:00:00 grep ptree
---------- Process stacks
tree2 S f6270a90 0 10461 10459
f5e59380 00000082 08048a86 f6270a90 f6270bfc c2b32260 00000000 0000d9d3
f5f423b0 00000000 ffffffff 00000000 00000000 00000001 00000000 f6270a88
00000000 f6270a90 00000000 c02243aa 00000004 00000003 0000000c 00000006
Call Trace:
[<c02243aa>] do_wait+0x1dd/0x2f6
[<c021cd14>] default_wake_function+0x0/0x8
[<c0224542>] sys_wait4+0x7f/0x92
[<c0224568>] sys_waitpid+0x13/0x17
[<c0202ce5>] sysenter_do_call+0x12/0x25
[<c0510000>] rtl8139_init_one+0x5ae/0x887
ptree2 S f5f423b0 0 10465 10461
f6002180 00000082 c2b265c8 f5f423b0 f5f4251c c2b29260 f67b1f44 e06d0177
00000282 c023363c c2b265c8 00000000 00000282 0000c350 00000001 0000c350
00000001 f67b1f44 0000c350 c051be99 00000000 00000001 0000c350 bf9d0e04
Call Trace:
[<c023363c>] hrtimer_start_range_ns+0x105/0x111
[<c051be99>] do_nanosleep+0x54/0x8c
[<c02336d7>] hrtimer_nanosleep+0x8f/0xee
[<c02332b8>] hrtimer_wakeup+0x0/0x18
[<c051be7f>] do_nanosleep+0x3a/0x8c
[<c0233777>] sys_nanosleep+0x41/0x51
[<c0202ce5>] sysenter_do_call+0x12/0x25
ptree2 ? f6bee040 0 10466 10465
f638cb80 00000046 00200200 f6bee040 f6bee1ac c2b17260 f6bee038 0000dd77
00000000 c022f576 ffffffff 00000303 00000000 00000001 00000000 00000012
f5a61e84 f6bee040 f6bee038 c0224c29 f6270a90 00000001 f6bee038 f5a61f88
Call Trace:
[<c022f576>] wakeme_after_rcu+0x0/0x8
[<c0224c29>] do_exit+0x638/0x63c
[<c0224c87>] do_group_exit+0x5a/0x83
[<c0224cbd>] sys_exit_group+0xd/0x10
[<c0202ce5>] sysenter_do_call+0x12/0x25
#include <stdio.h>
#include <unistd.h>
#include <wait.h>
#include <errno.h>
#include <string.h>
int max_depth = 3;
int num_children = 3;
#define CKPT_READY "checkpoint-ready"
#define CKPT_DONE "checkpoint-done"
#define TEST_DONE "test-done"
#define LOG_FILE "log-ptree2"
#undef SYS_GETGPID
#ifdef SYS_GETGPID
static inline int sys_getgpid()
{
#define __NR_getgpid 335
return syscall(__NR_getgpid);
}
#else
#define sys_getgpid getpid
#endif
FILE *logfp;
void do_exit(int status)
{
if (logfp) {
fflush(logfp);
fclose(logfp);
}
_Exit(status);
}
int test_done()
{
int rc;
rc = access(TEST_DONE, F_OK);
if (rc == 0)
return 1;
else if (errno == ENOENT)
return 0;
fprintf(logfp, "access(%s) failed, %s\n", TEST_DONE, strerror(errno));
do_exit(1);
}
int checkpoint_done()
{
int rc;
rc = access(CKPT_DONE, F_OK);
if (rc == 0)
return 1;
else if (errno == ENOENT)
return 0;
fprintf(logfp, "access(%s) failed, %s\n", CKPT_DONE, strerror(errno));
do_exit(1);
}
void checkpoint_ready()
{
int fd;
fd = creat(CKPT_READY, 0666, 0);
if (fd < 0) {
fprintf(logfp, "creat(%s) failed, %s\n", CKPT_READY,
strerror(errno));
do_exit(1);
}
close(fd);
}
print_exit_status(int pid, int status)
{
fprintf(logfp, "Pid %d unexpected exit - ", pid);
if (WIFEXITED(status)) {
fprintf(logfp, "exit status %d\n", WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
fprintf(logfp, "got signal %d\n", WTERMSIG(status));
} else {
fprintf(logfp, "stopped/continued ?\n");
}
}
void do_wait()
{
int rc;
int n;
int status;
n = 0;
while(1) {
rc = waitpid(-1, &status, 0);
if (rc < 0)
break;
n++;
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
print_exit_status(rc, status);
}
if (errno != ECHILD) {
fprintf(logfp, "waitpid(%d) failed, error %s\n",
rc, strerror(errno));
do_exit(1);
}
if (getpid() == 1 && n != num_children * max_depth) {
fprintf(logfp, "Only %d of %d children exited ?\n",
num_children, num_children * max_depth);
do_exit(1);
}
do_exit(0);
}
static do_child(int depth, char *suffix);
create_children(int depth, char *parent_suffix)
{
int i;
int child_pid;
char suffix[1024];
for (i = 0; i < num_children; i++) {
sprintf(suffix, "%s-%d", parent_suffix, i);
child_pid = fork();
if (child_pid == 0)
do_child(depth, suffix);
else if (child_pid < 0) {
fprintf(logfp, "fork() failed, depth %d, "
"child %d, error %s\n", depth, i,
strerror(errno));
do_exit(1);
}
}
}
do_child(int depth, char *suffix)
{
int i;
FILE *cfp;
char cfile[256];
char *mode = "w";
/*
* Recursively calls do_child() and both parent and child
* execute the code below
*/
if (depth < max_depth)
create_children(depth+1, suffix);
sprintf(cfile, "%s%s", LOG_FILE, suffix);
i = 0;
while (!test_done()) {
/* truncate the first time, append after that */
cfp = fopen(cfile, mode);
mode = "a";
if (!cfp) {
fprintf(logfp, "fopen(%s) failed, error %s\n", cfile,
strerror(errno));
do_exit(1);
}
fprintf(cfp, "gpid %d, pid %d: i %d\n", sys_getgpid(),
getpid(), i++);
fflush(cfp);
sleep(1);
fprintf(cfp, "gpid %d: woke up from sleep(1)\n", sys_getgpid());
fflush(cfp);
fclose(cfp);
}
/* Wait for any children that pre-deceased us */
do_wait();
do_exit(0);
}
static void usage(char *argv[])
{
printf("%s [h] [-d max-depth] [-n max-children]\n", argv[0]);
printf("\t <max-depth> max depth of process tree, default 3\n");
printf("\t <num-children> # of children per process, default 3\n");
do_exit(1);
}
main(int argc, char *argv[])
{
int c;
int i;
int status;
if (test_done()) {
printf("Remove %s before running test\n", TEST_DONE);
do_exit(1);
}
while ((c = getopt(argc, argv, "hd:n:")) != EOF) {
switch (c) {
case 'd': max_depth = atoi(optarg); break;
case 'n': num_children = atoi(optarg); break;
case 'h':
default:
usage(argv);
}
};
logfp = fopen(LOG_FILE, "w");
if (!logfp) {
fprintf(stderr, "fopen(%s) failed, %s\n", LOG_FILE,
strerror(errno));
fflush(stderr);
do_exit(1);
}
close(0);close(1);close(2);
create_children(1, "");
/*
* Now that we closed the special files and created process tree
* tell any wrapper scripts, we are ready for checkpoint
*/
checkpoint_ready();
#if 0
while(!checkpoint_done())
sleep(1);
#endif
do_wait();
}
#!/bin/bash
freezermountpoint=/cgroups
CHECKPOINT=".."
NS_EXEC="$CHECKPOINT/bin/ns_exec"
CR="$CHECKPOINT/bin/cr"
RSTR="$CHECKPOINT/bin/rstr"
MKTREE="$CHECKPOINT/bin/mktree"
ECHO="/bin/echo -e"
TEST_CMD="./ptree2"
TEST_ARGS="-n 1 -d 2" # -n: children per process, -d: depth of process tree
SCRIPT_LOG="log-p2-loop"
TEST_PID_FILE="pid.ptree2";
LOG_FILE="loop-ptree2.log"
SNAPSHOT_DIR="snap1"
TEST_DONE="test-done"
CHECKPOINT_FILE="checkpoint-ptree2";
CHECKPOINT_READY="checkpoint-ready"
CHECKPOINT_DONE="checkpoint-done"
TEST_LOG_PREFIX="log-ptree2"
TEST_LOG_SNAP="${TEST_LOG_PREFIX}.snap"
freeze()
{
$ECHO "\t - Freezing $1"
$ECHO FROZEN > ${freezermountpoint}/$1/freezer.state
ret=$?
if [ $ret -ne 0 ]; then
$ECHO "***** FAIL: \'echo FROZEN \> $state\' returned $ret"
fi
}
unfreeze()
{
$ECHO "\t - Unfreezing $1"
$ECHO THAWED > ${freezermountpoint}/$1/freezer.state
ret=$?
if [ $ret -ne 0 ]; then
$ECHO "***** FAIL: \'echo THAWED \> $state\' returned $ret"
fi
}
cleancgroup()
{
$ECHO "\t - Clean cgroup of $1"
rmdir ${freezermountpoint}/$1
if [ -d ${freezermountpoint}/$1 ]; then
$ECHO ***** WARNING ${freezermountpoint}/$1 remains
fi
}
checkpoint()
{
local pid=$1
$ECHO "Checkpoint: $CR $pid $CHECKPOINT_FILE"
$CR $pid $CHECKPOINT_FILE
ret=$?
if [ $ret -ne 0 ]; then
$ECHO "***** FAIL: Checkpoint of $pid failed"
ps aux |grep $TEST_CMD >> $SCRIPT_LOG
exit 1;
fi
}
function create_container
{
local pid;
$ECHO "\t - $NS_EXEC -cpmP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS"
$NS_EXEC -cpmP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS &
# Wait for test to finish setup
while [ ! -f $CHECKPOINT_READY ]; do
/bin/$ECHO -e "\t - Waiting for $CHECKPOINT_READY"
sleep 1;
done;
# Find global pid of container-init
pid=`cat $TEST_PID_FILE`;
if [ "x$pid" == "x" ]; then
$ECHO "***** FAIL: Invalid container-init pid $pid"
ps -ef |grep $TEST_CMD >> $SCRIPT_LOG
exit 1
fi
$ECHO "Created container with pid $pid" >> $SCRIPT_LOG
echo $pid
}
function restart_container
{
local ret;
$ECHO "\t - Exec $NS_EXEC -cpuim -- $MKTREE --no-pids <
$CHECKPOINT_FILE"
sleep 1
$NS_EXEC -cpuim -- $MKTREE --no-pids < $CHECKPOINT_FILE >> $SCRIPT_LOG
2>&1 &
ret=$?
if [ $ret -ne 0 ]; then
$ECHO "***** FAIL: Restart of $pid failed"
ps aux |grep $TEST_CMD >> $SCRIPT_LOG
exit 1;
fi
}
# Check freezer mount point
line=`grep freezer /proc/mounts`
$ECHO $line | grep "\<ns\>"
if [ $? -ne 0 ]; then
$ECHO "please mount freezer and ns cgroups"
$ECHO " mkdir /cgroups"
$ECHO " mount -t cgroup -o freezer,ns cgroup /cgroups"
exit 1
fi
#freezermountpoint=`$ECHO $line | awk '{ print $2 '}`
# Make sure no stray e2 from another run is still going
killall $TEST_CMD > $SCRIPT_LOG 2>&1
cnt=1
while [ 1 ]; do
> $SCRIPT_LOG;
dmesg -c > /dev/null
$ECHO "===== Iteration $cnt"
# Remove any 'state' files, start the app and let it tell us
# when it is ready
rm -f $CHECKPOINT_READY $TEST_DONE $TEST_PID_FILE
$NS_EXEC -cpumP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS&
$ECHO "\t - $NS_EXEC -cpumP $TEST_PID_FILE -- $TEST_CMD $TEST_ARGS"
# Wait for test to finish setup
while [ ! -f $CHECKPOINT_READY ]; do
$ECHO "\t - Waiting for $CHECKPOINT_READY"
sleep 1;
done;
ps -ef |grep ptree2 >> $SCRIPT_LOG
# Find global pid of container-init
pid=`cat $TEST_PID_FILE`;
if [ "x$pid" == "x" ]; then
$ECHO "***** FAIL: Invalid container-init pid $pid"
ps -ef |grep $TEST_CMD
exit 1
fi
$ECHO $pid
#pid=`create_container`
$ECHO "\t - Done creating container"
# Prepare for snapshot
if [ -d $SNAPSHOT_DIR ]; then
rm -rf ${SNAPSHOT_DIR}.prev
mv $SNAPSHOT_DIR ${SNAPSHOT_DIR}.prev
mkdir $SNAPSHOT_DIR
fi
freeze $pid
num_pids1=`ps -ef |grep $TEST_CMD | wc -l`
checkpoint $pid
#$ECHO t > /proc/sysrq-trigger
#dmesg > dmesg-1.out
# Snapshot the log files
cp ${TEST_LOG_PREFIX}* $SNAPSHOT_DIR
touch $CHECKPOINT_DONE
killall -9 `basename $TEST_CMD`
unfreeze $pid
sleep 3
cleancgroup $pid
# Restore the snapshot after the main process has been killed
/bin/cp ${SNAPSHOT_DIR}/* .
# Restart.
restart_container
sleep 3;
num_pids2=`ps -ef |grep $TEST_CMD | wc -l`
ps -ef |grep ptree2 >> $SCRIPT_LOG
$ECHO "\t - num_pids1 $num_pids1, num_pids2 $num_pids2";
# Find global-pid of container-init
nspid=`pidof $NS_EXEC`
if [ "x$nspid" == "x" ]; then
$ECHO "***** FAIL: Can't find pid of $NS_EXEC"
exit 1;
fi
# End test gracefully
touch $TEST_DONE
$ECHO "\t - Restart: Waiting for container-init (gloabl-pid $nspid) to
exit"
wait $nspid;
ret=$?
$ECHO "Container-init (global-pid $nspid) exited, status $ret"
if [ -d /cgroups/$pid ]; then
cleancgroup $pid
fi
cnt=$((cnt+1))
done
_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers
_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel