If pgid, or sid, or ppid is zero, it means that they point to a task
from the ancestor pid-ns. Make sure 'mktree' handles these correctly.

If a 0 pid (pgid/sid/ppid) exists, then force the --pidns option to be
on - to ensure that we have a new pid-ns (and thus a parent pid-ns).

Ignore pid = 0 when setting up new tasks in ckpt_setup_task().

Improve sanity checks for pid 0 and tgid 0 (both are impossible).

Allow ppid = 0 only for root_task. For others, allow sid = 0.

Introduce dummy zero_task that represents the parent of root_task,
likely from ancestor pid-ns. Setting a creator to root_task simplifies
the logic.

Also, ensure that a the tgid of threads is valid (has an entry in the
pids array) even if the thread group leader died.

Signed-off-by: Oren Laadan <[email protected]>
---
 restart.c |  126 +++++++++++++++++++++++++++++++++++++++++++------------------
 1 files changed, 89 insertions(+), 37 deletions(-)

diff --git a/restart.c b/restart.c
index a7f9d22..3128d4e 100644
--- a/restart.c
+++ b/restart.c
@@ -63,8 +63,8 @@ static char usage_str[] =
  * the time of the checkpoint may be already in use then. Therefore,
  * by default, 'mktree' creates an equivalen tree without restoring
  * the original pids, assuming that the application can tolerate this.
- * For this, the 'ckpt_hdr_pids' array is transformed on-the-fly
- * before it is fed to the kernel.
+ * For this, the 'ckpt_pids' array is transformed on-the-fly before it
+ * is fed to the kernel.
  *
  * By default, "--pids" implied "--pidns" and vice-versa. The user can
  * use "--pids --no-pidns" for a restart in the currnet namespace -
@@ -144,6 +144,9 @@ struct task {
        pid_t real_parent;      /* pid of task's real parent */
 };
 
+/* zero_task represents creator of root_task (all pids 0) */
+struct task zero_task;
+
 #define TASK_ROOT      0x1     /* root task */
 #define TASK_GHOST     0x2     /* dead task (pid used as sid/pgid) */
 #define TASK_THREAD    0x4     /* thread (non leader) */
@@ -153,7 +156,7 @@ struct task {
 #define TASK_DEAD      0x40    /* dead task (dummy) */
 
 struct ckpt_ctx {
-       pid_t init_pid;
+       pid_t root_pid;
        int pipe_in;
        int pipe_out;
        int pids_nr;
@@ -196,6 +199,7 @@ int global_sent_sigint;
 
 static int ckpt_build_tree(struct ckpt_ctx *ctx);
 static int ckpt_init_tree(struct ckpt_ctx *ctx);
+static int ckpt_need_pidns(struct ckpt_ctx *ctx);
 static int ckpt_set_creator(struct ckpt_ctx *ctx, struct task *task);
 static int ckpt_placeholder_task(struct ckpt_ctx *ctx, struct task *task);
 static int ckpt_propagate_session(struct ckpt_ctx *ctx, struct task *session);
@@ -545,6 +549,16 @@ int main(int argc, char *argv[])
        if (ret < 0)
                exit(1);
 
+       /*
+        * For a pgid/sid == 0, the corresponding restarting task will
+        * expect to reference the parent pid-ns (of entire restart).
+        * We ensure that one does exist by setting ctx.args->pidns.
+        */
+       if (!ctx.args->pidns && ckpt_need_pidns(&ctx)) {
+               ckpt_dbg("found pgid/sid 0, need pidns\n");
+               ctx.args->pidns = 1;
+       }
+
        if (ctx.args->pidns && ctx.tasks_arr[0].pid != 1) {
                ckpt_dbg("new pidns without init\n");
                if (global_send_sigint == -1)
@@ -821,8 +835,6 @@ static int ckpt_build_tree(struct ckpt_ctx *ctx)
        /* assign a creator to each task */
        for (i = 0; i < ctx->tasks_nr; i++) {
                task = &ctx->tasks_arr[i];
-               if (task == ckpt_init_task(ctx))
-                       continue;
                if (task->creator)
                        continue;
                if (ckpt_set_creator(ctx, task) < 0) {
@@ -837,7 +849,7 @@ static int ckpt_build_tree(struct ckpt_ctx *ctx)
                task = &ctx->tasks_arr[i];
                ckpt_dbg("[%d] pid %d ppid %d sid %d creator %d",
                         i, task->pid, task->ppid, task->sid,
-                      task->creator ? task->creator->pid : 0);
+                        task->creator->pid);
                if (task->next_sib)
                        ckpt_dbg_cont(" next %d", task->next_sib->pid);
                if (task->prev_sib)
@@ -862,7 +874,10 @@ static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t 
pid, pid_t ppid)
 {
        struct task *task;
 
-       if (hash_lookup(ctx, pid))
+       if (pid == 0)  /* ignore if outside namespace */
+               return 0;
+
+       if (hash_lookup(ctx, pid))  /* already handled */
                return 0;
 
        task = &ctx->tasks_arr[ctx->tasks_nr++];
@@ -896,7 +911,7 @@ static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid, 
pid_t ppid)
 
 static int ckpt_init_tree(struct ckpt_ctx *ctx)
 {
-       struct ckpt_hdr_pids *pids_arr = ctx->pids_arr;
+       struct ckpt_pids *pids_arr = ctx->pids_arr;
        int pids_nr = ctx->pids_nr;
        struct task *task;
        pid_t root_sid;
@@ -908,16 +923,6 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
        root_sid = pids_arr[0].vsid;
        root_pgid = pids_arr[0].vpgid;
 
-       /* XXX for out-of-container subtrees */
-       for (i = 0; i < pids_nr; i++) {
-               if (pids_arr[i].vsid == root_sid)
-                       pids_arr[i].vsid = root_pid;
-               if (pids_arr[i].vpgid == root_sid)
-                       pids_arr[i].vpgid = root_pid;
-               if (pids_arr[i].vpgid == root_pgid)
-                       pids_arr[i].vpgid = root_pid;
-       }
-
        /* populate with known tasks */
        for (i = 0; i < pids_nr; i++) {
                task = &ctx->tasks_arr[i];
@@ -938,6 +943,14 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
                task->rpid = -1;
                task->ctx = ctx;
 
+               if (task->pid == 0) {
+                       ckpt_err("Invalid pid 0 for task#%d\n", i);
+                       return -1;
+               } else if (task->tgid == 0) {
+                       ckpt_err("Invalid tgid 0 for task#%d\n", i);
+                       return -1;
+               }
+
                if (hash_insert(ctx, task->pid, task) < 0)
                        return -1;
        }
@@ -946,31 +959,60 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
 
        /* add pids unaccounted for (no tasks) */
        for (i = 0; i < pids_nr; i++) {
-               /* session leader's parent is root task */
-               if (ckpt_setup_task(ctx, pids_arr->vsid, root_pid) < 0)
+               pid_t sid;
+
+               sid = pids_arr[i].vsid;
+
+               /*
+                * An unaccounted-for sid belongs to a task that was a
+                * session leader and died. We can safe set its parent
+                * (and creator) to be the root task.
+                */
+               if (ckpt_setup_task(ctx, sid, root_pid) < 0)
                        return -1;
 
                /*
-                * If pgrp != sid, pgrp owner's parent is sid. Other
-                * tasks with same pgrp will need to have threir sid
-                * matching, too, when the kernel restores their pgrp.
-                * If pgrp == sid, then the call above would have
-                * ensured that the pid is hashed: ckpt_setup_task()
-                * will return promptly.
+                * If a pid belongs to a dead thread group leader, we
+                * need to add it with the same sid as current (and
+                * other) threads.
                 */
-               if (ckpt_setup_task(ctx, pids_arr->vpgid, pids_arr->vsid) < 0)
+               if (ckpt_setup_task(ctx, pids_arr[i].vtgid, sid) < 0)
                        return -1;
 
-               pids_arr++;
+               /*
+                * If pgrp == sid, then the pgrp/sid will already have
+                * been hashed by now (e.g. by the call above) and the
+                * ckpt_setup_task() will return promptly.
+                * If pgrp != sid, then the pgrp 'owner' must have the
+                * same sid as us: all tasks with same pgrp must have
+                * their sid matching.
+                */
+               if (ckpt_setup_task(ctx, pids_arr[i].vpgid, sid) < 0)
+                       return -1;
        }
 
-       /* mark root task(s) */
-       ctx->tasks_arr[0].flags |= TASK_ROOT;
+       /* mark root task(s), and set its "creator" to be zero_task */
+       ckpt_init_task(ctx)->flags |= TASK_ROOT;
+       ckpt_init_task(ctx)->creator = &zero_task;
 
        ckpt_dbg("total tasks (including ghosts): %d\n", ctx->tasks_nr);
        return 0;
 }
 
+static int ckpt_need_pidns(struct ckpt_ctx *ctx)
+{
+       int i;
+
+       for (i = 0; i < ctx->pids_nr; i++) {
+               if (ctx->pids_arr[i].vpid == 0 ||
+                   ctx->pids_arr[i].vpgid == 0 ||
+                   ctx->pids_arr[i].vsid == 0)
+                       return 1;
+       }
+
+       return 0;
+}
+
 /*
  * Algorithm DumpForest
  * "Transparent Checkpoint/Restart of Multiple Processes on Commodity
@@ -1043,10 +1085,20 @@ static int ckpt_set_creator(struct ckpt_ctx *ctx, 
struct task *task)
        struct task *creator;
 
        if (task == ckpt_init_task(ctx)) {
-               ckpt_err("pid %d: no init creator\n", ckpt_init_task(ctx)->pid);
+               ckpt_err("pid %d: logical error\n", ckpt_init_task(ctx)->pid);
                return -1;
        }
 
+       /* only root_task can have ppid == 0, parent must always exist */
+       if (task->ppid == 0 || !parent) {
+               ckpt_err("pid %d: invalid ppid %d\n", task->pid, task->ppid);
+               return -1;
+       }
+
+       /* sid == 0 must have been inherited from outside the container */
+       if (task->sid == 0)
+               session = ckpt_init_task(ctx);
+
        if (task->tgid != task->pid) {
                /* thread: creator is thread-group-leader */
                ckpt_dbg("pid %d: thread tgid %d\n", task->pid, task->tgid);
@@ -1073,7 +1125,7 @@ static int ckpt_set_creator(struct ckpt_ctx *ctx, struct 
task *task)
                creator = session->phantom;
        } else {
                /* first make sure we know the session's creator */
-               if (!session->creator && session != ckpt_init_task(ctx)) {
+               if (!session->creator) {
                        /* (non-session-leader) recursive: session's creator */
                        ckpt_dbg("pid %d: recursive session creator %d\n",
                               task->pid, task->sid);
@@ -1206,7 +1258,7 @@ static int ckpt_propagate_session(struct ckpt_ctx *ctx, 
struct task *task)
                ckpt_dbg("pid %d: moving up to %d\n", task->pid, creator->pid);
                task = creator;
 
-               if(!task->creator && task != ckpt_init_task(ctx)) {
+               if(!task->creator) {
                        if (ckpt_set_creator(ctx, task) < 0)
                                return -1;
                }
@@ -1389,7 +1441,7 @@ int ckpt_fork_stub(void *data)
 
        /* root has some extra work */
        if (task->flags & TASK_ROOT) {
-               ctx->init_pid = _getpid();
+               ctx->root_pid = _getpid();
                ckpt_dbg("root task pid %d\n", _getpid());
        }
 
@@ -1513,13 +1565,13 @@ static int ckpt_fork_feeder(struct ckpt_ctx *ctx)
 static void ckpt_abort(struct ckpt_ctx *ctx, char *str)
 {
        perror(str);
-       kill(-(ctx->init_pid), SIGKILL);
+       kill(-(ctx->root_pid), SIGKILL);
        exit(1);
 }
 
 /*
  * feeder process: delegates checkpoint image stream to the kernel.
- * In '--no-pids' mode, transform the pids array (struct ckpt_hdr_pids)
+ * In '--no-pids' mode, transform the pids array (struct ckpt_pids)
  * on the fly and feed the result to the "init" task of the restart
  */
 static int ckpt_do_feeder(void *data)
@@ -1567,7 +1619,7 @@ static int ckpt_do_feeder(void *data)
 }
 
 /*
- * ckpt_adjust_pids: transform the pids array (struct ckpt_hdr_pids) by
+ * ckpt_adjust_pids: transform the pids array (struct ckpt_pids) by
  * substituing actual pid values for original pid values.
  *
  * Collect pids reported by the newly created tasks; each task sends
-- 
1.6.0.4

_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to