Signed-off-by: Oren Laadan <[email protected]>
---
 mktree.c |  123 ++++++++++++++++++++++++++++++++++++++++----------------------
 1 files changed, 80 insertions(+), 43 deletions(-)

diff --git a/mktree.c b/mktree.c
index e42407f..6c6eb98 100644
--- a/mktree.c
+++ b/mktree.c
@@ -144,12 +144,13 @@ struct task {
        pid_t real_parent;      /* pid of task's real parent */
 };
 
-#define TASK_ROOT      0x1     /* */
-#define TASK_DEAD      0x2     /* */
-#define TASK_THREAD    0x4     /* */
-#define TASK_SIBLING   0x8     /* */
-#define TASK_SESSION   0x10    /* */
-#define TASK_NEWPID    0x20    /* */
+#define TASK_ROOT      0x1     /* root task */
+#define TASK_GHOST     0x2     /* dead task (pid used as sid/pgid) */
+#define TASK_THREAD    0x4     /* thread (non leader) */
+#define TASK_SIBLING   0x8     /* creator's sibling (use CLONE_PARENT) */
+#define TASK_SESSION   0x10    /* inherits creator's original sid */
+#define TASK_NEWPID    0x20    /* starts a new pid namespace */
+#define TASK_DEAD      0x40    /* dead task (dummy) */
 
 struct ckpt_ctx {
        pid_t init_pid;
@@ -843,10 +844,12 @@ static int ckpt_build_tree(struct ckpt_ctx *ctx)
                        ckpt_dbg_cont(" prev %d", task->prev_sib->pid);
                if (task->phantom)
                        ckpt_dbg_cont(" placeholder %d", task->phantom->pid);
-               ckpt_dbg_cont(" %c%c%c%c",
+               ckpt_dbg_cont(" %c%c%c%c%c%c",
                       (task->flags & TASK_THREAD) ? 'T' : ' ',
                       (task->flags & TASK_SIBLING) ? 'P' : ' ',
                       (task->flags & TASK_SESSION) ? 'S' : ' ',
+                      (task->flags & TASK_NEWPID) ? 'N' : ' ',
+                      (task->flags & TASK_GHOST) ? 'G' : ' ',
                       (task->flags & TASK_DEAD) ? 'D' : ' ');
                ckpt_dbg_cont("\n");
        }
@@ -855,7 +858,7 @@ static int ckpt_build_tree(struct ckpt_ctx *ctx)
        return 0;
 }              
 
-static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid)
+static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t pid, pid_t ppid)
 {
        struct task *task;
 
@@ -864,12 +867,13 @@ static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t 
pid)
 
        task = &ctx->tasks_arr[ctx->tasks_nr++];
 
-       task->flags = TASK_DEAD;
+       task->flags = TASK_GHOST;
 
+       /* */
        task->pid = pid;
-       task->ppid = ckpt_init_task(ctx)->pid;
+       task->ppid = ppid;
        task->tgid = pid;
-       task->sid = pid;
+       task->sid = ppid;
 
        task->children = NULL;
        task->next_sib = NULL;
@@ -892,36 +896,38 @@ static int ckpt_setup_task(struct ckpt_ctx *ctx, pid_t 
pid)
 
 static int ckpt_init_tree(struct ckpt_ctx *ctx)
 {
+       struct ckpt_hdr_pids *pids_arr = ctx->pids_arr;
+       int pids_nr = ctx->pids_nr;
        struct task *task;
-       pid_t init_sid;
-       pid_t init_pid;
-       pid_t init_pgid;
+       pid_t root_sid;
+       pid_t root_pid;
+       pid_t root_pgid;
        int i;
 
-       init_pid = ctx->pids_arr[0].vpid;
-       init_sid = ctx->pids_arr[0].vsid;
-       init_pgid = ctx->pids_arr[0].vpgid;
+       root_pid = pids_arr[0].vpid;
+       root_sid = pids_arr[0].vsid;
+       root_pgid = pids_arr[0].vpgid;
 
        /* XXX for out-of-container subtrees */
-       for (i = 0; i < ctx->pids_nr; i++) {
-               if (ctx->pids_arr[i].vsid == init_sid)
-                       ctx->pids_arr[i].vsid = init_pid;
-               if (ctx->pids_arr[i].vpgid == init_sid)
-                       ctx->pids_arr[i].vpgid = init_pid;
-               if (ctx->pids_arr[i].vpgid == init_pgid)
-                       ctx->pids_arr[i].vpgid = init_pid;
+       for (i = 0; i < pids_nr; i++) {
+               if (pids_arr[i].vsid == root_sid)
+                       pids_arr[i].vsid = root_pid;
+               if (pids_arr[i].vpgid == root_sid)
+                       pids_arr[i].vpgid = root_pid;
+               if (pids_arr[i].vpgid == root_pgid)
+                       pids_arr[i].vpgid = root_pid;
        }
 
        /* populate with known tasks */
-       for (i = 0; i < ctx->pids_nr; i++) {
+       for (i = 0; i < pids_nr; i++) {
                task = &ctx->tasks_arr[i];
 
                task->flags = 0;
 
-               task->pid = ctx->pids_arr[i].vpid;
-               task->ppid = ctx->pids_arr[i].vppid;
-               task->tgid = ctx->pids_arr[i].vtgid;
-               task->sid = ctx->pids_arr[i].vsid;
+               task->pid = pids_arr[i].vpid;
+               task->ppid = pids_arr[i].vppid;
+               task->tgid = pids_arr[i].vtgid;
+               task->sid = pids_arr[i].vsid;
 
                task->children = NULL;
                task->next_sib = NULL;
@@ -936,19 +942,32 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
                        return -1;
        }
 
-       ctx->tasks_nr = ctx->pids_nr;
+       ctx->tasks_nr = pids_nr;
 
        /* add pids unaccounted for (no tasks) */
-       for (i = 0; i < ctx->pids_nr; i++) {
-               if (ckpt_setup_task(ctx, ctx->pids_arr[i].vsid) < 0)
+       for (i = 0; i < pids_nr; i++) {
+               /* session leader's parent is root task */
+               if (ckpt_setup_task(ctx, pids_arr->vsid, root_pid) < 0)
                        return -1;
-               if (ckpt_setup_task(ctx, ctx->pids_arr[i].vpgid) < 0)
+
+               /*
+                * If pgrp != sid, pgrp owner's parent is sid. Other
+                * tasks with same pgrp will need to have threir sid
+                * matching, too, when the kernel restores their pgrp.
+                * If pgrp == sid, then the call above would have
+                * ensured that the pid is hashed: ckpt_setup_task()
+                * will return promptly.
+                */
+               if (ckpt_setup_task(ctx, pids_arr->vpgid, pids_arr->vsid) < 0)
                        return -1;
+
+               pids_arr++;
        }
 
        /* mark root task(s) */
        ctx->tasks_arr[0].flags |= TASK_ROOT;
 
+       ckpt_dbg("total tasks (including ghosts): %d\n", ctx->tasks_nr);
        return 0;
 }
 
@@ -970,10 +989,11 @@ static int ckpt_init_tree(struct ckpt_ctx *ctx)
  * flags, pid, tgid, sid, pgid, and pointers to the a creator, next
  * and previous sibling, and first child task. Note that the creator
  * may not necessarily correspond to the parent. The possible flags
- * are TASK_DEAD, TASK_THREAD, TASK_SESSION (that asks inherit a
- * session id), and TASK_SIBLING (that asks to inherit the parent via
- * CLONE_PARENT). The algorithm loops through all the entries in the
- * table:
+ * are TASK_ROOT, TASK_GHOST, TASK_THREAD, TASK_SIBLING (that asks to
+ * inherit the parent via CLONE_PARENT), TASK_SESSION (that asks to
+ * inherit a session id), TASK_NEWPID (that asks to start a new pid
+ * namespace), and TASK_DEAD. The algorithm loops through all the
+ * entries in the table:
  *
  * If the entry is a thread and not the thread group leader, we set
  * the creator to be the thread group leader and set TASK_THREAD.
@@ -1023,7 +1043,7 @@ static int ckpt_set_creator(struct ckpt_ctx *ctx, struct 
task *task)
        struct task *creator;
 
        if (task == ckpt_init_task(ctx)) {
-               ckpt_err("pid %d: init - no creator\n", 
ckpt_init_task(ctx)->pid);
+               ckpt_err("pid %d: no init creator\n", ckpt_init_task(ctx)->pid);
                return -1;
        }
 
@@ -1230,6 +1250,7 @@ static int ckpt_make_tree(struct ckpt_ctx *ctx, struct 
task *task)
 {
        struct task *child;
        struct pid_swap swap;
+       unsigned long flags = 0;
        pid_t newpid;
        int ret;
 
@@ -1307,9 +1328,20 @@ static int ckpt_make_tree(struct ckpt_ctx *ctx, struct 
task *task)
        }
        close(ctx->pipe_out);
 
+       /*
+        * Ghost tasks are not restarted and end up dead, but their
+        * pids are referred to by other tasks' pgids (also sids, that
+        * are already properly set by now). Therefore, they stick
+        * around until those tasks actually restore their pgrp, and
+        * then exit (more precisely, killed). The RESTART_GHOST flag
+        * tells the kernel that they are not to be restored.
+        */
+       if (task->flags & TASK_GHOST)
+               flags |= RESTART_GHOST;
+
        /* on success this doesn't return */
-       ckpt_dbg("about to call sys_restart()\n");
-       ret = restart(0, STDIN_FILENO, 0);
+       ckpt_dbg("about to call sys_restart(), flags %#lx\n", flags);
+       ret = restart(0, STDIN_FILENO, flags);
        if (ret < 0)
                perror("task restore failed");
        return ret;
@@ -1561,7 +1593,10 @@ static int ckpt_adjust_pids(struct ckpt_ctx *ctx)
        memcpy(ctx->copy_arr, ctx->pids_arr, len);
 
        /* read in 'pid_swap' data and adjust ctx->pids_arr */
-       for (n = 0; n < ctx->pids_nr; n++) {
+       for (n = 0; n < ctx->tasks_nr; n++) {
+               /* don't expect data from dead tasks */
+               if (ctx->tasks_arr[n].flags & TASK_DEAD)
+                       continue;
                ret = read(ctx->pipe_in, &swap, sizeof(swap));
                if (ret < 0)
                        ckpt_abort(ctx, "read pipe");
@@ -1576,8 +1611,10 @@ static int ckpt_adjust_pids(struct ckpt_ctx *ctx)
                                ctx->copy_arr[m].vpid = swap.new;
                        if (ctx->pids_arr[m].vtgid == swap.old)
                                ctx->copy_arr[m].vtgid = swap.new;
-                       if (ctx->pids_arr[m].vppid == swap.old)
-                               ctx->copy_arr[m].vppid = swap.new;
+                       if (ctx->pids_arr[m].vpgid == swap.old)
+                               ctx->copy_arr[m].vpgid = swap.new;
+                       else if (ctx->pids_arr[m].vpgid == -swap.old)
+                               ctx->copy_arr[m].vpgid = -swap.new;
                }
        }
 
-- 
1.6.0.4

_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to