Introduce a helper to iterate over the descendants of a given
task. The prototype is:

int walk_task_subtree(struct task_struct *root,
                      int (*func)(struct task_struct *, void *),
                      void *data)

The function will start with @root, and iterate through all the
descendants, including threads, in a DFS manner. Children of a task
are traversed before proceeding to the next thread of that task.

For each task, the callback @func will be called providing the task
pointer and the @data. The callback is invoked while holding the
tasklist_lock for reading. If the callback fails it should return a
negative error, and the traversal ends. If the callback succeeds, it
returns a non-negative number, and these values are summed.

On success, walk_task_subtree() returns the total summed. On failure,
it returns a negative value.

The code in checkpoint/checkpoint.c and checkpoint/restart.c has
been converted to use this helper.

Signed-off-by: Oren Laadan <[email protected]>
---
 checkpoint/checkpoint.c    |   97 ++++++++++++++--------------------------
 checkpoint/restart.c       |  105 +++++++++++++++++++-------------------------
 checkpoint/sys.c           |   55 +++++++++++++++++++++++
 include/linux/checkpoint.h |    3 +
 4 files changed, 137 insertions(+), 123 deletions(-)

diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index dbe9e10..1eeb557 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -520,80 +520,51 @@ static int collect_objects(struct ckpt_ctx *ctx)
        return ret;
 }
 
+struct ckpt_cnt_tasks {
+       struct ckpt_ctx *ctx;
+       int nr;
+};
+
 /* count number of tasks in tree (and optionally fill pid's in array) */
-static int tree_count_tasks(struct ckpt_ctx *ctx)
+static int __tree_count_tasks(struct task_struct *task, void *data)
 {
-       struct task_struct *root;
-       struct task_struct *task;
-       struct task_struct *parent;
-       struct task_struct **tasks_arr = ctx->tasks_arr;
-       int nr_tasks = ctx->nr_tasks;
-       int nr = 0;
+       struct ckpt_cnt_tasks *d = (struct ckpt_cnt_tasks *) data;
+       struct ckpt_ctx *ctx = d->ctx;
        int ret;
 
-       read_lock(&tasklist_lock);
-
-       /* we hold the lock, so root_task->real_parent can't change */
-       task = ctx->root_task;
-       if (ctx->root_init) {
-               /* container-init: start from container parent */
-               parent = task->real_parent;
-               root = parent;
-       } else {
-               /* non-container-init: start from root task and down */
-               parent = NULL;
-               root = task;
-       }
-
-       /* count tasks via DFS scan of the tree */
-       while (1) {
-               ctx->tsk = task;  /* (for ckpt_write_err) */
+       ctx->tsk = task;  /* (for ckpt_write_err) */
 
-               /* is this task cool ? */
-               ret = may_checkpoint_task(ctx, task);
-               if (ret < 0) {
-                       nr = ret;
-                       break;
-               }
-               if (tasks_arr) {
-                       /* unlikely... but if so then try again later */
-                       if (nr == nr_tasks) {
-                               nr = -EBUSY; /* cleanup in ckpt_ctx_free() */
-                               break;
-                       }
-                       tasks_arr[nr] = task;
-                       get_task_struct(task);
-               }
-               nr++;
-               /* if has children - proceed with child */
-               if (!list_empty(&task->children)) {
-                       parent = task;
-                       task = list_entry(task->children.next,
-                                         struct task_struct, sibling);
-                       continue;
-               }
-               while (task != root) {
-                       /* if has sibling - proceed with sibling */
-                       if (!list_is_last(&task->sibling, &parent->children)) {
-                               task = list_entry(task->sibling.next,
-                                                 struct task_struct, sibling);
-                               break;
-                       }
+       /* is this task cool ? */
+       ret = may_checkpoint_task(ctx, task);
+       if (ret < 0)
+               goto out;
 
-                       /* else, trace back to parent and proceed */
-                       task = parent;
-                       parent = parent->real_parent;
+       if (ctx->tasks_arr) {
+               if (d->nr == ctx->nr_tasks) {  /* unlikely... try again later */
+                       __ckpt_write_err(ctx, "TE", "bad task count\n", -EBUSY);
+                       ret = -EBUSY;
+                       goto out;
                }
-               if (task == root)
-                       break;
+               ctx->tasks_arr[d->nr++] = task;
+               get_task_struct(task);
        }
+
+       ret = 1;
+ out:
+       if (ret < 0)
+               ckpt_write_err(ctx, "", NULL);
        ctx->tsk = NULL;
+       return ret;
+}
 
-       read_unlock(&tasklist_lock);
+static int tree_count_tasks(struct ckpt_ctx *ctx)
+{
+       struct ckpt_cnt_tasks data;
 
-       if (nr < 0)
-               ckpt_write_err(ctx, "", NULL);
-       return nr;
+       data.ctx = ctx;
+       data.nr = 0;
+
+       return walk_task_subtree(ctx->root_task, __tree_count_tasks, &data);
 }
 
 /*
diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index 37454c5..c021eaf 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -804,77 +804,56 @@ static int do_restore_task(void)
  * Called by the coodinator to set the ->checkpoint_ctx pointer of the
  * root task and all its descendants.
  */
-static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root)
+static int __prepare_descendants(struct task_struct *task, void *data)
 {
-       struct task_struct *leader = root;
-       struct task_struct *parent = NULL;
-       struct task_struct *task = root;
-       int nr_pids = 0;
-       int ret = -EBUSY;
+       struct ckpt_ctx *ctx = (struct ckpt_ctx *) data;
 
-       read_lock(&tasklist_lock);
-       while (1) {
-               ckpt_debug("consider task %d\n", task_pid_vnr(task));
-               if (task_ptrace(task) & PT_PTRACED)
-                       break;
-               /*
-                * Set task->checkpoint_ctx of all non-zombie descendants.
-                * If a descendant already has a ->checkpoint_ctx, it
-                * must be a coordinator (for a different restart ?) so
-                * we fail.
-                *
-                * Note that own ancestors cannot interfere since they
-                * won't descend past us, as own ->checkpoint_ctx must
-                * already be set.
-                */
-               if (!task->exit_state) {
-                       if (!set_task_ctx(task, ctx))
-                               break;
-                       ckpt_debug("prepare task %d\n", task_pid_vnr(task));
-                       wake_up_process(task);
-                       nr_pids++;
-               }
+       ckpt_debug("consider task %d\n", task_pid_vnr(task));
 
-               /* if has children - proceed with child */
-               if (!list_empty(&task->children)) {
-                       parent = task;
-                       task = list_entry(task->children.next,
-                                         struct task_struct, sibling);
-                       continue;
-               }
-               while (task != root) {
-                       /* if has sibling - proceed with sibling */
-                       if (!list_is_last(&task->sibling, &parent->children)) {
-                               task = list_entry(task->sibling.next,
-                                                 struct task_struct, sibling);
-                               break;
-                       }
-
-                       /* else, trace back to parent and proceed */
-                       task = parent;
-                       parent = parent->real_parent;
-               }
-               if (task == root) {
-                       /* in case root task is multi-threaded */
-                       root = task = next_thread(task);
-                       if (root == leader) {
-                               ret = 0;
-                               break;
-                       }
-               }
+       if (task_ptrace(task) & PT_PTRACED) {
+               ckpt_debug("ptraced task %d\n", task_pid_vnr(task));
+               return -EBUSY;
        }
-       read_unlock(&tasklist_lock);
-       ckpt_debug("nr %d/%d  ret %d\n", ctx->nr_pids, nr_pids, ret);
+
+       /*
+        * Set task->checkpoint_ctx of all non-zombie descendants.
+        * If a descendant already has a ->checkpoint_ctx, it
+        * must be a coordinator (for a different restart ?) so
+        * we fail.
+        *
+        * Note that own ancestors cannot interfere since they
+        * won't descend past us, as own ->checkpoint_ctx must
+        * already be set.
+        */
+       if (!task->exit_state) {
+               if (!set_task_ctx(task, ctx))
+                       return -EBUSY;
+               ckpt_debug("prepare task %d\n", task_pid_vnr(task));
+               wake_up_process(task);
+               return 1;
+       }
+
+       return 0;
+}
+
+static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root)
+{
+       int nr_pids;
+
+       nr_pids = walk_task_subtree(root, __prepare_descendants, ctx);
+       ckpt_debug("nr %d/%d\n", ctx->nr_pids, nr_pids);
+       if (nr_pids < 0)
+               return nr_pids;
 
        /*
         * Actual tasks count may exceed ctx->nr_pids due of 'dead'
         * tasks used as place-holders for PGIDs, but not fall short.
         */
-       if (!ret && (nr_pids < ctx->nr_pids))
-               ret = -ESRCH;
+       if (nr_pids < ctx->nr_pids)
+               return -ESRCH;
 
        atomic_set(&ctx->nr_total, nr_pids);
-       return ret;
+       return nr_pids;
 }
 
 static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
@@ -991,6 +970,12 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t 
pid)
                return -EBUSY;
        }
 
+       /*
+        * From now on we are committed to the restart. If anything
+        * fails, we'll wipe out the entire subtree below us, to
+        * ensure proper cleanup.
+        */
+
        if (ctx->uflags & RESTART_TASKSELF) {
                ret = restore_task(ctx);
                ckpt_debug("restore task: %d\n", ret);
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index 77613d7..7604089 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -276,6 +276,61 @@ void ckpt_ctx_put(struct ckpt_ctx *ctx)
                ckpt_ctx_free(ctx);
 }
 
+int walk_task_subtree(struct task_struct *root,
+                     int (*func)(struct task_struct *, void *),
+                     void *data)
+{
+
+       struct task_struct *leader = root;
+       struct task_struct *parent = NULL;
+       struct task_struct *task = root;
+       int total = 0;
+       int ret;
+
+       read_lock(&tasklist_lock);
+       while (1) {
+               /* invoke callback on this task */
+               ret = func(task, data);
+               if (ret < 0)
+                       break;
+
+               total += ret;
+
+               /* if has children - proceed with child */
+               if (!list_empty(&task->children)) {
+                       parent = task;
+                       task = list_entry(task->children.next,
+                                         struct task_struct, sibling);
+                       continue;
+               }
+
+               while (task != root) {
+                       /* if has sibling - proceed with sibling */
+                       if (!list_is_last(&task->sibling, &parent->children)) {
+                               task = list_entry(task->sibling.next,
+                                                 struct task_struct, sibling);
+                               break;
+                       }
+
+                       /* else, trace back to parent and proceed */
+                       task = parent;
+                       parent = parent->real_parent;
+               }
+
+               if (task == root) {
+                       /* in case root task is multi-threaded */
+                       root = task = next_thread(task);
+                       if (root == leader)
+                               break;
+               }
+       }
+       read_unlock(&tasklist_lock);
+
+       ckpt_debug("total %d ret %d\n", total, ret);
+       return (ret < 0 ? ret : total);
+}
+
+
 /**
  * sys_checkpoint - checkpoint a container
  * @pid: pid of the container init(1) process
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index b7f1796..12210e4 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -50,6 +50,9 @@
         RESTART_FROZEN | \
         RESTART_GHOST)
 
+extern int walk_task_subtree(struct task_struct *task,
+                            int (*func)(struct task_struct *, void *),
+                            void *data);
 extern void exit_checkpoint(struct task_struct *tsk);
 
 extern int ckpt_kwrite(struct ckpt_ctx *ctx, void *buf, int count);
-- 
1.6.0.4

_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to