Quoting Oren Laadan ([email protected]):
> Introduce a helper to iterate over the descendants of a given
> task. The prototype is:
> 
> int walk_task_subtree(struct task_struct *root,
>                     int (*func)(struct task_struct *, void *),
>                             void *data)
> 
> The function will start with @root, and iterate through all the
> descendants, including threads, in a DFS manner. Children of a task
> are traversed before proceeding to the next thread of that task.
> 
> For each task, the callback @func will be called providing the task
> pointer and the @data. The callback is invoked while holding the
> tasklist_lock for reading. If the callback fails it should return a
> negative error, and the traversal ends. If the callback succeeds, it
> returns a non-negative number, and these values are summed.
> 
> On success, walk_task_subtree() returns the total summed. On failure,
> it returns a negative value.
> 
> The code in checkpoint/checkpoint.c and checkpoint/restart.c has
> been converted to use this helper.
> 
> Signed-off-by: Oren Laadan <[email protected]>

Acked-by: Serge Hallyn <[email protected]>

> ---
>  checkpoint/checkpoint.c    |   97 ++++++++++++++--------------------------
>  checkpoint/restart.c       |  105 
> +++++++++++++++++++-------------------------
>  checkpoint/sys.c           |   55 +++++++++++++++++++++++
>  include/linux/checkpoint.h |    3 +
>  4 files changed, 137 insertions(+), 123 deletions(-)
> 
> diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
> index dbe9e10..1eeb557 100644
> --- a/checkpoint/checkpoint.c
> +++ b/checkpoint/checkpoint.c
> @@ -520,80 +520,51 @@ static int collect_objects(struct ckpt_ctx *ctx)
>       return ret;
>  }
> 
> +struct ckpt_cnt_tasks {
> +     struct ckpt_ctx *ctx;
> +     int nr;
> +};
> +
>  /* count number of tasks in tree (and optionally fill pid's in array) */
> -static int tree_count_tasks(struct ckpt_ctx *ctx)
> +static int __tree_count_tasks(struct task_struct *task, void *data)
>  {
> -     struct task_struct *root;
> -     struct task_struct *task;
> -     struct task_struct *parent;
> -     struct task_struct **tasks_arr = ctx->tasks_arr;
> -     int nr_tasks = ctx->nr_tasks;
> -     int nr = 0;
> +     struct ckpt_cnt_tasks *d = (struct ckpt_cnt_tasks *) data;
> +     struct ckpt_ctx *ctx = d->ctx;
>       int ret;
> 
> -     read_lock(&tasklist_lock);
> -
> -     /* we hold the lock, so root_task->real_parent can't change */
> -     task = ctx->root_task;
> -     if (ctx->root_init) {
> -             /* container-init: start from container parent */
> -             parent = task->real_parent;
> -             root = parent;
> -     } else {
> -             /* non-container-init: start from root task and down */
> -             parent = NULL;
> -             root = task;
> -     }
> -
> -     /* count tasks via DFS scan of the tree */
> -     while (1) {
> -             ctx->tsk = task;  /* (for ckpt_write_err) */
> +     ctx->tsk = task;  /* (for ckpt_write_err) */
> 
> -             /* is this task cool ? */
> -             ret = may_checkpoint_task(ctx, task);
> -             if (ret < 0) {
> -                     nr = ret;
> -                     break;
> -             }
> -             if (tasks_arr) {
> -                     /* unlikely... but if so then try again later */
> -                     if (nr == nr_tasks) {
> -                             nr = -EBUSY; /* cleanup in ckpt_ctx_free() */
> -                             break;
> -                     }
> -                     tasks_arr[nr] = task;
> -                     get_task_struct(task);
> -             }
> -             nr++;
> -             /* if has children - proceed with child */
> -             if (!list_empty(&task->children)) {
> -                     parent = task;
> -                     task = list_entry(task->children.next,
> -                                       struct task_struct, sibling);
> -                     continue;
> -             }
> -             while (task != root) {
> -                     /* if has sibling - proceed with sibling */
> -                     if (!list_is_last(&task->sibling, &parent->children)) {
> -                             task = list_entry(task->sibling.next,
> -                                               struct task_struct, sibling);
> -                             break;
> -                     }
> +     /* is this task cool ? */
> +     ret = may_checkpoint_task(ctx, task);
> +     if (ret < 0)
> +             goto out;
> 
> -                     /* else, trace back to parent and proceed */
> -                     task = parent;
> -                     parent = parent->real_parent;
> +     if (ctx->tasks_arr) {
> +             if (d->nr == ctx->nr_tasks) {  /* unlikely... try again later */
> +                     __ckpt_write_err(ctx, "TE", "bad task count\n", -EBUSY);
> +                     ret = -EBUSY;
> +                     goto out;
>               }
> -             if (task == root)
> -                     break;
> +             ctx->tasks_arr[d->nr++] = task;
> +             get_task_struct(task);
>       }
> +
> +     ret = 1;
> + out:
> +     if (ret < 0)
> +             ckpt_write_err(ctx, "", NULL);
>       ctx->tsk = NULL;
> +     return ret;
> +}
> 
> -     read_unlock(&tasklist_lock);
> +static int tree_count_tasks(struct ckpt_ctx *ctx)
> +{
> +     struct ckpt_cnt_tasks data;
> 
> -     if (nr < 0)
> -             ckpt_write_err(ctx, "", NULL);
> -     return nr;
> +     data.ctx = ctx;
> +     data.nr = 0;
> +
> +     return walk_task_subtree(ctx->root_task, __tree_count_tasks, &data);
>  }
> 
>  /*
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> index 37454c5..c021eaf 100644
> --- a/checkpoint/restart.c
> +++ b/checkpoint/restart.c
> @@ -804,77 +804,56 @@ static int do_restore_task(void)
>   * Called by the coodinator to set the ->checkpoint_ctx pointer of the
>   * root task and all its descendants.
>   */
> -static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct 
> *root)
> +static int __prepare_descendants(struct task_struct *task, void *data)
>  {
> -     struct task_struct *leader = root;
> -     struct task_struct *parent = NULL;
> -     struct task_struct *task = root;
> -     int nr_pids = 0;
> -     int ret = -EBUSY;
> +     struct ckpt_ctx *ctx = (struct ckpt_ctx *) data;
> 
> -     read_lock(&tasklist_lock);
> -     while (1) {
> -             ckpt_debug("consider task %d\n", task_pid_vnr(task));
> -             if (task_ptrace(task) & PT_PTRACED)
> -                     break;
> -             /*
> -              * Set task->checkpoint_ctx of all non-zombie descendants.
> -              * If a descendant already has a ->checkpoint_ctx, it
> -              * must be a coordinator (for a different restart ?) so
> -              * we fail.
> -              *
> -              * Note that own ancestors cannot interfere since they
> -              * won't descend past us, as own ->checkpoint_ctx must
> -              * already be set.
> -              */
> -             if (!task->exit_state) {
> -                     if (!set_task_ctx(task, ctx))
> -                             break;
> -                     ckpt_debug("prepare task %d\n", task_pid_vnr(task));
> -                     wake_up_process(task);
> -                     nr_pids++;
> -             }
> +     ckpt_debug("consider task %d\n", task_pid_vnr(task));
> 
> -             /* if has children - proceed with child */
> -             if (!list_empty(&task->children)) {
> -                     parent = task;
> -                     task = list_entry(task->children.next,
> -                                       struct task_struct, sibling);
> -                     continue;
> -             }
> -             while (task != root) {
> -                     /* if has sibling - proceed with sibling */
> -                     if (!list_is_last(&task->sibling, &parent->children)) {
> -                             task = list_entry(task->sibling.next,
> -                                               struct task_struct, sibling);
> -                             break;
> -                     }
> -
> -                     /* else, trace back to parent and proceed */
> -                     task = parent;
> -                     parent = parent->real_parent;
> -             }
> -             if (task == root) {
> -                     /* in case root task is multi-threaded */
> -                     root = task = next_thread(task);
> -                     if (root == leader) {
> -                             ret = 0;
> -                             break;
> -                     }
> -             }
> +     if (task_ptrace(task) & PT_PTRACED) {
> +             ckpt_debug("ptraced task %d\n", task_pid_vnr(task));
> +             return -EBUSY;
>       }
> -     read_unlock(&tasklist_lock);
> -     ckpt_debug("nr %d/%d  ret %d\n", ctx->nr_pids, nr_pids, ret);
> +
> +     /*
> +      * Set task->checkpoint_ctx of all non-zombie descendants.
> +      * If a descendant already has a ->checkpoint_ctx, it
> +      * must be a coordinator (for a different restart ?) so
> +      * we fail.
> +      *
> +      * Note that own ancestors cannot interfere since they
> +      * won't descend past us, as own ->checkpoint_ctx must
> +      * already be set.
> +      */
> +     if (!task->exit_state) {
> +             if (!set_task_ctx(task, ctx))
> +                     return -EBUSY;
> +             ckpt_debug("prepare task %d\n", task_pid_vnr(task));
> +             wake_up_process(task);
> +             return 1;
> +     }
> +
> +     return 0;
> +}
> +
> +static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct 
> *root)
> +{
> +     int nr_pids;
> +
> +     nr_pids = walk_task_subtree(root, __prepare_descendants, ctx);
> +     ckpt_debug("nr %d/%d\n", ctx->nr_pids, nr_pids);
> +     if (nr_pids < 0)
> +             return nr_pids;
> 
>       /*
>        * Actual tasks count may exceed ctx->nr_pids due of 'dead'
>        * tasks used as place-holders for PGIDs, but not fall short.
>        */
> -     if (!ret && (nr_pids < ctx->nr_pids))
> -             ret = -ESRCH;
> +     if (nr_pids < ctx->nr_pids)
> +             return -ESRCH;
> 
>       atomic_set(&ctx->nr_total, nr_pids);
> -     return ret;
> +     return nr_pids;
>  }
> 
>  static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
> @@ -991,6 +970,12 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t 
> pid)
>               return -EBUSY;
>       }
> 
> +     /*
> +      * From now on we are committed to the restart. If anything
> +      * fails, we'll wipe out the entire subtree below us, to
> +      * ensure proper cleanup.
> +      */
> +
>       if (ctx->uflags & RESTART_TASKSELF) {
>               ret = restore_task(ctx);
>               ckpt_debug("restore task: %d\n", ret);
> diff --git a/checkpoint/sys.c b/checkpoint/sys.c
> index 77613d7..7604089 100644
> --- a/checkpoint/sys.c
> +++ b/checkpoint/sys.c
> @@ -276,6 +276,61 @@ void ckpt_ctx_put(struct ckpt_ctx *ctx)
>               ckpt_ctx_free(ctx);
>  }
> 
> +int walk_task_subtree(struct task_struct *root,
> +                   int (*func)(struct task_struct *, void *),
> +                   void *data)
> +{
> +
> +     struct task_struct *leader = root;
> +     struct task_struct *parent = NULL;
> +     struct task_struct *task = root;
> +     int total = 0;
> +     int ret;
> +
> +     read_lock(&tasklist_lock);
> +     while (1) {
> +             /* invoke callback on this task */
> +             ret = func(task, data);
> +             if (ret < 0)
> +                     break;
> +
> +             total += ret;
> +
> +             /* if has children - proceed with child */
> +             if (!list_empty(&task->children)) {
> +                     parent = task;
> +                     task = list_entry(task->children.next,
> +                                       struct task_struct, sibling);
> +                     continue;
> +             }
> +
> +             while (task != root) {
> +                     /* if has sibling - proceed with sibling */
> +                     if (!list_is_last(&task->sibling, &parent->children)) {
> +                             task = list_entry(task->sibling.next,
> +                                               struct task_struct, sibling);
> +                             break;
> +                     }
> +
> +                     /* else, trace back to parent and proceed */
> +                     task = parent;
> +                     parent = parent->real_parent;
> +             }
> +
> +             if (task == root) {
> +                     /* in case root task is multi-threaded */
> +                     root = task = next_thread(task);
> +                     if (root == leader)
> +                             break;
> +             }
> +     }
> +     read_unlock(&tasklist_lock);
> +
> +     ckpt_debug("total %d ret %d\n", total, ret);
> +     return (ret < 0 ? ret : total);
> +}
> +
> +
>  /**
>   * sys_checkpoint - checkpoint a container
>   * @pid: pid of the container init(1) process
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index b7f1796..12210e4 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -50,6 +50,9 @@
>        RESTART_FROZEN | \
>        RESTART_GHOST)
> 
> +extern int walk_task_subtree(struct task_struct *task,
> +                          int (*func)(struct task_struct *, void *),
> +                          void *data);
>  extern void exit_checkpoint(struct task_struct *tsk);
> 
>  extern int ckpt_kwrite(struct ckpt_ctx *ctx, void *buf, int count);
> -- 
> 1.6.0.4
> 
> _______________________________________________
> Containers mailing list
> [email protected]
> https://lists.linux-foundation.org/mailman/listinfo/containers
_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to