Quoting Oren Laadan ([email protected]):
> Instead of playing tricks with xchg() of task->checkpoint_ctx, use the
> task_{lock,unlock} to protect changes to that pointer. This simplifies
> the logic since we no longer need to check for races (and old_ctx).
> 
> The remaining changes include cleanup, and unification of common code
> to handle errors during restart, and some debug statements.
> 
> Signed-off-by: Oren Laadan <[email protected]>
> ---
>  checkpoint/restart.c       |  170 
> ++++++++++++++++++++++----------------------
>  checkpoint/sys.c           |    6 +-
>  include/linux/checkpoint.h |    2 +-
>  kernel/fork.c              |    6 +-
>  4 files changed, 92 insertions(+), 92 deletions(-)
> 
> diff --git a/checkpoint/restart.c b/checkpoint/restart.c
> index 543b380..5d936cf 100644
> --- a/checkpoint/restart.c
> +++ b/checkpoint/restart.c
> @@ -529,17 +529,54 @@ static inline int is_task_active(struct ckpt_ctx *ctx, 
> pid_t pid)
> 
>  static inline void _restore_notify_error(struct ckpt_ctx *ctx, int errno)
>  {
> -     ckpt_set_ctx_error(ctx, errno);
> -     complete(&ctx->complete);
> +     /* first to fail: notify everyone (racy but harmless) */
> +     if (!ckpt_test_ctx_error(ctx)) {
> +             ckpt_debug("setting restart error %d\n", errno); \
> +             ckpt_set_ctx_error(ctx, errno);
> +             complete(&ctx->complete);
> +             wake_up_all(&ctx->waitq);
> +             wake_up_all(&ctx->ghostq);
> +     }
>  }
> 
>  /* Need to call ckpt_debug such that it will get the correct source location 
> */
>  #define restore_notify_error(ctx, errno) \
>  do { \
> -     ckpt_debug("ctx root pid %d err %d", ctx->root_pid, errno); \
> +     ckpt_debug("restart error %d, root pid %d\n", errno, ctx->root_pid); \
>       _restore_notify_error(ctx, errno); \
>  } while(0)
> 

Maybe make a note that these can't be called under
write_lock_irq(&tasklist_lock)?  Also, update the comment above
task_lock() to add checkpoint_ctx to the list of things protected
by it.

> +static inline struct ckpt_ctx *get_task_ctx(struct task_struct *task)
> +{
> +     struct ckpt_ctx *ctx;
> +
> +     task_lock(task);
> +     ctx = ckpt_ctx_get(task->checkpoint_ctx);
> +     task_unlock(task);
> +     return ctx;
> +}
> +
> +/* returns 1 on success, 0 otherwise */

This works, but it's more confusing than it needs to be.  I think using
two helpers, 'set_task_ctx(tsk, ctx)' and 'clear_task-ctx(tsk)', where
set_task_ctx always bails if task->checkpoint_ctx is set, would be much
easier to read.

But

Acked-by: Serge Hallyn <[email protected]>

> +static int set_task_ctx(struct task_struct *task, struct ckpt_ctx *ctx)
> +{
> +     struct ckpt_ctx *old;
> +     int ret = 1;
> +
> +     task_lock(task);
> +     if (!ctx || !task->checkpoint_ctx) {
> +             old = task->checkpoint_ctx;
> +             task->checkpoint_ctx = ckpt_ctx_get(ctx);
> +     } else {
> +             ckpt_debug("task %d has prior checkpoint_ctx\n",
> +                        task_pid_vnr(task));
> +             old = NULL;
> +             ret = 0;
> +     }
> +     task_unlock(task);
> +     ckpt_ctx_put(old);
> +     return ret;
> +}
> +
>  static void restore_task_done(struct ckpt_ctx *ctx)
>  {
>       if (atomic_dec_and_test(&ctx->nr_total))
> @@ -570,6 +607,7 @@ static int restore_activate_next(struct ckpt_ctx *ctx)
>               rcu_read_unlock();
> 
>               if (!task) {
> +                     ckpt_debug("could not find task %d\n", pid);
>                       restore_notify_error(ctx, -ESRCH);
>                       return -ESRCH;
>               }
> @@ -590,12 +628,10 @@ static int wait_task_active(struct ckpt_ctx *ctx)
>       ret = wait_event_interruptible(ctx->waitq,
>                                      is_task_active(ctx, pid) ||
>                                      ckpt_test_ctx_error(ctx));
> -     ckpt_debug("active %d < %d (ret %d)\n",
> -                ctx->active_pid, ctx->nr_pids, ret);
> -     if (!ret && ckpt_test_ctx_error(ctx)) {
> -             force_sig(SIGKILL, current);
> +     ckpt_debug("active %d < %d (ret %d, errno %d)\n",
> +                ctx->active_pid, ctx->nr_pids, ret, ctx->errno);
> +     if (!ret && ckpt_test_ctx_error(ctx))
>               ret = -EBUSY;
> -     }
>       return ret;
>  }
> 
> @@ -603,17 +639,17 @@ static int wait_task_sync(struct ckpt_ctx *ctx)
>  {
>       ckpt_debug("pid %d syncing\n", task_pid_vnr(current));
>       wait_event_interruptible(ctx->waitq, ckpt_test_ctx_complete(ctx));
> -     if (ckpt_test_ctx_error(ctx)) {
> -             force_sig(SIGKILL, current);
> +     ckpt_debug("task sync done (errno %d)\n", ctx->errno);
> +     if (ckpt_test_ctx_error(ctx))
>               return -EBUSY;
> -     }
>       return 0;
>  }
> 
> +/* grabs a reference to the @ctx on success; caller should free */
>  static struct ckpt_ctx *wait_checkpoint_ctx(void)
>  {
>       DECLARE_WAIT_QUEUE_HEAD(waitq);
> -     struct ckpt_ctx *ctx, *old_ctx;
> +     struct ckpt_ctx *ctx;
>       int ret;
> 
>       /*
> @@ -621,32 +657,15 @@ static struct ckpt_ctx *wait_checkpoint_ctx(void)
>        * reference to its restart context.
>        */
>       ret = wait_event_interruptible(waitq, current->checkpoint_ctx);
> -     if (ret < 0)
> +     if (ret < 0) {
> +             ckpt_debug("wait_checkpoint_ctx: failed (%d)\n", ret);
>               return ERR_PTR(ret);
> +     }
> 
> -     ctx = xchg(&current->checkpoint_ctx, NULL);
> -     if (!ctx)
> +     ctx = get_task_ctx(current);
> +     if (!ctx) {
> +             ckpt_debug("wait_checkpoint_ctx: checkpoint_ctx missing\n");
>               return ERR_PTR(-EAGAIN);
> -     ckpt_ctx_get(ctx);
> -
> -     /*
> -      * Put the @ctx back on our task_struct. If an ancestor tried
> -      * to prepare_descendants() on us (although extremly unlikely)
> -      * we will encounter the ctx that he xchg()ed there and bail.
> -      */
> -     old_ctx = xchg(&current->checkpoint_ctx, ctx);
> -     if (old_ctx) {
> -             ckpt_debug("self-set of checkpoint_ctx failed\n");
> -
> -             /* alert coordinator of unexpected ctx */
> -             restore_notify_error(old_ctx, -EAGAIN);
> -             ckpt_ctx_put(old_ctx);
> -
> -             /* alert our coordinator that we bail */
> -             restore_notify_error(ctx, -EAGAIN);
> -             ckpt_ctx_put(ctx);
> -
> -             ctx = ERR_PTR(-EAGAIN);
>       }
> 
>       return ctx;
> @@ -655,6 +674,7 @@ static struct ckpt_ctx *wait_checkpoint_ctx(void)
>  static int do_ghost_task(void)
>  {
>       struct ckpt_ctx *ctx;
> +     int ret;
> 
>       ctx = wait_checkpoint_ctx();
>       if (IS_ERR(ctx))
> @@ -662,9 +682,11 @@ static int do_ghost_task(void)
> 
>       current->flags |= PF_RESTARTING;
> 
> -     wait_event_interruptible(ctx->ghostq,
> -                              all_tasks_activated(ctx) ||
> -                              ckpt_test_ctx_error(ctx));
> +     ret = wait_event_interruptible(ctx->ghostq,
> +                                    all_tasks_activated(ctx) ||
> +                                    ckpt_test_ctx_error(ctx));
> +     if (ret < 0)
> +             restore_notify_error(ctx, ret);
> 
>       current->exit_signal = -1;
>       ckpt_ctx_put(ctx);
> @@ -675,7 +697,7 @@ static int do_ghost_task(void)
> 
>  static int do_restore_task(void)
>  {
> -     struct ckpt_ctx *ctx, *old_ctx;
> +     struct ckpt_ctx *ctx;
>       int zombie, ret;
> 
>       ctx = wait_checkpoint_ctx();
> @@ -713,18 +735,11 @@ static int do_restore_task(void)
>       restore_task_done(ctx);
>       ret = wait_task_sync(ctx);
>   out:
> -     old_ctx = xchg(&current->checkpoint_ctx, NULL);
> -     if (old_ctx)
> -             ckpt_ctx_put(old_ctx);
> -
> -     /* if we're first to fail - notify others */
> -     if (ret < 0 && !ckpt_test_ctx_error(ctx)) {
> +     if (ret < 0)
>               restore_notify_error(ctx, ret);
> -             wake_up_all(&ctx->waitq);
> -             wake_up_all(&ctx->ghostq);
> -     }
> 
>       current->flags &= ~PF_RESTARTING;
> +     set_task_ctx(current, NULL);
>       ckpt_ctx_put(ctx);
>       return ret;
>  }
> @@ -742,17 +757,14 @@ static int prepare_descendants(struct ckpt_ctx *ctx, 
> struct task_struct *root)
>       struct task_struct *leader = root;
>       struct task_struct *parent = NULL;
>       struct task_struct *task = root;
> -     struct ckpt_ctx *old_ctx;
>       int nr_pids = 0;
> -     int ret = 0;
> +     int ret = -EBUSY;
> 
>       read_lock(&tasklist_lock);
>       while (1) {
>               ckpt_debug("consider task %d\n", task_pid_vnr(task));
> -             if (task_ptrace(task) & PT_PTRACED) {
> -                     ret = -EBUSY;
> +             if (task_ptrace(task) & PT_PTRACED)
>                       break;
> -             }
>               /*
>                * Set task->checkpoint_ctx of all non-zombie descendants.
>                * If a descendant already has a ->checkpoint_ctx, it
> @@ -764,14 +776,8 @@ static int prepare_descendants(struct ckpt_ctx *ctx, 
> struct task_struct *root)
>                * already be set.
>                */
>               if (!task->exit_state) {
> -                     ckpt_ctx_get(ctx);
> -                     old_ctx = xchg(&task->checkpoint_ctx, ctx);
> -                     if (old_ctx) {
> -                             ckpt_debug("bad task %d\n",task_pid_vnr(task));
> -                             ckpt_ctx_put(old_ctx);
> -                             ret = -EAGAIN;
> +                     if (!set_task_ctx(task, ctx))
>                               break;
> -                     }
>                       ckpt_debug("prepare task %d\n", task_pid_vnr(task));
>                       wake_up_process(task);
>                       nr_pids++;
> @@ -799,8 +805,10 @@ static int prepare_descendants(struct ckpt_ctx *ctx, 
> struct task_struct *root)
>               if (task == root) {
>                       /* in case root task is multi-threaded */
>                       root = task = next_thread(task);
> -                     if (root == leader)
> +                     if (root == leader) {
> +                             ret = 0;
>                               break;
> +                     }
>               }
>       }
>       read_unlock(&tasklist_lock);
> @@ -829,8 +837,7 @@ static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
>               return ret;
> 
>       ret = wait_for_completion_interruptible(&ctx->complete);
> -
> -     ckpt_debug("final sync kflags %#lx\n", ctx->kflags);
> +     ckpt_debug("final sync kflags %#lx (ret %d)\n", ctx->kflags, ret);
>       /*
>        * Usually when restart fails, the restarting task will first
>        * set @ctx->errno before waking us up. In the rare event that
> @@ -899,13 +906,14 @@ static int init_restart_ctx(struct ckpt_ctx *ctx, pid_t 
> pid)
> 
>  static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
>  {
> -     struct ckpt_ctx *old_ctx;
>       int ret;
> 
>       ret = restore_read_header(ctx);
> +     ckpt_debug("restore header: %d\n", ret);
>       if (ret < 0)
>               return ret;
>       ret = restore_read_tree(ctx);
> +     ckpt_debug("restore tree: %d\n", ret);
>       if (ret < 0)
>               return ret;
> 
> @@ -920,45 +928,42 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t 
> pid)
>        * Populate own ->checkpoint_ctx: if an ancestor attempts to
>        * prepare_descendants() on us, it will fail. Furthermore,
>        * that ancestor won't proceed deeper to interfere with our
> -      * descendants that are restarting (e.g. by xchg()ing their
> -      * ->checkpoint_ctx pointer temporarily).
> +      * descendants that are restarting.
>        */
> -     ckpt_ctx_get(ctx);
> -     old_ctx = xchg(&current->checkpoint_ctx, ctx);
> -     if (old_ctx) {
> +     if (!set_task_ctx(current, ctx)) {
>               /*
>                * We are a bad-behaving descendant: an ancestor must
> -              * have done prepare_descendants() on us as part of a
> -              * restart. Oh, well ... alert ancestor (coordinator)
> -              * with an error on @old_ctx.
> +              * have prepare_descendants() us as part of a restart.
>                */
> -             ckpt_debug("bad behaving checkpoint_ctx\n");
> -             restore_notify_error(old_ctx, -EBUSY);
> -             ckpt_ctx_put(old_ctx);
> -             ret = -EBUSY;
> -             goto out;
> +             ckpt_debug("coord already has checkpoint_ctx\n");
> +             return -EBUSY;
>       }
> 
>       if (ctx->uflags & RESTART_TASKSELF) {
>               ret = restore_task(ctx);
> +             ckpt_debug("restore task: %d\n", ret);
>               if (ret < 0)
>                       goto out;
>       } else {
>               /* prepare descendants' t->checkpoint_ctx point to coord */
>               ret = prepare_descendants(ctx, ctx->root_task);
> +             ckpt_debug("restore prepare: %d\n", ret);
>               if (ret < 0)
>                       goto out;
>               /* wait for all other tasks to complete do_restore_task() */
>               ret = wait_all_tasks_finish(ctx);
> +             ckpt_debug("restore finish: %d\n", ret);
>               if (ret < 0)
>                       goto out;
>       }
> 
>       ret = deferqueue_run(ctx->deferqueue);  /* run deferred work */
> +     ckpt_debug("restore deferqueue: %d\n", ret);
>       if (ret < 0)
>               goto out;
> 
>       ret = restore_read_tail(ctx);
> +     ckpt_debug("restore tail: %d\n", ret);
>       if (ret < 0)
>               goto out;
> 
> @@ -974,14 +979,8 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t 
> pid)
> 
>       if (!(ctx->uflags & RESTART_TASKSELF))
>               wake_up_all(&ctx->waitq);
> -     /*
> -      * If an ancestor attempts to prepare_descendants() on us, it
> -      * xchg()s our ->checkpoint_ctx, and free it. Our @ctx will,
> -      * instead, point to the ctx that said ancestor placed.
> -      */
> -     ctx = xchg(&current->checkpoint_ctx, NULL);
> -     ckpt_ctx_put(ctx);
> 
> +     set_task_ctx(current, NULL);
>       return ret;
>  }
> 
> @@ -1070,6 +1069,7 @@ long do_restart(struct ckpt_ctx *ctx, pid_t pid, 
> unsigned long flags)
>               }
>       }
> 
> +     ckpt_debug("sys_restart returns %ld\n", ret);
>       return ret;
>  }
> 
> @@ -1082,7 +1082,7 @@ void exit_checkpoint(struct task_struct *tsk)
>       struct ckpt_ctx *ctx;
> 
>       /* no one else will touch this, because @tsk is dead already */
> -     ctx = xchg(&tsk->checkpoint_ctx, NULL);
> +     ctx = tsk->checkpoint_ctx;
> 
>       /* restarting zombies will activate next task in restart */
>       if (tsk->flags & PF_RESTARTING) {
> diff --git a/checkpoint/sys.c b/checkpoint/sys.c
> index 76a3fa9..77613d7 100644
> --- a/checkpoint/sys.c
> +++ b/checkpoint/sys.c
> @@ -263,9 +263,11 @@ static struct ckpt_ctx *ckpt_ctx_alloc(int fd, unsigned 
> long uflags,
>       return ERR_PTR(err);
>  }
> 
> -void ckpt_ctx_get(struct ckpt_ctx *ctx)
> +struct ckpt_ctx *ckpt_ctx_get(struct ckpt_ctx *ctx)
>  {
> -     atomic_inc(&ctx->refcount);
> +     if (ctx)
> +             atomic_inc(&ctx->refcount);
> +     return ctx;
>  }
> 
>  void ckpt_ctx_put(struct ckpt_ctx *ctx)
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index 5294a96..b7f1796 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -134,7 +134,7 @@ extern int ckpt_obj_insert(struct ckpt_ctx *ctx, void 
> *ptr, int objref,
>                          enum obj_type type);
>  extern int ckpt_obj_reserve(struct ckpt_ctx *ctx);
> 
> -extern void ckpt_ctx_get(struct ckpt_ctx *ctx);
> +extern struct ckpt_ctx *ckpt_ctx_get(struct ckpt_ctx *ctx);
>  extern void ckpt_ctx_put(struct ckpt_ctx *ctx);
> 
>  extern long do_checkpoint(struct ckpt_ctx *ctx, pid_t pid);
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 57118e4..0e226f5 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1188,10 +1188,8 @@ static struct task_struct *copy_process(unsigned long 
> clone_flags,
> 
>  #ifdef CONFIG_CHECKPOINT
>       /* If parent is restarting, child should be too */
> -     if (unlikely(current->checkpoint_ctx)) {
> -             p->checkpoint_ctx = current->checkpoint_ctx;
> -             ckpt_ctx_get(p->checkpoint_ctx);
> -     }
> +     if (unlikely(current->checkpoint_ctx))
> +             p->checkpoint_ctx = ckpt_ctx_get(current->checkpoint_ctx);
>  #endif
>       /*
>        * The task hasn't been attached yet, so its cpus_allowed mask will
> -- 
> 1.6.0.4
> 
> _______________________________________________
> Containers mailing list
> [email protected]
> https://lists.linux-foundation.org/mailman/listinfo/containers
_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to