Oren Laadan [[email protected]] wrote:

| +/* dump the task_struct of a given task */
| +int cr_write_restart_block(struct cr_ctx *ctx, struct task_struct *t)
| +{
| +     struct cr_hdr h;
| +     struct cr_hdr_restart_block *hh;
| +     struct restart_block *restart_block;
| +     long (*fn)(struct restart_block *);
| +     s64 base, expire = 0;
| +     int ret;
| +
| +     h.type = CR_HDR_RESTART_BLOCK;
| +     h.len = sizeof(*hh);
| +
| +     hh = cr_hbuf_get(ctx, sizeof(*hh));
| +     if (!hh)
| +             return -ENOMEM;
| +     memset(hh, 0, sizeof(*hh));
| +
| +     base = ktime_to_ns(ctx->ktime_beg);
| +     restart_block = &task_thread_info(t)->restart_block;
| +     fn = restart_block->fn;

Can we define another operation in 'struct restart_block' and replace
the following 'fn == xyz' checks with:

        if (!restart_block->checkpoint)
                BUG(1);

        expires = restart_block->checkpoint(restart_block, &hh);

It would touch other files and require ifdef CONFIG_CHECKPOINT in them
though.

| +
| +     /* FIX: enumerate clockid_t so we're immune to changes */
| +
| +     if (fn == do_no_restart_syscall) {
| +
| +             hh->fn = CR_RESTART_BLOCK_NONE;
| +             cr_debug("restart_block: non\n");
| +
| +     } else if (fn == hrtimer_nanosleep_restart) {
| +
| +             hh->fn = CR_RESTART_BLOCK_HRTIMER_NANOSLEEP;
| +             hh->arg_0 = restart_block->nanosleep.index;
| +             hh->arg_1 = (unsigned long) restart_block->nanosleep.rmtp;
| +             expire = restart_block->nanosleep.expires;
| +             cr_debug("restart_block: hrtimer expire %lld now %lld\n",
| +                      expire, base);
| +
| +     } else if (fn == posix_cpu_nsleep_restart) {
| +             struct timespec ts;
| +
| +             hh->fn = CR_RESTART_BLOCK_POSIX_CPU_NANOSLEEP;
| +             hh->arg_0 = restart_block->arg0;
| +             hh->arg_1 = restart_block->arg1;
| +             ts.tv_sec = restart_block->arg2;
| +             ts.tv_nsec = restart_block->arg3;
| +             expire = timespec_to_ns(&ts);
| +             cr_debug("restart_block: posix_cpu expire %lld now %lld\n",
| +                      expire, base);
| +
| +#ifdef CONFIG_COMPAT
| +     } else if (fn == compat_nanosleep_restart) {
| +
| +             hh->fn = CR_RESTART_BLOCK_NANOSLEEP;
| +             hh->arg_0 = restart_block->nanosleep.index;
| +             hh->arg_1 = (unsigned long) restart_block->nanosleep.rmtp;
| +             hh->arg_2 = (unsigned long)
| +                     restart_block->nanosleep.compat_rmtp;
| +             expire = restart_block->nanosleep.expires;
| +             cr_debug("restart_block: compat expire %lld now %lld\n",
| +                      expire, base);
| +
| +     } else if (fn == compat_clock_nanosleep_restart) {
| +
| +             hh->fn = CR_RESTART_BLOCK_COMPAT_CLOCK_NANOSLEEP;
| +             hh->arg_0 = restart_block->nanosleep.index;
| +             hh->arg_1 = (unsigned long) restart_block->nanosleep.rmtp;
| +             hh->arg_2 = (unsigned long)
| +                     restart_block->nanosleep.compat_rmtp;
| +             expire = restart_block->nanosleep.expires;
| +             cr_debug("restart_block: compat_clock expire %lld now %lld\n",
| +                      expire, base);
| +
| +#endif
| +     } else if (fn == futex_wait_restart) {
| +
| +             hh->fn = CR_RESTART_BLOCK_FUTEX;
| +             hh->arg_0 = (unsigned long) restart_block->futex.uaddr;
| +             hh->arg_1 = restart_block->futex.val;
| +             hh->arg_2 = restart_block->futex.flags;
| +             hh->arg_3 = restart_block->futex.bitset;
| +             expire = restart_block->futex.time;
| +             cr_debug("restart_block: futex expire %lld now %lld\n",
| +                      expire, base);
| +
| +     } else if (fn == do_restart_poll) {
| +             struct timespec ts;
| +
| +             hh->fn = CR_RESTART_BLOCK_POLL;
| +             hh->arg_0 = (unsigned long) restart_block->poll.ufds;
| +             hh->arg_1 = restart_block->poll.nfds;
| +             hh->arg_2 = restart_block->poll.has_timeout;
| +             ts.tv_sec = restart_block->poll.tv_sec;
| +             ts.tv_nsec = restart_block->poll.tv_nsec;
| +             expire = timespec_to_ns(&ts);
| +             cr_debug("restart_block: poll expire %lld now %lld\n",
| +                      expire, base);
| +
| +     } else {
| +
| +             BUG();
| +
| +     }
| +
| +     /* common to all restart blocks: */
| +     if (base < expire)
| +             hh->arg_4 = (expire - base);
| +
| +     cr_debug("restart_block: args %#llx %#llx %#llx %#llx %#llx\n",
| +              hh->arg_0, hh->arg_1, hh->arg_2, hh->arg_3, hh->arg_4);
| +
| +     ret = cr_write_obj(ctx, &h, hh);
| +     cr_hbuf_put(ctx, sizeof(*hh));
| +
| +     return ret;
| +}
| +
|  /* dump the entire state of a given task */
|  int cr_write_task(struct cr_ctx *ctx, struct task_struct *t)
|  {
| diff --git a/checkpoint/restart.c b/checkpoint/restart.c
| index 234cc92..daaaeec 100644
| --- a/checkpoint/restart.c
| +++ b/checkpoint/restart.c
| @@ -264,18 +264,16 @@ int do_restart(struct cr_ctx *ctx, pid_t pid)
| 
|       ret = cr_ctx_restart(ctx, pid);
|       if (ret < 0)
| -             goto out;
| +             return ret;
|       ret = cr_read_head(ctx);
|       if (ret < 0)
| -             goto out;
| +             return ret;
|       ret = cr_read_task(ctx);
|       if (ret < 0)
| -             goto out;
| +             return ret;
|       ret = cr_read_tail(ctx);
|       if (ret < 0)
| -             goto out;
| +             return ret;

Nit, would this patch be simpler if we left the 'goto out' lines above
and added the 'return cr_retval_restart(ctx);' here (i.e just before the
'out:' label) ?

| 
| -     /* on success, adjust the return value if needed [TODO] */
| - out:
| -     return ret;
| +     return cr_retval_restart(ctx);
|  }
| diff --git a/checkpoint/rstr_task.c b/checkpoint/rstr_task.c
| index 93c86ab..52206d8 100644
| --- a/checkpoint/rstr_task.c
| +++ b/checkpoint/rstr_task.c
| @@ -9,6 +9,9 @@
|   */
| 
|  #include <linux/sched.h>
| +#include <linux/posix-timers.h>
| +#include <linux/futex.h>
| +#include <linux/poll.h>
|  #include <linux/checkpoint.h>
|  #include <linux/checkpoint_hdr.h>
| 
| @@ -52,6 +55,115 @@ static int cr_read_task_struct(struct cr_ctx *ctx)
|       return ret;
|  }
| 
| +int cr_read_restart_block(struct cr_ctx *ctx)
| +{
| +     struct cr_hdr_restart_block *hh;
| +     struct restart_block restart_block;
| +     struct timespec ts;
| +     clockid_t clockid;
| +     s64 expire;
| +     int ret;
| +
| +     hh = cr_hbuf_get(ctx, sizeof(*hh));
| +     if (!hh)
| +             return -ENOMEM;
| +
| +     ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_RESTART_BLOCK);
| +     if (ret < 0)
| +             goto out;
| +
| +     expire = ktime_to_ns(ctx->ktime_beg) + hh->arg_4;
| +     restart_block.fn = NULL;
| +
| +     cr_debug("restart_block: expire %lld begin %lld\n",
| +              expire, ktime_to_ns(ctx->ktime_beg));
| +     cr_debug("restart_block: args %#llx %#llx %#llx %#llx %#llx\n",
| +              hh->arg_0, hh->arg_1, hh->arg_2, hh->arg_3, hh->arg_4);
| +
| +     switch (hh->fn) {
| +     case CR_RESTART_BLOCK_NONE:
| +             restart_block.fn = do_no_restart_syscall;
| +             break;
| +     case CR_RESTART_BLOCK_HRTIMER_NANOSLEEP:
| +             clockid = hh->arg_0;
| +             if (clockid < 0 || invalid_clockid(clockid))
| +                     break;
| +             restart_block.fn = hrtimer_nanosleep_restart;
| +             restart_block.nanosleep.index = clockid;
| +             restart_block.nanosleep.rmtp =
| +                     (struct timespec __user *) (unsigned long) hh->arg_1;
| +             restart_block.nanosleep.expires = expire;
| +             break;
| +     case CR_RESTART_BLOCK_POSIX_CPU_NANOSLEEP:
| +             clockid = hh->arg_0;
| +             if (clockid < 0 || invalid_clockid(clockid))
| +                     break;
| +             restart_block.fn = posix_cpu_nsleep_restart;
| +             restart_block.arg0 = clockid;
| +             restart_block.arg1 = hh->arg_1;
| +             ts = ns_to_timespec(expire);
| +             restart_block.arg2 = ts.tv_sec;
| +             restart_block.arg3 = ts.tv_nsec;
| +             break;
| +#ifdef CONFIG_COMPAT
| +     case CR_RESTART_BLOCK_COMPAT_NANOSLEEP:
| +             clockid = hh->arg_0;
| +             if (clockid < 0 || invalid_clockid(clockid))
| +                     break;
| +             restart_block.fn = compat_nanosleep_restart;
| +             restart_block.nanosleep.index = clockid;
| +             restart_block.nanosleep.rmtp =
| +                     (struct timespec __user *) (unsigned long) hh->arg_1;
| +             restart_block.nanosleep.compat_rmtp =
| +                     (struct compat_timespec __user *)
| +                             (unsigned long) hh->arg_2;
| +             resatrt_block.nanosleep.expires = expire;
| +             break;
| +     case CR_RESTART_BLOCK_COMPAT_CLOCK_NANOSLEEP:
| +             clockid = hh->arg_0;
| +             if (clockid < 0 || invalid_clockid(clockid))
| +                     break;
| +             restart_block.fn = compat_clock_nanosleep_restart;
| +             restart_block.nanosleep.index = clockid;
| +             restart_block.nanosleep.rmtp =
| +                     (struct timespec __user *) (unsigned long) hh->arg_1;
| +             restart_block.nanosleep.compat_rmtp =
| +                     (struct compat_timespec __user *)
| +                             (unsigned long) hh->arg_2;
| +             resatrt_block.nanosleep.expires = expire;
| +             break;
| +#endif
| +     case CR_RESTART_BLOCK_FUTEX:
| +             restart_block.fn = futex_wait_restart;
| +             restart_block.futex.uaddr = (u32 *) (unsigned long) hh->arg_0;
| +             restart_block.futex.val = hh->arg_1;
| +             restart_block.futex.flags = hh->arg_2;
| +             restart_block.futex.bitset = hh->arg_3;
| +             restart_block.futex.time = expire;
| +             break;
| +     case CR_RESTART_BLOCK_POLL:
| +             restart_block.fn = do_restart_poll;
| +             restart_block.poll.ufds =
| +                     (struct pollfd __user *) (unsigned long) hh->arg_0;
| +             restart_block.poll.nfds = hh->arg_1;
| +             restart_block.poll.has_timeout = hh->arg_2;
| +             ts = ns_to_timespec(expire);
| +             restart_block.poll.tv_sec = ts.tv_sec;
| +             restart_block.poll.tv_nsec = ts.tv_nsec;
| +             break;
| +     default:
| +             break;
| +     }
| +
| +     if (restart_block.fn)
| +             task_thread_info(current)->restart_block = restart_block;
| +     else
| +             ret = -EINVAL;
| + out:
| +     cr_hbuf_put(ctx, sizeof(*hh));
| +     return ret;
| +}
| +
|  /* read the entire state of the current task */
|  int cr_read_task(struct cr_ctx *ctx)
|  {
| @@ -76,6 +188,5 @@ int cr_read_task(struct cr_ctx *ctx)
|       ret = cr_read_cpu(ctx);
|       cr_debug("cpu: ret %d\n", ret);
|   out:
| -
|       return ret;
|  }
| diff --git a/checkpoint/sys.c b/checkpoint/sys.c
| index 8652c5c..863cb63 100644
| --- a/checkpoint/sys.c
| +++ b/checkpoint/sys.c
| @@ -186,6 +186,7 @@ static struct cr_ctx *cr_ctx_alloc(int fd, unsigned long 
flags)
|               return ERR_PTR(-ENOMEM);
| 
|       ctx->flags = flags;
| +     ctx->ktime_beg = ktime_get();
| 
|       INIT_LIST_HEAD(&ctx->pgarr_list);
|       INIT_LIST_HEAD(&ctx->pgarr_pool);
| @@ -203,6 +204,7 @@ static struct cr_ctx *cr_ctx_alloc(int fd, unsigned long 
flags)
|       if (cr_objhash_alloc(ctx) < 0)
|               goto err;
| 
| +
|       return ctx;
| 
|   err:
| diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
| index 3a514fc..a94ce98 100644
| --- a/include/linux/checkpoint.h
| +++ b/include/linux/checkpoint.h
| @@ -18,6 +18,8 @@
|  struct cr_ctx {
|       int crid;               /* unique checkpoint id */
| 
| +     ktime_t ktime_beg;      /* checkpoint start time */

Nit: spell out 'begin' fully ? :-) 

| +
|       pid_t root_pid;         /* container identifier */
|       struct task_struct *root_task;  /* container root task */
|       struct nsproxy *root_nsproxy;   /* container root nsproxy */
| @@ -87,10 +89,12 @@ extern struct file *cr_read_open_fname(struct cr_ctx *ctx,
|                                      int flags, int mode);
| 
|  extern int cr_write_task(struct cr_ctx *ctx, struct task_struct *t);
| +extern int cr_write_restart_block(struct cr_ctx *ctx, struct task_struct *t);
|  extern int cr_write_mm(struct cr_ctx *ctx, struct task_struct *t);
|  extern int cr_write_fd_table(struct cr_ctx *ctx, struct task_struct *t);
| 
|  extern int cr_read_task(struct cr_ctx *ctx);
| +extern int cr_read_restart_block(struct cr_ctx *ctx);
|  extern int cr_read_mm(struct cr_ctx *ctx);
|  extern int cr_read_fd_table(struct cr_ctx *ctx);
| 
| diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
| index 30e649b..8821a30 100644
| --- a/include/linux/checkpoint_hdr.h
| +++ b/include/linux/checkpoint_hdr.h
| @@ -45,6 +45,7 @@ enum {
|       CR_HDR_FNAME,
| 
|       CR_HDR_TASK = 101,
| +     CR_HDR_RESTART_BLOCK,
|       CR_HDR_THREAD,
|       CR_HDR_CPU,
| 
| @@ -97,6 +98,25 @@ struct cr_hdr_task {
|       __u32 task_comm_len;
|  } __attribute__((aligned(8)));
| 
| +struct cr_hdr_restart_block {
| +     __u64 fn;

Nit: This a function-type rather than the function itself ? A bit misleading
considering restart_block->fn refers to a function.

| +     __u64 arg_0;
| +     __u64 arg_1;
| +     __u64 arg_2;
| +     __u64 arg_3;
| +     __u64 arg_4;
| +} __attribute__((aligned(8)));
| +
| +enum restart_block_type {
| +     CR_RESTART_BLOCK_NONE = 1,
| +     CR_RESTART_BLOCK_HRTIMER_NANOSLEEP,
| +     CR_RESTART_BLOCK_POSIX_CPU_NANOSLEEP,
| +     CR_RESTART_BLOCK_COMPAT_NANOSLEEP,
| +     CR_RESTART_BLOCK_COMPAT_CLOCK_NANOSLEEP,
| +     CR_RESTART_BLOCK_POLL,
| +     CR_RESTART_BLOCK_FUTEX
| +};
| +
|  struct cr_hdr_mm {
|       __s32 objref;           /* identifier for shared objects */
|       __u32 map_count;
| -- 
| 1.5.4.3
| 
| _______________________________________________
| Containers mailing list
| [email protected]
| https://lists.linux-foundation.org/mailman/listinfo/containers
_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to