Matt Helsley wrote:
> Support checkpoint/restart of timers specified via timerfd. Checkpoint
> essentially does the timerfd_gettime() syscall and saves the expired flags
> and tick count. This ensures there will be no lost expirations/ticks
> between checkpoint restart.
> 
> This should largely work as expected since the time returned from gettime
> is always relative.
> 
> However, like any time-sensitive state, timerfds set with TFD_TIMER_ABSTIME
> may expire/tick at the "wrong time" because that time has long since passed.
> Short of introducing time namespaces there's almost nothing that can be done
> to checkpoint these uses of timerfd. Would it make more sense to mark timerfds
> which were (re)set with TFD_TIMER_ABSTIME and refuse to checkpoint them rather
> than deliver a "best-effort" expiration/tick?
> 
> Signed-off-by: Matt Helsley <[email protected]>

[...]

> +
> +#ifdef CONFIG_CHECKPOINT
> +static int timerfd_checkpoint(struct ckpt_ctx *ckpt_ctx, struct file *file)
> +{
> +     struct itimerspec kotmr;
> +     struct timerfd_ctx *ctx;
> +     struct ckpt_hdr_file_timerfd *h;
> +     int ret = -ENOMEM;
> +
> +     h = ckpt_hdr_get_type(ckpt_ctx, sizeof(*h), CKPT_HDR_FILE);
> +     if (!h)
> +             return -ENOMEM;
> +     h->common.f_type = CKPT_FILE_EVENTFD;
> +     ret = checkpoint_file_common(ckpt_ctx, file, &h->common);
> +     if (ret < 0)
> +             goto out;
> +
> +     ctx = file->private_data;
> +     /*
> +      * There is no way to recover the hrtimer mode or flags
> +      * used during create or settime. Rely on the timerfd state
> +      * to reflect those.
> +      */
> +     spin_lock_irq(&ctx->wqh.lock);
> +     h->expired = ctx->expired; /* must precede __timerfd_gettime() */
> +     h->ticks = ctx->ticks; /* must precede __timerfd_gettime() */
> +     h->clockid = ctx->clockid;
> +     __timerfd_gettime(ctx, &kotmr);
> +     spin_unlock_irq(&ctx->wqh.lock);

Can this have a race with the signal c/r code ?  E.g. first we
record a timer which is about to expire, then it expires, and
then we record the pending signals which include a signal from
this timer:  the restarted task will have the signal pending
and soon after the timer will expire and possibly generate a
second signal ?

> +     ckpt_copy_itimerspec(CKPT_CPT, &h->spec, &kotmr);
> +     ret = ckpt_write_obj(ckpt_ctx, &h->common.h);
> +out:
> +     ckpt_hdr_put(ckpt_ctx, h);
> +     return ret;
> +}
> +
> +struct file *timerfd_restore(struct ckpt_ctx *ckpt_ctx,
> +                          struct ckpt_hdr_file *ptr)
> +{
> +     struct itimerspec dotmr; /* dummy */
> +     struct itimerspec kitmr;
> +     struct ckpt_hdr_file_timerfd *h = (struct ckpt_hdr_file_timerfd *)ptr;
> +     struct file *file;
> +     struct timerfd_ctx *ctx;
> +     int ufd, ret;
> +
> +     /* Known: CKPT_HDR_FILE and f_type == CKPT_FILE_TIMERFD */
> +     if ((h->common.h.len != sizeof(*h)) ||
> +         (h->common.f_flags & ~TFD_SHARED_FCNTL_FLAGS))
> +             return ERR_PTR(-EINVAL);
> +
> +     /* Fail early if the timespecs are bad */
> +     ckpt_copy_itimerspec(CKPT_RST, &h->spec, &kitmr);
> +     if (!timespec_valid(&kitmr.it_value) ||
> +         !timespec_valid(&kitmr.it_interval))
> +             return ERR_PTR(-EINVAL);
> +
> +     ufd = sys_timerfd_create(h->clockid,
> +                              h->common.f_flags & TFD_SHARED_FCNTL_FLAGS);
> +     if (ufd < 0)
> +             return ERR_PTR(ufd);
> +     file = fget(ufd);
> +     sys_close(ufd);
> +     if (!file)
> +             return ERR_PTR(-EBUSY);
> +
> +     ret = restore_file_common(ckpt_ctx, file, &h->common);
> +     if (ret < 0) {
> +             fput(file);
> +             return ERR_PTR(ret);
> +     }
> +
> +     ctx = file->private_data;
> +#define TFD_TIMER_RELTIME 0
> +     ret = sys_timerfd_settime(ufd, TFD_TIMER_RELTIME,
> +                               &kitmr, &dotmr);
> +     /* Is there a race here between settime and spin_lock()? */
> +     spin_lock_irq(&ctx->wqh.lock);
> +     ctx->expired = h->expired;
> +     ctx->ticks = h->ticks;
> +     spin_unlock_irq(&ctx->wqh.lock);
> +     return file;
> +}
> +#else
> +#define timerfd_checkpoint NULL
> +#endif
> +
>  static const struct file_operations timerfd_fops = {
>       .release        = timerfd_release,
>       .poll           = timerfd_poll,
>       .read           = timerfd_read,
> +     .checkpoint     = timerfd_checkpoint,
>  };
>  
>  static struct file *timerfd_fget(int fd)
> @@ -275,19 +377,10 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct 
> itimerspec __user *, otmr)
>       if (IS_ERR(file))
>               return PTR_ERR(file);
>       ctx = file->private_data;
> -
>       spin_lock_irq(&ctx->wqh.lock);
> -     if (ctx->expired && ctx->tintv.tv64) {
> -             ctx->expired = 0;
> -             ctx->ticks +=
> -                     hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
> -             hrtimer_restart(&ctx->tmr);
> -     }
> -     kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
> -     kotmr.it_interval = ktime_to_timespec(ctx->tintv);
> +     __timerfd_gettime(ctx, &kotmr);
>       spin_unlock_irq(&ctx->wqh.lock);
>       fput(file);
> -
>       return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
>  }
>  
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index dfcb59b..f429386 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -321,6 +321,17 @@ static inline int ckpt_validate_errno(int errno)
>                       memcpy(LIVE, SAVE, count * sizeof(*SAVE));      \
>       } while (0)
>  
> +struct ckpt_itimerspec;
> +struct itimerspec;
> +static inline void ckpt_copy_itimerspec(int op, struct ckpt_itimerspec *h,
> +                                     struct itimerspec *tmr)
> +{
> +     CKPT_COPY(op, h->interval.tv_sec,  tmr->it_interval.tv_sec);
> +     CKPT_COPY(op, h->interval.tv_nsec, tmr->it_interval.tv_nsec);
> +     CKPT_COPY(op, h->value.tv_sec,     tmr->it_value.tv_sec);
> +     CKPT_COPY(op, h->value.tv_nsec,    tmr->it_value.tv_nsec);
> +}
> +

Is there a reason to place this here and not in timerfd source file ?

>  /* debugging flags */
>  #define CKPT_DBASE   0x1             /* anything */
>  #define CKPT_DSYS    0x2             /* generic (system) */

[...]

Oren.

_______________________________________________
Containers mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/containers

_______________________________________________
Devel mailing list
[email protected]
https://openvz.org/mailman/listinfo/devel

Reply via email to