This two members represent monotonic and bootbased clocks for container's uptime. When container is in suspended state (or moving to another node) we trest monotonic and bootbased clocks as being stopped so we need to account delta time on restore and adjust the members in subject.
Moreover this timestamps are involved into posix-timers setup so once application tries to setup monotonic clocks after the restore (with absolute time specification) we adjust the values as well. The application which migrate a container must fetch the current settings from /sys/fs/cgroup/ve/$VE/ve.real_start_timespec and /sys/fs/cgroup/ve/$VE/ve.start_timespec, then write them back on the restore. https://jira.sw.ru/browse/PSBM-41311 https://jira.sw.ru/browse/PSBM-41406 v2: - use clock_[monotonic|bootbased] for cgroup entry names instead Original-by: Andrew Vagin <[email protected]> Signed-off-by: Cyrill Gorcunov <[email protected]> CC: Vasily Averin <[email protected]> CC: Andrey Vagin <[email protected]> CC: Pavel Emelianov <[email protected]> CC: Vladimir Davydov <[email protected]> CC: Konstantin Khorenko <[email protected]> --- kernel/ve/ve.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 2 deletions(-) Index: linux-pcs7.git/kernel/ve/ve.c =================================================================== --- linux-pcs7.git.orig/kernel/ve/ve.c +++ linux-pcs7.git/kernel/ve/ve.c @@ -439,6 +439,8 @@ static void ve_drop_context(struct ve_st ve->init_cred = NULL; } +static const struct timespec zero_time = { }; + /* under ve->op_sem write-lock */ int ve_start_container(struct ve_struct *ve) { @@ -454,8 +456,16 @@ int ve_start_container(struct ve_struct if (tsk->task_ve != ve || !is_child_reaper(task_pid(tsk))) return -ECHILD; - ve->start_timespec = tsk->start_time; - ve->real_start_timespec = tsk->real_start_time; + /* + * Setup uptime for new containers only, if restored + * the velue won't be zero here already but setup from + * cgroup write while resuming the container. + */ + if (timespec_equal(&ve->start_timespec, &zero_time)) { + ve->start_timespec = tsk->start_time; + ve->real_start_timespec = tsk->real_start_time; + } + /* The value is wrong, but it is never compared to process * start times */ ve->start_jiffies = get_jiffies_64(); @@ -1141,8 +1151,57 @@ enum { VE_CF_FEATURES, VE_CF_IPTABLES_MASK, VE_CF_PSEUDOSUPER, + VE_CF_CLOCK_MONOTONIC, + VE_CF_CLOCK_BOOTBASED }; +static int ve_ts_read(struct cgroup *cg, struct cftype *cft, struct seq_file *m) +{ + struct ve_struct *ve = cgroup_ve(cg); + struct timespec ts, *delta; + + do_posix_clock_monotonic_gettime(&ts); + if (cft->private == VE_CF_CLOCK_MONOTONIC) { + delta = &ve->start_timespec; + } else if (cft->private == VE_CF_CLOCK_BOOTBASED) { + delta = &ve->real_start_timespec; + monotonic_to_bootbased(&ts); + } else { + delta = &ts; + memset(&ts, 0, sizeof(ts)); + WARN_ON_ONCE(1); + } + + set_normalized_timespec(&ts, ts.tv_sec - delta->tv_sec, + ts.tv_nsec - delta->tv_nsec); + seq_printf(m, "%ld %ld", ts.tv_sec, ts.tv_nsec); + return 0; +} + +static int ve_ts_write(struct cgroup *cg, struct cftype *cft, const char *buffer) +{ + struct ve_struct *ve = cgroup_ve(cg); + struct timespec ts, delta, *target; + + if (sscanf(buffer, "%ld %ld", &delta.tv_sec, &delta.tv_nsec) != 2) + return -EINVAL; + + do_posix_clock_monotonic_gettime(&ts); + if (cft->private == VE_CF_CLOCK_MONOTONIC) { + target = &ve->start_timespec; + } else if (cft->private == VE_CF_CLOCK_BOOTBASED) { + target = &ve->real_start_timespec; + monotonic_to_bootbased(&ts); + } else { + WARN_ON_ONCE(1); + return -EINVAL; + } + + set_normalized_timespec(target, ts.tv_sec - delta.tv_sec, + ts.tv_nsec - delta.tv_nsec); + return 0; +} + static u64 ve_read_u64(struct cgroup *cg, struct cftype *cft) { if (cft->private == VE_CF_FEATURES) @@ -1258,6 +1317,20 @@ static struct cftype ve_cftypes[] = { .write_u64 = ve_write_pseudosuper, .private = VE_CF_PSEUDOSUPER, }, + { + .name = "clock_monotonic", + .flags = CFTYPE_NOT_ON_ROOT, + .read_seq_string = ve_ts_read, + .write_string = ve_ts_write, + .private = VE_CF_CLOCK_MONOTONIC, + }, + { + .name = "clock_bootbased", + .flags = CFTYPE_NOT_ON_ROOT, + .read_seq_string = ve_ts_read, + .write_string = ve_ts_write, + .private = VE_CF_CLOCK_BOOTBASED, + }, { } }; _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
