This two members represent monotonic and bootbased clocks for
container's uptime. When container is in suspended state (or
moving to another node) we trest monotonic and bootbased
clocks as being stopped so we need to account delta time
on restore and adjust the members in subject.

Moreover this timestamps are involved into posix-timers
setup so once application tries to setup monotonic clocks
after the restore (with absolute time specification) we
adjust the values as well.

The application which migrate a container must fetch
the current settings from /sys/fs/cgroup/ve/$VE/ve.real_start_timespec
and /sys/fs/cgroup/ve/$VE/ve.start_timespec, then write them
back on the restore.

https://jira.sw.ru/browse/PSBM-41311
https://jira.sw.ru/browse/PSBM-41406

v2:
 - use clock_[monotonic|bootbased] for cgroup entry names instead

Original-by: Andrew Vagin <[email protected]>
Signed-off-by: Cyrill Gorcunov <[email protected]>
CC: Vasily Averin <[email protected]>
CC: Andrey Vagin <[email protected]>
CC: Pavel Emelianov <[email protected]>
CC: Vladimir Davydov <[email protected]>
CC: Konstantin Khorenko <[email protected]>
---
 kernel/ve/ve.c |   77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

Index: linux-pcs7.git/kernel/ve/ve.c
===================================================================
--- linux-pcs7.git.orig/kernel/ve/ve.c
+++ linux-pcs7.git/kernel/ve/ve.c
@@ -439,6 +439,8 @@ static void ve_drop_context(struct ve_st
        ve->init_cred = NULL;
 }
 
+static const struct timespec zero_time = { };
+
 /* under ve->op_sem write-lock */
 int ve_start_container(struct ve_struct *ve)
 {
@@ -454,8 +456,16 @@ int ve_start_container(struct ve_struct
        if (tsk->task_ve != ve || !is_child_reaper(task_pid(tsk)))
                return -ECHILD;
 
-       ve->start_timespec = tsk->start_time;
-       ve->real_start_timespec = tsk->real_start_time;
+       /*
+        * Setup uptime for new containers only, if restored
+        * the velue won't be zero here already but setup from
+        * cgroup write while resuming the container.
+        */
+       if (timespec_equal(&ve->start_timespec, &zero_time)) {
+               ve->start_timespec = tsk->start_time;
+               ve->real_start_timespec = tsk->real_start_time;
+       }
+
        /* The value is wrong, but it is never compared to process
         * start times */
        ve->start_jiffies = get_jiffies_64();
@@ -1141,8 +1151,57 @@ enum {
        VE_CF_FEATURES,
        VE_CF_IPTABLES_MASK,
        VE_CF_PSEUDOSUPER,
+       VE_CF_CLOCK_MONOTONIC,
+       VE_CF_CLOCK_BOOTBASED
 };
 
+static int ve_ts_read(struct cgroup *cg, struct cftype *cft, struct seq_file 
*m)
+{
+       struct ve_struct *ve = cgroup_ve(cg);
+       struct timespec ts, *delta;
+
+       do_posix_clock_monotonic_gettime(&ts);
+       if (cft->private == VE_CF_CLOCK_MONOTONIC) {
+               delta = &ve->start_timespec;
+       } else if (cft->private == VE_CF_CLOCK_BOOTBASED) {
+               delta = &ve->real_start_timespec;
+               monotonic_to_bootbased(&ts);
+       } else {
+               delta = &ts;
+               memset(&ts, 0, sizeof(ts));
+               WARN_ON_ONCE(1);
+       }
+
+       set_normalized_timespec(&ts, ts.tv_sec - delta->tv_sec,
+                               ts.tv_nsec - delta->tv_nsec);
+       seq_printf(m, "%ld %ld", ts.tv_sec, ts.tv_nsec);
+       return 0;
+}
+
+static int ve_ts_write(struct cgroup *cg, struct cftype *cft, const char 
*buffer)
+{
+       struct ve_struct *ve = cgroup_ve(cg);
+       struct timespec ts, delta, *target;
+
+       if (sscanf(buffer, "%ld %ld", &delta.tv_sec, &delta.tv_nsec) != 2)
+               return -EINVAL;
+
+       do_posix_clock_monotonic_gettime(&ts);
+       if (cft->private == VE_CF_CLOCK_MONOTONIC) {
+               target = &ve->start_timespec;
+       } else if (cft->private == VE_CF_CLOCK_BOOTBASED) {
+               target = &ve->real_start_timespec;
+               monotonic_to_bootbased(&ts);
+       } else {
+               WARN_ON_ONCE(1);
+               return -EINVAL;
+       }
+
+       set_normalized_timespec(target, ts.tv_sec - delta.tv_sec,
+                               ts.tv_nsec - delta.tv_nsec);
+       return 0;
+}
+
 static u64 ve_read_u64(struct cgroup *cg, struct cftype *cft)
 {
        if (cft->private == VE_CF_FEATURES)
@@ -1258,6 +1317,20 @@ static struct cftype ve_cftypes[] = {
                .write_u64              = ve_write_pseudosuper,
                .private                = VE_CF_PSEUDOSUPER,
        },
+       {
+               .name                   = "clock_monotonic",
+               .flags                  = CFTYPE_NOT_ON_ROOT,
+               .read_seq_string        = ve_ts_read,
+               .write_string           = ve_ts_write,
+               .private                = VE_CF_CLOCK_MONOTONIC,
+       },
+       {
+               .name                   = "clock_bootbased",
+               .flags                  = CFTYPE_NOT_ON_ROOT,
+               .read_seq_string        = ve_ts_read,
+               .write_string           = ve_ts_write,
+               .private                = VE_CF_CLOCK_BOOTBASED,
+       },
        { }
 };
 
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to