Two small nits, everything else is OK. On 1/13/20 11:25 AM, Valeriy Vdovin wrote: > https://jira.sw.ru/browse/PSBM-100083 > Signed-off-by: Valeriy Vdovin <valeriy.vdo...@virtuozzo.com> > --- > criu/cr-dump.c | 49 ++++++++++++++++++++++++++++++++++++ > criu/cr-restore.c | 68 > ++++++++++++++++++++++++++++++-------------------- > criu/include/crtools.h | 31 +++++++++++++++++++++++ > images/core.proto | 2 ++ > 4 files changed, 123 insertions(+), 27 deletions(-) > > diff --git a/criu/cr-dump.c b/criu/cr-dump.c > index 45626e8..751141f 100644 > --- a/criu/cr-dump.c > +++ b/criu/cr-dump.c > @@ -1037,6 +1037,47 @@ int dump_thread_core(int pid, CoreEntry *core, const > struct parasite_dump_thread > return ret; > } > > +struct get_internal_start_time_rq { > + int pid; > + unsigned long long result; > +}; > + > +static int child_get_internal_start_time(void *arg) > +{ > + struct proc_pid_stat p; > + struct get_internal_start_time_rq *r = > + (struct get_internal_start_time_rq *)arg; > + > + /* We need to join ve to access container relative > + * value of task's start_time, otherwize we will see > + * start_time visible to host. > + */ > + join_veX(r->pid); > + > + parse_pid_stat(r->pid, &p); > + r->result = p.start_time; > + return 0; > +} > + > +static int dump_task_internal_start_time(int pid, TaskCoreEntry *tc) > +{ > + int ret; > + struct get_internal_start_time_rq r = { > + .pid = pid, > + .result = 0 > + }; > + > + ret = call_in_child_process(child_get_internal_start_time, &r); > + if (ret) { > + pr_err("Failed to exec in child\n"); > + return ret; > + } > + > + tc->has_start_time = 1; > + tc->start_time = r.result; > + return 0; > +} > + > static int dump_task_core_all(struct parasite_ctl *ctl, > struct pstree_item *item, > const struct proc_pid_stat *stat, > @@ -1063,6 +1104,14 @@ static int dump_task_core_all(struct parasite_ctl *ctl, > core->tc->task_state = item->pid->state; > core->tc->exit_code = 0; > > + ret = dump_task_internal_start_time(pid, core->tc); > + if (ret) { > + pr_err("Failed to dump start_time for task %d\n", pid); > + goto err; > + } > + > + pr_info("Dumped start_time of task %d is %lu\n", pid, > core->tc->start_time); > + > if (stat->tty_nr) { > struct pstree_item *p = item; > > diff --git a/criu/cr-restore.c b/criu/cr-restore.c > index 170beab..c9777ae 100644 > --- a/criu/cr-restore.c > +++ b/criu/cr-restore.c > @@ -947,6 +947,40 @@ static int prepare_proc_misc(pid_t pid, TaskCoreEntry > *tc) > static int prepare_itimers(int pid, struct task_restore_args *args, > CoreEntry *core); > static int prepare_mm(pid_t pid, struct task_restore_args *args); > > +static int restore_start_time(int pid, CoreEntry *core) > +{ > + unsigned long long total_nsec; > + unsigned long flags; > + long tps; > + struct prctl_task_ct_fields ct_fields; > + > + if (!core->tc->has_start_time) { > + pr_warn("Skipping restore_start_time for old image version.\n"); > + return -1; > + } > + > + tps = sysconf(_SC_CLK_TCK); > + if (tps == -1) { > + pr_perror("Failed to get clock ticks via sysconf"); > + return -1; > + } > + > + total_nsec = core->tc->start_time * (NSEC_PER_SEC / tps); > + > + ct_fields.real_start_time = total_nsec; > + flags = PR_TASK_CT_FIELDS_START_TIME; > + > + if (prctl(PR_SET_TASK_CT_FIELDS, (unsigned long)&ct_fields, flags, 0, > 0)) { > + pr_perror("Can't set process start time"); > + return -1; > + } > + > + pr_info("Restored start_time of task %d is %lu\n", > + pid, core->tc->start_time); > + > + return 0; > +} > + > static int restore_one_alive_task(int pid, CoreEntry *core) > { > unsigned args_len; > @@ -955,6 +989,8 @@ static int restore_one_alive_task(int pid, CoreEntry > *core) > > rst_mem_switch_to_private(); > > + restore_start_time(pid, core); > + > args_len = round_up(sizeof(*ta) + sizeof(struct thread_restore_args) * > current->nr_threads, page_size()); > ta = mmap(NULL, args_len, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | > MAP_PRIVATE, 0, 0); > @@ -1122,7 +1158,7 @@ static int wait_on_helpers_zombies(void) > > static int wait_exiting_children(char *prefix); > > -static int restore_one_zombie(CoreEntry *core) > +static int restore_one_zombie(int pid, CoreEntry *core) > { > int exit_code = core->tc->exit_code; > > @@ -1134,6 +1170,8 @@ static int restore_one_zombie(CoreEntry *core) > if (lazy_pages_setup_zombie(vpid(current))) > return -1; > > + restore_start_time(pid, core); > + > prctl(PR_SET_NAME, (long)(void *)core->tc->comm, 0, 0, 0); > > if (task_entries != NULL) { > @@ -1142,6 +1180,7 @@ static int restore_one_zombie(CoreEntry *core) > zombie_prepare_signals(); > } > > +
Excess empty line. > if (exit_code & 0x7f) { > int signr; > > @@ -1344,7 +1383,7 @@ static int restore_one_task(int pid, CoreEntry *core) > if (task_alive(current)) > ret = restore_one_alive_task(pid, core); > else if (current->pid->state == TASK_DEAD) > - ret = restore_one_zombie(core); > + ret = restore_one_zombie(pid, core); > else if (current->pid->state == TASK_HELPER) { > ret = restore_one_helper(); > } else { > @@ -2156,31 +2195,6 @@ static int write_restored_pid(void) > > extern char *get_dumpee_veid(pid_t pid_real); > > -#define join_veX(pid) join_ve(pid, true) > - > -/* > - * Use join_ve0 very carefully! We have checks in kernel to prohibit > execution > - * of files on CT mounts for security. All mounts created after join_veX are > - * marked as CT mounts, including all mounts of the root_yard temporary > mntns. > - * So if you do join_ve0 you can be blocked from executing anything. > - * > - * https://jira.sw.ru/browse/PSBM-98702 > - * > - * note: If for some reason we will desperately need to execute binaries from > - * mounts in the root_yard temporary mntns from VE0 we have an option: > - * > - * In restore_root_task before calling join_veX we can clone a helper process > - * which will create CT userns and mntns first (all mounts are marked as host > - * mounts), next after join_veX in restore_root_task we create another helper > - * process which setns'es to these user and mnt namespaces, and from these > - * helper we can clone CT init process obviousely without CLONE_NEWNS and > - * CLONE_NEWUSER. These way userns, mntns, ve will be preserved for all tasks > - * but all mounts cloned from host will be marked as host mounts, and > execution > - * on them will be allowed even from VE0. > - */ > - > -#define join_ve0(pid) join_ve(pid, false) > - > /* > * To eliminate overhead we don't parse VE cgroup mountpoint > * but presume to find it in known place. Otherwise simply > diff --git a/criu/include/crtools.h b/criu/include/crtools.h > index c3f7cb3..acb1faf 100644 > --- a/criu/include/crtools.h > +++ b/criu/include/crtools.h > @@ -44,6 +44,37 @@ extern void pr_check_features(const char *offset, const > char *sep, int width); > .actor = name##_cb, \ > } > > +#define join_veX(pid) join_ve(pid, true) > + > +/* > + * Use join_ve0 very carefully! We have checks in kernel to prohibit > execution > + * of files on CT mounts for security. All mounts created after join_veX are > + * marked as CT mounts, including all mounts of the root_yard temporary > mntns. > + * So if you do join_ve0 you can be blocked from executing anything. > + * > + * https://jira.sw.ru/browse/PSBM-98702 > + * > + * note: If for some reason we will desperately need to execute binaries from > + * mounts in the root_yard temporary mntns from VE0 we have an option: > + * > + * In restore_root_task before calling join_veX we can clone a helper process > + * which will create CT userns and mntns first (all mounts are marked as host > + * mounts), next after join_veX in restore_root_task we create another helper > + * process which setns'es to these user and mnt namespaces, and from these > + * helper we can clone CT init process obviousely without CLONE_NEWNS and > + * CLONE_NEWUSER. These way userns, mntns, ve will be preserved for all tasks > + * but all mounts cloned from host will be marked as host mounts, and > execution > + * on them will be allowed even from VE0. > + */ > + > +#define join_ve0(pid) join_ve(pid, false) > + > +/* > + * To eliminate overhead we don't parse VE cgroup mountpoint > + * but presume to find it in known place. Otherwise simply > + * don't enter into veX with one warning. > + */ Duplicated comment. > + > int join_ve(pid_t pid, bool veX); > > #endif /* __CR_CRTOOLS_H__ */ > diff --git a/images/core.proto b/images/core.proto > index 6ef5f50..c164c7a 100644 > --- a/images/core.proto > +++ b/images/core.proto > @@ -50,6 +50,7 @@ message task_core_entry_VZ730 { > optional int32 tty_nr = 15; > optional int32 tty_pgrp = 16; > repeated sa_entry sigactions = 17; > + optional uint64 start_time = 19; > } > > message task_core_entry { > @@ -79,6 +80,7 @@ message task_core_entry { > repeated sa_entry sigactions = 15; > optional int32 tty_nr = 16; > optional int32 tty_pgrp = 17; > + optional uint64 start_time = 19; > } > > message task_kobj_ids_entry { > -- Best regards, Tikhomirov Pavel Software Developer, Virtuozzo. _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel