This way task_ve pointer's lifetime extends to ve namespace lifetime
(which holds a reference to ve) and we can drop all the tricks about
unsetting task_ve to prevent use after free.

https://virtuozzo.atlassian.net/browse/VSTOR-118289
Signed-off-by: Pavel Tikhomirov <[email protected]>

Feature: ve: ve generic structures
---
 kernel/ve/ve.c           | 51 ----------------------------------------
 kernel/ve/ve_namespace.c |  2 ++
 2 files changed, 2 insertions(+), 51 deletions(-)

diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 75148d25b7b39..1bde9c27f8683 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -32,7 +32,6 @@
 #include <linux/ctype.h>
 #include <linux/tty.h>
 #include <linux/device.h>
-#include <linux/sched.h>
 #include <linux/sched/loadavg.h>
 
 #include <uapi/linux/vzcalluser.h>
@@ -857,14 +856,6 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
        }
        rcu_read_unlock();
 
-       /*
-        * To prevent use-after-free (e.g. reading /proc/pid/stat) in case
-        * zombie container init outlives the ve_struct.
-        *
-        * Note: synchronize_rcu in ve_drop_context makes it rcu safe.
-        */
-       rcu_assign_pointer(current->task_ve, &ve0);
-
        /*
         * At this point all userspace tasks in container are dead.
         */
@@ -1211,50 +1202,9 @@ static void ve_attach(struct cgroup_taskset *tset)
 
                if (cpuid_override_on())
                        set_tsk_thread_flag(task, TIF_CPUID_OVERRIDE);
-
-               rcu_assign_pointer(task->task_ve, ve);
        }
 }
 
-/*
- * When a container task exits it is moved out of its css set, see cgroup_exit.
- * So the task effectively puts its reference to ve_struct of the container,
- * and now nothing prevents ve_struct from being freed while the task is still
- * there.
- *
- * We need to make sure that this task's ->task_ve is not pointing to the
- * container ve before putting the reference, else the use of ->task_ve from
- * the task can lead to use-after-free (e.g. when reading /proc/pid/stat).
- *
- * This is not a problem for a "normal" task (basically any task inside
- * container process tree except init), as such a "normal" task will be reaped
- * in zap_pid_ns_processes by the init, before init releases it's own reference
- * to ve.
- *
- * So we have two cases:
- * - init is handled in ve_exit_ns, unless there is no ve_nsproxy;
- * - "external" task (the task which does not resolve to a valid pid in
- *   container pid namespace, and thus there is no guaranty that it would be
- *   reaped in time) is handled below.
- */
-static void ve_exit(struct task_struct *task)
-{
-       struct ve_struct *ve = task->task_ve;
-
-       /*
-        * Pairs with synchronize_rcu in ve_grab_context and ve_drop_context.
-        */
-       rcu_read_lock();
-       /*
-        * Clear task_ve if ve has no namespaces (ve is starting, stopped or
-        * stopping), or in case of "external" task.
-        */
-       if (!ve->ve_nsproxy ||
-           !task_pid_nr_ns(task, ve->ve_nsproxy->pid_ns_for_children))
-               rcu_assign_pointer(task->task_ve, &ve0);
-       rcu_read_unlock();
-}
-
 static int ve_state_show(struct seq_file *sf, void *v)
 {
        struct cgroup_subsys_state *css = seq_css(sf);
@@ -1962,7 +1912,6 @@ struct cgroup_subsys ve_cgrp_subsys = {
        .css_free       = ve_destroy,
        .can_attach     = ve_can_attach,
        .attach         = ve_attach,
-       .exit           = ve_exit,
        .legacy_cftypes = ve_cftypes,
 };
 
diff --git a/kernel/ve/ve_namespace.c b/kernel/ve/ve_namespace.c
index e5e5f540205ed..6dae8487735f9 100644
--- a/kernel/ve/ve_namespace.c
+++ b/kernel/ve/ve_namespace.c
@@ -83,6 +83,7 @@ int copy_ve_ns(unsigned long flags, struct task_struct *p)
                return PTR_ERR(new_ve_ns);
 
        p->ve_ns = new_ve_ns;
+       p->task_ve = new_ve_ns->ve;
        return 0;
 }
 
@@ -129,6 +130,7 @@ void switch_ve_namespace(struct task_struct *p, struct 
ve_namespace *new)
        task_lock(p);
        old = p->ve_ns;
        p->ve_ns = new;
+       p->task_ve = new ? new->ve : &ve0;
        task_unlock(p);
 
        if (old)
-- 
2.51.1

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to