The commit is pushed to "branch-rh10-6.12.0-55.13.1.2.x.vz10-ovz" and will
appear at [email protected]:openvz/vzkernel.git
after rh10-6.12.0-55.13.1.2.21.vz10
------>
commit a11c98da280547e34b71f12110a08c979ff72d7d
Author: Pavel Tikhomirov <[email protected]>
Date: Mon Nov 24 19:20:42 2025 +0800
ve: Make task_ve pointer ve namespace based
This way task_ve pointer's lifetime extends to ve namespace lifetime
(which holds a reference to ve) and we can drop all the tricks about
unsetting task_ve to prevent use after free.
https://virtuozzo.atlassian.net/browse/VSTOR-118289
Signed-off-by: Pavel Tikhomirov <[email protected]>
Feature: ve: ve generic structures
======
Patchset description:
ve: Add VE namespace
Main ideas behind VE namespace explained in "ve: Introduce VE
namespace".
---
kernel/ve/ve.c | 51 ------------------------------------------------
kernel/ve/ve_namespace.c | 2 ++
2 files changed, 2 insertions(+), 51 deletions(-)
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 75148d25b7b39..1bde9c27f8683 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -32,7 +32,6 @@
#include <linux/ctype.h>
#include <linux/tty.h>
#include <linux/device.h>
-#include <linux/sched.h>
#include <linux/sched/loadavg.h>
#include <uapi/linux/vzcalluser.h>
@@ -857,14 +856,6 @@ void ve_exit_ns(struct pid_namespace *pid_ns)
}
rcu_read_unlock();
- /*
- * To prevent use-after-free (e.g. reading /proc/pid/stat) in case
- * zombie container init outlives the ve_struct.
- *
- * Note: synchronize_rcu in ve_drop_context makes it rcu safe.
- */
- rcu_assign_pointer(current->task_ve, &ve0);
-
/*
* At this point all userspace tasks in container are dead.
*/
@@ -1211,50 +1202,9 @@ static void ve_attach(struct cgroup_taskset *tset)
if (cpuid_override_on())
set_tsk_thread_flag(task, TIF_CPUID_OVERRIDE);
-
- rcu_assign_pointer(task->task_ve, ve);
}
}
-/*
- * When a container task exits it is moved out of its css set, see cgroup_exit.
- * So the task effectively puts its reference to ve_struct of the container,
- * and now nothing prevents ve_struct from being freed while the task is still
- * there.
- *
- * We need to make sure that this task's ->task_ve is not pointing to the
- * container ve before putting the reference, else the use of ->task_ve from
- * the task can lead to use-after-free (e.g. when reading /proc/pid/stat).
- *
- * This is not a problem for a "normal" task (basically any task inside
- * container process tree except init), as such a "normal" task will be reaped
- * in zap_pid_ns_processes by the init, before init releases it's own reference
- * to ve.
- *
- * So we have two cases:
- * - init is handled in ve_exit_ns, unless there is no ve_nsproxy;
- * - "external" task (the task which does not resolve to a valid pid in
- * container pid namespace, and thus there is no guaranty that it would be
- * reaped in time) is handled below.
- */
-static void ve_exit(struct task_struct *task)
-{
- struct ve_struct *ve = task->task_ve;
-
- /*
- * Pairs with synchronize_rcu in ve_grab_context and ve_drop_context.
- */
- rcu_read_lock();
- /*
- * Clear task_ve if ve has no namespaces (ve is starting, stopped or
- * stopping), or in case of "external" task.
- */
- if (!ve->ve_nsproxy ||
- !task_pid_nr_ns(task, ve->ve_nsproxy->pid_ns_for_children))
- rcu_assign_pointer(task->task_ve, &ve0);
- rcu_read_unlock();
-}
-
static int ve_state_show(struct seq_file *sf, void *v)
{
struct cgroup_subsys_state *css = seq_css(sf);
@@ -1962,7 +1912,6 @@ struct cgroup_subsys ve_cgrp_subsys = {
.css_free = ve_destroy,
.can_attach = ve_can_attach,
.attach = ve_attach,
- .exit = ve_exit,
.legacy_cftypes = ve_cftypes,
};
diff --git a/kernel/ve/ve_namespace.c b/kernel/ve/ve_namespace.c
index e5e5f540205ed..6dae8487735f9 100644
--- a/kernel/ve/ve_namespace.c
+++ b/kernel/ve/ve_namespace.c
@@ -83,6 +83,7 @@ int copy_ve_ns(unsigned long flags, struct task_struct *p)
return PTR_ERR(new_ve_ns);
p->ve_ns = new_ve_ns;
+ p->task_ve = new_ve_ns->ve;
return 0;
}
@@ -129,6 +130,7 @@ void switch_ve_namespace(struct task_struct *p, struct
ve_namespace *new)
task_lock(p);
old = p->ve_ns;
p->ve_ns = new;
+ p->task_ve = new ? new->ve : &ve0;
task_unlock(p);
if (old)
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel