The commit is pushed to "branch-rh10-6.12.0-55.13.1.3.x.vz10-ovz" and will 
appear at [email protected]:openvz/vzkernel.git
after rh10-6.12.0-55.13.1.2.24.vz10
------>
commit 0930dd55a107af09fe5a697aa68d29dd0489effb
Author: Pavel Tikhomirov <[email protected]>
Date:   Wed Dec 10 18:34:32 2025 +0800

    ve_namespace: Reorder VE namespace creation before other namespaces
    
    On clone3() we first create and switch to user namespace, when ve
    namespace owned by this user namespace and switch to it, and only then
    create other namespaces, this way we can make sure that all the
    Container namespaces created together with VE namespace have
    get_exec_env() pointing to new VE namespace.
    
    On unshare() we can't easily enforce it, as this code path requires
    creating all namespaces first and only then joins them (basically
    namespace change either happens for all requested namespaces or it does
    not happen atomically). So while in this patch we reorder it for code
    consistency, it does not really work, so we prohibit using unshare()
    with CLONE_NEWVE together with non CLONE_NEWUSER namespaces.
    
    Fixes: 8a771a3d6bea ("ve: Introduce VE namespace")
    https://virtuozzo.atlassian.net/browse/VSTOR-118289
    Signed-off-by: Pavel Tikhomirov <[email protected]>
    
    Feature: ve: ve generic structures
---
 kernel/fork.c | 48 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 5ce4c2ac91c1c..25644501c8237 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2389,15 +2389,15 @@ __latent_entropy struct task_struct *copy_process(
        retval = copy_mm(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_signal;
-       retval = copy_namespaces(clone_flags, p);
+       retval = copy_ve_ns(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_mm;
-       retval = copy_ve_ns(clone_flags, p);
+       retval = copy_namespaces(clone_flags, p);
        if (retval)
-               goto bad_fork_cleanup_namespaces;
+               goto bad_fork_cleanup_ve_namespace;
        retval = copy_io(clone_flags, p);
        if (retval)
-               goto bad_fork_cleanup_ve_namespace;
+               goto bad_fork_cleanup_namespaces;
        retval = copy_thread(p, args);
        if (retval)
                goto bad_fork_cleanup_io;
@@ -2652,10 +2652,10 @@ __latent_entropy struct task_struct *copy_process(
 bad_fork_cleanup_io:
        if (p->io_context)
                exit_io_context(p);
-bad_fork_cleanup_ve_namespace:
-       exit_ve_namespace(p);
 bad_fork_cleanup_namespaces:
        exit_task_namespaces(p);
+bad_fork_cleanup_ve_namespace:
+       exit_ve_namespace(p);
 bad_fork_cleanup_mm:
        if (p->mm) {
                mm_clear_owner(p->mm, p);
@@ -3237,6 +3237,20 @@ static int check_unshare_flags(unsigned long 
unshare_flags)
                        return -EINVAL;
        }
 
+       /*
+        * Unshare creates all namespaces first and only then switches to them.
+        * So get_exec_env() yet returns previous VE while we are creating
+        * other namespaces. That leads to network and mount namespace
+        * initialized incorrectly, having ->owner_ve links set to previous VE.
+        * To avoid confusion, only allow CLONE_NEWVE together with 
CLONE_NEWUSER.
+        * CLONE_NEWUSER is allowed as it should own VE namespace, not vice 
versa.
+        */
+       if (unshare_flags & CLONE_NEWVE) {
+               unsigned long allowed_with_ve = CLONE_NEWVE | CLONE_NEWUSER;
+               if (unshare_flags & ~allowed_with_ve)
+                       return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -3342,25 +3356,20 @@ int ksys_unshare(unsigned long unshare_flags)
        err = unshare_userns(unshare_flags, &new_cred);
        if (err)
                goto bad_unshare_cleanup_fd;
+       err = unshare_ve_namespace(unshare_flags, &new_ve_ns, new_cred);
+       if (err)
+               goto bad_unshare_cleanup_cred;
        err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
                                         new_cred, new_fs);
        if (err)
-               goto bad_unshare_cleanup_cred;
-       err = unshare_ve_namespace(unshare_flags, &new_ve_ns, new_cred);
-       if (err) {
-               if (new_nsproxy)
-                       free_nsproxy(new_nsproxy);
-               goto bad_unshare_cleanup_cred;
-       }
+               goto bad_unshare_cleanup_ve_namespace;
 
        if (new_cred) {
                err = set_cred_ucounts(new_cred);
                if (err) {
                        if (new_nsproxy)
                                free_nsproxy(new_nsproxy);
-                       if (new_ve_ns)
-                               free_ve_ns(new_ve_ns);
-                       goto bad_unshare_cleanup_cred;
+                       goto bad_unshare_cleanup_ve_namespace;
                }
        }
 
@@ -3380,8 +3389,10 @@ int ksys_unshare(unsigned long unshare_flags)
                if (new_nsproxy)
                        switch_task_namespaces(current, new_nsproxy);
 
-               if (new_ve_ns)
+               if (new_ve_ns) {
                        switch_ve_namespace(current, new_ve_ns);
+                       new_ve_ns = NULL;
+               }
 
                task_lock(current);
 
@@ -3410,6 +3421,9 @@ int ksys_unshare(unsigned long unshare_flags)
 
        perf_event_namespaces(current);
 
+bad_unshare_cleanup_ve_namespace:
+       if (new_ve_ns)
+               free_ve_ns(new_ve_ns);
 bad_unshare_cleanup_cred:
        if (new_cred)
                put_cred(new_cred);
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to