The check_unsafe_exec() counting of n_fs would not add up under a heavily
threaded process trying to perform a suid exec, causing the suid portion
to fail. This counting error appears to be unneeded, but to catch any
possible conditions, explicitly unshare fs_struct on exec, if it ends up
needing to happen. This means fs->in_exec must be removed (since it would
never get cleared in the old copy), and is specifically no longer needed.

See also commit 498052bba55e ("New locking/refcounting for fs_struct").

This additionally allows the "in_exec" member to be dropped, showing an
8 bytes savings per fs_struct on 64-bit. Before:

struct fs_struct {
        int                        users;                /*     0     4 */
        spinlock_t                 lock;                 /*     4     4 */
        seqcount_spinlock_t        seq;                  /*     8     4 */
        int                        umask;                /*    12     4 */
        int                        in_exec;              /*    16     4 */

        /* XXX 4 bytes hole, try to pack */

        struct path                root;                 /*    24    16 */
        struct path                pwd;                  /*    40    16 */

        /* size: 56, cachelines: 1, members: 7 */
        /* sum members: 52, holes: 1, sum holes: 4 */
        /* last cacheline: 56 bytes */
};

After:

struct fs_struct {
        int                        users;                /*     0     4 */
        spinlock_t                 lock;                 /*     4     4 */
        seqcount_spinlock_t        seq;                  /*     8     4 */
        int                        umask;                /*    12     4 */
        struct path                root;                 /*    16    16 */
        struct path                pwd;                  /*    32    16 */

        /* size: 48, cachelines: 1, members: 6 */
        /* last cacheline: 48 bytes */
};

Reported-by: Jorge Merlino <[email protected]>
Link: 
https://lore.kernel.org/lkml/[email protected]/
Cc: Eric Biederman <[email protected]>
Cc: Alexander Viro <[email protected]>
Cc: "Christian Brauner (Microsoft)" <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Sebastian Andrzej Siewior <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Kees Cook <[email protected]>
---
 fs/exec.c                 |  9 +++---
 fs/fs_struct.c            |  1 -
 include/linux/fdtable.h   |  1 +
 include/linux/fs_struct.h |  1 -
 kernel/fork.c             | 62 ++++++++++++++++++++++++++-------------
 5 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 902bce45b116..7d5f63f03c58 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1272,6 +1272,11 @@ int begin_new_exec(struct linux_binprm * bprm)
        if (retval)
                goto out;
 
+       /* Ensure the fs_struct is not shared. */
+       retval = unshare_fs();
+       if (retval)
+               goto out;
+
        /*
         * Must be called _before_ exec_mmap() as bprm->mm is
         * not visible until then. This also enables the update
@@ -1583,8 +1588,6 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
 
        if (p->fs->users > n_fs)
                bprm->unsafe |= LSM_UNSAFE_SHARE;
-       else
-               p->fs->in_exec = 1;
        spin_unlock(&p->fs->lock);
 }
 
@@ -1843,7 +1846,6 @@ static int bprm_execve(struct linux_binprm *bprm,
                goto out;
 
        /* execve succeeded */
-       current->fs->in_exec = 0;
        current->in_execve = 0;
        rseq_execve(current);
        acct_update_integrals(current);
@@ -1861,7 +1863,6 @@ static int bprm_execve(struct linux_binprm *bprm,
                force_fatal_sig(SIGSEGV);
 
 out_unmark:
-       current->fs->in_exec = 0;
        current->in_execve = 0;
 
        return retval;
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 04b3f5b9c629..c27c67023d01 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -115,7 +115,6 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
        /* We don't need to lock fs - think why ;-) */
        if (fs) {
                fs->users = 1;
-               fs->in_exec = 0;
                spin_lock_init(&fs->lock);
                seqcount_spinlock_init(&fs->seq, &fs->lock);
                fs->umask = old->umask;
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index e066816f3519..fbfb89ee3bda 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -117,6 +117,7 @@ struct task_struct;
 
 void put_files_struct(struct files_struct *fs);
 int unshare_files(void);
+int unshare_fs(void);
 struct files_struct *dup_fd(struct files_struct *, unsigned, int *) 
__latent_entropy;
 void do_close_on_exec(struct files_struct *);
 int iterate_fd(struct files_struct *, unsigned,
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 783b48dedb72..08b82a90ceae 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -11,7 +11,6 @@ struct fs_struct {
        spinlock_t lock;
        seqcount_spinlock_t seq;
        int umask;
-       int in_exec;
        struct path root, pwd;
 } __randomize_layout;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index b4a799d9c50f..53b7248f7a4b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1589,10 +1589,6 @@ static int copy_fs(unsigned long clone_flags, struct 
task_struct *tsk)
        if (clone_flags & CLONE_FS) {
                /* tsk->fs is already what we want */
                spin_lock(&fs->lock);
-               if (fs->in_exec) {
-                       spin_unlock(&fs->lock);
-                       return -EAGAIN;
-               }
                fs->users++;
                spin_unlock(&fs->lock);
                return 0;
@@ -3070,10 +3066,9 @@ static int check_unshare_flags(unsigned long 
unshare_flags)
        return 0;
 }
 
-/*
- * Unshare the filesystem structure if it is being shared
- */
-static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
+/* Allocate an fs_struct if it is currently shared and CLONE_FS requested. */
+static int unshare_fs_alloc(unsigned long unshare_flags,
+                           struct fs_struct **new_fsp)
 {
        struct fs_struct *fs = current->fs;
 
@@ -3091,6 +3086,41 @@ static int unshare_fs(unsigned long unshare_flags, 
struct fs_struct **new_fsp)
        return 0;
 }
 
+/* Swap out fs_struct, returning old fs if it needs freeing. */
+static void unshare_fs_finalize(struct fs_struct **new_fsp)
+{
+       struct task_struct *task = current;
+       struct fs_struct *fs = task->fs;
+
+       spin_lock(&fs->lock);
+       task->fs = *new_fsp;
+       if (--fs->users)
+               *new_fsp = NULL;
+       else
+               *new_fsp = fs;
+       spin_unlock(&fs->lock);
+}
+
+/*
+ * Unshare the filesystem structure if it is being shared
+ */
+int unshare_fs(void)
+{
+       struct fs_struct *new_fs = NULL;
+       int error;
+
+       error = unshare_fs_alloc(CLONE_FS, &new_fs);
+       if (error || !new_fs)
+               return error;
+
+       unshare_fs_finalize(&new_fs);
+
+       if (new_fs)
+               free_fs_struct(new_fs);
+
+       return 0;
+}
+
 /*
  * Unshare file descriptor table if it is being shared
  */
@@ -3120,7 +3150,7 @@ int unshare_fd(unsigned long unshare_flags, unsigned int 
max_fds,
  */
 int ksys_unshare(unsigned long unshare_flags)
 {
-       struct fs_struct *fs, *new_fs = NULL;
+       struct fs_struct *new_fs = NULL;
        struct files_struct *new_fd = NULL;
        struct cred *new_cred = NULL;
        struct nsproxy *new_nsproxy = NULL;
@@ -3159,7 +3189,7 @@ int ksys_unshare(unsigned long unshare_flags)
         */
        if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
                do_sysvsem = 1;
-       err = unshare_fs(unshare_flags, &new_fs);
+       err = unshare_fs_alloc(unshare_flags, &new_fs);
        if (err)
                goto bad_unshare_out;
        err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
@@ -3197,16 +3227,8 @@ int ksys_unshare(unsigned long unshare_flags)
 
                task_lock(current);
 
-               if (new_fs) {
-                       fs = current->fs;
-                       spin_lock(&fs->lock);
-                       current->fs = new_fs;
-                       if (--fs->users)
-                               new_fs = NULL;
-                       else
-                               new_fs = fs;
-                       spin_unlock(&fs->lock);
-               }
+               if (new_fs)
+                       unshare_fs_finalize(&new_fs);
 
                if (new_fd)
                        swap(current->files, new_fd);
-- 
2.34.1


Reply via email to