In order to protect against ptrace(2) and similar attacks on container
runtimes when they join namespaces, many runtimes set mm->dumpable to
SUID_DUMP_DISABLE. However, doing this means that attempting to set up
an unprivileged user namespace will fail because an unprivileged process
can no longer access /proc/self/{setgroups,{uid,gid}_map} for the
container process (which is the same uid as the runtime process).

Fix this by changing pid_getattr to *also* change the owner of regular
files that have a mode of 0644 (when the process is not dumpable). This
ensures that the important /proc/[pid]/... files mentioned above are
properly accessible by a container runtime in a rootless container
context.

The most blantant issue is that a non-dumpable process in a rootless
container context is unable to open /proc/self/setgroups, because it
doesn't own the file.

int main(void)
{
        prctl(PR_SET_DUMPABLE, 0, 0, 0, 0);
        unshare(CLONE_NEWUSER);

        /* This will fail. */
        int fd = open("/proc/self/setgroups", O_WRONLY);
        if (fd < 0)
                abort();

        return 0;
}

Cc: d...@opencontainers.org
Cc: contain...@lists.linux-foundation.org
Signed-off-by: Aleksa Sarai <asa...@suse.de>
---
 fs/proc/base.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ca651ac00660..ebabb12f4536 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1729,6 +1729,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry 
*dentry, struct kstat *stat)
                        return -ENOENT;
                }
                if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
+                   (inode->i_mode == (S_IFREG|S_IRUGO|S_IWUSR)) ||
                    task_dumpable(task)) {
                        cred = __task_cred(task);
                        stat->uid = cred->euid;
@@ -1770,6 +1771,7 @@ int pid_revalidate(struct dentry *dentry, unsigned int 
flags)
 
        if (task) {
                if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
+                   (inode->i_mode == (S_IFREG|S_IRUGO|S_IWUSR)) ||
                    task_dumpable(task)) {
                        rcu_read_lock();
                        cred = __task_cred(task);
@@ -2394,7 +2396,7 @@ static int proc_pident_instantiate(struct inode *dir,
        return -ENOENT;
 }
 
-static struct dentry *proc_pident_lookup(struct inode *dir, 
+static struct dentry *proc_pident_lookup(struct inode *dir,
                                         struct dentry *dentry,
                                         const struct pid_entry *ents,
                                         unsigned int nents)
@@ -2536,7 +2538,7 @@ static const struct pid_entry attr_dir_stuff[] = {
 
 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
 {
-       return proc_pident_readdir(file, ctx, 
+       return proc_pident_readdir(file, ctx,
                                   attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
 }
 
-- 
2.11.0

Reply via email to