From: "Eric W. Biederman" <ebied...@xmission.com>

[ Upstream commit 68eb94f16227336a5773b83ecfa8290f1d6b78ce ]

Instead of making the files owned by the GLOBAL_ROOT_USER.  Make
non-dumpable files whose mm has always lived in a user namespace owned
by the user namespace root.  This allows the container root to have
things work as expected in a container.

Signed-off-by: "Eric W. Biederman" <ebied...@xmission.com>
Cc: <sta...@vger.kernel.org> # 4.9.x
Signed-off-by: Wen Yang <weny...@linux.alibaba.com>
---
 fs/proc/base.c     | 102 ++++++++++++++++++++++++++++++-----------------------
 fs/proc/fd.c       |  12 +------
 fs/proc/internal.h |  16 ++-------
 3 files changed, 61 insertions(+), 69 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ee2e0ec..5bfdb61 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1676,12 +1676,63 @@ static int proc_pid_readlink(struct dentry * dentry, 
char __user * buffer, int b
 
 /* building an inode */
 
+void task_dump_owner(struct task_struct *task, mode_t mode,
+                    kuid_t *ruid, kgid_t *rgid)
+{
+       /* Depending on the state of dumpable compute who should own a
+        * proc file for a task.
+        */
+       const struct cred *cred;
+       kuid_t uid;
+       kgid_t gid;
+
+       /* Default to the tasks effective ownership */
+       rcu_read_lock();
+       cred = __task_cred(task);
+       uid = cred->euid;
+       gid = cred->egid;
+       rcu_read_unlock();
+
+       /*
+        * Before the /proc/pid/status file was created the only way to read
+        * the effective uid of a /process was to stat /proc/pid.  Reading
+        * /proc/pid/status is slow enough that procps and other packages
+        * kept stating /proc/pid.  To keep the rules in /proc simple I have
+        * made this apply to all per process world readable and executable
+        * directories.
+        */
+       if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
+               struct mm_struct *mm;
+               task_lock(task);
+               mm = task->mm;
+               /* Make non-dumpable tasks owned by some root */
+               if (mm) {
+                       if (get_dumpable(mm) != SUID_DUMP_USER) {
+                               struct user_namespace *user_ns = mm->user_ns;
+
+                               uid = make_kuid(user_ns, 0);
+                               if (!uid_valid(uid))
+                                       uid = GLOBAL_ROOT_UID;
+
+                               gid = make_kgid(user_ns, 0);
+                               if (!gid_valid(gid))
+                                       gid = GLOBAL_ROOT_GID;
+                       }
+               } else {
+                       uid = GLOBAL_ROOT_UID;
+                       gid = GLOBAL_ROOT_GID;
+               }
+               task_unlock(task);
+       }
+       *ruid = uid;
+       *rgid = gid;
+}
+
 struct inode *proc_pid_make_inode(struct super_block * sb,
                                  struct task_struct *task, umode_t mode)
 {
        struct inode * inode;
        struct proc_inode *ei;
-       const struct cred *cred;
 
        /* We need a new inode */
 
@@ -1703,13 +1754,7 @@ struct inode *proc_pid_make_inode(struct super_block * 
sb,
        if (!ei->pid)
                goto out_unlock;
 
-       if (task_dumpable(task)) {
-               rcu_read_lock();
-               cred = __task_cred(task);
-               inode->i_uid = cred->euid;
-               inode->i_gid = cred->egid;
-               rcu_read_unlock();
-       }
+       task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
        security_task_to_inode(task, inode);
 
 out:
@@ -1724,7 +1769,6 @@ int pid_getattr(struct vfsmount *mnt, struct dentry 
*dentry, struct kstat *stat)
 {
        struct inode *inode = d_inode(dentry);
        struct task_struct *task;
-       const struct cred *cred;
        struct pid_namespace *pid = dentry->d_sb->s_fs_info;
 
        generic_fillattr(inode, stat);
@@ -1742,12 +1786,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry 
*dentry, struct kstat *stat)
                         */
                        return -ENOENT;
                }
-               if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
-                   task_dumpable(task)) {
-                       cred = __task_cred(task);
-                       stat->uid = cred->euid;
-                       stat->gid = cred->egid;
-               }
+               task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
        }
        rcu_read_unlock();
        return 0;
@@ -1763,18 +1802,11 @@ int pid_getattr(struct vfsmount *mnt, struct dentry 
*dentry, struct kstat *stat)
  * Rewrite the inode's ownerships here because the owning task may have
  * performed a setuid(), etc.
  *
- * Before the /proc/pid/status file was created the only way to read
- * the effective uid of a /process was to stat /proc/pid.  Reading
- * /proc/pid/status is slow enough that procps and other packages
- * kept stating /proc/pid.  To keep the rules in /proc simple I have
- * made this apply to all per process world readable and executable
- * directories.
  */
 int pid_revalidate(struct dentry *dentry, unsigned int flags)
 {
        struct inode *inode;
        struct task_struct *task;
-       const struct cred *cred;
 
        if (flags & LOOKUP_RCU)
                return -ECHILD;
@@ -1783,17 +1815,8 @@ int pid_revalidate(struct dentry *dentry, unsigned int 
flags)
        task = get_proc_task(inode);
 
        if (task) {
-               if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
-                   task_dumpable(task)) {
-                       rcu_read_lock();
-                       cred = __task_cred(task);
-                       inode->i_uid = cred->euid;
-                       inode->i_gid = cred->egid;
-                       rcu_read_unlock();
-               } else {
-                       inode->i_uid = GLOBAL_ROOT_UID;
-                       inode->i_gid = GLOBAL_ROOT_GID;
-               }
+               task_dump_owner(task, inode->i_mode, &inode->i_uid, 
&inode->i_gid);
+
                inode->i_mode &= ~(S_ISUID | S_ISGID);
                security_task_to_inode(task, inode);
                put_task_struct(task);
@@ -1915,7 +1938,6 @@ static int map_files_d_revalidate(struct dentry *dentry, 
unsigned int flags)
        bool exact_vma_exists = false;
        struct mm_struct *mm = NULL;
        struct task_struct *task;
-       const struct cred *cred;
        struct inode *inode;
        int status = 0;
 
@@ -1940,16 +1962,8 @@ static int map_files_d_revalidate(struct dentry *dentry, 
unsigned int flags)
        mmput(mm);
 
        if (exact_vma_exists) {
-               if (task_dumpable(task)) {
-                       rcu_read_lock();
-                       cred = __task_cred(task);
-                       inode->i_uid = cred->euid;
-                       inode->i_gid = cred->egid;
-                       rcu_read_unlock();
-               } else {
-                       inode->i_uid = GLOBAL_ROOT_UID;
-                       inode->i_gid = GLOBAL_ROOT_GID;
-               }
+               task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+
                security_task_to_inode(task, inode);
                status = 1;
        }
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 4274f83..00ce153 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -84,7 +84,6 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned 
int flags)
 {
        struct files_struct *files;
        struct task_struct *task;
-       const struct cred *cred;
        struct inode *inode;
        unsigned int fd;
 
@@ -108,16 +107,7 @@ static int tid_fd_revalidate(struct dentry *dentry, 
unsigned int flags)
                                rcu_read_unlock();
                                put_files_struct(files);
 
-                               if (task_dumpable(task)) {
-                                       rcu_read_lock();
-                                       cred = __task_cred(task);
-                                       inode->i_uid = cred->euid;
-                                       inode->i_gid = cred->egid;
-                                       rcu_read_unlock();
-                               } else {
-                                       inode->i_uid = GLOBAL_ROOT_UID;
-                                       inode->i_gid = GLOBAL_ROOT_GID;
-                               }
+                               task_dump_owner(task, 0, &inode->i_uid, 
&inode->i_gid);
 
                                if (S_ISLNK(inode->i_mode)) {
                                        unsigned i_mode = S_IFLNK;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5bc057b..103435f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -98,20 +98,8 @@ static inline struct task_struct *get_proc_task(struct inode 
*inode)
        return get_pid_task(proc_pid(inode), PIDTYPE_PID);
 }
 
-static inline int task_dumpable(struct task_struct *task)
-{
-       int dumpable = 0;
-       struct mm_struct *mm;
-
-       task_lock(task);
-       mm = task->mm;
-       if (mm)
-               dumpable = get_dumpable(mm);
-       task_unlock(task);
-       if (dumpable == SUID_DUMP_USER)
-               return 1;
-       return 0;
-}
+void task_dump_owner(struct task_struct *task, mode_t mode,
+                    kuid_t *ruid, kgid_t *rgid);
 
 static inline unsigned name_to_int(const struct qstr *qstr)
 {
-- 
1.8.3.1

Reply via email to