Analogous to the supplementary GID list, the supplementary UID list
provides a set of additional user credentials that a process can act as.
A process with CAP_SETUID can set its UID list arbitrarily; a process
without CAP_SETUID can only reduce its UID list.

This allows each user to have a set of UIDs that they can then use to
further sandbox individual child processes without first escalating to
root to change UIDs.  For instance, a PAM module could give each user a
block of UIDs to work with.

Tested via the following test program:

#include <err.h>
#include <stdio.h>
#include <sys/syscall.h>
#include <unistd.h>

static int getusers(int count, uid_t *uids)
{
    return syscall(322, count, uids);
}

static int setusers(int count, const uid_t *uids)
{
    return syscall(323, count, uids);
}

static void show_users(void)
{
    uid_t uids[65536];
    int i, count = getusers(65536, uids);
    if (count < 0)
        err(1, "getusers");
    printf("UIDs:");
    for (i = 0; i < count; i++)
        printf(" %u", (unsigned)uids[i]);
    printf("\n");
}

int main(void)
{
    uid_t list1[] = { 1, 2, 3, 4, 5 };
    uid_t list2[] = { 1, 2, 3, 4 };
    uid_t list3[] = { 2, 3, 4 };
    show_users();
    if (setusers(5, list1) < 0)
        err(1, "setusers 1");
    show_users();
    if (setresgid(1, 1, 1) < 0)
        err(1, "setresgid");
    if (setresuid(1, 1, 1) < 0)
        err(1, "setresuid");
    if (setusers(4, list2) < 0)
        err(1, "setusers 2");
    show_users();
    if (setusers(3, list3) < 0)
        err(1, "setusers 3");
    show_users();
    if (setusers(4, list2) < 0)
        err(1, "setusers 4");
    show_users();
    if (setresuid(2, 2, 2) < 0)
        err(1, "setresuid 2");
    if (setusers(5, list1) < 0)
        err(1, "setusers 5");
    show_users();

    return 0;
}

In this test, all but the last call to setusers succeeds; the last call
fails with EPERM because the unprivileged process attempts to add UID 5
to the supplementary UID list, which it does not currently have.

Signed-off-by: Josh Triplett <j...@joshtriplett.org>
---
 arch/x86/syscalls/syscall_32.tbl  |   2 +
 arch/x86/syscalls/syscall_64.tbl  |   2 +
 include/linux/cred.h              |  66 +++++++++++++++
 include/linux/syscalls.h          |   2 +
 include/uapi/asm-generic/unistd.h |   6 +-
 include/uapi/linux/limits.h       |   1 +
 init/Kconfig                      |   9 ++
 kernel/cred.c                     |   4 +
 kernel/groups.c                   | 173 ++++++++++++++++++++++++++++++++++++++
 kernel/sys.c                      |  21 +++--
 kernel/sys_ni.c                   |   2 +
 11 files changed, 280 insertions(+), 8 deletions(-)

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 9fe1b5d..55717d7 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -364,3 +364,5 @@
 355    i386    getrandom               sys_getrandom
 356    i386    memfd_create            sys_memfd_create
 357    i386    bpf                     sys_bpf
+358    i386    getusers                sys_getusers
+359    i386    setusers                sys_setusers
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 281150b..5572e67 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -328,6 +328,8 @@
 319    common  memfd_create            sys_memfd_create
 320    common  kexec_file_load         sys_kexec_file_load
 321    common  bpf                     sys_bpf
+322    common  getusers                sys_getusers
+323    common  setusers                sys_setusers
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b2d0820..31169fe 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -76,6 +76,8 @@ extern int groups_search(const struct group_info *, kgid_t);
 extern int in_group_p(kgid_t);
 extern int in_egroup_p(kgid_t);
 
+struct user_info;
+
 /*
  * The security context of a task
  *
@@ -135,6 +137,12 @@ struct cred {
        struct user_struct *user;       /* real user ID subscription */
        struct user_namespace *user_ns; /* user_ns the caps and keyrings are 
relative to. */
        struct group_info *group_info;  /* supplementary groups for euid/fsgid 
*/
+#ifdef CONFIG_SUPPLEMENTARY_UIDS
+       struct user_info *user_info;    /* supplementary users */
+#define INIT_USER_INFO .user_info = &init_users,
+#else
+#define INIT_USER_INFO
+#endif
        struct rcu_head rcu;            /* RCU deletion hook */
 };
 
@@ -381,4 +389,62 @@ do {                                               \
        *(_fsgid) = __cred->fsgid;              \
 } while(0)
 
+#ifdef CONFIG_SUPPLEMENTARY_UIDS
+struct user_info {
+       atomic_t        usage;
+       int             nusers;
+       int             nblocks;
+       kuid_t          small_block[NGROUPS_SMALL];
+       kuid_t          *blocks[0];
+};
+
+#define USER_AT(ui, i) GROUP_AT(ui, i)
+extern struct user_info init_users;
+void users_free(struct user_info *);
+bool has_supplementary_uid(kuid_t);
+
+/**
+ * get_user_info - Get a reference to a user_info structure
+ * @user_info: The user_info to reference
+ *
+ * This gets a reference to a set of supplementary users.
+ *
+ * If the caller is accessing a task's credentials, they must hold the RCU read
+ * lock when reading.
+ */
+static inline struct user_info *get_user_info(struct user_info *ui)
+{
+       atomic_inc(&ui->usage);
+       return ui;
+}
+
+static inline void get_cred_user_info(struct cred *cred)
+{
+       get_user_info(cred->user_info);
+}
+
+/**
+ * put_user_info - Release a reference to a user_info structure
+ * @user_info: The user_info to release
+ */
+static inline void put_user_info(struct user_info *ui)
+{
+       if (atomic_dec_and_test(&ui->usage))
+               users_free(ui);
+}
+
+static inline void put_cred_user_info(struct cred *cred)
+{
+       if (cred->user_info)
+               put_user_info(cred->user_info);
+}
+#else /* CONFIG_SUPPLEMENTARY_UIDS */
+static inline bool has_supplementary_uid(kuid_t uid)
+{
+       return false;
+}
+static inline void get_cred_user_info(struct cred *cred) {}
+static inline void put_cred_user_info(struct cred *cred) {}
+#endif
+
 #endif /* _LINUX_CRED_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bda9b81..3bde665 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -232,6 +232,7 @@ asmlinkage long sys_getpgid(pid_t pid);
 asmlinkage long sys_getpgrp(void);
 asmlinkage long sys_getsid(pid_t pid);
 asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist);
+asmlinkage long sys_getusers(int uidsetsize, uid_t __user *userlist);
 
 asmlinkage long sys_setregid(gid_t rgid, gid_t egid);
 asmlinkage long sys_setgid(gid_t gid);
@@ -244,6 +245,7 @@ asmlinkage long sys_setfsgid(gid_t gid);
 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid);
 asmlinkage long sys_setsid(void);
 asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist);
+asmlinkage long sys_setusers(int uidsetsize, uid_t __user *grouplist);
 
 asmlinkage long sys_acct(const char __user *name);
 asmlinkage long sys_capget(cap_user_header_t header,
diff --git a/include/uapi/asm-generic/unistd.h 
b/include/uapi/asm-generic/unistd.h
index 22749c1..d6696cf 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -707,9 +707,13 @@ __SYSCALL(__NR_getrandom, sys_getrandom)
 __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 280
 __SYSCALL(__NR_bpf, sys_bpf)
+#define __NR_getusers 281
+__SYSCALL(__NR_getusers, sys_getusers)
+#define __NR_setusers 282
+__SYSCALL(__NR_setusers, sys_setusers)
 
 #undef __NR_syscalls
-#define __NR_syscalls 281
+#define __NR_syscalls 283
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/limits.h b/include/uapi/linux/limits.h
index 2d0f941..bae1b4c 100644
--- a/include/uapi/linux/limits.h
+++ b/include/uapi/linux/limits.h
@@ -4,6 +4,7 @@
 #define NR_OPEN                1024
 
 #define NGROUPS_MAX    65536   /* supplemental group IDs are available */
+#define NUSERS_MAX     65536   /* supplemental user IDs available */
 #define ARG_MAX       131072   /* # bytes of args + environ for exec() */
 #define LINK_MAX         127   /* # links a file may have */
 #define MAX_CANON        255   /* size of the canonical input queue */
diff --git a/init/Kconfig b/init/Kconfig
index 3ee28ae..d85b159 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1358,6 +1358,15 @@ config UID16
        help
          This enables the legacy 16-bit UID syscall wrappers.
 
+config SUPPLEMENTARY_UIDS
+       bool "Enable supplementary UIDs and system calls" if EXPERT
+       default y
+       help
+         This option adds a list of supplementary UIDs to each process, along
+         with system calls to manage that list.  If building an embedded
+         system where no applications use this functionality, you can disable
+         this option to save space.
+
 config SGETMASK_SYSCALL
        bool "sgetmask/ssetmask syscalls support" if EXPERT
        def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || 
SPARC || CRIS || MICROBLAZE || SUPERH
diff --git a/kernel/cred.c b/kernel/cred.c
index e0573a4..1700a03 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -54,6 +54,7 @@ struct cred init_cred = {
        .user                   = INIT_USER,
        .user_ns                = &init_user_ns,
        .group_info             = &init_groups,
+       INIT_USER_INFO
 };
 
 static inline void set_cred_subscribers(struct cred *cred, int n)
@@ -112,6 +113,7 @@ static void put_cred_rcu(struct rcu_head *rcu)
        key_put(cred->request_key_auth);
        if (cred->group_info)
                put_group_info(cred->group_info);
+       put_cred_user_info(cred);
        free_uid(cred->user);
        put_user_ns(cred->user_ns);
        kmem_cache_free(cred_jar, cred);
@@ -252,6 +254,7 @@ struct cred *prepare_creds(void)
        atomic_set(&new->usage, 1);
        set_cred_subscribers(new, 0);
        get_group_info(new->group_info);
+       get_cred_user_info(new);
        get_uid(new->user);
        get_user_ns(new->user_ns);
 
@@ -607,6 +610,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
        get_uid(new->user);
        get_user_ns(new->user_ns);
        get_group_info(new->group_info);
+       get_cred_user_info(new);
 
 #ifdef CONFIG_KEYS
        new->session_keyring = NULL;
diff --git a/kernel/groups.c b/kernel/groups.c
index 451698f..d5de27d 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -269,3 +269,176 @@ int in_egroup_p(kgid_t grp)
 }
 
 EXPORT_SYMBOL(in_egroup_p);
+
+#ifdef CONFIG_SUPPLEMENTARY_UIDS
+/* init to 2 - one for init_task, one to ensure it is never freed */
+struct user_info init_users = { .usage = ATOMIC_INIT(2) };
+
+static struct user_info *users_alloc(int uidsetsize)
+{
+       return (struct user_info *)groups_alloc(uidsetsize);
+}
+
+void users_free(struct user_info *user_info)
+{
+       groups_free((struct group_info *)user_info);
+}
+
+/* export the user_info to a user-space array */
+static int users_to_user(uid_t __user *userlist,
+                        const struct user_info *user_info)
+{
+       struct user_namespace *user_ns = current_user_ns();
+       int i;
+       unsigned int count = user_info->nusers;
+
+       for (i = 0; i < count; i++) {
+               uid_t uid;
+               uid = from_kuid_munged(user_ns, USER_AT(user_info, i));
+               if (put_user(uid, userlist+i))
+                       return -EFAULT;
+       }
+       return 0;
+}
+
+/* fill a user_info from a user-space array - it must be allocated already */
+static int users_from_user(struct user_info *user_info, uid_t __user *userlist)
+{
+       struct user_namespace *user_ns = current_user_ns();
+       int i;
+       unsigned int count = user_info->nusers;
+
+       for (i = 0; i < count; i++) {
+               uid_t uid;
+               kuid_t kuid;
+               if (get_user(uid, userlist+i))
+                       return -EFAULT;
+
+               kuid = make_kuid(user_ns, uid);
+               if (!uid_valid(kuid))
+                       return -EINVAL;
+
+               USER_AT(user_info, i) = kuid;
+       }
+       return 0;
+}
+
+static void users_sort(struct user_info *user_info)
+{
+       groups_sort((struct group_info *)user_info);
+}
+
+static bool users_search(const struct user_info *user_info, kuid_t uid)
+{
+       return groups_search((const struct group_info *)user_info, *(kgid_t 
*)&uid);
+}
+
+/* Return true if the user_info is a subset of the user_info of the specified
+ * credentials.  Also allow the first user_info to contain the uid, euid, or
+ * suid of the credentials.
+ */
+static bool user_subset(const struct user_info *u1, const struct cred *cred2)
+{
+       const struct user_info *u2 = cred2->user_info;
+       unsigned int i, j;
+
+       for (i = 0, j = 0; i < u1->nusers; i++) {
+               kuid_t uid1 = USER_AT(u1, i);
+               kuid_t uid2;
+               for (; j < u2->nusers; j++) {
+                       uid2 = USER_AT(u2, j);
+                       if (uid_lte(uid1, uid2))
+                               break;
+               }
+               if (j >= u2->nusers || !uid_eq(uid1, uid2)) {
+                       if (!uid_eq(uid1, cred2->uid)
+                           && !uid_eq(uid1, cred2->euid)
+                           && !uid_eq(uid1, cred2->suid))
+                               return false;
+               } else {
+                       j++;
+               }
+       }
+
+       return true;
+}
+
+/**
+ * set_current_users - Change current's supplementary user list
+ * @user_info: The user list to impose
+ *
+ * Validate a user list and, if valid, impose it upon current's task
+ * security record.
+ */
+int set_current_users(struct user_info *user_info)
+{
+       struct cred *new;
+
+       users_sort(user_info);
+       new = prepare_creds();
+       if (!new)
+               return -ENOMEM;
+       if (!(ns_capable(current_user_ns(), CAP_SETUID)
+             || user_subset(user_info, new))) {
+               abort_creds(new);
+               return -EPERM;
+       }
+
+       put_user_info(new->user_info);
+       get_user_info(user_info);
+       new->user_info = user_info;
+       return commit_creds(new);
+}
+
+SYSCALL_DEFINE2(getusers, int, uidsetsize, uid_t __user *, userlist)
+{
+       const struct cred *cred = current_cred();
+       int i;
+
+       if (uidsetsize < 0)
+               return -EINVAL;
+
+       /* no need to grab task_lock here; it cannot change */
+       i = cred->user_info->nusers;
+       if (uidsetsize) {
+               if (i > uidsetsize) {
+                       i = -EINVAL;
+                       goto out;
+               }
+               if (users_to_user(userlist, cred->user_info)) {
+                       i = -EFAULT;
+                       goto out;
+               }
+       }
+out:
+       return i;
+}
+
+SYSCALL_DEFINE2(setusers, int, uidsetsize, uid_t __user *, userlist)
+{
+       struct user_info *user_info;
+       int retval;
+
+       if ((unsigned)uidsetsize > NUSERS_MAX)
+               return -EINVAL;
+
+       user_info = users_alloc(uidsetsize);
+       if (!user_info)
+               return -ENOMEM;
+       retval = users_from_user(user_info, userlist);
+       if (retval) {
+               put_user_info(user_info);
+               return retval;
+       }
+
+       retval = set_current_users(user_info);
+       put_user_info(user_info);
+
+       return retval;
+}
+
+bool has_supplementary_uid(kuid_t uid)
+{
+       return users_search(current_cred()->user_info, uid);
+}
+#endif /* CONFIG_SUPPLEMENTARY_UIDS */
diff --git a/kernel/sys.c b/kernel/sys.c
index 1eaa2f0..412dda9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -472,7 +472,8 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
                new->uid = kruid;
                if (!uid_eq(old->uid, kruid) &&
                    !uid_eq(old->euid, kruid) &&
-                   !ns_capable(old->user_ns, CAP_SETUID))
+                   !ns_capable(old->user_ns, CAP_SETUID) &&
+                   !has_supplementary_uid(kruid))
                        goto error;
        }
 
@@ -481,7 +482,8 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
                if (!uid_eq(old->uid, keuid) &&
                    !uid_eq(old->euid, keuid) &&
                    !uid_eq(old->suid, keuid) &&
-                   !ns_capable(old->user_ns, CAP_SETUID))
+                   !ns_capable(old->user_ns, CAP_SETUID) &&
+                   !has_supplementary_uid(keuid))
                        goto error;
        }
 
@@ -542,7 +544,8 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
                        if (retval < 0)
                                goto error;
                }
-       } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) {
+       } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid) &&
+                  !has_supplementary_uid(kuid)) {
                goto error;
        }
 
@@ -594,13 +597,16 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, 
uid_t, suid)
        retval = -EPERM;
        if (!ns_capable(old->user_ns, CAP_SETUID)) {
                if (ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
-                   !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
+                   !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid) &&
+                   !has_supplementary_uid(kruid))
                        goto error;
                if (euid != (uid_t) -1        && !uid_eq(keuid, old->uid) &&
-                   !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
+                   !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid) &&
+                   !has_supplementary_uid(keuid))
                        goto error;
                if (suid != (uid_t) -1        && !uid_eq(ksuid, old->uid) &&
-                   !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
+                   !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid) &&
+                   !has_supplementary_uid(ksuid))
                        goto error;
        }
 
@@ -750,7 +756,8 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
 
        if (uid_eq(kuid, old->uid)  || uid_eq(kuid, old->euid)  ||
            uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
-           ns_capable(old->user_ns, CAP_SETUID)) {
+           ns_capable(old->user_ns, CAP_SETUID) ||
+           has_supplementary_uid(kuid)) {
                if (!uid_eq(kuid, old->fsuid)) {
                        new->fsuid = kuid;
                        if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 
0)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 02aa418..a8a8f02 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -159,6 +159,8 @@ cond_syscall(sys_uselib);
 cond_syscall(sys_fadvise64);
 cond_syscall(sys_fadvise64_64);
 cond_syscall(sys_madvise);
+cond_syscall(sys_getusers);
+cond_syscall(sys_setusers);
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read);
-- 
2.1.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to