To count the number of user processes use the counter bound to the user
in the user namespace.

Signed-off-by: Alexey Gladkov <[email protected]>
---
 fs/exec.c                      |  7 ++++---
 fs/io-wq.c                     | 14 +++++++++-----
 include/linux/sched/user.h     |  1 -
 include/linux/user_namespace.h |  8 ++++++++
 kernel/cred.c                  |  8 ++++----
 kernel/exit.c                  |  2 +-
 kernel/fork.c                  |  9 +++++----
 kernel/sys.c                   |  6 ++++--
 kernel/ucount.c                | 34 ++++++++++++++++++++++++++++++++++
 kernel/user.c                  |  3 ++-
 10 files changed, 71 insertions(+), 21 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a70327..3f2071f7b9c7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1826,19 +1826,20 @@ static int __do_execve_file(int fd, struct filename 
*filename,
        char *pathbuf = NULL;
        struct linux_binprm *bprm;
        struct files_struct *displaced;
-       int retval;
+       int retval, processes;
 
        if (IS_ERR(filename))
                return PTR_ERR(filename);
 
+       processes = get_rlimit_counter(&init_user_ns, current_euid(), 
UCOUNT_RLIMIT_NPROC);
+
        /*
         * We move the actual failure in case of RLIMIT_NPROC excess from
         * set*uid() to execve() because too many poorly written programs
         * don't check setuid() return code.  Here we additionally recheck
         * whether NPROC limit is still exceeded.
         */
-       if ((current->flags & PF_NPROC_EXCEEDED) &&
-           atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
+       if ((current->flags & PF_NPROC_EXCEEDED) && processes > 
rlimit(RLIMIT_NPROC)) {
                retval = -EAGAIN;
                goto out_ret;
        }
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 47c5f3aeb460..6170aee986db 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -17,6 +17,7 @@
 #include <linux/rculist_nulls.h>
 #include <linux/fs_struct.h>
 #include <linux/task_work.h>
+#include <linux/user_namespace.h>
 
 #include "io-wq.h"
 
@@ -216,7 +217,7 @@ static void io_worker_exit(struct io_worker *worker)
        if (worker->flags & IO_WORKER_F_RUNNING)
                atomic_dec(&acct->nr_running);
        if (!(worker->flags & IO_WORKER_F_BOUND))
-               atomic_dec(&wqe->wq->user->processes);
+               dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, 
UCOUNT_RLIMIT_NPROC);
        worker->flags = 0;
        preempt_enable();
 
@@ -349,12 +350,12 @@ static void __io_worker_busy(struct io_wqe *wqe, struct 
io_worker *worker,
                        worker->flags |= IO_WORKER_F_BOUND;
                        wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--;
                        wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
-                       atomic_dec(&wqe->wq->user->processes);
+                       dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, 
UCOUNT_RLIMIT_NPROC);
                } else {
                        worker->flags &= ~IO_WORKER_F_BOUND;
                        wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
                        wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
-                       atomic_inc(&wqe->wq->user->processes);
+                       inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, 
UCOUNT_RLIMIT_NPROC);
                }
                io_wqe_inc_running(wqe, worker);
         }
@@ -671,7 +672,7 @@ static bool create_io_worker(struct io_wq *wq, struct 
io_wqe *wqe, int index)
        spin_unlock_irq(&wqe->lock);
 
        if (index == IO_WQ_ACCT_UNBOUND)
-               atomic_inc(&wq->user->processes);
+               inc_rlimit_counter(&init_user_ns, wq->user->uid, 
UCOUNT_RLIMIT_NPROC);
 
        wake_up_process(worker->task);
        return true;
@@ -754,6 +755,7 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct 
io_wqe_acct *acct,
                            struct io_wq_work *work)
 {
        bool free_worker;
+       int processes;
 
        if (!(work->flags & IO_WQ_WORK_UNBOUND))
                return true;
@@ -766,7 +768,9 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct 
io_wqe_acct *acct,
        if (free_worker)
                return true;
 
-       if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers &&
+       processes = get_rlimit_counter(&init_user_ns, wqe->wq->user->uid, 
UCOUNT_RLIMIT_NPROC);
+
+       if (processes >= acct->max_workers &&
            !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN)))
                return false;
 
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 917d88edb7b9..38e122bc3d07 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -12,7 +12,6 @@
  */
 struct user_struct {
        refcount_t __count;     /* reference count */
-       atomic_t processes;     /* How many processes does this user have? */
        atomic_t sigpending;    /* How many pending signals does this user 
have? */
 #ifdef CONFIG_FANOTIFY
        atomic_t fanotify_listeners;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index fc75af812d73..6d9d180b2c9d 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -50,9 +50,13 @@ enum ucount_type {
        UCOUNT_INOTIFY_INSTANCES,
        UCOUNT_INOTIFY_WATCHES,
 #endif
+       UCOUNT_RLIMIT_NPROC,
        UCOUNT_COUNTS,
 };
 
+#define UCOUNT_MIN_RLIMIT UCOUNT_RLIMIT_NPROC
+#define UCOUNT_MAX_RLIMIT UCOUNT_RLIMIT_NPROC
+
 struct user_namespace {
        struct uid_gid_map      uid_map;
        struct uid_gid_map      gid_map;
@@ -104,6 +108,10 @@ void retire_userns_sysctls(struct user_namespace *ns);
 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum 
ucount_type type);
 void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
 
+long get_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum 
ucount_type type);
+struct ucounts *inc_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum 
ucount_type type);
+void dec_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum 
ucount_type type);
+
 #ifdef CONFIG_USER_NS
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
diff --git a/kernel/cred.c b/kernel/cred.c
index 421b1149c651..b6694700e760 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -351,7 +351,7 @@ int copy_creds(struct task_struct *p, unsigned long 
clone_flags)
                kdebug("share_creds(%p{%d,%d})",
                       p->cred, atomic_read(&p->cred->usage),
                       read_cred_subscribers(p->cred));
-               atomic_inc(&p->cred->user->processes);
+               inc_rlimit_counter(&init_user_ns, task_euid(p), 
UCOUNT_RLIMIT_NPROC);
                return 0;
        }
 
@@ -384,7 +384,7 @@ int copy_creds(struct task_struct *p, unsigned long 
clone_flags)
        }
 #endif
 
-       atomic_inc(&new->user->processes);
+       inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC);
        p->cred = p->real_cred = get_cred(new);
        alter_cred_subscribers(new, 2);
        validate_creds(new);
@@ -486,11 +486,11 @@ int commit_creds(struct cred *new)
         */
        alter_cred_subscribers(new, 2);
        if (new->user != old->user)
-               atomic_inc(&new->user->processes);
+               inc_rlimit_counter(&init_user_ns, new->euid, 
UCOUNT_RLIMIT_NPROC);
        rcu_assign_pointer(task->real_cred, new);
        rcu_assign_pointer(task->cred, new);
        if (new->user != old->user)
-               atomic_dec(&old->user->processes);
+               dec_rlimit_counter(&init_user_ns, old->euid, 
UCOUNT_RLIMIT_NPROC);
        alter_cred_subscribers(old, -2);
 
        /* send notifications */
diff --git a/kernel/exit.c b/kernel/exit.c
index 727150f28103..5a0d7dd1ad64 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -187,7 +187,7 @@ void release_task(struct task_struct *p)
        /* don't need to get the RCU readlock here - the process is dead and
         * can't be modifying its own credentials. But shut RCU-lockdep up */
        rcu_read_lock();
-       atomic_dec(&__task_cred(p)->user->processes);
+       dec_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC);
        rcu_read_unlock();
 
        cgroup_release(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index efc5493203ae..2bc8bd45179f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1844,7 +1844,7 @@ static __latent_entropy struct task_struct *copy_process(
                                        int node,
                                        struct kernel_clone_args *args)
 {
-       int pidfd = -1, retval;
+       int pidfd = -1, retval, processes;
        struct task_struct *p;
        struct multiprocess_signals delayed;
        struct file *pidfile = NULL;
@@ -1958,9 +1958,10 @@ static __latent_entropy struct task_struct *copy_process(
        DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
        DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
+       processes = get_rlimit_counter(&init_user_ns, p->real_cred->euid,
+                       UCOUNT_RLIMIT_NPROC);
        retval = -EAGAIN;
-       if (atomic_read(&p->real_cred->user->processes) >=
-                       task_rlimit(p, RLIMIT_NPROC)) {
+       if (processes >= task_rlimit(p, RLIMIT_NPROC)) {
                if (p->real_cred->user != INIT_USER &&
                    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
                        goto bad_fork_free;
@@ -2361,7 +2362,7 @@ static __latent_entropy struct task_struct *copy_process(
 #endif
        delayacct_tsk_free(p);
 bad_fork_cleanup_count:
-       atomic_dec(&p->cred->user->processes);
+       dec_rlimit_counter(&init_user_ns, p->cred->euid, UCOUNT_RLIMIT_NPROC);
        exit_creds(p);
 bad_fork_free:
        p->state = TASK_DEAD;
diff --git a/kernel/sys.c b/kernel/sys.c
index 00a96746e28a..db780ec32d86 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -461,11 +461,14 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
 static int set_user(struct cred *new)
 {
        struct user_struct *new_user;
+       int processes;
 
        new_user = alloc_uid(new->uid);
        if (!new_user)
                return -EAGAIN;
 
+       processes = get_rlimit_counter(&init_user_ns, new_user->uid, 
UCOUNT_RLIMIT_NPROC);
+
        /*
         * We don't fail in case of NPROC limit excess here because too many
         * poorly written programs don't check set*uid() return code, assuming
@@ -473,8 +476,7 @@ static int set_user(struct cred *new)
         * for programs doing set*uid()+execve() by harmlessly deferring the
         * failure to the execve() stage.
         */
-       if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
-                       new_user != INIT_USER)
+       if (processes >= rlimit(RLIMIT_NPROC) && new_user != INIT_USER)
                current->flags |= PF_NPROC_EXCEEDED;
        else
                current->flags &= ~PF_NPROC_EXCEEDED;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 7b2bca8582ef..e00d644e4ca5 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -74,6 +74,7 @@ static struct ctl_table user_table[] = {
        UCOUNT_ENTRY("max_inotify_instances"),
        UCOUNT_ENTRY("max_inotify_watches"),
 #endif
+       { },
        { }
 };
 #endif /* CONFIG_SYSCTL */
@@ -222,6 +223,39 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type 
type)
        put_ucounts(ucounts);
 }
 
+long get_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum 
ucount_type type)
+{
+       long v;
+       struct ucounts *ucounts = get_ucounts(ns, uid);
+       if (!ucounts)
+               return LONG_MAX;
+       v = atomic_long_read(&ucounts->ucount[type]);
+       put_ucounts(ucounts);
+       return v;
+}
+
+struct ucounts *inc_rlimit_counter(struct user_namespace *ns, kuid_t uid,
+               enum ucount_type type)
+{
+       if (type < UCOUNT_MIN_RLIMIT || type > UCOUNT_MAX_RLIMIT)
+               return NULL;
+
+       return inc_ucount(ns, uid, type);
+}
+
+void dec_rlimit_counter(struct user_namespace *ns, kuid_t uid, enum 
ucount_type type)
+{
+       struct ucounts *ucounts;
+
+       if (type < UCOUNT_MIN_RLIMIT || type > UCOUNT_MAX_RLIMIT)
+               return;
+
+       ucounts = get_ucounts(ns, uid);
+
+       if (ucounts)
+               dec_ucount(ucounts, type);
+}
+
 static __init int user_namespace_sysctl_init(void)
 {
 #ifdef CONFIG_SYSCTL
diff --git a/kernel/user.c b/kernel/user.c
index b1635d94a1f2..5bb75ebdef4f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
        .__count        = REFCOUNT_INIT(1),
-       .processes      = ATOMIC_INIT(1),
        .sigpending     = ATOMIC_INIT(0),
        .locked_shm     = 0,
        .uid            = GLOBAL_ROOT_UID,
@@ -224,6 +223,8 @@ static int __init uid_cache_init(void)
        uid_hash_insert(&root_user, uidhashentry(GLOBAL_ROOT_UID));
        spin_unlock_irq(&uidhash_lock);
 
+       inc_rlimit_counter(&init_user_ns, GLOBAL_ROOT_UID, UCOUNT_RLIMIT_NPROC);
+
        return 0;
 }
 subsys_initcall(uid_cache_init);
-- 
2.25.4

Reply via email to