Signed-off-by: Yun Zhou <[email protected]>
---
 include/linux/pid_namespace.h |  1 +
 kernel/pid.c                  | 12 ++++++------
 kernel/pid_namespace.c        | 33 ++++++++++++++++++++++++++++-----
 kernel/sysctl.c               |  9 ---------
 kernel/trace/pid_list.c       |  2 +-
 kernel/trace/trace.c          |  2 +-
 kernel/trace/trace.h          |  2 --
 7 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index f9f9931e02d6..0e3c18f3cac5 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -27,6 +27,7 @@ struct pid_namespace {
        struct idr idr;
        struct rcu_head rcu;
        unsigned int pid_allocated;
+       int pid_max;
        struct task_struct *child_reaper;
        struct kmem_cache *pid_cachep;
        unsigned int level;
diff --git a/kernel/pid.c b/kernel/pid.c
index 6500ef956f2f..14da3f68ceed 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -59,8 +59,6 @@ struct pid init_struct_pid = {
        }, }
 };
 
-int pid_max = PID_MAX_DEFAULT;
-
 #define RESERVED_PIDS          300
 
 int pid_max_min = RESERVED_PIDS + 1;
@@ -74,6 +72,7 @@ int pid_max_max = PID_MAX_LIMIT;
  */
 struct pid_namespace init_pid_ns = {
        .ns.count = REFCOUNT_INIT(2),
+       .pid_max = PID_MAX_DEFAULT,
        .idr = IDR_INIT(init_pid_ns.idr),
        .pid_allocated = PIDNS_ADDING,
        .level = 0,
@@ -194,7 +193,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t 
*set_tid,
                        tid = set_tid[ns->level - i];
 
                        retval = -EINVAL;
-                       if (tid < 1 || tid >= pid_max)
+                       if (tid < 1 || tid >= tmp->pid_max)
                                goto out_free;
                        /*
                         * Also fail if a PID != 1 is requested and
@@ -234,7 +233,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t 
*set_tid,
                         * a partially initialized PID (see below).
                         */
                        nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
-                                             pid_max, GFP_ATOMIC);
+                                             tmp->pid_max, GFP_ATOMIC);
                }
                spin_unlock_irq(&pidmap_lock);
                idr_preload_end();
@@ -651,11 +650,12 @@ void __init pid_idr_init(void)
        BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
 
        /* bump default and minimum pid_max based on number of cpus */
-       pid_max = min(pid_max_max, max_t(int, pid_max,
+       init_pid_ns.pid_max = min(pid_max_max, max_t(int, init_pid_ns.pid_max,
                                PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
        pid_max_min = max_t(int, pid_max_min,
                                PIDS_PER_CPU_MIN * num_possible_cpus());
-       pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
+       pr_info("pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max,
+                       pid_max_min);
 
        idr_init(&init_pid_ns.idr);
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 3028b2218aa4..d6b3f34ecb25 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -110,6 +110,7 @@ static struct pid_namespace *create_pid_namespace(struct 
user_namespace *user_ns
        ns->user_ns = get_user_ns(user_ns);
        ns->ucounts = ucounts;
        ns->pid_allocated = PIDNS_ADDING;
+       ns->pid_max = parent_pid_ns->pid_max;
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
        ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns);
 #endif
@@ -295,20 +296,44 @@ static int pid_ns_ctl_handler(struct ctl_table *table, 
int write,
 
        return ret;
 }
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+
+static int pid_max_ns_ctl_handler(struct ctl_table *table, int write,
+               void *buffer, size_t *lenp, loff_t *ppos)
+{
+       struct pid_namespace *pid_ns = task_active_pid_ns(current);
+
+       if (write && !checkpoint_restore_ns_capable(pid_ns->user_ns))
+               return -EPERM;
+
+       table->data = &pid_ns->pid_max;
+       if (pid_ns->parent)
+               table->extra2 = &pid_ns->parent->pid_max;
+
+       return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
 
-extern int pid_max;
 static struct ctl_table pid_ns_ctl_table[] = {
+#ifdef CONFIG_CHECKPOINT_RESTORE
        {
                .procname = "ns_last_pid",
                .maxlen = sizeof(int),
                .mode = 0666, /* permissions are checked in the handler */
                .proc_handler = pid_ns_ctl_handler,
                .extra1 = SYSCTL_ZERO,
-               .extra2 = &pid_max,
+               .extra2 = &init_pid_ns.pid_max,
+       },
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+       {
+               .procname       = "pid_max",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = pid_max_ns_ctl_handler,
+               .extra1         = &pid_max_min,
+               .extra2         = &pid_max_max,
        },
        { }
 };
-#endif /* CONFIG_CHECKPOINT_RESTORE */
 
 int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
 {
@@ -465,9 +490,7 @@ static __init int pid_namespaces_init(void)
 {
        pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC | SLAB_ACCOUNT);
 
-#ifdef CONFIG_CHECKPOINT_RESTORE
        register_sysctl_init("kernel", pid_ns_ctl_table);
-#endif
 
        register_pid_ns_sysctl_table_vm();
        return 0;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 157f7ce2942d..857bfdb39b15 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1809,15 +1809,6 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = proc_dointvec,
        },
 #endif
-       {
-               .procname       = "pid_max",
-               .data           = &pid_max,
-               .maxlen         = sizeof (int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &pid_max_min,
-               .extra2         = &pid_max_max,
-       },
        {
                .procname       = "panic_on_oops",
                .data           = &panic_on_oops,
diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c
index 95106d02b32d..ef52820e6719 100644
--- a/kernel/trace/pid_list.c
+++ b/kernel/trace/pid_list.c
@@ -414,7 +414,7 @@ struct trace_pid_list *trace_pid_list_alloc(void)
        int i;
 
        /* According to linux/thread.h, pids can be no bigger that 30 bits */
-       WARN_ON_ONCE(pid_max > (1 << 30));
+       WARN_ON_ONCE(init_pid_ns.pid_max > (1 << 30));
 
        pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL);
        if (!pid_list)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fbcd3bafb93e..6295679ce16c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5415,7 +5415,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int 
mask, int enabled)
 
        if (mask == TRACE_ITER_RECORD_TGID) {
                if (!tgid_map) {
-                       tgid_map_max = pid_max;
+                       tgid_map_max = init_pid_ns.pid_max;
                        map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
                                       GFP_KERNEL);
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b7f4ea25a194..df61b1db86a2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -700,8 +700,6 @@ extern unsigned long tracing_thresh;
 
 /* PID filtering */
 
-extern int pid_max;
-
 bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
                             pid_t search_pid);
 bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
-- 
2.27.0


Reply via email to