On Mon, 2 Sep 2024 19:49:20 +0800
Yun Zhou <[email protected]> wrote:


-ENOCHANGELOG

What? Why? Why should I care about this?

A change log *must* have all the information to say why this change is
necessary. It's OK for the subject to state what it is doing, but there
most definitely needs a "why?" in the change log.

-- Steve


> Signed-off-by: Yun Zhou <[email protected]>
> ---
>  include/linux/pid_namespace.h |  1 +
>  kernel/pid.c                  | 12 ++++++------
>  kernel/pid_namespace.c        | 33 ++++++++++++++++++++++++++++-----
>  kernel/sysctl.c               |  9 ---------
>  kernel/trace/pid_list.c       |  2 +-
>  kernel/trace/trace.c          |  2 +-
>  kernel/trace/trace.h          |  2 --
>  7 files changed, 37 insertions(+), 24 deletions(-)
> 
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index f9f9931e02d6..0e3c18f3cac5 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -27,6 +27,7 @@ struct pid_namespace {
>       struct idr idr;
>       struct rcu_head rcu;
>       unsigned int pid_allocated;
> +     int pid_max;
>       struct task_struct *child_reaper;
>       struct kmem_cache *pid_cachep;
>       unsigned int level;
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 6500ef956f2f..14da3f68ceed 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -59,8 +59,6 @@ struct pid init_struct_pid = {
>       }, }
>  };
>  
> -int pid_max = PID_MAX_DEFAULT;
> -
>  #define RESERVED_PIDS                300
>  
>  int pid_max_min = RESERVED_PIDS + 1;
> @@ -74,6 +72,7 @@ int pid_max_max = PID_MAX_LIMIT;
>   */
>  struct pid_namespace init_pid_ns = {
>       .ns.count = REFCOUNT_INIT(2),
> +     .pid_max = PID_MAX_DEFAULT,
>       .idr = IDR_INIT(init_pid_ns.idr),
>       .pid_allocated = PIDNS_ADDING,
>       .level = 0,
> @@ -194,7 +193,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t 
> *set_tid,
>                       tid = set_tid[ns->level - i];
>  
>                       retval = -EINVAL;
> -                     if (tid < 1 || tid >= pid_max)
> +                     if (tid < 1 || tid >= tmp->pid_max)
>                               goto out_free;
>                       /*
>                        * Also fail if a PID != 1 is requested and
> @@ -234,7 +233,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t 
> *set_tid,
>                        * a partially initialized PID (see below).
>                        */
>                       nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
> -                                           pid_max, GFP_ATOMIC);
> +                                           tmp->pid_max, GFP_ATOMIC);
>               }
>               spin_unlock_irq(&pidmap_lock);
>               idr_preload_end();
> @@ -651,11 +650,12 @@ void __init pid_idr_init(void)
>       BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
>  
>       /* bump default and minimum pid_max based on number of cpus */
> -     pid_max = min(pid_max_max, max_t(int, pid_max,
> +     init_pid_ns.pid_max = min(pid_max_max, max_t(int, init_pid_ns.pid_max,
>                               PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
>       pid_max_min = max_t(int, pid_max_min,
>                               PIDS_PER_CPU_MIN * num_possible_cpus());
> -     pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
> +     pr_info("pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max,
> +                     pid_max_min);
>  
>       idr_init(&init_pid_ns.idr);
>  
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index 3028b2218aa4..d6b3f34ecb25 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -110,6 +110,7 @@ static struct pid_namespace *create_pid_namespace(struct 
> user_namespace *user_ns
>       ns->user_ns = get_user_ns(user_ns);
>       ns->ucounts = ucounts;
>       ns->pid_allocated = PIDNS_ADDING;
> +     ns->pid_max = parent_pid_ns->pid_max;
>  #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
>       ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns);
>  #endif
> @@ -295,20 +296,44 @@ static int pid_ns_ctl_handler(struct ctl_table *table, 
> int write,
>  
>       return ret;
>  }
> +#endif       /* CONFIG_CHECKPOINT_RESTORE */
> +
> +static int pid_max_ns_ctl_handler(struct ctl_table *table, int write,
> +             void *buffer, size_t *lenp, loff_t *ppos)
> +{
> +     struct pid_namespace *pid_ns = task_active_pid_ns(current);
> +
> +     if (write && !checkpoint_restore_ns_capable(pid_ns->user_ns))
> +             return -EPERM;
> +
> +     table->data = &pid_ns->pid_max;
> +     if (pid_ns->parent)
> +             table->extra2 = &pid_ns->parent->pid_max;
> +
> +     return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
> +}
>  
> -extern int pid_max;
>  static struct ctl_table pid_ns_ctl_table[] = {
> +#ifdef CONFIG_CHECKPOINT_RESTORE
>       {
>               .procname = "ns_last_pid",
>               .maxlen = sizeof(int),
>               .mode = 0666, /* permissions are checked in the handler */
>               .proc_handler = pid_ns_ctl_handler,
>               .extra1 = SYSCTL_ZERO,
> -             .extra2 = &pid_max,
> +             .extra2 = &init_pid_ns.pid_max,
> +     },
> +#endif       /* CONFIG_CHECKPOINT_RESTORE */
> +     {
> +             .procname       = "pid_max",
> +             .maxlen         = sizeof(int),
> +             .mode           = 0644,
> +             .proc_handler   = pid_max_ns_ctl_handler,
> +             .extra1         = &pid_max_min,
> +             .extra2         = &pid_max_max,
>       },
>       { }
>  };
> -#endif       /* CONFIG_CHECKPOINT_RESTORE */
>  
>  int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
>  {
> @@ -465,9 +490,7 @@ static __init int pid_namespaces_init(void)
>  {
>       pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC | SLAB_ACCOUNT);
>  
> -#ifdef CONFIG_CHECKPOINT_RESTORE
>       register_sysctl_init("kernel", pid_ns_ctl_table);
> -#endif
>  
>       register_pid_ns_sysctl_table_vm();
>       return 0;
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 157f7ce2942d..857bfdb39b15 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1809,15 +1809,6 @@ static struct ctl_table kern_table[] = {
>               .proc_handler   = proc_dointvec,
>       },
>  #endif
> -     {
> -             .procname       = "pid_max",
> -             .data           = &pid_max,
> -             .maxlen         = sizeof (int),
> -             .mode           = 0644,
> -             .proc_handler   = proc_dointvec_minmax,
> -             .extra1         = &pid_max_min,
> -             .extra2         = &pid_max_max,
> -     },
>       {
>               .procname       = "panic_on_oops",
>               .data           = &panic_on_oops,
> diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c
> index 95106d02b32d..ef52820e6719 100644
> --- a/kernel/trace/pid_list.c
> +++ b/kernel/trace/pid_list.c
> @@ -414,7 +414,7 @@ struct trace_pid_list *trace_pid_list_alloc(void)
>       int i;
>  
>       /* According to linux/thread.h, pids can be no bigger that 30 bits */
> -     WARN_ON_ONCE(pid_max > (1 << 30));
> +     WARN_ON_ONCE(init_pid_ns.pid_max > (1 << 30));
>  
>       pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL);
>       if (!pid_list)
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index fbcd3bafb93e..6295679ce16c 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -5415,7 +5415,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned 
> int mask, int enabled)
>  
>       if (mask == TRACE_ITER_RECORD_TGID) {
>               if (!tgid_map) {
> -                     tgid_map_max = pid_max;
> +                     tgid_map_max = init_pid_ns.pid_max;
>                       map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
>                                      GFP_KERNEL);
>  
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index b7f4ea25a194..df61b1db86a2 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -700,8 +700,6 @@ extern unsigned long tracing_thresh;
>  
>  /* PID filtering */
>  
> -extern int pid_max;
> -
>  bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
>                            pid_t search_pid);
>  bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,


Reply via email to