Ping.

> On 21 Apr 2023, at 17:17, Vitaliy Makkoveev <m...@openbsd.org> wrote:
> 
> At least network subset of sysctl(8) MIBs relies on netlock or another
> locks and doesn't require kernel lock. Also some integers in other
> subsets can be read without kernel lock held.
> 
> Diff below actually pushes kernel lock down to net_sysctl(). It is
> required for MPLS and PFLOW cases. The protocol *_sysctls() are left
> under kernel lock to make the review of this diff easier. I want to
> sequentially unlock them later. The rest of cases are easy to review.
> net_lisnk_sysctl() only calls net_ifq_sysctl() which only returns error.
> unp_sysctl() does access to integer variables, which are already
> accessed in uipc_attach() without kernel lock held. pipex_sysctl() also
> accesses integer variable which is lockless accessed in the
> pppx(4)/pipex(4) layer.
> 
> ok?
> 
> Index: sys/kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.411
> diff -u -p -r1.411 kern_sysctl.c
> --- sys/kern/kern_sysctl.c    22 Jan 2023 12:05:44 -0000      1.411
> +++ sys/kern/kern_sysctl.c    21 Apr 2023 13:48:13 -0000
> @@ -168,7 +168,7 @@ sys_sysctl(struct proc *p, void *v, regi
>               syscallarg(void *) new;
>               syscallarg(size_t) newlen;
>       } */ *uap = v;
> -     int error, dolock = 1;
> +     int error, dokernellock = 1, dolock = 1;
>       size_t savelen = 0, oldlen = 0;
>       sysctlfn *fn;
>       int name[CTL_MAXNAME];
> @@ -203,6 +203,7 @@ sys_sysctl(struct proc *p, void *v, regi
>               break;
>       case CTL_NET:
>               fn = net_sysctl;
> +             dokernellock = 0;
>               break;
>       case CTL_FS:
>               fn = fs_sysctl;
> @@ -230,19 +231,22 @@ sys_sysctl(struct proc *p, void *v, regi
>       if (SCARG(uap, oldlenp) &&
>           (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
>               return (error);
> +     if (dokernellock)
> +             KERNEL_LOCK();
>       if (SCARG(uap, old) != NULL) {
>               if ((error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR)) != 0)
> -                     return (error);
> +                     goto unlock;
>               if (dolock) {
>                       if (atop(oldlen) > uvmexp.wiredmax - uvmexp.wired) {
>                               rw_exit_write(&sysctl_lock);
> -                             return (ENOMEM);
> +                             error = ENOMEM;
> +                             goto unlock;
>                       }
>                       error = uvm_vslock(p, SCARG(uap, old), oldlen,
>                           PROT_READ | PROT_WRITE);
>                       if (error) {
>                               rw_exit_write(&sysctl_lock);
> -                             return (error);
> +                             goto unlock;
>                       }
>               }
>               savelen = oldlen;
> @@ -254,6 +258,9 @@ sys_sysctl(struct proc *p, void *v, regi
>                       uvm_vsunlock(p, SCARG(uap, old), savelen);
>               rw_exit_write(&sysctl_lock);
>       }
> +unlock:
> +     if (dokernellock)
> +             KERNEL_UNLOCK();
>       if (error)
>               return (error);
>       if (SCARG(uap, oldlenp))
> Index: sys/kern/syscalls.master
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.246
> diff -u -p -r1.246 syscalls.master
> --- sys/kern/syscalls.master  25 Feb 2023 09:55:46 -0000      1.246
> +++ sys/kern/syscalls.master  21 Apr 2023 13:48:13 -0000
> @@ -361,7 +361,7 @@
> 199   OBSOL           pad_lseek
> 200   OBSOL           pad_truncate
> 201   OBSOL           pad_ftruncate
> -202  STD             { int sys_sysctl(const int *name, u_int namelen, \
> +202  STD NOLOCK      { int sys_sysctl(const int *name, u_int namelen, \
>                           void *old, size_t *oldlenp, void *new, \
>                           size_t newlen); }
> 203   STD             { int sys_mlock(const void *addr, size_t len); }
> Index: sys/kern/uipc_domain.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_domain.c,v
> retrieving revision 1.60
> diff -u -p -r1.60 uipc_domain.c
> --- sys/kern/uipc_domain.c    14 Aug 2022 01:58:28 -0000      1.60
> +++ sys/kern/uipc_domain.c    21 Apr 2023 13:48:13 -0000
> @@ -183,8 +183,8 @@ net_link_sysctl(int *name, u_int namelen
> }
> 
> int
> -net_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
> -    size_t newlen, struct proc *p)
> +net_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
> +    void *newp, size_t newlen, struct proc *p)
> {
>       const struct domain *dp;
>       const struct protosw *pr;
> @@ -213,9 +213,13 @@ net_sysctl(int *name, u_int namelen, voi
>                   newp, newlen));
> #endif
> #if NPFLOW > 0
> -     if (family == PF_PFLOW)
> -             return (pflow_sysctl(name + 1, namelen - 1, oldp, oldlenp,
> -                 newp, newlen));
> +     if (family == PF_PFLOW) {
> +             KERNEL_LOCK();
> +             error = pflow_sysctl(name + 1, namelen - 1, oldp, oldlenp,
> +                 newp, newlen);
> +             KERNEL_UNLOCK();
> +             return (error);
> +     }
> #endif
> #ifdef PIPEX
>       if (family == PF_PIPEX)
> @@ -223,9 +227,13 @@ net_sysctl(int *name, u_int namelen, voi
>                   newp, newlen));
> #endif
> #ifdef MPLS
> -     if (family == PF_MPLS)
> -             return (mpls_sysctl(name + 1, namelen - 1, oldp, oldlenp,
> -                 newp, newlen));
> +     if (family == PF_MPLS) {
> +             KERNEL_LOCK();
> +             error = mpls_sysctl(name + 1, namelen - 1, oldp, oldlenp,
> +                 newp, newlen);
> +             KERNEL_UNLOCK();
> +             return (error);
> +     }
> #endif
>       dp = pffinddomain(family);
>       if (dp == NULL)
> @@ -236,8 +244,10 @@ net_sysctl(int *name, u_int namelen, voi
>       protocol = name[1];
>       for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
>               if (pr->pr_protocol == protocol && pr->pr_sysctl) {
> +                     KERNEL_LOCK();
>                       error = (*pr->pr_sysctl)(name + 2, namelen - 2,
>                           oldp, oldlenp, newp, newlen);
> +                     KERNEL_UNLOCK();
>                       return (error);
>               }
>       return (ENOPROTOOPT);

Reply via email to