Ping.
> On 21 Apr 2023, at 17:17, Vitaliy Makkoveev <m...@openbsd.org> wrote:
>
> At least network subset of sysctl(8) MIBs relies on netlock or another
> locks and doesn't require kernel lock. Also some integers in other
> subsets can be read without kernel lock held.
>
> Diff below actually pushes kernel lock down to net_sysctl(). It is
> required for MPLS and PFLOW cases. The protocol *_sysctls() are left
> under kernel lock to make the review of this diff easier. I want to
> sequentially unlock them later. The rest of cases are easy to review.
> net_lisnk_sysctl() only calls net_ifq_sysctl() which only returns error.
> unp_sysctl() does access to integer variables, which are already
> accessed in uipc_attach() without kernel lock held. pipex_sysctl() also
> accesses integer variable which is lockless accessed in the
> pppx(4)/pipex(4) layer.
>
> ok?
>
> Index: sys/kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.411
> diff -u -p -r1.411 kern_sysctl.c
> --- sys/kern/kern_sysctl.c 22 Jan 2023 12:05:44 -0000 1.411
> +++ sys/kern/kern_sysctl.c 21 Apr 2023 13:48:13 -0000
> @@ -168,7 +168,7 @@ sys_sysctl(struct proc *p, void *v, regi
> syscallarg(void *) new;
> syscallarg(size_t) newlen;
> } */ *uap = v;
> - int error, dolock = 1;
> + int error, dokernellock = 1, dolock = 1;
> size_t savelen = 0, oldlen = 0;
> sysctlfn *fn;
> int name[CTL_MAXNAME];
> @@ -203,6 +203,7 @@ sys_sysctl(struct proc *p, void *v, regi
> break;
> case CTL_NET:
> fn = net_sysctl;
> + dokernellock = 0;
> break;
> case CTL_FS:
> fn = fs_sysctl;
> @@ -230,19 +231,22 @@ sys_sysctl(struct proc *p, void *v, regi
> if (SCARG(uap, oldlenp) &&
> (error = copyin(SCARG(uap, oldlenp), &oldlen, sizeof(oldlen))))
> return (error);
> + if (dokernellock)
> + KERNEL_LOCK();
> if (SCARG(uap, old) != NULL) {
> if ((error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR)) != 0)
> - return (error);
> + goto unlock;
> if (dolock) {
> if (atop(oldlen) > uvmexp.wiredmax - uvmexp.wired) {
> rw_exit_write(&sysctl_lock);
> - return (ENOMEM);
> + error = ENOMEM;
> + goto unlock;
> }
> error = uvm_vslock(p, SCARG(uap, old), oldlen,
> PROT_READ | PROT_WRITE);
> if (error) {
> rw_exit_write(&sysctl_lock);
> - return (error);
> + goto unlock;
> }
> }
> savelen = oldlen;
> @@ -254,6 +258,9 @@ sys_sysctl(struct proc *p, void *v, regi
> uvm_vsunlock(p, SCARG(uap, old), savelen);
> rw_exit_write(&sysctl_lock);
> }
> +unlock:
> + if (dokernellock)
> + KERNEL_UNLOCK();
> if (error)
> return (error);
> if (SCARG(uap, oldlenp))
> Index: sys/kern/syscalls.master
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.246
> diff -u -p -r1.246 syscalls.master
> --- sys/kern/syscalls.master 25 Feb 2023 09:55:46 -0000 1.246
> +++ sys/kern/syscalls.master 21 Apr 2023 13:48:13 -0000
> @@ -361,7 +361,7 @@
> 199 OBSOL pad_lseek
> 200 OBSOL pad_truncate
> 201 OBSOL pad_ftruncate
> -202 STD { int sys_sysctl(const int *name, u_int namelen, \
> +202 STD NOLOCK { int sys_sysctl(const int *name, u_int namelen, \
> void *old, size_t *oldlenp, void *new, \
> size_t newlen); }
> 203 STD { int sys_mlock(const void *addr, size_t len); }
> Index: sys/kern/uipc_domain.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_domain.c,v
> retrieving revision 1.60
> diff -u -p -r1.60 uipc_domain.c
> --- sys/kern/uipc_domain.c 14 Aug 2022 01:58:28 -0000 1.60
> +++ sys/kern/uipc_domain.c 21 Apr 2023 13:48:13 -0000
> @@ -183,8 +183,8 @@ net_link_sysctl(int *name, u_int namelen
> }
>
> int
> -net_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
> - size_t newlen, struct proc *p)
> +net_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
> + void *newp, size_t newlen, struct proc *p)
> {
> const struct domain *dp;
> const struct protosw *pr;
> @@ -213,9 +213,13 @@ net_sysctl(int *name, u_int namelen, voi
> newp, newlen));
> #endif
> #if NPFLOW > 0
> - if (family == PF_PFLOW)
> - return (pflow_sysctl(name + 1, namelen - 1, oldp, oldlenp,
> - newp, newlen));
> + if (family == PF_PFLOW) {
> + KERNEL_LOCK();
> + error = pflow_sysctl(name + 1, namelen - 1, oldp, oldlenp,
> + newp, newlen);
> + KERNEL_UNLOCK();
> + return (error);
> + }
> #endif
> #ifdef PIPEX
> if (family == PF_PIPEX)
> @@ -223,9 +227,13 @@ net_sysctl(int *name, u_int namelen, voi
> newp, newlen));
> #endif
> #ifdef MPLS
> - if (family == PF_MPLS)
> - return (mpls_sysctl(name + 1, namelen - 1, oldp, oldlenp,
> - newp, newlen));
> + if (family == PF_MPLS) {
> + KERNEL_LOCK();
> + error = mpls_sysctl(name + 1, namelen - 1, oldp, oldlenp,
> + newp, newlen);
> + KERNEL_UNLOCK();
> + return (error);
> + }
> #endif
> dp = pffinddomain(family);
> if (dp == NULL)
> @@ -236,8 +244,10 @@ net_sysctl(int *name, u_int namelen, voi
> protocol = name[1];
> for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
> if (pr->pr_protocol == protocol && pr->pr_sysctl) {
> + KERNEL_LOCK();
> error = (*pr->pr_sysctl)(name + 2, namelen - 2,
> oldp, oldlenp, newp, newlen);
> + KERNEL_UNLOCK();
> return (error);
> }
> return (ENOPROTOOPT);