> Date: Wed, 14 Oct 2020 12:01:10 +0200 > From: Martin Pieuchot <m...@openbsd.org> > > Getting uvm_fault() out of the KERNEL_LOCK() alone is not enough to > reduce the contention due to page faults. A single part of the handler > spinning on the lock is enough to hide bugs and increase latency. One > recent example is the uvm_map_inentry() check. > > uvm_grow() is another small function called in trap that currently needs > the KERNEL_LOCK(). Diff below changes this requirement without removing > the KERNEL_LOCK() yet. > > It uses the underlying vm_space lock to serialize writes to the fields > of "truct vmspace". > > While here I also documented that the reference counting is currently > protected by the KERNEL_LOCK() and introduced a wrapper to help with > future changes and reduce the differences with NetBSD. > > Once uvm_grow() is safe to be called without the KERNEL_LOCK() MD trap > functions can be adapted on a case-per-case basis. > > Comments, Oks?
I considered the same approach of using the lock of the underlying vm_map. I have seen some evidence of contention on that lock, but I don't think it is too bad (yet). I looked at a lock-free approach as well, but it got a bit messy. So I think the approach is fine. However... > > Index: kern/kern_sysctl.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_sysctl.c,v > retrieving revision 1.379 > diff -u -p -r1.379 kern_sysctl.c > --- kern/kern_sysctl.c 1 Sep 2020 01:53:50 -0000 1.379 > +++ kern/kern_sysctl.c 14 Oct 2020 09:35:00 -0000 > @@ -1783,7 +1783,7 @@ sysctl_proc_args(int *name, u_int namele > /* Execing - danger. */ > if ((vpr->ps_flags & PS_INEXEC)) > return (EBUSY); > - > + > /* Only owner or root can get env */ > if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) && > (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid && > @@ -1792,7 +1792,7 @@ sysctl_proc_args(int *name, u_int namele > > ps_strings = vpr->ps_strings; > vm = vpr->ps_vmspace; > - vm->vm_refcnt++; > + uvmspace_addref(vm); > vpr = NULL; > > buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK); > Index: kern/sys_process.c > =================================================================== > RCS file: /cvs/src/sys/kern/sys_process.c,v > retrieving revision 1.83 > diff -u -p -r1.83 sys_process.c > --- kern/sys_process.c 16 Mar 2020 11:58:46 -0000 1.83 > +++ kern/sys_process.c 14 Oct 2020 09:35:00 -0000 > @@ -850,13 +850,12 @@ process_domem(struct proc *curp, struct > if ((error = process_checkioperm(curp, tr)) != 0) > return error; > > - /* XXXCDC: how should locking work here? */ > vm = tr->ps_vmspace; > if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1)) > return EFAULT; > addr = uio->uio_offset; > > - vm->vm_refcnt++; > + uvmspace_addref(vm); > > error = uvm_io(&vm->vm_map, uio, > (uio->uio_rw == UIO_WRITE) ? UVM_IO_FIXPROT : 0); > @@ -892,7 +891,7 @@ process_auxv_offset(struct proc *curp, s > if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1)) > return EFAULT; > > - vm->vm_refcnt++; > + uvmspace_addref(vm); > error = uvm_io(&vm->vm_map, &uio, 0); > uvmspace_free(vm); > > Index: uvm/uvm_extern.h > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_extern.h,v > retrieving revision 1.153 > diff -u -p -r1.153 uvm_extern.h > --- uvm/uvm_extern.h 13 Sep 2020 10:05:25 -0000 1.153 > +++ uvm/uvm_extern.h 14 Oct 2020 09:35:00 -0000 > @@ -192,11 +192,13 @@ struct pmap; > * Several fields are temporary (text, data stuff). > * > * Locks used to protect struct members in this file: > + * K kernel lock > * I immutable after creation > + * v vm_map's lock > */ > struct vmspace { > struct vm_map vm_map; /* VM address map */ > - int vm_refcnt; /* number of references */ > + int vm_refcnt; /* [K] number of references */ > caddr_t vm_shm; /* SYS5 shared memory private data XXX */ > /* we copy from vm_startcopy to the end of the structure on fork */ > #define vm_startcopy vm_rssize > @@ -205,9 +207,9 @@ struct vmspace { > segsz_t vm_tsize; /* text size (pages) XXX */ > segsz_t vm_dsize; /* data size (pages) XXX */ > segsz_t vm_dused; /* data segment length (pages) XXX */ > - segsz_t vm_ssize; /* stack size (pages) */ > - caddr_t vm_taddr; /* user virtual address of text XXX */ > - caddr_t vm_daddr; /* user virtual address of data XXX */ > + segsz_t vm_ssize; /* [v] stack size (pages) */ > + caddr_t vm_taddr; /* [I] user virtual address of text */ > + caddr_t vm_daddr; /* [I] user virtual address of data */ > caddr_t vm_maxsaddr; /* [I] user VA at max stack growth */ > caddr_t vm_minsaddr; /* [I] user VA at top of stack */ > }; > @@ -413,6 +415,7 @@ void uvmspace_init(struct vmspace *, > s > vaddr_t, vaddr_t, boolean_t, boolean_t); > void uvmspace_exec(struct proc *, vaddr_t, vaddr_t); > struct vmspace *uvmspace_fork(struct process *); > +void uvmspace_addref(struct vmspace *); > void uvmspace_free(struct vmspace *); > struct vmspace *uvmspace_share(struct process *); > int uvm_share(vm_map_t, vaddr_t, vm_prot_t, > Index: uvm/uvm_map.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_map.c,v > retrieving revision 1.268 > diff -u -p -r1.268 uvm_map.c > --- uvm/uvm_map.c 22 Sep 2020 14:31:08 -0000 1.268 > +++ uvm/uvm_map.c 14 Oct 2020 09:35:00 -0000 > @@ -3517,7 +3517,6 @@ uvmspace_init(struct vmspace *vm, struct > /* > * uvmspace_share: share a vmspace between two processes > * > - * - XXX: no locking on vmspace > * - used for vfork > */ > > @@ -3526,7 +3525,7 @@ uvmspace_share(struct process *pr) > { > struct vmspace *vm = pr->ps_vmspace; > > - vm->vm_refcnt++; > + uvmspace_addref(vm); > return vm; > } > > @@ -3630,13 +3629,25 @@ uvmspace_exec(struct proc *p, vaddr_t st > } > > /* > + * uvmspace_addref: add a reference to a vmspace. > + */ > +void > +uvmspace_addref(struct vmspace *vm) > +{ > + KERNEL_ASSERT_LOCKED(); > + KASSERT(vm->vm_refcnt > 0); > + > + vm->vm_refcnt++; > +} > + > +/* > * uvmspace_free: free a vmspace data structure > - * > - * - XXX: no locking on vmspace > */ > void > uvmspace_free(struct vmspace *vm) > { > + KERNEL_ASSERT_LOCKED(); > + > if (--vm->vm_refcnt == 0) { > /* > * lock the map, to wait out all other references to it. delete > Index: uvm/uvm_unix.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_unix.c,v > retrieving revision 1.68 > diff -u -p -r1.68 uvm_unix.c > --- uvm/uvm_unix.c 6 Jul 2020 13:33:09 -0000 1.68 > +++ uvm/uvm_unix.c 14 Oct 2020 09:35:00 -0000 > @@ -108,11 +108,14 @@ void > uvm_grow(struct proc *p, vaddr_t sp) > { > struct vmspace *vm = p->p_vmspace; > + vm_map_t map = &vm->vm_map; > int si; > > + vm_map_lock(map); > + > /* For user defined stacks (from sendsig). */ > if (sp < (vaddr_t)vm->vm_maxsaddr) > - return; > + goto out; Since vm_maxsaddr is ummutable, this check can be done without holding the lock. I think that's worth it as it will prevent contention in multi-threaded processes as this check will almost always be true for anything but the first thread since those will use a user-defined stack. > /* For common case of already allocated (from trap). */ > #ifdef MACHINE_STACK_GROWS_UP > @@ -120,7 +123,7 @@ uvm_grow(struct proc *p, vaddr_t sp) > #else > if (sp >= (vaddr_t)vm->vm_minsaddr - ptoa(vm->vm_ssize)) > #endif > - return; > + goto out; > > /* Really need to check vs limit and increment stack size if ok. */ > #ifdef MACHINE_STACK_GROWS_UP > @@ -130,6 +133,8 @@ uvm_grow(struct proc *p, vaddr_t sp) > #endif > if (vm->vm_ssize + si <= atop(lim_cur(RLIMIT_STACK))) > vm->vm_ssize += si; > +out: > + vm_map_unlock(map); > } > > #ifndef SMALL_KERNEL > >