Getting uvm_fault() out of the KERNEL_LOCK() alone is not enough to
reduce the contention due to page faults. A single part of the handler
spinning on the lock is enough to hide bugs and increase latency. One
recent example is the uvm_map_inentry() check.
uvm_grow() is another small function called in trap that currently needs
the KERNEL_LOCK(). Diff below changes this requirement without removing
the KERNEL_LOCK() yet.
It uses the underlying vm_space lock to serialize writes to the fields
of "truct vmspace".
While here I also documented that the reference counting is currently
protected by the KERNEL_LOCK() and introduced a wrapper to help with
future changes and reduce the differences with NetBSD.
Once uvm_grow() is safe to be called without the KERNEL_LOCK() MD trap
functions can be adapted on a case-per-case basis.
Comments, Oks?
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.379
diff -u -p -r1.379 kern_sysctl.c
--- kern/kern_sysctl.c 1 Sep 2020 01:53:50 -0000 1.379
+++ kern/kern_sysctl.c 14 Oct 2020 09:35:00 -0000
@@ -1783,7 +1783,7 @@ sysctl_proc_args(int *name, u_int namele
/* Execing - danger. */
if ((vpr->ps_flags & PS_INEXEC))
return (EBUSY);
-
+
/* Only owner or root can get env */
if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
(vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
@@ -1792,7 +1792,7 @@ sysctl_proc_args(int *name, u_int namele
ps_strings = vpr->ps_strings;
vm = vpr->ps_vmspace;
- vm->vm_refcnt++;
+ uvmspace_addref(vm);
vpr = NULL;
buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
Index: kern/sys_process.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_process.c,v
retrieving revision 1.83
diff -u -p -r1.83 sys_process.c
--- kern/sys_process.c 16 Mar 2020 11:58:46 -0000 1.83
+++ kern/sys_process.c 14 Oct 2020 09:35:00 -0000
@@ -850,13 +850,12 @@ process_domem(struct proc *curp, struct
if ((error = process_checkioperm(curp, tr)) != 0)
return error;
- /* XXXCDC: how should locking work here? */
vm = tr->ps_vmspace;
if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
return EFAULT;
addr = uio->uio_offset;
- vm->vm_refcnt++;
+ uvmspace_addref(vm);
error = uvm_io(&vm->vm_map, uio,
(uio->uio_rw == UIO_WRITE) ? UVM_IO_FIXPROT : 0);
@@ -892,7 +891,7 @@ process_auxv_offset(struct proc *curp, s
if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
return EFAULT;
- vm->vm_refcnt++;
+ uvmspace_addref(vm);
error = uvm_io(&vm->vm_map, &uio, 0);
uvmspace_free(vm);
Index: uvm/uvm_extern.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
retrieving revision 1.153
diff -u -p -r1.153 uvm_extern.h
--- uvm/uvm_extern.h 13 Sep 2020 10:05:25 -0000 1.153
+++ uvm/uvm_extern.h 14 Oct 2020 09:35:00 -0000
@@ -192,11 +192,13 @@ struct pmap;
* Several fields are temporary (text, data stuff).
*
* Locks used to protect struct members in this file:
+ * K kernel lock
* I immutable after creation
+ * v vm_map's lock
*/
struct vmspace {
struct vm_map vm_map; /* VM address map */
- int vm_refcnt; /* number of references */
+ int vm_refcnt; /* [K] number of references */
caddr_t vm_shm; /* SYS5 shared memory private data XXX */
/* we copy from vm_startcopy to the end of the structure on fork */
#define vm_startcopy vm_rssize
@@ -205,9 +207,9 @@ struct vmspace {
segsz_t vm_tsize; /* text size (pages) XXX */
segsz_t vm_dsize; /* data size (pages) XXX */
segsz_t vm_dused; /* data segment length (pages) XXX */
- segsz_t vm_ssize; /* stack size (pages) */
- caddr_t vm_taddr; /* user virtual address of text XXX */
- caddr_t vm_daddr; /* user virtual address of data XXX */
+ segsz_t vm_ssize; /* [v] stack size (pages) */
+ caddr_t vm_taddr; /* [I] user virtual address of text */
+ caddr_t vm_daddr; /* [I] user virtual address of data */
caddr_t vm_maxsaddr; /* [I] user VA at max stack growth */
caddr_t vm_minsaddr; /* [I] user VA at top of stack */
};
@@ -413,6 +415,7 @@ void uvmspace_init(struct vmspace *,
s
vaddr_t, vaddr_t, boolean_t, boolean_t);
void uvmspace_exec(struct proc *, vaddr_t, vaddr_t);
struct vmspace *uvmspace_fork(struct process *);
+void uvmspace_addref(struct vmspace *);
void uvmspace_free(struct vmspace *);
struct vmspace *uvmspace_share(struct process *);
int uvm_share(vm_map_t, vaddr_t, vm_prot_t,
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.268
diff -u -p -r1.268 uvm_map.c
--- uvm/uvm_map.c 22 Sep 2020 14:31:08 -0000 1.268
+++ uvm/uvm_map.c 14 Oct 2020 09:35:00 -0000
@@ -3517,7 +3517,6 @@ uvmspace_init(struct vmspace *vm, struct
/*
* uvmspace_share: share a vmspace between two processes
*
- * - XXX: no locking on vmspace
* - used for vfork
*/
@@ -3526,7 +3525,7 @@ uvmspace_share(struct process *pr)
{
struct vmspace *vm = pr->ps_vmspace;
- vm->vm_refcnt++;
+ uvmspace_addref(vm);
return vm;
}
@@ -3630,13 +3629,25 @@ uvmspace_exec(struct proc *p, vaddr_t st
}
/*
+ * uvmspace_addref: add a reference to a vmspace.
+ */
+void
+uvmspace_addref(struct vmspace *vm)
+{
+ KERNEL_ASSERT_LOCKED();
+ KASSERT(vm->vm_refcnt > 0);
+
+ vm->vm_refcnt++;
+}
+
+/*
* uvmspace_free: free a vmspace data structure
- *
- * - XXX: no locking on vmspace
*/
void
uvmspace_free(struct vmspace *vm)
{
+ KERNEL_ASSERT_LOCKED();
+
if (--vm->vm_refcnt == 0) {
/*
* lock the map, to wait out all other references to it. delete
Index: uvm/uvm_unix.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_unix.c,v
retrieving revision 1.68
diff -u -p -r1.68 uvm_unix.c
--- uvm/uvm_unix.c 6 Jul 2020 13:33:09 -0000 1.68
+++ uvm/uvm_unix.c 14 Oct 2020 09:35:00 -0000
@@ -108,11 +108,14 @@ void
uvm_grow(struct proc *p, vaddr_t sp)
{
struct vmspace *vm = p->p_vmspace;
+ vm_map_t map = &vm->vm_map;
int si;
+ vm_map_lock(map);
+
/* For user defined stacks (from sendsig). */
if (sp < (vaddr_t)vm->vm_maxsaddr)
- return;
+ goto out;
/* For common case of already allocated (from trap). */
#ifdef MACHINE_STACK_GROWS_UP
@@ -120,7 +123,7 @@ uvm_grow(struct proc *p, vaddr_t sp)
#else
if (sp >= (vaddr_t)vm->vm_minsaddr - ptoa(vm->vm_ssize))
#endif
- return;
+ goto out;
/* Really need to check vs limit and increment stack size if ok. */
#ifdef MACHINE_STACK_GROWS_UP
@@ -130,6 +133,8 @@ uvm_grow(struct proc *p, vaddr_t sp)
#endif
if (vm->vm_ssize + si <= atop(lim_cur(RLIMIT_STACK)))
vm->vm_ssize += si;
+out:
+ vm_map_unlock(map);
}
#ifndef SMALL_KERNEL