> Date: Wed, 14 Oct 2020 12:01:10 +0200
> From: Martin Pieuchot <m...@openbsd.org>
> 
> Getting uvm_fault() out of the KERNEL_LOCK() alone is not enough to
> reduce the contention due to page faults.  A single part of the handler
> spinning on the lock is enough to hide bugs and increase latency.  One
> recent example is the uvm_map_inentry() check.
> 
> uvm_grow() is another small function called in trap that currently needs
> the KERNEL_LOCK().  Diff below changes this requirement without removing
> the KERNEL_LOCK() yet. 
> 
> It uses the underlying vm_space lock to serialize writes to the fields
> of "truct vmspace". 
> 
> While here I also documented that the reference counting is currently
> protected by the KERNEL_LOCK() and introduced a wrapper to help with
> future changes and reduce the differences with NetBSD.
> 
> Once uvm_grow() is safe to be called without the KERNEL_LOCK() MD trap
> functions can be adapted on a case-per-case basis.
> 
> Comments, Oks?

I considered the same approach of using the lock of the underlying
vm_map.  I have seen some evidence of contention on that lock, but I
don't think it is too bad (yet).  I looked at a lock-free approach as
well, but it got a bit messy.  So I think the approach is fine.
However...

> 
> Index: kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.379
> diff -u -p -r1.379 kern_sysctl.c
> --- kern/kern_sysctl.c        1 Sep 2020 01:53:50 -0000       1.379
> +++ kern/kern_sysctl.c        14 Oct 2020 09:35:00 -0000
> @@ -1783,7 +1783,7 @@ sysctl_proc_args(int *name, u_int namele
>       /* Execing - danger. */
>       if ((vpr->ps_flags & PS_INEXEC))
>               return (EBUSY);
> -     
> +
>       /* Only owner or root can get env */
>       if ((op == KERN_PROC_NENV || op == KERN_PROC_ENV) &&
>           (vpr->ps_ucred->cr_uid != cp->p_ucred->cr_uid &&
> @@ -1792,7 +1792,7 @@ sysctl_proc_args(int *name, u_int namele
>  
>       ps_strings = vpr->ps_strings;
>       vm = vpr->ps_vmspace;
> -     vm->vm_refcnt++;
> +     uvmspace_addref(vm);
>       vpr = NULL;
>  
>       buf = malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
> Index: kern/sys_process.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/sys_process.c,v
> retrieving revision 1.83
> diff -u -p -r1.83 sys_process.c
> --- kern/sys_process.c        16 Mar 2020 11:58:46 -0000      1.83
> +++ kern/sys_process.c        14 Oct 2020 09:35:00 -0000
> @@ -850,13 +850,12 @@ process_domem(struct proc *curp, struct 
>       if ((error = process_checkioperm(curp, tr)) != 0)
>               return error;
>  
> -     /* XXXCDC: how should locking work here? */
>       vm = tr->ps_vmspace;
>       if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
>               return EFAULT;
>       addr = uio->uio_offset;
>  
> -     vm->vm_refcnt++;
> +     uvmspace_addref(vm);
>  
>       error = uvm_io(&vm->vm_map, uio,
>           (uio->uio_rw == UIO_WRITE) ? UVM_IO_FIXPROT : 0);
> @@ -892,7 +891,7 @@ process_auxv_offset(struct proc *curp, s
>       if ((tr->ps_flags & PS_EXITING) || (vm->vm_refcnt < 1))
>               return EFAULT;
>  
> -     vm->vm_refcnt++;
> +     uvmspace_addref(vm);
>       error = uvm_io(&vm->vm_map, &uio, 0);
>       uvmspace_free(vm);
>  
> Index: uvm/uvm_extern.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
> retrieving revision 1.153
> diff -u -p -r1.153 uvm_extern.h
> --- uvm/uvm_extern.h  13 Sep 2020 10:05:25 -0000      1.153
> +++ uvm/uvm_extern.h  14 Oct 2020 09:35:00 -0000
> @@ -192,11 +192,13 @@ struct pmap;
>   * Several fields are temporary (text, data stuff).
>   *
>   *  Locks used to protect struct members in this file:
> + *   K       kernel lock
>   *   I       immutable after creation
> + *   v       vm_map's lock
>   */
>  struct vmspace {
>       struct  vm_map vm_map;  /* VM address map */
> -     int     vm_refcnt;      /* number of references */
> +     int     vm_refcnt;      /* [K] number of references */
>       caddr_t vm_shm;         /* SYS5 shared memory private data XXX */
>  /* we copy from vm_startcopy to the end of the structure on fork */
>  #define vm_startcopy vm_rssize
> @@ -205,9 +207,9 @@ struct vmspace {
>       segsz_t vm_tsize;       /* text size (pages) XXX */
>       segsz_t vm_dsize;       /* data size (pages) XXX */
>       segsz_t vm_dused;       /* data segment length (pages) XXX */
> -     segsz_t vm_ssize;       /* stack size (pages) */
> -     caddr_t vm_taddr;       /* user virtual address of text XXX */
> -     caddr_t vm_daddr;       /* user virtual address of data XXX */
> +     segsz_t vm_ssize;       /* [v] stack size (pages) */
> +     caddr_t vm_taddr;       /* [I] user virtual address of text */
> +     caddr_t vm_daddr;       /* [I] user virtual address of data */
>       caddr_t vm_maxsaddr;    /* [I] user VA at max stack growth */
>       caddr_t vm_minsaddr;    /* [I] user VA at top of stack */
>  };
> @@ -413,6 +415,7 @@ void                      uvmspace_init(struct vmspace *, 
> s
>                           vaddr_t, vaddr_t, boolean_t, boolean_t);
>  void                 uvmspace_exec(struct proc *, vaddr_t, vaddr_t);
>  struct vmspace               *uvmspace_fork(struct process *);
> +void                 uvmspace_addref(struct vmspace *);
>  void                 uvmspace_free(struct vmspace *);
>  struct vmspace               *uvmspace_share(struct process *);
>  int                  uvm_share(vm_map_t, vaddr_t, vm_prot_t,
> Index: uvm/uvm_map.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.268
> diff -u -p -r1.268 uvm_map.c
> --- uvm/uvm_map.c     22 Sep 2020 14:31:08 -0000      1.268
> +++ uvm/uvm_map.c     14 Oct 2020 09:35:00 -0000
> @@ -3517,7 +3517,6 @@ uvmspace_init(struct vmspace *vm, struct
>  /*
>   * uvmspace_share: share a vmspace between two processes
>   *
> - * - XXX: no locking on vmspace
>   * - used for vfork
>   */
>  
> @@ -3526,7 +3525,7 @@ uvmspace_share(struct process *pr)
>  {
>       struct vmspace *vm = pr->ps_vmspace;
>  
> -     vm->vm_refcnt++;
> +     uvmspace_addref(vm);
>       return vm;
>  }
>  
> @@ -3630,13 +3629,25 @@ uvmspace_exec(struct proc *p, vaddr_t st
>  }
>  
>  /*
> + * uvmspace_addref: add a reference to a vmspace.
> + */
> +void
> +uvmspace_addref(struct vmspace *vm)
> +{
> +     KERNEL_ASSERT_LOCKED();
> +     KASSERT(vm->vm_refcnt > 0);
> +
> +     vm->vm_refcnt++;
> +}
> +
> +/*
>   * uvmspace_free: free a vmspace data structure
> - *
> - * - XXX: no locking on vmspace
>   */
>  void
>  uvmspace_free(struct vmspace *vm)
>  {
> +     KERNEL_ASSERT_LOCKED();
> +
>       if (--vm->vm_refcnt == 0) {
>               /*
>                * lock the map, to wait out all other references to it.  delete
> Index: uvm/uvm_unix.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_unix.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 uvm_unix.c
> --- uvm/uvm_unix.c    6 Jul 2020 13:33:09 -0000       1.68
> +++ uvm/uvm_unix.c    14 Oct 2020 09:35:00 -0000
> @@ -108,11 +108,14 @@ void
>  uvm_grow(struct proc *p, vaddr_t sp)
>  {
>       struct vmspace *vm = p->p_vmspace;
> +     vm_map_t map = &vm->vm_map;
>       int si;
>  
> +     vm_map_lock(map);
> +
>       /* For user defined stacks (from sendsig). */
>       if (sp < (vaddr_t)vm->vm_maxsaddr)
> -             return;
> +             goto out;

Since vm_maxsaddr is ummutable, this check can be done without holding
the lock.  I think that's worth it as it will prevent contention in
multi-threaded processes as this check will almost always be true for
anything but the first thread since those will use a user-defined
stack.

>       /* For common case of already allocated (from trap). */
>  #ifdef MACHINE_STACK_GROWS_UP
> @@ -120,7 +123,7 @@ uvm_grow(struct proc *p, vaddr_t sp)
>  #else
>       if (sp >= (vaddr_t)vm->vm_minsaddr - ptoa(vm->vm_ssize))
>  #endif
> -             return;
> +             goto out;
>  
>       /* Really need to check vs limit and increment stack size if ok. */
>  #ifdef MACHINE_STACK_GROWS_UP
> @@ -130,6 +133,8 @@ uvm_grow(struct proc *p, vaddr_t sp)
>  #endif
>       if (vm->vm_ssize + si <= atop(lim_cur(RLIMIT_STACK)))
>               vm->vm_ssize += si;
> +out:
> +     vm_map_unlock(map);
>  }
>  
>  #ifndef SMALL_KERNEL
> 
> 

Reply via email to