> Date: Wed, 6 Apr 2016 20:58:16 +0200
> From: Stefan Kempf <[email protected]>
>
> Stefan Kempf wrote:
> > Hi,
> >
> > here comes a diff for vmm, and I'd like to ask people that are
> > interested in our hypervisor to test this. If you are experimenting
> > with vmm already, just do what you always do with vmm when running
> > with this diff :-)
> >
> > [...]
> >
> > This diff will not go in at once. The first thing that should be
> > committed is an addition to uvm. I'll post that one separately in this
> > thread and ask for reviews.
>
> Here are just the uvm parts:
>
> This diff has just the uvm parts with a new main function uvm_share()
> and a helper function uvm_mapent_share(). Nothing uses it yet, but
> vmm(4) will call it later. So this diff should have no impact on the
> rest of uvm or the kernel.
>
> What uvm_share() does is that it takes two virtual address ranges [A,B]
> and [C,D], and makes sure that [A,B] and [C,D] both get mapped to
> the same physical pages.
>
> uvm already has the possibility to establish such shared mappings
> (uvm_mapent_forkshared). I pulled out the common functionality
> that uvm_share() needs as well into uvm_mapent_share() and made
> uvm_mapent_clone a little more generic.
>
> The only thing that uvm_share() does is that the source address
> range [A,B] exists in the source address space, and that it is
> backed by memory (whether it's anon memory or whether is comes
> from a file does not matter). And the destination address range
> [C, D] must still be available in the destination address space.
>
> Comments, oks?
Looks good to me.
> Background:
>
> vmm(4) creates a separate (virtual) address space
> for the guest VM. These guest physical addresses are then mapped to
> "real" physical RAM on the host. But the memory for the guest is
> currently allocated within the kernel and not directly visible to vmd.
>
> With this diff, we can later have vmd(8) allocate a large chunk of
> memory via mmap(), and have this memory correspond to a guest physical
> range in the guest VM.
>
> The protection bits for the two address ranges can be different. That
> way, the allocated memory in vmd(8) will be non-executable. In the
> guest itself however, the memory is executable.
>
> Index: uvm/uvm_extern.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
> retrieving revision 1.138
> diff -u -p -r1.138 uvm_extern.h
> --- uvm/uvm_extern.h 4 Apr 2016 16:34:16 -0000 1.138
> +++ uvm/uvm_extern.h 6 Apr 2016 17:57:06 -0000
> @@ -428,6 +428,8 @@ void uvmspace_exec(struct proc *,
> vadd
> struct vmspace *uvmspace_fork(struct process *);
> void uvmspace_free(struct vmspace *);
> struct vmspace *uvmspace_share(struct process *);
> +int uvm_share(vm_map_t, vaddr_t, vm_prot_t,
> + vm_map_t, vaddr_t, vsize_t);
> void uvm_meter(void);
> int uvm_sysctl(int *, u_int, void *, size_t *,
> void *, size_t, struct proc *);
> Index: uvm/uvm_map.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.211
> diff -u -p -r1.211 uvm_map.c
> --- uvm/uvm_map.c 4 Apr 2016 16:34:16 -0000 1.211
> +++ uvm/uvm_map.c 6 Apr 2016 17:57:06 -0000
> @@ -182,8 +182,12 @@ int uvm_mapent_bias(struct
> vm_map*, s
> * uvm_vmspace_fork helper functions.
> */
> struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
> - vsize_t, struct vm_map_entry*,
> - struct uvm_map_deadq*, int, int);
> + vsize_t, vm_prot_t, vm_prot_t,
> + struct vm_map_entry*, struct uvm_map_deadq*, int,
> + int);
> +struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
> + vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
> + struct vm_map_entry*, struct uvm_map_deadq*);
> struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
> struct vm_map*, struct vm_map_entry*,
> struct uvm_map_deadq*);
> @@ -3364,6 +3368,98 @@ uvmspace_free(struct vmspace *vm)
> }
>
> /*
> + * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
> + * srcmap to the address range [dstaddr, dstaddr + sz) in
> + * dstmap.
> + *
> + * The whole address range in srcmap must be backed by an object
> + * (no holes).
> + *
> + * If successful, the address ranges share memory and the destination
> + * address range uses the protection flags in prot.
> + *
> + * This routine assumes that sz is a multiple of PAGE_SIZE and
> + * that dstaddr and srcaddr are page-aligned.
> + */
> +int
> +uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
> + struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
> +{
> + int ret = 0;
> + vaddr_t unmap_end;
> + vaddr_t dstva;
> + vsize_t off, len, n = sz;
> + struct vm_map_entry *first = NULL, *last = NULL;
> + struct vm_map_entry *src_entry, *psrc_entry = NULL;
> + struct uvm_map_deadq dead;
> +
> + if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
> + return EINVAL;
> +
> + TAILQ_INIT(&dead);
> + vm_map_lock(dstmap);
> + vm_map_lock_read(srcmap);
> +
> + if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
> + ret = ENOMEM;
> + goto exit_unlock;
> + }
> + if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
> + ret = EINVAL;
> + goto exit_unlock;
> + }
> +
> + unmap_end = dstaddr;
> + for (; src_entry != NULL;
> + psrc_entry = src_entry,
> + src_entry = RB_NEXT(uvm_map_addr, &srcmap->addr, src_entry)) {
> + /* hole in address space, bail out */
> + if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
> + break;
> + if (src_entry->start >= srcaddr + sz)
> + break;
> +
> + if (UVM_ET_ISSUBMAP(src_entry))
> + panic("uvm_share: encountered a submap (illegal)");
> + if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
> + UVM_ET_ISNEEDSCOPY(src_entry))
> + panic("uvm_share: non-copy_on_write map entries "
> + "marked needs_copy (illegal)");
> +
> + dstva = dstaddr;
> + if (src_entry->start > srcaddr) {
> + dstva += src_entry->start - srcaddr;
> + off = 0;
> + } else
> + off = srcaddr - src_entry->start;
> +
> + if (n < src_entry->end - src_entry->start)
> + len = n;
> + else
> + len = src_entry->end - src_entry->start;
> + n -= len;
> +
> + if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot,
> + srcmap, src_entry, &dead) == NULL)
> + break;
> +
> + unmap_end = dstva + len;
> + if (n == 0)
> + goto exit_unlock;
> + }
> +
> + ret = EINVAL;
> + uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE);
> +
> +exit_unlock:
> + vm_map_unlock_read(srcmap);
> + vm_map_unlock(dstmap);
> + uvm_unmap_detach(&dead, 0);
> +
> + return ret;
> +}
> +
> +/*
> * Clone map entry into other map.
> *
> * Mapping will be placed at dstaddr, for the same length.
> @@ -3372,7 +3468,8 @@ uvmspace_free(struct vmspace *vm)
> */
> struct vm_map_entry *
> uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
> - vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
> + vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
> + struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
> int mapent_flags, int amap_share_flags)
> {
> struct vm_map_entry *new_entry, *first, *last;
> @@ -3394,8 +3491,8 @@ uvm_mapent_clone(struct vm_map *dstmap,
> new_entry->offset = old_entry->offset;
> new_entry->aref = old_entry->aref;
> new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
> - new_entry->protection = old_entry->protection;
> - new_entry->max_protection = old_entry->max_protection;
> + new_entry->protection = prot;
> + new_entry->max_protection = maxprot;
> new_entry->inheritance = old_entry->inheritance;
> new_entry->advice = old_entry->advice;
>
> @@ -3417,34 +3514,48 @@ uvm_mapent_clone(struct vm_map *dstmap,
> return new_entry;
> }
>
> -/*
> - * share the mapping: this means we want the old and
> - * new entries to share amaps and backing objects.
> - */
> struct vm_map_entry *
> -uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
> - struct vm_map *old_map,
> +uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
> + vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
> struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
> {
> - struct vm_map_entry *new_entry;
> -
> /*
> - * if the old_entry needs a new amap (due to prev fork)
> - * then we need to allocate it now so that we have
> - * something we own to share with the new_entry. [in
> - * other words, we need to clear needs_copy]
> + * If old_entry refers to a copy-on-write region that has not yet been
> + * written to (needs_copy flag is set), then we need to allocate a new
> + * amap for old_entry.
> + *
> + * If we do not do this, and the process owning old_entry does a copy-on
> + * write later, old_entry and new_entry will refer to different memory
> + * regions, and the memory between the processes is no longer shared.
> + *
> + * [in other words, we need to clear needs_copy]
> */
>
> if (UVM_ET_ISNEEDSCOPY(old_entry)) {
> /* get our own amap, clears needs_copy */
> amap_copy(old_map, old_entry, M_WAITOK, FALSE,
> - 0, 0);
> + 0, 0);
> /* XXXCDC: WAITOK??? */
> }
>
> - new_entry = uvm_mapent_clone(new_map, old_entry->start,
> - old_entry->end - old_entry->start, 0, old_entry,
> - dead, 0, AMAP_SHARED);
> + return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
> + prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
> +}
> +
> +/*
> + * share the mapping: this means we want the old and
> + * new entries to share amaps and backing objects.
> + */
> +struct vm_map_entry *
> +uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
> + struct vm_map *old_map,
> + struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
> +{
> + struct vm_map_entry *new_entry;
> +
> + new_entry = uvm_mapent_share(new_map, old_entry->start,
> + old_entry->end - old_entry->start, 0, old_entry->protection,
> + old_entry->max_protection, old_map, old_entry, dead);
>
> /*
> * pmap_copy the mappings: this routine is optional
> @@ -3474,8 +3585,8 @@ uvm_mapent_forkcopy(struct vmspace *new_
> boolean_t protect_child;
>
> new_entry = uvm_mapent_clone(new_map, old_entry->start,
> - old_entry->end - old_entry->start, 0, old_entry,
> - dead, 0, 0);
> + old_entry->end - old_entry->start, 0, old_entry->protection,
> + old_entry->max_protection, old_entry, dead, 0, 0);
>
> new_entry->etype |=
> (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
> @@ -3615,8 +3726,8 @@ uvm_mapent_forkzero(struct vmspace *new_
> struct vm_map_entry *new_entry;
>
> new_entry = uvm_mapent_clone(new_map, old_entry->start,
> - old_entry->end - old_entry->start, 0, old_entry,
> - dead, 0, 0);
> + old_entry->end - old_entry->start, 0, old_entry->protection,
> + old_entry->max_protection, old_entry, dead, 0, 0);
>
> new_entry->etype |=
> (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
> @@ -4116,6 +4227,7 @@ uvm_map_extract(struct vm_map *srcmap, v
>
> newentry = uvm_mapent_clone(kernel_map,
> cp_start - start + dstaddr, cp_len, cp_off,
> + entry->protection, entry->max_protection,
> entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
> if (newentry == NULL) {
> error = ENOMEM;
>
>