Module Name: src Committed By: maxv Date: Sat Dec 15 13:39:43 UTC 2018
Modified Files: src/lib/libnvmm: libnvmm.3 libnvmm.c nvmm.h src/sys/dev/nvmm: nvmm.c nvmm_internal.h nvmm_ioctl.h Log Message: Invert the mapping logic. Until now, the "owner" of the memory was the guest, and by calling nvmm_gpa_map(), the virtualizer was creating a view towards the guest memory. Qemu expects the contrary: it wants the owner to be the virtualizer, and nvmm_gpa_map should just create a view from the guest towards the virtualizer's address space. Under this scheme, it is legal to have two GPAs that point to the same HVA. Introduce nvmm_hva_map() and nvmm_hva_unmap(), that map/unamp the HVA into a dedicated UOBJ. Change nvmm_gpa_map() and nvmm_gpa_unmap() to just perform an enter into the desired UOBJ. With this change in place, all the mapping-related problems in Qemu+NVMM are fixed. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/lib/libnvmm/libnvmm.3 src/lib/libnvmm/libnvmm.c cvs rdiff -u -r1.2 -r1.3 src/lib/libnvmm/nvmm.h cvs rdiff -u -r1.3 -r1.4 src/sys/dev/nvmm/nvmm.c cvs rdiff -u -r1.1 -r1.2 src/sys/dev/nvmm/nvmm_internal.h \ src/sys/dev/nvmm/nvmm_ioctl.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/lib/libnvmm/libnvmm.3 diff -u src/lib/libnvmm/libnvmm.3:1.4 src/lib/libnvmm/libnvmm.3:1.5 --- src/lib/libnvmm/libnvmm.3:1.4 Wed Dec 12 11:40:08 2018 +++ src/lib/libnvmm/libnvmm.3 Sat Dec 15 13:39:43 2018 @@ -1,4 +1,4 @@ -.\" $NetBSD: libnvmm.3,v 1.4 2018/12/12 11:40:08 wiz Exp $ +.\" $NetBSD: libnvmm.3,v 1.5 2018/12/15 13:39:43 maxv Exp $ .\" .\" Copyright (c) 2018 The NetBSD Foundation, Inc. .\" All rights reserved. @@ -27,7 +27,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd December 12, 2018 +.Dd December 14, 2018 .Dt LIBNVMM 3 .Os .Sh NAME @@ -63,6 +63,10 @@ .Fn nvmm_vcpu_run "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \ "struct nvmm_exit *exit" .Ft int +.Fn nvmm_hva_map "struct nvmm_machine *mach" "uintptr_t hva" "size_t size" +.Ft int +.Fn nvmm_hva_unmap "struct nvmm_machine *mach" "uintptr_t hva" "size_t size" +.Ft int .Fn nvmm_gpa_map "struct nvmm_machine *mach" "uintptr_t hva" "gpaddr_t gpa" \ "size_t size" "int flags" .Ft int @@ -164,15 +168,33 @@ The structure is filled to indicate the exit reason, and the associated parameters if any. .Pp +.Fn nvmm_hva_map +maps at address +.Fa hva +a buffer of size +.Fa size +in the calling process' virtual address space. +This buffer is allowed to be subsequently mapped in a virtual machine. +.Pp +.Fn nvmm_hva_unmap +unmaps the buffer of size +.Fa size +at address +.Fa hva +from the calling process' virtual address space. +.Pp .Fn nvmm_gpa_map -makes the guest physical memory area beginning on address +maps into the guest physical memory beginning on address .Fa gpa -and of size +the buffer of size .Fa size -available in the machine -.Fa mach . -The area is mapped in the calling process' virtual address space, at address -.Fa hva . +located at address +.Fa hva +of the calling process' virtual address space. +The +.Fa hva +parameter must point to a buffer that was previously mapped with +.Fn nvmm_hva_map . .Pp .Fn nvmm_gpa_unmap removes the guest physical memory area beginning on address @@ -181,9 +203,6 @@ and of size .Fa size from the machine .Fa mach . -It also unmaps the area beginning on -.Fa hva -from the calling process' virtual address space. .Pp .Fn nvmm_gva_to_gpa translates, on the CPU Index: src/lib/libnvmm/libnvmm.c diff -u src/lib/libnvmm/libnvmm.c:1.4 src/lib/libnvmm/libnvmm.c:1.5 --- src/lib/libnvmm/libnvmm.c:1.4 Wed Dec 12 10:42:34 2018 +++ src/lib/libnvmm/libnvmm.c Sat Dec 15 13:39:43 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: libnvmm.c,v 1.4 2018/12/12 10:42:34 maxv Exp $ */ +/* $NetBSD: libnvmm.c,v 1.5 2018/12/15 13:39:43 maxv Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -65,24 +65,12 @@ __area_isvalid(struct nvmm_machine *mach area_t *ent; LIST_FOREACH(ent, areas, list) { - /* Collision on HVA */ - if (hva >= ent->hva && hva < ent->hva + ent->size) { - return false; - } - if (hva + size >= ent->hva && - hva + size < ent->hva + ent->size) { - return false; - } - if (hva <= ent->hva && hva + size >= ent->hva + ent->size) { - return false; - } - /* Collision on GPA */ if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) { return false; } - if (gpa + size >= ent->gpa && - gpa + size < ent->gpa + ent->size) { + if (gpa + size > ent->gpa && + gpa + size <= ent->gpa + ent->size) { return false; } if (gpa <= ent->gpa && gpa + size >= ent->gpa + ent->size) { @@ -434,12 +422,54 @@ nvmm_gpa_unmap(struct nvmm_machine *mach args.size = size; ret = ioctl(nvmm_fd, NVMM_IOC_GPA_UNMAP, &args); + if (ret == -1) { + /* Can't recover. */ + abort(); + } + + return 0; +} + +int +nvmm_hva_map(struct nvmm_machine *mach, uintptr_t hva, size_t size) +{ + struct nvmm_ioc_hva_map args; + int ret; + + if (nvmm_init() == -1) { + return -1; + } + + args.machid = mach->machid; + args.hva = hva; + args.size = size; + + ret = ioctl(nvmm_fd, NVMM_IOC_HVA_MAP, &args); if (ret == -1) return -1; - ret = munmap((void *)hva, size); + return 0; +} - return ret; +int +nvmm_hva_unmap(struct nvmm_machine *mach, uintptr_t hva, size_t size) +{ + struct nvmm_ioc_hva_map args; + int ret; + + if (nvmm_init() == -1) { + return -1; + } + + args.machid = mach->machid; + args.hva = hva; + args.size = size; + + ret = ioctl(nvmm_fd, NVMM_IOC_HVA_MAP, &args); + if (ret == -1) + return -1; + + return 0; } /* @@ -458,15 +488,10 @@ nvmm_gpa_to_hva(struct nvmm_machine *mac } LIST_FOREACH(ent, areas, list) { - if (gpa < ent->gpa) { - continue; - } - if (gpa >= ent->gpa + ent->size) { - continue; + if (gpa >= ent->gpa && gpa < ent->gpa + ent->size) { + *hva = ent->hva + (gpa - ent->gpa); + return 0; } - - *hva = ent->hva + (gpa - ent->gpa); - return 0; } errno = ENOENT; Index: src/lib/libnvmm/nvmm.h diff -u src/lib/libnvmm/nvmm.h:1.2 src/lib/libnvmm/nvmm.h:1.3 --- src/lib/libnvmm/nvmm.h:1.2 Thu Nov 29 19:55:20 2018 +++ src/lib/libnvmm/nvmm.h Sat Dec 15 13:39:43 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: nvmm.h,v 1.2 2018/11/29 19:55:20 maxv Exp $ */ +/* $NetBSD: nvmm.h,v 1.3 2018/12/15 13:39:43 maxv Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -83,6 +83,8 @@ int nvmm_vcpu_run(struct nvmm_machine *, int nvmm_gpa_map(struct nvmm_machine *, uintptr_t, gpaddr_t, size_t, int); int nvmm_gpa_unmap(struct nvmm_machine *, uintptr_t, gpaddr_t, size_t); +int nvmm_hva_map(struct nvmm_machine *, uintptr_t, size_t); +int nvmm_hva_unmap(struct nvmm_machine *, uintptr_t, size_t); int nvmm_gva_to_gpa(struct nvmm_machine *, nvmm_cpuid_t, gvaddr_t, gpaddr_t *, nvmm_prot_t *); Index: src/sys/dev/nvmm/nvmm.c diff -u src/sys/dev/nvmm/nvmm.c:1.3 src/sys/dev/nvmm/nvmm.c:1.4 --- src/sys/dev/nvmm/nvmm.c:1.3 Sun Nov 25 14:11:24 2018 +++ src/sys/dev/nvmm/nvmm.c Sat Dec 15 13:39:43 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: nvmm.c,v 1.3 2018/11/25 14:11:24 maxv Exp $ */ +/* $NetBSD: nvmm.c,v 1.4 2018/12/15 13:39:43 maxv Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.3 2018/11/25 14:11:24 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nvmm.c,v 1.4 2018/12/15 13:39:43 maxv Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -213,7 +213,14 @@ nvmm_kill_machines(pid_t pid) nvmm_vcpu_put(vcpu); } uvmspace_free(mach->vm); - uao_detach(mach->uobj); + + /* Drop the kernel UOBJ refs. */ + for (j = 0; j < NVMM_MAX_SEGS; j++) { + if (!mach->segs[j].present) + continue; + uao_detach(mach->segs[j].uobj); + } + nvmm_machine_free(mach); rw_exit(&mach->lock); @@ -249,14 +256,13 @@ nvmm_machine_create(struct nvmm_ioc_mach /* Curproc owns the machine. */ mach->procid = curproc->p_pid; + /* Zero out the segments. */ + memset(&mach->segs, 0, sizeof(mach->segs)); + /* Create the machine vmspace. */ mach->gpa_begin = 0; mach->gpa_end = NVMM_MAX_RAM; mach->vm = uvmspace_alloc(0, mach->gpa_end - mach->gpa_begin, false); - mach->uobj = uao_create(mach->gpa_end - mach->gpa_begin, 0); - - /* Grab a reference for the machine. */ - uao_reference(mach->uobj); (*nvmm_impl->machine_create)(mach); @@ -292,7 +298,13 @@ nvmm_machine_destroy(struct nvmm_ioc_mac /* Free the machine vmspace. */ uvmspace_free(mach->vm); - uao_detach(mach->uobj); + + /* Drop the kernel UOBJ refs. */ + for (i = 0; i < NVMM_MAX_SEGS; i++) { + if (!mach->segs[i].present) + continue; + uao_detach(mach->segs[i].uobj); + } nvmm_machine_free(mach); nvmm_machine_put(mach); @@ -500,22 +512,193 @@ out: /* -------------------------------------------------------------------------- */ +static struct uvm_object * +nvmm_seg_getuobj(struct nvmm_machine *mach, uintptr_t hva, size_t size, + size_t *off) +{ + struct nvmm_seg *seg; + size_t i; + + for (i = 0; i < NVMM_MAX_SEGS; i++) { + seg = &mach->segs[i]; + if (!seg->present) { + continue; + } + if (hva >= seg->hva && hva + size <= seg->hva + seg->size) { + *off = hva - seg->hva; + return seg->uobj; + } + } + + return NULL; +} + +static struct nvmm_seg * +nvmm_seg_find(struct nvmm_machine *mach, uintptr_t hva, size_t size) +{ + struct nvmm_seg *seg; + size_t i; + + for (i = 0; i < NVMM_MAX_SEGS; i++) { + seg = &mach->segs[i]; + if (seg->present && seg->hva == hva && seg->size == size) { + return seg; + } + } + + return NULL; +} + +static int +nvmm_seg_validate(struct nvmm_machine *mach, uintptr_t hva, size_t size) +{ + struct nvmm_seg *seg; + size_t i; + + if ((hva % PAGE_SIZE) != 0 || (size % PAGE_SIZE) != 0) { + return EINVAL; + } + if (hva == 0) { + return EINVAL; + } + + for (i = 0; i < NVMM_MAX_SEGS; i++) { + seg = &mach->segs[i]; + if (!seg->present) { + continue; + } + + if (hva >= seg->hva && hva + size <= seg->hva + seg->size) { + break; + } + + if (hva >= seg->hva && hva < seg->hva + seg->size) { + return EEXIST; + } + if (hva + size > seg->hva && + hva + size <= seg->hva + seg->size) { + return EEXIST; + } + if (hva <= seg->hva && hva + size >= seg->hva + seg->size) { + return EEXIST; + } + } + + return 0; +} + +static struct nvmm_seg * +nvmm_seg_alloc(struct nvmm_machine *mach) +{ + struct nvmm_seg *seg; + size_t i; + + for (i = 0; i < NVMM_MAX_SEGS; i++) { + seg = &mach->segs[i]; + if (!seg->present) { + seg->present = true; + return seg; + } + } + + return NULL; +} + +static void +nvmm_seg_free(struct nvmm_seg *seg) +{ + struct vmspace *vmspace = curproc->p_vmspace; + + uvm_unmap(&vmspace->vm_map, seg->hva, seg->hva + seg->size); + uao_detach(seg->uobj); + + seg->uobj = NULL; + seg->present = false; +} + +static int +nvmm_hva_map(struct nvmm_ioc_hva_map *args) +{ + struct vmspace *vmspace = curproc->p_vmspace; + struct nvmm_machine *mach; + struct nvmm_seg *seg; + vaddr_t uva; + int error; + + error = nvmm_machine_get(args->machid, &mach, true); + if (error) + return error; + + error = nvmm_seg_validate(mach, args->hva, args->size); + if (error) + goto out; + + seg = nvmm_seg_alloc(mach); + if (seg == NULL) { + error = ENOBUFS; + goto out; + } + + seg->hva = args->hva; + seg->size = args->size; + seg->uobj = uao_create(seg->size, 0); + uva = seg->hva; + + /* Take a reference for the kernel. */ + uao_reference(seg->uobj); + + /* Take a reference for the user. */ + uao_reference(seg->uobj); + + /* Map the uobj into the user address space, as pageable. */ + error = uvm_map(&vmspace->vm_map, &uva, seg->size, seg->uobj, 0, 0, + UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_SHARE, + UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); + if (error) { + uao_detach(seg->uobj); + } + +out: + nvmm_machine_put(mach); + return error; +} + +static int +nvmm_hva_unmap(struct nvmm_ioc_hva_unmap *args) +{ + struct nvmm_machine *mach; + struct nvmm_seg *seg; + int error; + + error = nvmm_machine_get(args->machid, &mach, true); + if (error) + return error; + + seg = nvmm_seg_find(mach, args->hva, args->size); + if (seg == NULL) + return ENOENT; + + nvmm_seg_free(seg); + + nvmm_machine_put(mach); + return 0; +} + +/* -------------------------------------------------------------------------- */ + static int nvmm_gpa_map(struct nvmm_ioc_gpa_map *args) { - struct proc *p = curproc; struct nvmm_machine *mach; - struct vmspace *vmspace; + struct uvm_object *uobj; gpaddr_t gpa; - vaddr_t uva; + size_t off; int error; error = nvmm_machine_get(args->machid, &mach, false); if (error) return error; - vmspace = p->p_vmspace; - if ((args->gpa % PAGE_SIZE) != 0 || (args->size % PAGE_SIZE) != 0 || (args->hva % PAGE_SIZE) != 0) { error = EINVAL; @@ -539,38 +722,30 @@ nvmm_gpa_map(struct nvmm_ioc_gpa_map *ar } gpa = args->gpa; - /* Take a reference for the kernel. */ - uao_reference(mach->uobj); + uobj = nvmm_seg_getuobj(mach, args->hva, args->size, &off); + if (uobj == NULL) { + error = EINVAL; + goto out; + } + + /* Take a reference for the machine. */ + uao_reference(uobj); /* Map the uobj into the machine address space, as pageable. */ - error = uvm_map(&mach->vm->vm_map, &gpa, args->size, mach->uobj, - args->gpa, 0, UVM_MAPFLAG(UVM_PROT_RWX, UVM_PROT_RWX, - UVM_INH_NONE, UVM_ADV_RANDOM, UVM_FLAG_FIXED)); + error = uvm_map(&mach->vm->vm_map, &gpa, args->size, uobj, off, 0, + UVM_MAPFLAG(UVM_PROT_RWX, UVM_PROT_RWX, UVM_INH_NONE, + UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); if (error) { - uao_detach(mach->uobj); + uao_detach(uobj); goto out; } if (gpa != args->gpa) { - uao_detach(mach->uobj); + uao_detach(uobj); printf("[!] uvm_map problem\n"); error = EINVAL; goto out; } - uva = (vaddr_t)args->hva; - - /* Take a reference for the user. */ - uao_reference(mach->uobj); - - /* Map the uobj into the user address space, as pageable. */ - error = uvm_map(&vmspace->vm_map, &uva, args->size, mach->uobj, - args->gpa, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, - UVM_INH_SHARE, UVM_ADV_RANDOM, UVM_FLAG_FIXED|UVM_FLAG_UNMAP)); - if (error) { - uao_detach(mach->uobj); - goto out; - } - out: nvmm_machine_put(mach); return error; @@ -715,6 +890,10 @@ nvmm_ioctl(dev_t dev, u_long cmd, void * return nvmm_gpa_map(data); case NVMM_IOC_GPA_UNMAP: return nvmm_gpa_unmap(data); + case NVMM_IOC_HVA_MAP: + return nvmm_hva_map(data); + case NVMM_IOC_HVA_UNMAP: + return nvmm_hva_unmap(data); default: return EINVAL; } Index: src/sys/dev/nvmm/nvmm_internal.h diff -u src/sys/dev/nvmm/nvmm_internal.h:1.1 src/sys/dev/nvmm/nvmm_internal.h:1.2 --- src/sys/dev/nvmm/nvmm_internal.h:1.1 Wed Nov 7 07:43:08 2018 +++ src/sys/dev/nvmm/nvmm_internal.h Sat Dec 15 13:39:43 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: nvmm_internal.h,v 1.1 2018/11/07 07:43:08 maxv Exp $ */ +/* $NetBSD: nvmm_internal.h,v 1.2 2018/12/15 13:39:43 maxv Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -34,6 +34,7 @@ #define NVMM_MAX_MACHINES 128 #define NVMM_MAX_VCPUS 256 +#define NVMM_MAX_SEGS 32 #define NVMM_MAX_RAM (4UL * (1 << 30)) struct nvmm_cpu { @@ -49,6 +50,13 @@ struct nvmm_cpu { void *cpudata; }; +struct nvmm_seg { + bool present; + uintptr_t hva; + size_t size; + struct uvm_object *uobj; +}; + struct nvmm_machine { bool present; nvmm_machid_t machid; @@ -57,10 +65,12 @@ struct nvmm_machine { /* Kernel */ struct vmspace *vm; - struct uvm_object *uobj; gpaddr_t gpa_begin; gpaddr_t gpa_end; + /* Segments */ + struct nvmm_seg segs[NVMM_MAX_SEGS]; + /* CPU */ struct nvmm_cpu cpus[NVMM_MAX_VCPUS]; Index: src/sys/dev/nvmm/nvmm_ioctl.h diff -u src/sys/dev/nvmm/nvmm_ioctl.h:1.1 src/sys/dev/nvmm/nvmm_ioctl.h:1.2 --- src/sys/dev/nvmm/nvmm_ioctl.h:1.1 Wed Nov 7 07:43:08 2018 +++ src/sys/dev/nvmm/nvmm_ioctl.h Sat Dec 15 13:39:43 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: nvmm_ioctl.h,v 1.1 2018/11/07 07:43:08 maxv Exp $ */ +/* $NetBSD: nvmm_ioctl.h,v 1.2 2018/12/15 13:39:43 maxv Exp $ */ /* * Copyright (c) 2018 The NetBSD Foundation, Inc. @@ -90,6 +90,20 @@ struct nvmm_ioc_vcpu_run { struct nvmm_exit exit; }; +struct nvmm_ioc_hva_map { + nvmm_machid_t machid; + uintptr_t hva; + size_t size; + int flags; +}; + +struct nvmm_ioc_hva_unmap { + nvmm_machid_t machid; + uintptr_t hva; + size_t size; + int flags; +}; + struct nvmm_ioc_gpa_map { nvmm_machid_t machid; uintptr_t hva; @@ -116,5 +130,7 @@ struct nvmm_ioc_gpa_unmap { #define NVMM_IOC_VCPU_RUN _IOWR('N', 9, struct nvmm_ioc_vcpu_run) #define NVMM_IOC_GPA_MAP _IOW ('N', 10, struct nvmm_ioc_gpa_map) #define NVMM_IOC_GPA_UNMAP _IOW ('N', 11, struct nvmm_ioc_gpa_unmap) +#define NVMM_IOC_HVA_MAP _IOW ('N', 12, struct nvmm_ioc_hva_map) +#define NVMM_IOC_HVA_UNMAP _IOW ('N', 13, struct nvmm_ioc_hva_unmap) #endif /* _NVMM_IOCTL_H_ */