Module Name: src Committed By: martin Date: Sun May 31 10:39:35 UTC 2020
Modified Files: src/sys/arch/x86/include [netbsd-9]: pmap.h src/sys/arch/x86/x86 [netbsd-9]: pmap.c src/sys/arch/xen/include [netbsd-9]: xenio.h xenpmap.h src/sys/arch/xen/x86 [netbsd-9]: x86_xpmap.c src/sys/arch/xen/xen [netbsd-9]: privcmd.c src/sys/external/mit/xen-include-public/dist/xen/include/public [netbsd-9]: memory.h Log Message: Pull up following revision(s) (requested by bouyer in ticket #935): sys/arch/xen/x86/x86_xpmap.c: revision 1.89 sys/arch/x86/include/pmap.h: revision 1.121 sys/arch/xen/xen/privcmd.c: revision 1.58 sys/external/mit/xen-include-public/dist/xen/include/public/memory.h: revision 1.2 sys/arch/xen/include/xenpmap.h: revision 1.44 sys/arch/xen/include/xenio.h: revision 1.12 sys/arch/x86/x86/pmap.c: revision 1.394 (all via patch) Ajust pmap_enter_ma() for upcoming new Xen privcmd ioctl: pass flags to xpq_update_foreign() Introduce a pmap MD flag: PMAP_MD_XEN_NOTR, which cause xpq_update_foreign() to use the MMU_PT_UPDATE_NO_TRANSLATE flag. make xpq_update_foreign() return the raw Xen error. This will cause pmap_enter_ma() to return a negative error number in this case, but the only user of this code path is privcmd.c and it can deal with it. Add pmap_enter_gnt()m which maps a set of Xen grant entries at the specified va in the specified pmap. Use the hooks implemented for EPT to keep track of mapped grand entries in the pmap, and unmap them when pmap_remove() is called. This requires pmap_remove() to be split into a pmap_remove_locked(), to be called from pmap_remove_gnt(). Implement new ioctl, needed by Xen 4.13: IOCTL_PRIVCMD_MMAPBATCH_V2 IOCTL_PRIVCMD_MMAP_RESOURCE IOCTL_GNTDEV_MMAP_GRANT_REF IOCTL_GNTDEV_ALLOC_GRANT_REF Always enable declarations needed by privcmd.c To generate a diff of this commit: cvs rdiff -u -r1.101 -r1.101.2.1 src/sys/arch/x86/include/pmap.h cvs rdiff -u -r1.334.2.1 -r1.334.2.2 src/sys/arch/x86/x86/pmap.c cvs rdiff -u -r1.11 -r1.11.22.1 src/sys/arch/xen/include/xenio.h cvs rdiff -u -r1.41 -r1.41.4.1 src/sys/arch/xen/include/xenpmap.h cvs rdiff -u -r1.84 -r1.84.4.1 src/sys/arch/xen/x86/x86_xpmap.c cvs rdiff -u -r1.51 -r1.51.10.1 src/sys/arch/xen/xen/privcmd.c cvs rdiff -u -r1.1.1.1 -r1.1.1.1.6.1 \ src/sys/external/mit/xen-include-public/dist/xen/include/public/memory.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/x86/include/pmap.h diff -u src/sys/arch/x86/include/pmap.h:1.101 src/sys/arch/x86/include/pmap.h:1.101.2.1 --- src/sys/arch/x86/include/pmap.h:1.101 Wed May 29 16:54:41 2019 +++ src/sys/arch/x86/include/pmap.h Sun May 31 10:39:34 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.h,v 1.101 2019/05/29 16:54:41 maxv Exp $ */ +/* $NetBSD: pmap.h,v 1.101.2.1 2020/05/31 10:39:34 martin Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -272,7 +272,7 @@ struct pmap { uint64_t pm_ncsw; /* for assertions */ struct vm_page *pm_gc_ptp; /* pages from pmap g/c */ - /* Used by NVMM. */ + /* Used by NVMM and Xen */ int (*pm_enter)(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int); bool (*pm_extract)(struct pmap *, vaddr_t, paddr_t *); void (*pm_remove)(struct pmap *, vaddr_t, vaddr_t); Index: src/sys/arch/x86/x86/pmap.c diff -u src/sys/arch/x86/x86/pmap.c:1.334.2.1 src/sys/arch/x86/x86/pmap.c:1.334.2.2 --- src/sys/arch/x86/x86/pmap.c:1.334.2.1 Wed Apr 29 13:39:23 2020 +++ src/sys/arch/x86/x86/pmap.c Sun May 31 10:39:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: pmap.c,v 1.334.2.1 2020/04/29 13:39:23 martin Exp $ */ +/* $NetBSD: pmap.c,v 1.334.2.2 2020/05/31 10:39:35 martin Exp $ */ /* * Copyright (c) 2008, 2010, 2016, 2017 The NetBSD Foundation, Inc. @@ -130,7 +130,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.334.2.1 2020/04/29 13:39:23 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.334.2.2 2020/05/31 10:39:35 martin Exp $"); #include "opt_user_ldt.h" #include "opt_lockdebug.h" @@ -150,6 +150,7 @@ __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.3 #include <sys/intr.h> #include <sys/xcall.h> #include <sys/kcore.h> +#include <sys/kmem.h> #include <sys/asan.h> #include <uvm/uvm.h> @@ -2393,7 +2394,7 @@ pmap_create(void) pmap->pm_flags = 0; pmap->pm_gc_ptp = NULL; - /* Used by NVMM. */ + /* Used by NVMM and Xen */ pmap->pm_enter = NULL; pmap->pm_extract = NULL; pmap->pm_remove = NULL; @@ -3527,32 +3528,19 @@ pmap_remove_pte(struct pmap *pmap, struc return true; } -/* - * pmap_remove: mapping removal function. - * - * => caller should not be holding any pmap locks - */ -void -pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +static void +pmap_remove_locked(struct pmap *pmap, vaddr_t sva, vaddr_t eva, + pt_entry_t *ptes, pd_entry_t * const *pdes) { - pt_entry_t *ptes; pd_entry_t pde; - pd_entry_t * const *pdes; struct pv_entry *pv_tofree = NULL; bool result; paddr_t ptppa; vaddr_t blkendva, va = sva; struct vm_page *ptp; - struct pmap *pmap2; int lvl; - if (__predict_false(pmap->pm_remove != NULL)) { - (*pmap->pm_remove)(pmap, sva, eva); - return; - } - - kpreempt_disable(); - pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ + KASSERT(kpreempt_disabled()); /* * removing one page? take shortcut function. @@ -3620,14 +3608,31 @@ pmap_remove(struct pmap *pmap, vaddr_t s pmap_free_ptp(pmap, ptp, va, ptes, pdes); } } - pmap_unmap_ptes(pmap, pmap2); /* unlock pmap */ - kpreempt_enable(); /* Now we free unused PVs */ if (pv_tofree) pmap_free_pvs(pv_tofree); } +void +pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +{ + struct pmap *pmap2; + pt_entry_t *ptes; + pd_entry_t * const *pdes; + + if (__predict_false(pmap->pm_remove != NULL)) { + (*pmap->pm_remove)(pmap, sva, eva); + return; + } + + kpreempt_disable(); + pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ + pmap_remove_locked(pmap, sva, eva, ptes, pdes); + pmap_unmap_ptes(pmap, pmap2); /* unlock pmap */ + kpreempt_enable(); +} + /* * pmap_sync_pv: clear pte bits and return the old value of the pp_attrs. * @@ -4296,7 +4301,7 @@ pmap_enter_ma(struct pmap *pmap, vaddr_t continue; } error = xpq_update_foreign( - vtomach((vaddr_t)ptep), npte, domid); + vtomach((vaddr_t)ptep), npte, domid, flags); splx(s); if (error) { if (ptp != NULL && ptp->wire_count <= 1) { @@ -4380,6 +4385,315 @@ out: return error; } +#if defined(XEN) && defined(DOM0OPS) + +struct pmap_data_gnt { + SLIST_ENTRY(pmap_data_gnt) pd_gnt_list; + vaddr_t pd_gnt_sva; + vaddr_t pd_gnt_eva; /* range covered by this gnt */ + int pd_gnt_refs; /* ref counter */ + struct gnttab_map_grant_ref pd_gnt_ops[1]; /* variable length */ +}; +SLIST_HEAD(pmap_data_gnt_head, pmap_data_gnt); + +static void pmap_remove_gnt(struct pmap *, vaddr_t, vaddr_t); + +static struct pmap_data_gnt * +pmap_find_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +{ + struct pmap_data_gnt_head *headp; + struct pmap_data_gnt *pgnt; + + KASSERT(mutex_owned(pmap->pm_lock)); + headp = pmap->pm_data; + KASSERT(headp != NULL); + SLIST_FOREACH(pgnt, headp, pd_gnt_list) { + if (pgnt->pd_gnt_sva >= sva && pgnt->pd_gnt_sva <= eva) + return pgnt; + /* check that we're not overlapping part of a region */ + KASSERT(pgnt->pd_gnt_sva >= eva || pgnt->pd_gnt_eva <= sva); + } + return NULL; +} + +static void +pmap_alloc_gnt(struct pmap *pmap, vaddr_t sva, int nentries, + const struct gnttab_map_grant_ref *ops) +{ + struct pmap_data_gnt_head *headp; + struct pmap_data_gnt *pgnt; + vaddr_t eva = sva + nentries * PAGE_SIZE; + KASSERT(mutex_owned(pmap->pm_lock)); + KASSERT(nentries >= 1); + if (pmap->pm_remove == NULL) { + pmap->pm_remove = pmap_remove_gnt; + KASSERT(pmap->pm_data == NULL); + headp = kmem_alloc(sizeof(*headp), KM_SLEEP); + SLIST_INIT(headp); + pmap->pm_data = headp; + } else { + KASSERT(pmap->pm_remove == pmap_remove_gnt); + KASSERT(pmap->pm_data != NULL); + headp = pmap->pm_data; + } + + pgnt = pmap_find_gnt(pmap, sva, eva); + if (pgnt != NULL) { + KASSERT(pgnt->pd_gnt_sva == sva); + KASSERT(pgnt->pd_gnt_eva == eva); + return; + } + + /* new entry */ + pgnt = kmem_alloc(sizeof(*pgnt) + + (nentries - 1) * sizeof(struct gnttab_map_grant_ref), KM_SLEEP); + pgnt->pd_gnt_sva = sva; + pgnt->pd_gnt_eva = eva; + pgnt->pd_gnt_refs = 0; + memcpy(pgnt->pd_gnt_ops, ops, + sizeof(struct gnttab_map_grant_ref) * nentries); + SLIST_INSERT_HEAD(headp, pgnt, pd_gnt_list); +} + +static void +pmap_free_gnt(struct pmap *pmap, struct pmap_data_gnt *pgnt) +{ + struct pmap_data_gnt_head *headp = pmap->pm_data; + int nentries = (pgnt->pd_gnt_eva - pgnt->pd_gnt_sva) / PAGE_SIZE; + KASSERT(nentries >= 1); + KASSERT(mutex_owned(&pmap->pm_lock)); + KASSERT(pgnt->pd_gnt_refs == 0); + SLIST_REMOVE(headp, pgnt, pmap_data_gnt, pd_gnt_list); + kmem_free(pgnt, sizeof(*pgnt) + + (nentries - 1) * sizeof(struct gnttab_map_grant_ref)); + if (SLIST_EMPTY(headp)) { + kmem_free(headp, sizeof(*headp)); + pmap->pm_data = NULL; + pmap->pm_remove = NULL; + } +} + +/* + * pmap_enter_gnt: enter a grant entry into a pmap + * + * => must be done "now" ... no lazy-evaluation + */ +int +pmap_enter_gnt(struct pmap *pmap, vaddr_t va, vaddr_t sva, int nentries, + const struct gnttab_map_grant_ref *oops) +{ + struct pmap_data_gnt *pgnt; + pt_entry_t *ptes, opte; + pt_entry_t *ptep; + pd_entry_t * const *pdes; + struct vm_page *ptp; + struct vm_page *old_pg; + struct pmap_page *old_pp; + struct pv_entry *old_pve = NULL; + int error; + struct pmap *pmap2; + struct gnttab_map_grant_ref *op; + int ret; + int idx; + + KASSERT(pmap_initialized); + KASSERT(curlwp->l_md.md_gc_pmap != pmap); + KASSERT(va < VM_MAX_KERNEL_ADDRESS); + KASSERTMSG(va != (vaddr_t)PDP_BASE, "%s: trying to map va=%#" + PRIxVADDR " over PDP!", __func__, va); + KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS || + pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]), + "%s: missing kernel PTP for va=%#" PRIxVADDR, __func__, va); + + kpreempt_disable(); + pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ + + pmap_alloc_gnt(pmap, sva, nentries, oops); + + pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE); + KASSERT(pgnt != NULL); + + ptp = pmap_get_ptp(pmap, va, pdes, PMAP_CANFAIL); + if (ptp == NULL) { + pmap_unmap_ptes(pmap, pmap2); + error = ENOMEM; + goto out; + } + + /* + * Check if there is an existing mapping. If we are now sure that + * we need pves and we failed to allocate them earlier, handle that. + * Caching the value of oldpa here is safe because only the mod/ref + * bits can change while the pmap is locked. + */ + ptep = &ptes[pl1_i(va)]; + opte = *ptep; + bool have_oldpa = pmap_valid_entry(opte); + paddr_t oldpa = pmap_pte2pa(opte); + + /* + * Update the pte. + */ + + idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE; + op = &pgnt->pd_gnt_ops[idx]; + + op->host_addr = xpmap_ptetomach(ptep); + op->dev_bus_addr = 0; + op->status = GNTST_general_error; + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1); + if (__predict_false(ret)) { + printf("%s: GNTTABOP_map_grant_ref failed: %d\n", + __func__, ret); + op->status = GNTST_general_error; + } + for (int d = 0; d < 256 && op->status == GNTST_eagain; d++) { + kpause("gntmap", false, mstohz(1), NULL); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, 1); + if (__predict_false(ret)) { + printf("%s: GNTTABOP_map_grant_ref failed: %d\n", + __func__, ret); + op->status = GNTST_general_error; + } + } + if (__predict_false(op->status != GNTST_okay)) { + printf("%s: GNTTABOP_map_grant_ref status: %d\n", + __func__, op->status); + if (ptp != NULL) { + if (have_oldpa) { + ptp->wire_count--; + } + } + } else { + pgnt->pd_gnt_refs++; + if (ptp != NULL) { + if (!have_oldpa) { + ptp->wire_count++; + } + } + } + + /* + * Update statistics and PTP's reference count. + */ + pmap_stats_update_bypte(pmap, 0, opte); + if (ptp != NULL && !have_oldpa) { + ptp->wire_count++; + } + KASSERT(ptp == NULL || ptp->wire_count > 1); + + /* + * If old page is pv-tracked, remove pv_entry from its list. + */ + if ((~opte & (PTE_P | PG_PVLIST)) == 0) { + if ((old_pg = PHYS_TO_VM_PAGE(oldpa)) != NULL) { + KASSERT(uvm_page_locked_p(old_pg)); + old_pp = VM_PAGE_TO_PP(old_pg); + } else if ((old_pp = pmap_pv_tracked(oldpa)) == NULL) { + panic("%s: PG_PVLIST with pv-untracked page" + " va = %#"PRIxVADDR + " pa = %#" PRIxPADDR , + __func__, va, oldpa); + } + + old_pve = pmap_remove_pv(old_pp, ptp, va); + old_pp->pp_attrs |= pmap_pte_to_pp_attrs(opte); + } + + pmap_unmap_ptes(pmap, pmap2); + + error = 0; +out: + kpreempt_enable(); + if (old_pve != NULL) { + pool_cache_put(&pmap_pv_cache, old_pve); + } + + return error; +} + +/* + * pmap_remove_gnt: grant mapping removal function. + * + * => caller should not be holding any pmap locks + */ +static void +pmap_remove_gnt(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +{ + struct pmap_data_gnt *pgnt; + pt_entry_t *ptes; + pd_entry_t pde; + pd_entry_t * const *pdes; + paddr_t ptppa; + struct vm_page *ptp; + struct pmap *pmap2; + vaddr_t va; + int lvl; + int idx; + struct gnttab_map_grant_ref *op; + struct gnttab_unmap_grant_ref unmap_op; + int ret; + + KASSERT(pmap != pmap_kernel()); + KASSERT(pmap->pm_remove == pmap_remove_gnt); + + pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* lock pmap */ + for (va = sva; va < eva; va += PAGE_SIZE) { + pgnt = pmap_find_gnt(pmap, va, va + PAGE_SIZE); + if (pgnt == NULL) { + kpreempt_disable(); + pmap_remove_locked(pmap, sva, eva, ptes, pdes); + kpreempt_enable(); + continue; + } + + if (!pmap_pdes_valid(va, pdes, &pde, &lvl)) { + panic("pmap_remove_gnt pdes not valid"); + } + + idx = (va - pgnt->pd_gnt_sva) / PAGE_SIZE; + op = &pgnt->pd_gnt_ops[idx]; + KASSERT(lvl == 1); + KASSERT(op->status == GNTST_okay); + + /* PA of the PTP */ + ptppa = pmap_pte2pa(pde); + + /* Get PTP if non-kernel mapping. */ + ptp = pmap_find_ptp(pmap, va, ptppa, 1); + KASSERTMSG(ptp != NULL, + "%s: unmanaged PTP detected", __func__); + + if (op->status == GNTST_okay) { + KASSERT(pmap_valid_entry(ptes[pl1_i(va)])); + unmap_op.handle = op->handle; + unmap_op.dev_bus_addr = 0; + unmap_op.host_addr = xpmap_ptetomach(&ptes[pl1_i(va)]); + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, &unmap_op, 1); + if (ret) { + printf("%s: GNTTABOP_unmap_grant_ref " + "failed: %d\n", __func__, ret); + } + + ptp->wire_count--; + pgnt->pd_gnt_refs--; + if (pgnt->pd_gnt_refs == 0) { + pmap_free_gnt(pmap, pgnt); + } + } + /* + * if mapping removed and the PTP is no longer + * being used, free it! + */ + + if (ptp && ptp->wire_count <= 1) + pmap_free_ptp(pmap, ptp, va, ptes, pdes); + } + pmap_unmap_ptes(pmap, pmap2); /* unlock pmap */ +} +#endif /* XEN && DOM0OPS */ + paddr_t pmap_get_physpage(void) { Index: src/sys/arch/xen/include/xenio.h diff -u src/sys/arch/xen/include/xenio.h:1.11 src/sys/arch/xen/include/xenio.h:1.11.22.1 --- src/sys/arch/xen/include/xenio.h:1.11 Thu Jul 7 06:55:40 2016 +++ src/sys/arch/xen/include/xenio.h Sun May 31 10:39:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: xenio.h,v 1.11 2016/07/07 06:55:40 msaitoh Exp $ */ +/* $NetBSD: xenio.h,v 1.11.22.1 2020/05/31 10:39:35 martin Exp $ */ /****************************************************************************** * privcmd.h @@ -114,9 +114,92 @@ typedef struct oprivcmd_hypercall */ #define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \ _IOR('P', 5, int) + #define IOCTL_PRIVCMD_MMAPBATCH_V2 \ _IOW('P', 6, privcmd_mmapbatch_v2_t) +/* + * @cmd: IOCTL_PRIVCMD_MMAP_RESOURCE + * @arg &privcmd_mmap_resource_t + * Return: + * map the specified resource at the provided virtual address + */ + +typedef struct privcmd_mmap_resource { + domid_t dom; + uint32_t type; + uint32_t id; + uint32_t idx; + uint64_t num; + uint64_t addr; +} privcmd_mmap_resource_t; + +#define IOCTL_PRIVCMD_MMAP_RESOURCE \ + _IOW('P', 7, privcmd_mmap_resource_t) + +/* + * @cmd: IOCTL_GNTDEV_MMAP_GRANT_REF + * @arg &ioctl_gntdev_mmap_grant_ref + * Return: + * map the grant references at the virtual address provided by caller + * The grant ref already exists (e.g. comes from a remote domain) + */ +struct ioctl_gntdev_grant_ref { + /* The domain ID of the grant to be mapped. */ + uint32_t domid; + /* The grant reference of the grant to be mapped. */ + uint32_t ref; +}; + +struct ioctl_gntdev_grant_notify { + ssize_t offset; + uint32_t action; + uint32_t event_channel_port; +}; +#define UNMAP_NOTIFY_CLEAR_BYTE 0x1 +#define UNMAP_NOTIFY_SEND_EVENT 0x2 + +struct ioctl_gntdev_mmap_grant_ref { + /* The number of grants to be mapped. */ + uint32_t count; + uint32_t pad; + /* The virtual address where they should be mapped */ + void *va; + /* notify action */ + struct ioctl_gntdev_grant_notify notify; + /* Array of grant references, of size @count. */ + struct ioctl_gntdev_grant_ref *refs; +}; + +#define IOCTL_GNTDEV_MMAP_GRANT_REF \ + _IOW('P', 8, struct ioctl_gntdev_mmap_grant_ref) + +/* + * @cmd: IOCTL_GNTDEV_ALLOC_GRANT_REF + * @arg &ioctl_gntdev_alloc_grant_ref + * Return: + * Allocate local memory and grant it to a remote domain. + * local memory is mmaped at the virtual address provided by caller + */ + +struct ioctl_gntdev_alloc_grant_ref { + /* IN parameters */ + uint16_t domid; + uint16_t flags; + uint32_t count; + void *va; + /* notify action */ + struct ioctl_gntdev_grant_notify notify; + /* Variable OUT parameter */ + uint32_t *gref_ids; +}; + +#define IOCTL_GNTDEV_ALLOC_GRANT_REF \ + _IOW('P', 9, struct ioctl_gntdev_alloc_grant_ref) + +#define GNTDEV_ALLOC_FLAG_WRITABLE 0x01 + + /* Interface to /dev/xenevt */ /* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ #define EVTCHN_RESET _IO('E', 1) Index: src/sys/arch/xen/include/xenpmap.h diff -u src/sys/arch/xen/include/xenpmap.h:1.41 src/sys/arch/xen/include/xenpmap.h:1.41.4.1 --- src/sys/arch/xen/include/xenpmap.h:1.41 Wed Feb 13 06:52:43 2019 +++ src/sys/arch/xen/include/xenpmap.h Sun May 31 10:39:35 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: xenpmap.h,v 1.41 2019/02/13 06:52:43 cherry Exp $ */ +/* $NetBSD: xenpmap.h,v 1.41.4.1 2020/05/31 10:39:35 martin Exp $ */ /* * @@ -48,7 +48,7 @@ void xpq_queue_set_ldt(vaddr_t, uint32_t void xpq_queue_tlb_flush(void); void xpq_queue_pin_table(paddr_t, int); void xpq_queue_unpin_table(paddr_t); -int xpq_update_foreign(paddr_t, pt_entry_t, int); +int xpq_update_foreign(paddr_t, pt_entry_t, int, u_int); void xen_mcast_tlbflush(kcpuset_t *); void xen_bcast_tlbflush(void); void xen_mcast_invlpg(vaddr_t, kcpuset_t *); @@ -75,6 +75,10 @@ void xen_kpm_sync(struct pmap *, int); #ifdef XENPV extern unsigned long *xpmap_phys_to_machine_mapping; +/* MD PMAP flags */ +/* mmu_update with MMU_PT_UPDATE_NO_TRANSLATE */ +#define PMAP_MD_XEN_NOTR 0x01000000 + static __inline paddr_t xpmap_mtop_masked(paddr_t mpa) { @@ -171,4 +175,7 @@ MULTI_update_va_mapping_otherdomain( void xen_set_user_pgd(paddr_t); #endif +int pmap_enter_gnt(struct pmap *, vaddr_t, vaddr_t, int, + const struct gnttab_map_grant_ref *); + #endif /* _XEN_XENPMAP_H_ */ Index: src/sys/arch/xen/x86/x86_xpmap.c diff -u src/sys/arch/xen/x86/x86_xpmap.c:1.84 src/sys/arch/xen/x86/x86_xpmap.c:1.84.4.1 --- src/sys/arch/xen/x86/x86_xpmap.c:1.84 Sat Mar 9 08:42:25 2019 +++ src/sys/arch/xen/x86/x86_xpmap.c Sun May 31 10:39:34 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: x86_xpmap.c,v 1.84 2019/03/09 08:42:25 maxv Exp $ */ +/* $NetBSD: x86_xpmap.c,v 1.84.4.1 2020/05/31 10:39:34 martin Exp $ */ /* * Copyright (c) 2017 The NetBSD Foundation, Inc. @@ -95,7 +95,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.84 2019/03/09 08:42:25 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.84.4.1 2020/05/31 10:39:34 martin Exp $"); #include "opt_xen.h" #include "opt_ddb.h" @@ -427,17 +427,26 @@ xen_pagezero(paddr_t pa) } int -xpq_update_foreign(paddr_t ptr, pt_entry_t val, int dom) +xpq_update_foreign(paddr_t ptr, pt_entry_t val, int dom, u_int flags) { mmu_update_t op; int ok; + int err; xpq_flush_queue(); op.ptr = ptr; + if (flags & PMAP_MD_XEN_NOTR) + op.ptr |= MMU_PT_UPDATE_NO_TRANSLATE; op.val = val; - if (HYPERVISOR_mmu_update(&op, 1, &ok, dom) < 0) - return EFAULT; + /* + * here we return a negative error number as Xen error to + * pmap_enter_ma. only calls from privcmd.c should end here, and + * it can deal with it. + */ + if ((err = HYPERVISOR_mmu_update(&op, 1, &ok, dom)) < 0) { + return err; + } return 0; } Index: src/sys/arch/xen/xen/privcmd.c diff -u src/sys/arch/xen/xen/privcmd.c:1.51 src/sys/arch/xen/xen/privcmd.c:1.51.10.1 --- src/sys/arch/xen/xen/privcmd.c:1.51 Thu Jun 22 22:36:50 2017 +++ src/sys/arch/xen/xen/privcmd.c Sun May 31 10:39:34 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: privcmd.c,v 1.51 2017/06/22 22:36:50 chs Exp $ */ +/* $NetBSD: privcmd.c,v 1.51.10.1 2020/05/31 10:39:34 martin Exp $ */ /*- * Copyright (c) 2004 Christian Limpach. @@ -27,7 +27,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.51 2017/06/22 22:36:50 chs Exp $"); +__KERNEL_RCSID(0, "$NetBSD: privcmd.c,v 1.51.10.1 2020/05/31 10:39:34 martin Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -36,6 +36,7 @@ __KERNEL_RCSID(0, "$NetBSD: privcmd.c,v #include <sys/dirent.h> #include <sys/stat.h> #include <sys/proc.h> +#include <sys/kernel.h> #include <miscfs/specfs/specdev.h> #include <miscfs/kernfs/kernfs.h> @@ -46,29 +47,68 @@ __KERNEL_RCSID(0, "$NetBSD: privcmd.c,v #include <xen/kernfs_machdep.h> #include <xen/xenio.h> +#include <xen/granttables.h> #define PRIVCMD_MODE (S_IRUSR) /* Magic value is used to mark invalid pages. * This must be a value within the page-offset. * Page-aligned values including 0x0 are used by the guest. - */ + */ #define INVALID_PAGE 0xfff +typedef enum _privcmd_type { + PTYPE_PRIVCMD, + PTYPE_GNTDEV_REF, + PTYPE_GNTDEV_ALLOC +} privcmd_type; + +struct privcmd_object_privcmd { + paddr_t *maddr; /* array of machine address to map */ + int domid; + bool no_translate; +}; + +struct privcmd_object_gntref { + struct ioctl_gntdev_grant_notify notify; + struct gnttab_map_grant_ref ops[1]; /* variable length */ +}; + +struct privcmd_object_gntalloc { + vaddr_t gntva; /* granted area mapped in kernel */ + uint16_t domid; + uint16_t flags; + struct ioctl_gntdev_grant_notify notify; + uint32_t gref_ids[1]; /* variable length */ +}; + struct privcmd_object { struct uvm_object uobj; - paddr_t *maddr; /* array of machine address to map */ + privcmd_type type; int npages; - int domid; + union { + struct privcmd_object_privcmd pc; + struct privcmd_object_gntref gr; + struct privcmd_object_gntalloc ga; + } u; }; +#define PGO_GNTREF_LEN(count) \ + (sizeof(struct privcmd_object) + \ + sizeof(struct gnttab_map_grant_ref) * ((count) - 1)) + +#define PGO_GNTA_LEN(count) \ + (sizeof(struct privcmd_object) + \ + sizeof(uint32_t) * ((count) - 1)) + int privcmd_nobjects = 0; static void privpgop_reference(struct uvm_object *); static void privpgop_detach(struct uvm_object *); static int privpgop_fault(struct uvm_faultinfo *, vaddr_t , struct vm_page **, - int, int, vm_prot_t, int); -static int privcmd_map_obj(struct vm_map *, vaddr_t, paddr_t *, int, int); + int, int, vm_prot_t, int); +static int privcmd_map_obj(struct vm_map *, vaddr_t, + struct privcmd_object *, vm_prot_t); static int @@ -251,6 +291,414 @@ privcmd_xen2bsd_errno(int error) } } +static vm_prot_t +privcmd_get_map_prot(struct vm_map *map, vaddr_t start, off_t size) +{ + vm_prot_t prot; + + vm_map_lock_read(map); + /* get protections. This also check for validity of mapping */ + if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_WRITE)) + prot = VM_PROT_READ | VM_PROT_WRITE; + else if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_READ)) + prot = VM_PROT_READ; + else { + printf("privcmd_get_map_prot 0x%lx -> 0x%lx " + "failed\n", + start, (unsigned long)(start + size - 1)); + prot = UVM_PROT_NONE; + } + vm_map_unlock_read(map); + return prot; +} + +static int +privcmd_mmap(struct vop_ioctl_args *ap) +{ + int i, j; + privcmd_mmap_t *mcmd = ap->a_data; + privcmd_mmap_entry_t mentry; + vaddr_t va; + paddr_t ma; + struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; + paddr_t *maddr; + struct privcmd_object *obj; + vm_prot_t prot; + int error; + + for (i = 0; i < mcmd->num; i++) { + error = copyin(&mcmd->entry[i], &mentry, sizeof(mentry)); + if (error) + return EINVAL; + if (mentry.npages == 0) + return EINVAL; + if (mentry.va > VM_MAXUSER_ADDRESS) + return EINVAL; + va = mentry.va & ~PAGE_MASK; + prot = privcmd_get_map_prot(vmm, va, mentry.npages * PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + maddr = kmem_alloc(sizeof(paddr_t) * mentry.npages, + KM_SLEEP); + ma = ((paddr_t)mentry.mfn) << PGSHIFT; + for (j = 0; j < mentry.npages; j++) { + maddr[j] = ma; + ma += PAGE_SIZE; + } + obj = kmem_alloc(sizeof(*obj), KM_SLEEP); + obj->type = PTYPE_PRIVCMD; + obj->u.pc.maddr = maddr; + obj->u.pc.no_translate = false; + obj->npages = mentry.npages; + obj->u.pc.domid = mcmd->dom; + error = privcmd_map_obj(vmm, va, obj, prot); + if (error) + return error; + } + return 0; +} + +static int +privcmd_mmapbatch(struct vop_ioctl_args *ap) +{ + int i; + privcmd_mmapbatch_t* pmb = ap->a_data; + vaddr_t va0; + u_long mfn; + paddr_t ma; + struct vm_map *vmm; + vaddr_t trymap; + paddr_t *maddr; + struct privcmd_object *obj; + vm_prot_t prot; + int error; + + vmm = &curlwp->l_proc->p_vmspace->vm_map; + va0 = pmb->addr & ~PAGE_MASK; + + if (pmb->num == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP); + /* get a page of KVA to check mappins */ + trymap = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, + UVM_KMF_VAONLY); + if (trymap == 0) { + kmem_free(maddr, sizeof(paddr_t) * pmb->num); + return ENOMEM; + } + + obj = kmem_alloc(sizeof(*obj), KM_SLEEP); + obj->type = PTYPE_PRIVCMD; + obj->u.pc.maddr = maddr; + obj->u.pc.no_translate = false; + obj->npages = pmb->num; + obj->u.pc.domid = pmb->dom; + + for(i = 0; i < pmb->num; ++i) { + error = copyin(&pmb->arr[i], &mfn, sizeof(mfn)); + if (error != 0) { + /* XXX: mappings */ + pmap_update(pmap_kernel()); + kmem_free(maddr, sizeof(paddr_t) * pmb->num); + uvm_km_free(kernel_map, trymap, PAGE_SIZE, + UVM_KMF_VAONLY); + return error; + } + ma = ((paddr_t)mfn) << PGSHIFT; + if ((error = pmap_enter_ma(pmap_kernel(), trymap, ma, 0, + prot, PMAP_CANFAIL | prot, pmb->dom))) { + mfn |= 0xF0000000; + copyout(&mfn, &pmb->arr[i], sizeof(mfn)); + maddr[i] = INVALID_PAGE; + } else { + pmap_remove(pmap_kernel(), trymap, + trymap + PAGE_SIZE); + maddr[i] = ma; + } + } + pmap_update(pmap_kernel()); + uvm_km_free(kernel_map, trymap, PAGE_SIZE, UVM_KMF_VAONLY); + + error = privcmd_map_obj(vmm, va0, obj, prot); + + return error; +} + +static int +privcmd_mmapbatch_v2(struct vop_ioctl_args *ap) +{ + int i; + privcmd_mmapbatch_v2_t* pmb = ap->a_data; + vaddr_t va0; + u_long mfn; + struct vm_map *vmm; + paddr_t *maddr; + struct privcmd_object *obj; + vm_prot_t prot; + int error; + + vmm = &curlwp->l_proc->p_vmspace->vm_map; + va0 = pmb->addr & ~PAGE_MASK; + + if (pmb->num == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP); + obj = kmem_alloc(sizeof(*obj), KM_SLEEP); + obj->type = PTYPE_PRIVCMD; + obj->u.pc.maddr = maddr; + obj->u.pc.no_translate = false; + obj->npages = pmb->num; + obj->u.pc.domid = pmb->dom; + + for(i = 0; i < pmb->num; ++i) { + error = copyin(&pmb->arr[i], &mfn, sizeof(mfn)); + if (error != 0) { + kmem_free(maddr, sizeof(paddr_t) * pmb->num); + return error; + } + maddr[i] = ((paddr_t)mfn) << PGSHIFT; + } + error = privcmd_map_obj(vmm, va0, obj, prot); + if (error) + return error; + + /* + * map the range in user process now. + * If Xenr return -ENOENT, retry (paging in progress) + */ + for(i = 0; i < pmb->num; i++, va0 += PAGE_SIZE) { + int err, cerr; + for (int j = 0 ; j < 10; j++) { + err = pmap_enter_ma(vmm->pmap, va0, maddr[i], 0, + prot, PMAP_CANFAIL | prot, + pmb->dom); + if (err != -2) /* Xen ENOENT */ + break; + if (kpause("xnoent", 1, mstohz(100), NULL)) + break; + } + if (err) { + maddr[i] = INVALID_PAGE; + } + cerr = copyout(&err, &pmb->err[i], sizeof(pmb->err[i])); + if (cerr) { + privpgop_detach(&obj->uobj); + return cerr; + } + } + return 0; +} + +static int +privcmd_mmap_resource(struct vop_ioctl_args *ap) +{ + int i; + privcmd_mmap_resource_t* pmr = ap->a_data; + vaddr_t va0; + struct vm_map *vmm; + struct privcmd_object *obj; + vm_prot_t prot; + int error; + struct xen_mem_acquire_resource op; + xen_pfn_t *pfns; + paddr_t *maddr; + + KASSERT(!xen_feature(XENFEAT_auto_translated_physmap)); + + vmm = &curlwp->l_proc->p_vmspace->vm_map; + va0 = pmr->addr & ~PAGE_MASK; + + if (pmr->num == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmr->num) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + pfns = kmem_alloc(sizeof(xen_pfn_t) * pmr->num, KM_SLEEP); + memset(&op, 0, sizeof(op)); + op.domid = pmr->dom; + op.type = pmr->type; + op.id = pmr->id; + op.frame = pmr->idx; + op.nr_frames = pmr->num; + set_xen_guest_handle(op.frame_list, pfns); + + error = HYPERVISOR_memory_op(XENMEM_acquire_resource, &op); + if (error) { + printf("%s: XENMEM_acquire_resource failed: %d\n", + __func__, error); + return privcmd_xen2bsd_errno(error); + } + maddr = kmem_alloc(sizeof(paddr_t) * pmr->num, KM_SLEEP); + for (i = 0; i < pmr->num; i++) { + maddr[i] = pfns[i] << PGSHIFT; + } + kmem_free(pfns, sizeof(xen_pfn_t) * pmr->num); + + obj = kmem_alloc(sizeof(*obj), KM_SLEEP); + obj->type = PTYPE_PRIVCMD; + obj->u.pc.maddr = maddr; + obj->u.pc.no_translate = true; + obj->npages = pmr->num; + obj->u.pc.domid = (op.flags & XENMEM_rsrc_acq_caller_owned) ? + DOMID_SELF : pmr->dom; + + error = privcmd_map_obj(vmm, va0, obj, prot); + return error; +} + +static int +privcmd_map_gref(struct vop_ioctl_args *ap) +{ + struct ioctl_gntdev_mmap_grant_ref *mgr = ap->a_data; + struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; + struct privcmd_object *obj; + vaddr_t va0 = (vaddr_t)mgr->va & ~PAGE_MASK; + vm_prot_t prot; + int error; + + if (mgr->count == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mgr->count) + return EINVAL; + if (mgr->notify.offset < 0 || mgr->notify.offset > mgr->count) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + obj = kmem_alloc(PGO_GNTREF_LEN(mgr->count), KM_SLEEP); + + obj->type = PTYPE_GNTDEV_REF; + obj->npages = mgr->count; + memcpy(&obj->u.gr.notify, &mgr->notify, + sizeof(obj->u.gr.notify)); + + for (int i = 0; i < obj->npages; ++i) { + struct ioctl_gntdev_grant_ref gref; + error = copyin(&mgr->refs[i], &gref, sizeof(gref)); + if (error != 0) { + goto err1; + } + obj->u.gr.ops[i].host_addr = 0; + obj->u.gr.ops[i].dev_bus_addr = 0; + obj->u.gr.ops[i].ref = gref.ref; + obj->u.gr.ops[i].dom = gref.domid; + obj->u.gr.ops[i].handle = -1; + obj->u.gr.ops[i].flags = GNTMAP_host_map | + GNTMAP_application_map | GNTMAP_contains_pte; + if (prot == UVM_PROT_READ) + obj->u.gr.ops[i].flags |= GNTMAP_readonly; + } + error = privcmd_map_obj(vmm, va0, obj, prot); + return error; + +err1: + kmem_free(obj, PGO_GNTREF_LEN(obj->npages)); + return error; +} + +static int +privcmd_alloc_gref(struct vop_ioctl_args *ap) +{ + struct ioctl_gntdev_alloc_grant_ref *mga = ap->a_data; + struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; + struct privcmd_object *obj; + vaddr_t va0 = (vaddr_t)mga->va & ~PAGE_MASK; + vm_prot_t prot; + int error, ret; + + if (mga->count == 0) + return EINVAL; + if (va0 > VM_MAXUSER_ADDRESS) + return EINVAL; + if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < mga->count) + return EINVAL; + if (mga->notify.offset < 0 || mga->notify.offset > mga->count) + return EINVAL; + + prot = privcmd_get_map_prot(vmm, va0, PAGE_SIZE); + if (prot == UVM_PROT_NONE) + return EINVAL; + + obj = kmem_alloc(PGO_GNTA_LEN(mga->count), KM_SLEEP); + + obj->type = PTYPE_GNTDEV_ALLOC; + obj->npages = mga->count; + obj->u.ga.domid = mga->domid; + memcpy(&obj->u.ga.notify, &mga->notify, + sizeof(obj->u.ga.notify)); + obj->u.ga.gntva = uvm_km_alloc(kernel_map, + PAGE_SIZE * obj->npages, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO); + if (obj->u.ga.gntva == 0) { + error = ENOMEM; + goto err1; + } + + for (int i = 0; i < obj->npages; ++i) { + paddr_t ma; + vaddr_t va = obj->u.ga.gntva + i * PAGE_SIZE; + grant_ref_t id; + bool ro = ((mga->flags & GNTDEV_ALLOC_FLAG_WRITABLE) == 0); + (void)pmap_extract_ma(pmap_kernel(), va, &ma); + if ((ret = xengnt_grant_access(mga->domid, ma, ro, &id)) != 0) { + printf("%s: xengnt_grant_access failed: %d\n", + __func__, ret); + for (int j = 0; j < i; j++) { + xengnt_revoke_access(obj->u.ga.gref_ids[j]); + error = ret; + goto err2; + } + } + obj->u.ga.gref_ids[i] = id; + } + + error = copyout(&obj->u.ga.gref_ids[0], mga->gref_ids, + sizeof(uint32_t) * obj->npages); + if (error) { + for (int i = 0; i < obj->npages; ++i) { + xengnt_revoke_access(obj->u.ga.gref_ids[i]); + } + goto err2; + } + + error = privcmd_map_obj(vmm, va0, obj, prot); + return error; + +err2: + uvm_km_free(kernel_map, obj->u.ga.gntva, + PAGE_SIZE * obj->npages, UVM_KMF_WIRED); +err1: + kmem_free(obj, PGO_GNTA_LEN(obj->npages)); + return error; +} + static int privcmd_ioctl(void *v) { @@ -263,7 +711,6 @@ privcmd_ioctl(void *v) kauth_cred_t a_cred; } */ *ap = v; int error = 0; - paddr_t *maddr; switch (ap->a_command) { case IOCTL_PRIVCMD_HYPERCALL: @@ -327,113 +774,23 @@ privcmd_ioctl(void *v) break; } case IOCTL_PRIVCMD_MMAP: - { - int i, j; - privcmd_mmap_t *mcmd = ap->a_data; - privcmd_mmap_entry_t mentry; - vaddr_t va; - paddr_t ma; - struct vm_map *vmm = &curlwp->l_proc->p_vmspace->vm_map; + return privcmd_mmap(ap); - for (i = 0; i < mcmd->num; i++) { - error = copyin(&mcmd->entry[i], &mentry, sizeof(mentry)); - if (error) - return error; - if (mentry.npages == 0) - return EINVAL; - if (mentry.va > VM_MAXUSER_ADDRESS) - return EINVAL; -#if 0 - if (mentry.va + (mentry.npages << PGSHIFT) > - mrentry->vm_end) - return EINVAL; -#endif - maddr = kmem_alloc(sizeof(paddr_t) * mentry.npages, - KM_SLEEP); - va = mentry.va & ~PAGE_MASK; - ma = ((paddr_t)mentry.mfn) << PGSHIFT; /* XXX ??? */ - for (j = 0; j < mentry.npages; j++) { - maddr[j] = ma; - ma += PAGE_SIZE; - } - error = privcmd_map_obj(vmm, va, maddr, - mentry.npages, mcmd->dom); - if (error) - return error; - } - break; - } case IOCTL_PRIVCMD_MMAPBATCH: - { - int i; - privcmd_mmapbatch_t* pmb = ap->a_data; - vaddr_t va0; - u_long mfn; - paddr_t ma; - struct vm_map *vmm; - struct vm_map_entry *entry; - vm_prot_t prot; - vaddr_t trymap; - - vmm = &curlwp->l_proc->p_vmspace->vm_map; - va0 = pmb->addr & ~PAGE_MASK; + return privcmd_mmapbatch(ap); - if (pmb->num == 0) - return EINVAL; - if (va0 > VM_MAXUSER_ADDRESS) - return EINVAL; - if (((VM_MAXUSER_ADDRESS - va0) >> PGSHIFT) < pmb->num) - return EINVAL; + case IOCTL_PRIVCMD_MMAPBATCH_V2: + return privcmd_mmapbatch_v2(ap); - vm_map_lock_read(vmm); - if (!uvm_map_lookup_entry(vmm, va0, &entry)) { - vm_map_unlock_read(vmm); - return EINVAL; - } - prot = entry->protection; - vm_map_unlock_read(vmm); - - maddr = kmem_alloc(sizeof(paddr_t) * pmb->num, KM_SLEEP); - /* get a page of KVA to check mappins */ - trymap = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, - UVM_KMF_VAONLY); - if (trymap == 0) { - kmem_free(maddr, sizeof(paddr_t) * pmb->num); - return ENOMEM; - } - for(i = 0; i < pmb->num; ++i) { - error = copyin(&pmb->arr[i], &mfn, sizeof(mfn)); - if (error != 0) { - /* XXX: mappings */ - pmap_update(pmap_kernel()); - kmem_free(maddr, sizeof(paddr_t) * pmb->num); - uvm_km_free(kernel_map, trymap, PAGE_SIZE, - UVM_KMF_VAONLY); - return error; - } - ma = ((paddr_t)mfn) << PGSHIFT; - if (pmap_enter_ma(pmap_kernel(), trymap, ma, 0, - prot, PMAP_CANFAIL, pmb->dom)) { - mfn |= 0xF0000000; - copyout(&mfn, &pmb->arr[i], sizeof(mfn)); - maddr[i] = INVALID_PAGE; - } else { - pmap_remove(pmap_kernel(), trymap, - trymap + PAGE_SIZE); - maddr[i] = ma; - } - } - pmap_update(pmap_kernel()); + case IOCTL_PRIVCMD_MMAP_RESOURCE: + return privcmd_mmap_resource(ap); - error = privcmd_map_obj(vmm, va0, maddr, pmb->num, pmb->dom); - uvm_km_free(kernel_map, trymap, PAGE_SIZE, UVM_KMF_VAONLY); + case IOCTL_GNTDEV_MMAP_GRANT_REF: + return privcmd_map_gref(ap); - if (error != 0) - return error; - - break; - } + case IOCTL_GNTDEV_ALLOC_GRANT_REF: + return privcmd_alloc_gref(ap); default: error = EINVAL; } @@ -456,20 +813,86 @@ privpgop_reference(struct uvm_object *uo } static void +privcmd_notify(struct ioctl_gntdev_grant_notify *notify, vaddr_t va, + struct gnttab_map_grant_ref *gmops) +{ + if (notify->action & UNMAP_NOTIFY_SEND_EVENT) { + hypervisor_notify_via_evtchn(notify->event_channel_port); + } + if ((notify->action & UNMAP_NOTIFY_CLEAR_BYTE) == 0) { + notify->action = 0; + return; + } + if (va == 0) { + struct gnttab_map_grant_ref op; + struct gnttab_unmap_grant_ref uop; + int i = notify->offset / PAGE_SIZE; + int o = notify->offset % PAGE_SIZE; + int err; + + KASSERT(gmops != NULL); + va = uvm_km_alloc(kernel_map, PAGE_SIZE, PAGE_SIZE, + UVM_KMF_VAONLY | UVM_KMF_WAITVA); + op.host_addr = va; + op.dev_bus_addr = 0; + op.ref = gmops[i].ref; + op.dom = gmops[i].dom; + op.handle = -1; + op.flags = GNTMAP_host_map; + err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + if (err == 0 && op.status == GNTST_okay) { + char *n = (void *)(va + o); + *n = 0; + uop.host_addr = va; + uop.handle = op.handle; + uop.dev_bus_addr = 0; + (void)HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, &uop, 1); + } + uvm_km_free(kernel_map, va, PAGE_SIZE, UVM_KMF_VAONLY); + } else { + KASSERT(gmops == NULL); + char *n = (void *)(va + notify->offset); + *n = 0; + } + notify->action = 0; +} + +static void privpgop_detach(struct uvm_object *uobj) { struct privcmd_object *pobj = (struct privcmd_object *)uobj; mutex_enter(uobj->vmobjlock); + KASSERT(uobj->uo_refs > 0); if (uobj->uo_refs > 1) { uobj->uo_refs--; mutex_exit(uobj->vmobjlock); return; } mutex_exit(uobj->vmobjlock); - kmem_free(pobj->maddr, sizeof(paddr_t) * pobj->npages); - uvm_obj_destroy(uobj, true); - kmem_free(pobj, sizeof(struct privcmd_object)); + switch (pobj->type) { + case PTYPE_PRIVCMD: + kmem_free(pobj->u.pc.maddr, sizeof(paddr_t) * pobj->npages); + uvm_obj_destroy(uobj, true); + kmem_free(pobj, sizeof(struct privcmd_object)); + break; + case PTYPE_GNTDEV_REF: + { + privcmd_notify(&pobj->u.gr.notify, 0, pobj->u.gr.ops); + kmem_free(pobj, PGO_GNTREF_LEN(pobj->npages)); + break; + } + case PTYPE_GNTDEV_ALLOC: + privcmd_notify(&pobj->u.ga.notify, pobj->u.ga.gntva, NULL); + for (int i = 0; i < pobj->npages; ++i) { + xengnt_revoke_access(pobj->u.ga.gref_ids[i]); + } + uvm_km_free(kernel_map, pobj->u.ga.gntva, + PAGE_SIZE * pobj->npages, UVM_KMF_WIRED); + kmem_free(pobj, PGO_GNTA_LEN(pobj->npages)); + } + privcmd_nobjects--; } @@ -492,25 +915,61 @@ privpgop_fault(struct uvm_faultinfo *ufi continue; if (pps[i] == PGO_DONTCARE) continue; - if (pobj->maddr[maddr_i] == INVALID_PAGE) { - /* This has already been flagged as error. */ - error = EFAULT; + switch(pobj->type) { + case PTYPE_PRIVCMD: + if (pobj->u.pc.maddr[maddr_i] == INVALID_PAGE) { + /* This has already been flagged as error. */ + error = EFAULT; + goto out; + } + error = pmap_enter_ma(ufi->orig_map->pmap, vaddr, + pobj->u.pc.maddr[maddr_i], 0, + ufi->entry->protection, + PMAP_CANFAIL | ufi->entry->protection | + (pobj->u.pc.no_translate ? PMAP_MD_XEN_NOTR : 0), + pobj->u.pc.domid); + if (error == ENOMEM) { + goto out; + } + if (error) { + pobj->u.pc.maddr[maddr_i] = INVALID_PAGE; + error = EFAULT; + } + break; + case PTYPE_GNTDEV_REF: + { + struct pmap *pmap = ufi->orig_map->pmap; + if (pmap_enter_gnt(pmap, vaddr, entry->start, pobj->npages, &pobj->u.gr.ops[0]) != GNTST_okay) { + error = EFAULT; + goto out; + } break; } - error = pmap_enter_ma(ufi->orig_map->pmap, vaddr, - pobj->maddr[maddr_i], 0, ufi->entry->protection, - PMAP_CANFAIL | ufi->entry->protection, - pobj->domid); - if (error == ENOMEM) { - error = ERESTART; + case PTYPE_GNTDEV_ALLOC: + { + paddr_t pa; + if (!pmap_extract(pmap_kernel(), + pobj->u.ga.gntva + maddr_i * PAGE_SIZE, &pa)) { + error = EFAULT; + goto out; + } + error = pmap_enter(ufi->orig_map->pmap, vaddr, pa, + ufi->entry->protection, + PMAP_CANFAIL | ufi->entry->protection); + if (error == ENOMEM) { + goto out; + } break; } + } if (error) { /* XXX for proper ptp accountings */ pmap_remove(ufi->orig_map->pmap, vaddr, vaddr + PAGE_SIZE); } } + +out: pmap_update(ufi->orig_map->pmap); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj); @@ -521,38 +980,16 @@ privpgop_fault(struct uvm_faultinfo *ufi } static int -privcmd_map_obj(struct vm_map *map, vaddr_t start, paddr_t *maddr, - int npages, int domid) +privcmd_map_obj(struct vm_map *map, vaddr_t start, struct privcmd_object *obj, + vm_prot_t prot) { - struct privcmd_object *obj; - int error; uvm_flag_t uvmflag; + int error; vaddr_t newstart = start; - vm_prot_t prot; - off_t size = ((off_t)npages << PGSHIFT); + off_t size = ((off_t)obj->npages << PGSHIFT); - vm_map_lock_read(map); - /* get protections. This also check for validity of mapping */ - if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_WRITE)) - prot = VM_PROT_READ | VM_PROT_WRITE; - else if (uvm_map_checkprot(map, start, start + size - 1, VM_PROT_READ)) - prot = VM_PROT_READ; - else { - printf("uvm_map_checkprot 0x%lx -> 0x%lx " - "failed\n", - start, (unsigned long)(start + size - 1)); - vm_map_unlock_read(map); - kmem_free(maddr, sizeof(paddr_t) * npages); - return EINVAL; - } - vm_map_unlock_read(map); - - obj = kmem_alloc(sizeof(*obj), KM_SLEEP); privcmd_nobjects++; uvm_obj_init(&obj->uobj, &privpgops, true, 1); - obj->maddr = maddr; - obj->npages = npages; - obj->domid = domid; uvmflag = UVM_MAPFLAG(prot, prot, UVM_INH_NONE, UVM_ADV_NORMAL, UVM_FLAG_FIXED | UVM_FLAG_UNMAP | UVM_FLAG_NOMERGE); error = uvm_map(map, &newstart, size, &obj->uobj, 0, 0, uvmflag); Index: src/sys/external/mit/xen-include-public/dist/xen/include/public/memory.h diff -u src/sys/external/mit/xen-include-public/dist/xen/include/public/memory.h:1.1.1.1 src/sys/external/mit/xen-include-public/dist/xen/include/public/memory.h:1.1.1.1.6.1 --- src/sys/external/mit/xen-include-public/dist/xen/include/public/memory.h:1.1.1.1 Sat Feb 2 08:03:48 2019 +++ src/sys/external/mit/xen-include-public/dist/xen/include/public/memory.h Sun May 31 10:39:34 2020 @@ -374,7 +374,7 @@ struct xen_pod_target { }; typedef struct xen_pod_target xen_pod_target_t; -#if defined(__XEN__) || defined(__XEN_TOOLS__) +#if 1 /* defined(__XEN__) || defined(__XEN_TOOLS__) XXX needed for privcmd.c */ #ifndef uint64_aligned_t #define uint64_aligned_t uint64_t