Module Name: src Committed By: jdolecek Date: Thu Apr 9 19:26:38 UTC 2020
Modified Files: src/sys/arch/xen/conf: std.xenversion src/sys/arch/xen/include: xen.h src/sys/arch/xen/x86: xen_bus_dma.c src/sys/arch/xen/xen: balloon.c xengnt.c Log Message: update to __XEN_INTERFACE_VERSION__ 0x0003020a aka Xen 3.2.10 this brings grant memory v2 support: - status separated from flags - revoking access needs just memory barrier, no need for expensive cmpxchg16 any more - sub-page hypervisor copy-only grants, to be used by xennet(4) - 64-bit frame, i.e. support for DomU RAM >16TB the grant table is now always allocated on boot to maximum size, it's now never grown in runtime; switch back to regular kmem_alloc()/kmem_free() code now requires v2 support, no compatibility for grant version 1 retained - Xen v2 support predates all currently supported Xen versions also interface for baloon changed slightly, code updated To generate a diff of this commit: cvs rdiff -u -r1.1 -r1.2 src/sys/arch/xen/conf/std.xenversion cvs rdiff -u -r1.44 -r1.45 src/sys/arch/xen/include/xen.h cvs rdiff -u -r1.28 -r1.29 src/sys/arch/xen/x86/xen_bus_dma.c cvs rdiff -u -r1.19 -r1.20 src/sys/arch/xen/xen/balloon.c cvs rdiff -u -r1.30 -r1.31 src/sys/arch/xen/xen/xengnt.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/xen/conf/std.xenversion diff -u src/sys/arch/xen/conf/std.xenversion:1.1 src/sys/arch/xen/conf/std.xenversion:1.2 --- src/sys/arch/xen/conf/std.xenversion:1.1 Thu Apr 9 14:39:10 2020 +++ src/sys/arch/xen/conf/std.xenversion Thu Apr 9 19:26:37 2020 @@ -1,6 +1,6 @@ -# $NetBSD: std.xenversion,v 1.1 2020/04/09 14:39:10 jdolecek Exp $ +# $NetBSD: std.xenversion,v 1.2 2020/04/09 19:26:37 jdolecek Exp $ # # Xen options shared for all archs (i386, amd64) -options __XEN_INTERFACE_VERSION__=0x00030208 # Xen 3.1 interface +options __XEN_INTERFACE_VERSION__=0x0003020a # Xen 3.2.10 interface Index: src/sys/arch/xen/include/xen.h diff -u src/sys/arch/xen/include/xen.h:1.44 src/sys/arch/xen/include/xen.h:1.45 --- src/sys/arch/xen/include/xen.h:1.44 Thu May 9 17:09:50 2019 +++ src/sys/arch/xen/include/xen.h Thu Apr 9 19:26:37 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: xen.h,v 1.44 2019/05/09 17:09:50 bouyer Exp $ */ +/* $NetBSD: xen.h,v 1.45 2020/04/09 19:26:37 jdolecek Exp $ */ /* * @@ -170,20 +170,6 @@ xen_atomic_xchg(volatile XATOMIC_T *ptr, return result; } -static inline uint16_t -xen_atomic_cmpxchg16(volatile uint16_t *ptr, uint16_t val, uint16_t newval) -{ - unsigned long result; - - __asm volatile(__LOCK_PREFIX - "cmpxchgw %w1,%2" - :"=a" (result) - :"q"(newval), "m" (*ptr), "0" (val) - :"memory"); - - return result; -} - static __inline void xen_atomic_setbits_l (volatile XATOMIC_T *ptr, unsigned long bits) { #ifdef __x86_64__ Index: src/sys/arch/xen/x86/xen_bus_dma.c diff -u src/sys/arch/xen/x86/xen_bus_dma.c:1.28 src/sys/arch/xen/x86/xen_bus_dma.c:1.29 --- src/sys/arch/xen/x86/xen_bus_dma.c:1.28 Mon Sep 3 16:29:29 2018 +++ src/sys/arch/xen/x86/xen_bus_dma.c Thu Apr 9 19:26:37 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: xen_bus_dma.c,v 1.28 2018/09/03 16:29:29 riastradh Exp $ */ +/* $NetBSD: xen_bus_dma.c,v 1.29 2020/04/09 19:26:37 jdolecek Exp $ */ /* NetBSD bus_dma.c,v 1.21 2005/04/16 07:53:35 yamt Exp */ /*- @@ -32,7 +32,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xen_bus_dma.c,v 1.28 2018/09/03 16:29:29 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xen_bus_dma.c,v 1.29 2020/04/09 19:26:37 jdolecek Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -94,7 +94,7 @@ _xen_alloc_contig(bus_size_t size, bus_s set_xen_guest_handle(res.extent_start, &mfn); res.nr_extents = 1; res.extent_order = 0; - res.address_bits = 0; + res.mem_flags = 0; res.domid = DOMID_SELF; error = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &res); if (error != 1) { @@ -113,14 +113,14 @@ _xen_alloc_contig(bus_size_t size, bus_s set_xen_guest_handle(res.extent_start, &mfn); res.nr_extents = 1; res.extent_order = order; - res.address_bits = get_order(high) + PAGE_SHIFT; + res.mem_flags = XENMEMF_address_bits(get_order(high) + PAGE_SHIFT); res.domid = DOMID_SELF; error = HYPERVISOR_memory_op(XENMEM_increase_reservation, &res); if (error != 1) { #ifdef DEBUG printf("xen_alloc_contig: XENMEM_increase_reservation " - "failed: %d (order %d address_bits %d)\n", - error, order, res.address_bits); + "failed: %d (order %d mem_flags %d)\n", + error, order, res.mem_flags); #endif error = ENOMEM; pg = NULL; @@ -166,7 +166,7 @@ failed: set_xen_guest_handle(res.extent_start, &mfn); res.nr_extents = 1; res.extent_order = 0; - res.address_bits = 32; + res.mem_flags = XENMEMF_address_bits(32); res.domid = DOMID_SELF; if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &res) < 0) { Index: src/sys/arch/xen/xen/balloon.c diff -u src/sys/arch/xen/xen/balloon.c:1.19 src/sys/arch/xen/xen/balloon.c:1.20 --- src/sys/arch/xen/xen/balloon.c:1.19 Sun Jun 24 20:28:58 2018 +++ src/sys/arch/xen/xen/balloon.c Thu Apr 9 19:26:38 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: balloon.c,v 1.19 2018/06/24 20:28:58 jdolecek Exp $ */ +/* $NetBSD: balloon.c,v 1.20 2020/04/09 19:26:38 jdolecek Exp $ */ /*- * Copyright (c) 2010 The NetBSD Foundation, Inc. @@ -75,7 +75,7 @@ #endif #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: balloon.c,v 1.19 2018/06/24 20:28:58 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: balloon.c,v 1.20 2020/04/09 19:26:38 jdolecek Exp $"); #include <sys/inttypes.h> #include <sys/device.h> @@ -378,7 +378,7 @@ balloon_inflate(struct balloon_xenbus_so xen_pfn_t *mfn_list = sc->sc_mfn_list; struct xen_memory_reservation reservation = { - .address_bits = 0, + .mem_flags = 0, .extent_order = 0, .domid = DOMID_SELF }; @@ -449,7 +449,7 @@ balloon_deflate(struct balloon_xenbus_so xen_pfn_t *mfn_list = sc->sc_mfn_list; struct xen_memory_reservation reservation = { - .address_bits = 0, + .mem_flags = 0, .extent_order = 0, .domid = DOMID_SELF }; Index: src/sys/arch/xen/xen/xengnt.c diff -u src/sys/arch/xen/xen/xengnt.c:1.30 src/sys/arch/xen/xen/xengnt.c:1.31 --- src/sys/arch/xen/xen/xengnt.c:1.30 Thu Apr 9 15:54:41 2020 +++ src/sys/arch/xen/xen/xengnt.c Thu Apr 9 19:26:38 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: xengnt.c,v 1.30 2020/04/09 15:54:41 bouyer Exp $ */ +/* $NetBSD: xengnt.c,v 1.31 2020/04/09 19:26:38 jdolecek Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.30 2020/04/09 15:54:41 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.31 2020/04/09 19:26:38 jdolecek Exp $"); #include <sys/types.h> #include <sys/param.h> @@ -49,7 +49,8 @@ __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1 #define DPRINTF(x) #endif -#define NR_GRANT_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_t)) +#define NR_GRANT_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_v2_t)) +#define NR_GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t)) /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 @@ -58,6 +59,8 @@ __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1 int gnt_nr_grant_frames; /* Maximum number of frames that can make up the grant table */ int gnt_max_grant_frames; +/* Number of grant status frames */ +int gnt_status_frames; /* table of free grant entries */ grant_ref_t *gnt_entries; @@ -67,17 +70,20 @@ int last_gnt_entry; #define XENGNT_NO_ENTRY 0xffffffff /* VM address of the grant table */ -grant_entry_t *grant_table; +grant_entry_v2_t *grant_table; +grant_status_t *grant_status; kmutex_t grant_lock; static grant_ref_t xengnt_get_entry(void); static void xengnt_free_entry(grant_ref_t); static int xengnt_more_entries(void); +static int xengnt_map_status(void); void xengnt_init(void) { struct gnttab_query_size query; + struct gnttab_set_version gntversion; int rc; int nr_grant_entries; int i; @@ -88,7 +94,16 @@ xengnt_init(void) gnt_max_grant_frames = 4; /* Legacy max number of frames */ else gnt_max_grant_frames = query.max_nr_frames; - gnt_nr_grant_frames = 0; + + /* + * Always allocate max number of grant frames, never expand in runtime + */ + gnt_nr_grant_frames = gnt_max_grant_frames; + + gntversion.version = 2; + rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1); + if (rc < 0 || gntversion.version != 2) + panic("GNTTABOP_set_version 2 failed %d", rc); nr_grant_entries = gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE; @@ -96,12 +111,20 @@ xengnt_init(void) grant_table = (void *)uvm_km_alloc(kernel_map, gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY); if (grant_table == NULL) - panic("xengnt_init() no VM space"); + panic("xengnt_init() table no VM space"); + gnt_entries = kmem_alloc((nr_grant_entries + 1) * sizeof(grant_ref_t), KM_SLEEP); for (i = 0; i <= nr_grant_entries; i++) gnt_entries[i] = XENGNT_NO_ENTRY; + gnt_status_frames = + round_page(nr_grant_entries * sizeof(grant_status_t)) / PAGE_SIZE; + grant_status = (void *)uvm_km_alloc(kernel_map, + gnt_status_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY); + if (grant_status == NULL) + panic("xengnt_init() status no VM space"); + mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM); xengnt_resume(); @@ -124,6 +147,7 @@ xengnt_resume(void) if (xengnt_more_entries() != 0) panic("xengnt_resume: can't restore grant frames"); } + xengnt_map_status(); mutex_exit(&grant_lock); return true; } @@ -144,13 +168,67 @@ xengnt_suspend(void) { gnt_entries[i] = XENGNT_NO_ENTRY; } - /* Remove virtual => machine mapping */ + /* Remove virtual => machine mapping for grant table */ pmap_kremove((vaddr_t)grant_table, gnt_nr_grant_frames * PAGE_SIZE); + + /* Remove virtual => machine mapping for status table */ + pmap_kremove((vaddr_t)grant_status, gnt_status_frames * PAGE_SIZE); + pmap_update(pmap_kernel()); mutex_exit(&grant_lock); return true; } +/* + * Get status frames and enter them into the VA space. + */ +static int +xengnt_map_status(void) +{ + gnttab_get_status_frames_t getstatus; + u_long *pages; + size_t sz; + + KASSERT(mutex_owned(&grant_lock)); + + sz = gnt_status_frames * sizeof(u_long); + pages = kmem_alloc(sz, KM_NOSLEEP); + if (pages == NULL) + return ENOMEM; + + getstatus.dom = DOMID_SELF; + getstatus.nr_frames = gnt_status_frames; + set_xen_guest_handle(getstatus.frame_list, pages); + + /* + * get the status frames, and return the list of their virtual + * addresses in 'pages' + */ + if (HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, + &getstatus, 1) != 0) + panic("%s: get_status_frames failed", __func__); + if (getstatus.status != GNTST_okay) { + aprint_error("%s: get_status_frames returned %d\n", + __func__, getstatus.status); + kmem_free(pages, sz); + return ENOMEM; + } + + /* + * map between status_table addresses and the machine addresses of + * the status table frames + */ + for (int i = 0; i < gnt_status_frames; i++) { + pmap_kenter_ma(((vaddr_t)grant_status) + i * PAGE_SIZE, + ((paddr_t)pages[i]) << PAGE_SHIFT, + VM_PROT_WRITE, 0); + } + pmap_update(pmap_kernel()); + + kmem_free(pages, sz); + + return 0; +} /* * Add another page to the grant table @@ -170,7 +248,7 @@ xengnt_more_entries(void) return ENOMEM; sz = nframes_new * sizeof(u_long); - pages = kmem_intr_alloc(sz, KM_NOSLEEP); + pages = kmem_alloc(sz, KM_NOSLEEP); if (pages == NULL) return ENOMEM; @@ -209,7 +287,7 @@ xengnt_more_entries(void) if (setup.status != GNTST_okay) { aprint_error("%s: setup table returned %d\n", __func__, setup.status); - kmem_intr_free(pages, sz); + kmem_free(pages, sz); return ENOMEM; } } @@ -244,7 +322,7 @@ xengnt_more_entries(void) last_gnt_entry++; } gnt_nr_grant_frames = nframes_new; - kmem_intr_free(pages, sz); + kmem_free(pages, sz); return 0; } @@ -258,13 +336,11 @@ xengnt_get_entry(void) static struct timeval xengnt_nonmemtime; static const struct timeval xengnt_nonmemintvl = {5,0}; - if (last_gnt_entry == 0) { - if (xengnt_more_entries()) { - if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl)) - printf("xengnt_get_entry: out of grant " - "table entries\n"); - return XENGNT_NO_ENTRY; - } + if (__predict_false(last_gnt_entry == 0)) { + if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl)) + printf("xengnt_get_entry: out of grant " + "table entries\n"); + return XENGNT_NO_ENTRY; } KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY); last_gnt_entry--; @@ -303,14 +379,14 @@ xengnt_grant_access(domid_t dom, paddr_t return ENOMEM; } - grant_table[*entryp].frame = ma >> PAGE_SHIFT; - grant_table[*entryp].domid = dom; + grant_table[*entryp].full_page.frame = ma >> PAGE_SHIFT; + grant_table[*entryp].hdr.domid = dom; /* * ensure that the above values reach global visibility * before permitting frame's access (done when we set flags) */ xen_rmb(); - grant_table[*entryp].flags = + grant_table[*entryp].hdr.flags = GTF_permit_access | (ro ? GTF_readonly : 0); mutex_exit(&grant_lock); return 0; @@ -319,21 +395,29 @@ xengnt_grant_access(domid_t dom, paddr_t void xengnt_revoke_access(grant_ref_t entry) { - uint16_t flags, nflags; + grant_table[entry].hdr.flags = 0; + xen_mb(); /* Concurrent access by hypervisor */ + + if (__predict_false((grant_status[entry] & (GTF_reading|GTF_writing)) + != 0)) + printf("xengnt_revoke_access(%u): still in use\n", + entry); + else { - nflags = grant_table[entry].flags; + /* + * The read of grant_status needs to have acquire semantics. + * Reads already have that on x86, so need only protect + * against compiler reordering. May need full barrier + * on other architectures. + */ + __insn_barrier(); + } - do { - if ((flags = nflags) & (GTF_reading|GTF_writing)) - panic("xengnt_revoke_access: still in use"); - nflags = xen_atomic_cmpxchg16(&grant_table[entry].flags, - flags, 0); - } while (nflags != flags); xengnt_free_entry(entry); } int xengnt_status(grant_ref_t entry) { - return (grant_table[entry].flags & (GTF_reading|GTF_writing)); + return grant_status[entry] & (GTF_reading|GTF_writing); }