Module Name:    src
Committed By:   jdolecek
Date:           Thu Apr  9 19:26:38 UTC 2020

Modified Files:
        src/sys/arch/xen/conf: std.xenversion
        src/sys/arch/xen/include: xen.h
        src/sys/arch/xen/x86: xen_bus_dma.c
        src/sys/arch/xen/xen: balloon.c xengnt.c

Log Message:
update to __XEN_INTERFACE_VERSION__ 0x0003020a aka Xen 3.2.10

this brings grant memory v2 support:
- status separated from flags - revoking access needs just memory barrier,
  no need for expensive cmpxchg16 any more
- sub-page hypervisor copy-only grants, to be used by xennet(4)
- 64-bit frame, i.e. support for DomU RAM >16TB

the grant table is now always allocated on boot to maximum size, it's now
never grown in runtime; switch back to regular kmem_alloc()/kmem_free()

code now requires v2 support, no compatibility for grant version 1 retained -
Xen v2 support predates all currently supported Xen versions

also interface for baloon changed slightly, code updated


To generate a diff of this commit:
cvs rdiff -u -r1.1 -r1.2 src/sys/arch/xen/conf/std.xenversion
cvs rdiff -u -r1.44 -r1.45 src/sys/arch/xen/include/xen.h
cvs rdiff -u -r1.28 -r1.29 src/sys/arch/xen/x86/xen_bus_dma.c
cvs rdiff -u -r1.19 -r1.20 src/sys/arch/xen/xen/balloon.c
cvs rdiff -u -r1.30 -r1.31 src/sys/arch/xen/xen/xengnt.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/xen/conf/std.xenversion
diff -u src/sys/arch/xen/conf/std.xenversion:1.1 src/sys/arch/xen/conf/std.xenversion:1.2
--- src/sys/arch/xen/conf/std.xenversion:1.1	Thu Apr  9 14:39:10 2020
+++ src/sys/arch/xen/conf/std.xenversion	Thu Apr  9 19:26:37 2020
@@ -1,6 +1,6 @@
-# $NetBSD: std.xenversion,v 1.1 2020/04/09 14:39:10 jdolecek Exp $
+# $NetBSD: std.xenversion,v 1.2 2020/04/09 19:26:37 jdolecek Exp $
 #
 # Xen options shared for all archs (i386, amd64)
 
-options 	__XEN_INTERFACE_VERSION__=0x00030208 # Xen 3.1 interface
+options 	__XEN_INTERFACE_VERSION__=0x0003020a # Xen 3.2.10 interface
 

Index: src/sys/arch/xen/include/xen.h
diff -u src/sys/arch/xen/include/xen.h:1.44 src/sys/arch/xen/include/xen.h:1.45
--- src/sys/arch/xen/include/xen.h:1.44	Thu May  9 17:09:50 2019
+++ src/sys/arch/xen/include/xen.h	Thu Apr  9 19:26:37 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: xen.h,v 1.44 2019/05/09 17:09:50 bouyer Exp $	*/
+/*	$NetBSD: xen.h,v 1.45 2020/04/09 19:26:37 jdolecek Exp $	*/
 
 /*
  *
@@ -170,20 +170,6 @@ xen_atomic_xchg(volatile XATOMIC_T *ptr,
 	return result;
 }
 
-static inline uint16_t
-xen_atomic_cmpxchg16(volatile uint16_t *ptr, uint16_t  val, uint16_t newval)
-{
-	unsigned long result;
-
-        __asm volatile(__LOCK_PREFIX
-	    "cmpxchgw %w1,%2"
-	    :"=a" (result)
-	    :"q"(newval), "m" (*ptr), "0" (val)
-	    :"memory");
-
-	return result;
-}
-
 static __inline void
 xen_atomic_setbits_l (volatile XATOMIC_T *ptr, unsigned long bits) {  
 #ifdef __x86_64__

Index: src/sys/arch/xen/x86/xen_bus_dma.c
diff -u src/sys/arch/xen/x86/xen_bus_dma.c:1.28 src/sys/arch/xen/x86/xen_bus_dma.c:1.29
--- src/sys/arch/xen/x86/xen_bus_dma.c:1.28	Mon Sep  3 16:29:29 2018
+++ src/sys/arch/xen/x86/xen_bus_dma.c	Thu Apr  9 19:26:37 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: xen_bus_dma.c,v 1.28 2018/09/03 16:29:29 riastradh Exp $	*/
+/*	$NetBSD: xen_bus_dma.c,v 1.29 2020/04/09 19:26:37 jdolecek Exp $	*/
 /*	NetBSD bus_dma.c,v 1.21 2005/04/16 07:53:35 yamt Exp */
 
 /*-
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xen_bus_dma.c,v 1.28 2018/09/03 16:29:29 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_bus_dma.c,v 1.29 2020/04/09 19:26:37 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -94,7 +94,7 @@ _xen_alloc_contig(bus_size_t size, bus_s
 		set_xen_guest_handle(res.extent_start, &mfn);
 		res.nr_extents = 1;
 		res.extent_order = 0;
-		res.address_bits = 0;
+		res.mem_flags = 0;
 		res.domid = DOMID_SELF;
 		error = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &res);
 		if (error != 1) {
@@ -113,14 +113,14 @@ _xen_alloc_contig(bus_size_t size, bus_s
 	set_xen_guest_handle(res.extent_start, &mfn);
 	res.nr_extents = 1;
 	res.extent_order = order;
-	res.address_bits = get_order(high) + PAGE_SHIFT;
+	res.mem_flags = XENMEMF_address_bits(get_order(high) + PAGE_SHIFT);
 	res.domid = DOMID_SELF;
 	error = HYPERVISOR_memory_op(XENMEM_increase_reservation, &res);
 	if (error != 1) {
 #ifdef DEBUG
 		printf("xen_alloc_contig: XENMEM_increase_reservation "
-		    "failed: %d (order %d address_bits %d)\n",
-		    error, order, res.address_bits);
+		    "failed: %d (order %d mem_flags %d)\n",
+		    error, order, res.mem_flags);
 #endif
 		error = ENOMEM;
 		pg = NULL;
@@ -166,7 +166,7 @@ failed:
 		set_xen_guest_handle(res.extent_start, &mfn);
 		res.nr_extents = 1;
 		res.extent_order = 0;
-		res.address_bits = 32;
+		res.mem_flags = XENMEMF_address_bits(32);
 		res.domid = DOMID_SELF;
 		if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &res)
 		    < 0) {

Index: src/sys/arch/xen/xen/balloon.c
diff -u src/sys/arch/xen/xen/balloon.c:1.19 src/sys/arch/xen/xen/balloon.c:1.20
--- src/sys/arch/xen/xen/balloon.c:1.19	Sun Jun 24 20:28:58 2018
+++ src/sys/arch/xen/xen/balloon.c	Thu Apr  9 19:26:38 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: balloon.c,v 1.19 2018/06/24 20:28:58 jdolecek Exp $ */
+/* $NetBSD: balloon.c,v 1.20 2020/04/09 19:26:38 jdolecek Exp $ */
 
 /*-
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -75,7 +75,7 @@
 #endif
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: balloon.c,v 1.19 2018/06/24 20:28:58 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: balloon.c,v 1.20 2020/04/09 19:26:38 jdolecek Exp $");
 
 #include <sys/inttypes.h>
 #include <sys/device.h>
@@ -378,7 +378,7 @@ balloon_inflate(struct balloon_xenbus_so
 	xen_pfn_t *mfn_list = sc->sc_mfn_list;
 
 	struct xen_memory_reservation reservation = {
-		.address_bits = 0,
+		.mem_flags = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
@@ -449,7 +449,7 @@ balloon_deflate(struct balloon_xenbus_so
 	xen_pfn_t *mfn_list = sc->sc_mfn_list;
 
 	struct xen_memory_reservation reservation = {
-		.address_bits = 0,
+		.mem_flags = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};

Index: src/sys/arch/xen/xen/xengnt.c
diff -u src/sys/arch/xen/xen/xengnt.c:1.30 src/sys/arch/xen/xen/xengnt.c:1.31
--- src/sys/arch/xen/xen/xengnt.c:1.30	Thu Apr  9 15:54:41 2020
+++ src/sys/arch/xen/xen/xengnt.c	Thu Apr  9 19:26:38 2020
@@ -1,4 +1,4 @@
-/*      $NetBSD: xengnt.c,v 1.30 2020/04/09 15:54:41 bouyer Exp $      */
+/*      $NetBSD: xengnt.c,v 1.31 2020/04/09 19:26:38 jdolecek Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.30 2020/04/09 15:54:41 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1.31 2020/04/09 19:26:38 jdolecek Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -49,7 +49,8 @@ __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1
 #define DPRINTF(x)
 #endif
 
-#define NR_GRANT_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_t))
+#define NR_GRANT_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_v2_t))
+#define NR_GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t))
 
 /* External tools reserve first few grant table entries. */
 #define NR_RESERVED_ENTRIES 8
@@ -58,6 +59,8 @@ __KERNEL_RCSID(0, "$NetBSD: xengnt.c,v 1
 int gnt_nr_grant_frames;
 /* Maximum number of frames that can make up the grant table */
 int gnt_max_grant_frames;
+/* Number of grant status frames */
+int gnt_status_frames;
 
 /* table of free grant entries */
 grant_ref_t *gnt_entries;
@@ -67,17 +70,20 @@ int last_gnt_entry;
 #define XENGNT_NO_ENTRY 0xffffffff
 
 /* VM address of the grant table */
-grant_entry_t *grant_table;
+grant_entry_v2_t *grant_table;
+grant_status_t *grant_status;
 kmutex_t grant_lock;
 
 static grant_ref_t xengnt_get_entry(void);
 static void xengnt_free_entry(grant_ref_t);
 static int xengnt_more_entries(void);
+static int xengnt_map_status(void);
 
 void
 xengnt_init(void)
 {
 	struct gnttab_query_size query;
+	struct gnttab_set_version gntversion;
 	int rc;
 	int nr_grant_entries;
 	int i;
@@ -88,7 +94,16 @@ xengnt_init(void)
 		gnt_max_grant_frames = 4; /* Legacy max number of frames */
 	else
 		gnt_max_grant_frames = query.max_nr_frames;
-	gnt_nr_grant_frames = 0;
+
+	/*
+	 * Always allocate max number of grant frames, never expand in runtime
+	 */
+	gnt_nr_grant_frames = gnt_max_grant_frames;
+
+	gntversion.version = 2;
+	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gntversion, 1);
+	if (rc < 0 || gntversion.version != 2)
+		panic("GNTTABOP_set_version 2 failed %d", rc);
 
 	nr_grant_entries =
 	    gnt_max_grant_frames * NR_GRANT_ENTRIES_PER_PAGE;
@@ -96,12 +111,20 @@ xengnt_init(void)
 	grant_table = (void *)uvm_km_alloc(kernel_map,
 	    gnt_max_grant_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
 	if (grant_table == NULL)
-		panic("xengnt_init() no VM space");
+		panic("xengnt_init() table no VM space");
+
 	gnt_entries = kmem_alloc((nr_grant_entries + 1) * sizeof(grant_ref_t),
 	    KM_SLEEP);
 	for (i = 0; i <= nr_grant_entries; i++)
 		gnt_entries[i] = XENGNT_NO_ENTRY;
 
+	gnt_status_frames =
+	    round_page(nr_grant_entries * sizeof(grant_status_t)) / PAGE_SIZE;
+	grant_status = (void *)uvm_km_alloc(kernel_map,
+	    gnt_status_frames * PAGE_SIZE, 0, UVM_KMF_VAONLY);
+	if (grant_status == NULL)
+		panic("xengnt_init() status no VM space");
+
 	mutex_init(&grant_lock, MUTEX_DEFAULT, IPL_VM);
 
 	xengnt_resume();
@@ -124,6 +147,7 @@ xengnt_resume(void)
 		if (xengnt_more_entries() != 0)
 			panic("xengnt_resume: can't restore grant frames");
 	}
+	xengnt_map_status();
 	mutex_exit(&grant_lock);
 	return true;
 }
@@ -144,13 +168,67 @@ xengnt_suspend(void) {
 		gnt_entries[i] = XENGNT_NO_ENTRY;
 	}
 	
-	/* Remove virtual => machine mapping */
+	/* Remove virtual => machine mapping for grant table */
 	pmap_kremove((vaddr_t)grant_table, gnt_nr_grant_frames * PAGE_SIZE);
+
+	/* Remove virtual => machine mapping for status table */
+	pmap_kremove((vaddr_t)grant_status, gnt_status_frames * PAGE_SIZE);
+
 	pmap_update(pmap_kernel());
 	mutex_exit(&grant_lock);
 	return true;
 }
 
+/*
+ * Get status frames and enter them into the VA space.
+ */
+static int
+xengnt_map_status(void)
+{
+	gnttab_get_status_frames_t getstatus;
+	u_long *pages;
+	size_t sz;
+
+	KASSERT(mutex_owned(&grant_lock));
+
+	sz = gnt_status_frames * sizeof(u_long);
+	pages = kmem_alloc(sz, KM_NOSLEEP);
+	if (pages == NULL)
+		return ENOMEM;
+
+	getstatus.dom = DOMID_SELF;
+	getstatus.nr_frames = gnt_status_frames;
+	set_xen_guest_handle(getstatus.frame_list, pages);
+
+	/*
+	 * get the status frames, and return the list of their virtual
+	 * addresses in 'pages'
+	 */
+	if (HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
+	    &getstatus, 1) != 0)
+		panic("%s: get_status_frames failed", __func__);
+	if (getstatus.status != GNTST_okay) {
+		aprint_error("%s: get_status_frames returned %d\n",
+		    __func__, getstatus.status);
+		kmem_free(pages, sz);
+		return ENOMEM;
+	}
+
+	/*
+	 * map between status_table addresses and the machine addresses of
+	 * the status table frames
+	 */
+	for (int i = 0; i < gnt_status_frames; i++) {
+		pmap_kenter_ma(((vaddr_t)grant_status) + i * PAGE_SIZE,
+		    ((paddr_t)pages[i]) << PAGE_SHIFT,
+		    VM_PROT_WRITE, 0);
+	}
+	pmap_update(pmap_kernel());
+
+	kmem_free(pages, sz);
+
+	return 0;
+}
 
 /*
  * Add another page to the grant table
@@ -170,7 +248,7 @@ xengnt_more_entries(void)
 		return ENOMEM;
 
 	sz = nframes_new * sizeof(u_long);
-	pages = kmem_intr_alloc(sz, KM_NOSLEEP);
+	pages = kmem_alloc(sz, KM_NOSLEEP);
 	if (pages == NULL)
 		return ENOMEM;
 
@@ -209,7 +287,7 @@ xengnt_more_entries(void)
 		if (setup.status != GNTST_okay) {
 			aprint_error("%s: setup table returned %d\n",
 			    __func__, setup.status);
-			kmem_intr_free(pages, sz);
+			kmem_free(pages, sz);
 			return ENOMEM;
 		}
 	}
@@ -244,7 +322,7 @@ xengnt_more_entries(void)
 		last_gnt_entry++;
 	}
 	gnt_nr_grant_frames = nframes_new;
-	kmem_intr_free(pages, sz);
+	kmem_free(pages, sz);
 	return 0;
 }
 
@@ -258,13 +336,11 @@ xengnt_get_entry(void)
 	static struct timeval xengnt_nonmemtime;
 	static const struct timeval xengnt_nonmemintvl = {5,0};
 
-	if (last_gnt_entry == 0) {
-		if (xengnt_more_entries()) {
-			if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl))
-				printf("xengnt_get_entry: out of grant "
-				    "table entries\n");
-			return XENGNT_NO_ENTRY;
-		}
+	if (__predict_false(last_gnt_entry == 0)) {
+		if (ratecheck(&xengnt_nonmemtime, &xengnt_nonmemintvl))
+			printf("xengnt_get_entry: out of grant "
+			    "table entries\n");
+		return XENGNT_NO_ENTRY;
 	}
 	KASSERT(gnt_entries[last_gnt_entry] == XENGNT_NO_ENTRY);
 	last_gnt_entry--;
@@ -303,14 +379,14 @@ xengnt_grant_access(domid_t dom, paddr_t
 		return ENOMEM;
 	}
 
-	grant_table[*entryp].frame = ma >> PAGE_SHIFT;
-	grant_table[*entryp].domid = dom;
+	grant_table[*entryp].full_page.frame = ma >> PAGE_SHIFT;
+	grant_table[*entryp].hdr.domid = dom;
 	/*
 	 * ensure that the above values reach global visibility 
 	 * before permitting frame's access (done when we set flags)
 	 */
 	xen_rmb();
-	grant_table[*entryp].flags =
+	grant_table[*entryp].hdr.flags =
 	    GTF_permit_access | (ro ? GTF_readonly : 0);
 	mutex_exit(&grant_lock);
 	return 0;
@@ -319,21 +395,29 @@ xengnt_grant_access(domid_t dom, paddr_t
 void
 xengnt_revoke_access(grant_ref_t entry)
 {
-	uint16_t flags, nflags;
+	grant_table[entry].hdr.flags = 0;
+	xen_mb();	/* Concurrent access by hypervisor */
+
+	if (__predict_false((grant_status[entry] & (GTF_reading|GTF_writing))
+	    != 0))
+		printf("xengnt_revoke_access(%u): still in use\n",
+		    entry);
+	else {
 
-	nflags = grant_table[entry].flags;
+		/*
+		 * The read of grant_status needs to have acquire semantics.
+		 * Reads already have that on x86, so need only protect
+		 * against compiler reordering. May need full barrier
+		 * on other architectures.
+		 */
+		__insn_barrier();
+	}
 
-	do {
-		if ((flags = nflags) & (GTF_reading|GTF_writing))
-			panic("xengnt_revoke_access: still in use");
-		nflags = xen_atomic_cmpxchg16(&grant_table[entry].flags,
-		    flags, 0);
-	} while (nflags != flags);
 	xengnt_free_entry(entry);
 }
 
 int
 xengnt_status(grant_ref_t entry)
 {
-	return (grant_table[entry].flags & (GTF_reading|GTF_writing));
+	return grant_status[entry] & (GTF_reading|GTF_writing);
 }

Reply via email to