Module Name:    src
Committed By:   jym
Date:           Sun May 31 20:15:37 UTC 2009

Modified Files:
        src/sys/arch/x86/include [jym-xensuspend]: pmap.h
        src/sys/arch/x86/x86 [jym-xensuspend]: pmap.c
        src/sys/arch/xen/include [jym-xensuspend]: xenbus.h
        src/sys/arch/xen/include/amd64 [jym-xensuspend]: hypercalls.h
        src/sys/arch/xen/include/i386 [jym-xensuspend]: hypercalls.h
        src/sys/arch/xen/x86 [jym-xensuspend]: x86_xpmap.c xen_bus_dma.c
        src/sys/arch/xen/xen [jym-xensuspend]: evtchn.c if_xennet_xenbus.c
            xbd_xenbus.c xen_machdep.c xencons.c xennetback_xenbus.c
        src/sys/arch/xen/xenbus [jym-xensuspend]: xenbus_probe.c

Log Message:
Modifications for the Xen suspend/migrate/resume branch:

- introduce xenbus_device_{suspend,resume}() functions. These are routines
used to suspend/resume MI parts of the Xenbus device interfaces, like updating
frontend/backend devices' paths found in XenStore.

- introduce HYPERVISOR_sysctl(), an hypercall used only by Xentools to obtain
information from hypervisor (listing VMs, printing console, etc.). I use it
to query xenconsole from ddb(), as a last resort in case of a panic() in
dom0 (xm being not available). Currently unused in the branch; could be, if
requested.

- disable the rwlock(9) used to protect code that could use transient MFNs.
It could trigger nasty context switches in place it should not to.

- fix some bugs in the xennet/xbd suspend/resume pmf(9) handlers.

- following XenSource's design, talk_to_otherend() is now called
watch_otherend(), and free_otherend_details() is used by Xenbus device
suspend/resume routines.

- some slight modifications in pmap regarding APDP. Introduce an inline
function (pmap_unmap_apdp_pde()) that clears APDP entry for the current pmap.

- similarly, implement pmap_unmap_all_apdp_pdes() that iterates through all
pmaps and tears down APDP, as Xen does not handle them properly.

TODO/XXX:

- pmap_unmap_apdp_pde() does not handle APDP shadow entry of PAE. It will,
once I figure out how PAE uses it.

- revisit the pmap locking issue regarding transient MFNs. As NetBSD does not
use kernel preemption and MP for Xen, this could be skipped momentarily. See
http://mail-index.netbsd.org/port-xen/2009/04/27/msg004903.html for details.

- fix a bug regarding grant tables which could technically DoS a dom0 if
ridiculously high consumer/producer indexes are passed down in the ring during
a resume.

All in all, once the grant table index issue and APDP PAE are fixed, next step
is to torture test this branch.

Tested under i386 PAE and non-PAE, Xen3 dom0 and domU. amd64 is only compile
tested.


To generate a diff of this commit:
cvs rdiff -u -r1.21.2.1 -r1.21.2.2 src/sys/arch/x86/include/pmap.h
cvs rdiff -u -r1.77.2.2 -r1.77.2.3 src/sys/arch/x86/x86/pmap.c
cvs rdiff -u -r1.11 -r1.11.6.1 src/sys/arch/xen/include/xenbus.h
cvs rdiff -u -r1.5 -r1.5.4.1 src/sys/arch/xen/include/amd64/hypercalls.h
cvs rdiff -u -r1.8 -r1.8.4.1 src/sys/arch/xen/include/i386/hypercalls.h
cvs rdiff -u -r1.12.4.2 -r1.12.4.3 src/sys/arch/xen/x86/x86_xpmap.c
cvs rdiff -u -r1.14.2.1 -r1.14.2.2 src/sys/arch/xen/x86/xen_bus_dma.c
cvs rdiff -u -r1.42.2.2 -r1.42.2.3 src/sys/arch/xen/xen/evtchn.c
cvs rdiff -u -r1.33.2.2 -r1.33.2.3 src/sys/arch/xen/xen/if_xennet_xenbus.c
cvs rdiff -u -r1.38.2.2 -r1.38.2.3 src/sys/arch/xen/xen/xbd_xenbus.c
cvs rdiff -u -r1.4.12.2 -r1.4.12.3 src/sys/arch/xen/xen/xen_machdep.c
cvs rdiff -u -r1.31.2.1 -r1.31.2.2 src/sys/arch/xen/xen/xencons.c
cvs rdiff -u -r1.27.2.2 -r1.27.2.3 src/sys/arch/xen/xen/xennetback_xenbus.c
cvs rdiff -u -r1.27.2.1 -r1.27.2.2 src/sys/arch/xen/xenbus/xenbus_probe.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/x86/include/pmap.h
diff -u src/sys/arch/x86/include/pmap.h:1.21.2.1 src/sys/arch/x86/include/pmap.h:1.21.2.2
--- src/sys/arch/x86/include/pmap.h:1.21.2.1	Wed May 13 17:18:44 2009
+++ src/sys/arch/x86/include/pmap.h	Sun May 31 20:15:36 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.h,v 1.21.2.1 2009/05/13 17:18:44 jym Exp $	*/
+/*	$NetBSD: pmap.h,v 1.21.2.2 2009/05/31 20:15:36 jym Exp $	*/
 
 /*
  *
@@ -400,6 +400,7 @@
 int	pmap_enter_ma(struct pmap *, vaddr_t, paddr_t, paddr_t,
 	    vm_prot_t, u_int, int);
 bool	pmap_extract_ma(pmap_t, vaddr_t, paddr_t *);
+void	pmap_unmap_all_apdp_pdes(void);
 
 paddr_t	vtomach(vaddr_t);
 #define vtomfn(va) (vtomach(va) >> PAGE_SHIFT)

Index: src/sys/arch/x86/x86/pmap.c
diff -u src/sys/arch/x86/x86/pmap.c:1.77.2.2 src/sys/arch/x86/x86/pmap.c:1.77.2.3
--- src/sys/arch/x86/x86/pmap.c:1.77.2.2	Wed May 13 17:18:45 2009
+++ src/sys/arch/x86/x86/pmap.c	Sun May 31 20:15:36 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.77.2.2 2009/05/13 17:18:45 jym Exp $	*/
+/*	$NetBSD: pmap.c,v 1.77.2.3 2009/05/31 20:15:36 jym Exp $	*/
 
 /*
  * Copyright (c) 2007 Manuel Bouyer.
@@ -154,7 +154,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.77.2.2 2009/05/13 17:18:45 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.77.2.3 2009/05/31 20:15:36 jym Exp $");
 
 #include "opt_user_ldt.h"
 #include "opt_lockdebug.h"
@@ -741,6 +741,54 @@
 	    (kernel && (pmap->pm_kernel_cpus & ci->ci_cpumask) != 0));
 }
 
+/*
+ * Flush the content of APDP_PDE
+ */
+static inline
+void pmap_unmap_apdp_pde(void) {
+
+	int i;
+
+	for (i = 0; i < PDP_SIZE; i++) {
+		pmap_pte_set(&APDP_PDE[i], 0);
+#ifdef PAE
+		/* clear shadow entry too */
+    		pmap_pte_set(&APDP_PDE_SHADOW[i], 0);
+#endif
+	}
+
+}
+
+/*
+ * Flush all APDP entries found in pmaps
+ * Required during Xen save/restore operations, as it does not
+ * handle alternative recursive mappings properly
+ */
+void
+pmap_unmap_all_apdp_pdes(void) {
+
+	// XXX JYM PAE
+
+	int s;
+	struct pmap *pm;
+
+	s = splvm();
+
+	mutex_enter(&pmaps_lock);
+	LIST_FOREACH(pm, &pmaps, pm_list) {
+		xpq_queue_pte_update(
+		    xpmap_ptom(pmap_pdirpa(pm, PDIR_SLOT_APTE)),
+		    0);
+	}
+	xpq_flush_queue();
+
+	mutex_exit(&pmaps_lock);
+
+	splx(s);
+
+}
+
+
 static void
 pmap_apte_flush(struct pmap *pmap)
 {
@@ -932,7 +980,7 @@
 		KASSERT(curcpu()->ci_pmap == pmap2);
 #endif
 #if defined(MULTIPROCESSOR)
-		pmap_pte_set(APDP_PDE, 0);
+		pmap_unmap_apdp_pde();
 		pmap_pte_flush();
 		pmap_apte_flush(pmap2);
 #endif
@@ -2308,13 +2356,7 @@
 	xen_acquire_reader_ptom_lock();
 	if (xpmap_ptom_masked(pmap_pdirpa(pmap, 0)) == (*APDP_PDE & PG_FRAME)) {
 		kpreempt_disable();
-		for (i = 0; i < PDP_SIZE; i++) {
-	        	pmap_pte_set(&APDP_PDE[i], 0);
-#ifdef PAE
-			/* clear shadow entry too */
-	    		pmap_pte_set(&APDP_PDE_SHADOW[i], 0);
-#endif
-		}
+		pmap_unmap_apdp_pde();
 		pmap_pte_flush();
 	        pmap_apte_flush(pmap_kernel());
 	        kpreempt_enable();
@@ -2780,14 +2822,7 @@
 	 * been freed
 	 */
 	if (*APDP_PDE) {
-		int i;
-		for (i = 0; i < PDP_SIZE; i++) {
-			pmap_pte_set(&APDP_PDE[i], 0);
-#ifdef PAE
-			/* clear shadow entry too */
-			pmap_pte_set(&APDP_PDE_SHADOW[i], 0);
-#endif
-		}
+		pmap_unmap_apdp_pde();
 	}
 	/* lldt() does pmap_pte_flush() */
 #else /* XEN */
@@ -3026,11 +3061,10 @@
 #ifdef XEN
 /*
  * pmap_extract_ma: extract a MA for the given VA
- */
-
-/*
- * XXX JYM replace functions calling pmap_extract_ma by wrappers, as they are
- * NOT safe in regards to ptom locking
+ * When used directly in a Xen domain, caller must ensure that this function
+ * should not be used in a context where a reference to a MFN could be kept
+ * between a suspend, resume, or migrate.
+ * XXX JYM revisit for locking (MFNs may be invalid after a migration)
  */
 bool
 pmap_extract_ma(struct pmap *pmap, vaddr_t va, paddr_t *pap)

Index: src/sys/arch/xen/include/xenbus.h
diff -u src/sys/arch/xen/include/xenbus.h:1.11 src/sys/arch/xen/include/xenbus.h:1.11.6.1
--- src/sys/arch/xen/include/xenbus.h:1.11	Wed Oct 29 13:35:35 2008
+++ src/sys/arch/xen/include/xenbus.h	Sun May 31 20:15:36 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: xenbus.h,v 1.11 2008/10/29 13:35:35 cegger Exp $ */
+/* $NetBSD: xenbus.h,v 1.11.6.1 2009/05/31 20:15:36 jym Exp $ */
 /******************************************************************************
  * xenbus.h
  *
@@ -257,6 +257,8 @@
 void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
 		      ...);
 
+bool xenbus_device_suspend(struct xenbus_device *);
+bool xenbus_device_resume(struct xenbus_device *);
 
 #endif /* _ASM_XEN_XENBUS_H */
 

Index: src/sys/arch/xen/include/amd64/hypercalls.h
diff -u src/sys/arch/xen/include/amd64/hypercalls.h:1.5 src/sys/arch/xen/include/amd64/hypercalls.h:1.5.4.1
--- src/sys/arch/xen/include/amd64/hypercalls.h:1.5	Thu Nov 13 01:45:48 2008
+++ src/sys/arch/xen/include/amd64/hypercalls.h	Sun May 31 20:15:36 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: hypercalls.h,v 1.5 2008/11/13 01:45:48 cegger Exp $ */
+/* $NetBSD: hypercalls.h,v 1.5.4.1 2009/05/31 20:15:36 jym Exp $ */
 /******************************************************************************
  * hypercall.h
  * 
@@ -406,4 +406,9 @@
 	return _hypercall1(int, mca, mc);
 }
 
+static inline int
+HYPERVISOR_sysctl(void *sysctl)
+{
+	return _hypercall1(int, sysctl, sysctl);
+}
 #endif /* __HYPERCALL_H__ */

Index: src/sys/arch/xen/include/i386/hypercalls.h
diff -u src/sys/arch/xen/include/i386/hypercalls.h:1.8 src/sys/arch/xen/include/i386/hypercalls.h:1.8.4.1
--- src/sys/arch/xen/include/i386/hypercalls.h:1.8	Thu Nov 13 18:35:20 2008
+++ src/sys/arch/xen/include/i386/hypercalls.h	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: hypercalls.h,v 1.8 2008/11/13 18:35:20 cegger Exp $	*/
+/*	$NetBSD: hypercalls.h,v 1.8.4.1 2009/05/31 20:15:37 jym Exp $	*/
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -796,4 +796,16 @@
 
     return ret;
 }
+
+static __inline int
+HYPERVISOR_sysctl(void *sysctl)
+{
+    int ret;
+    unsigned long ign1;
+
+    _hypercall(__HYPERVISOR_sysctl, _harg("1" (sysctl)),
+	_harg("=a" (ret), "=b" (ign1)));
+
+    return ret;
+}
 #endif /* _XENI386_HYPERVISOR_H_ */

Index: src/sys/arch/xen/x86/x86_xpmap.c
diff -u src/sys/arch/xen/x86/x86_xpmap.c:1.12.4.2 src/sys/arch/xen/x86/x86_xpmap.c:1.12.4.3
--- src/sys/arch/xen/x86/x86_xpmap.c:1.12.4.2	Wed May 13 17:18:50 2009
+++ src/sys/arch/xen/x86/x86_xpmap.c	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: x86_xpmap.c,v 1.12.4.2 2009/05/13 17:18:50 jym Exp $	*/
+/*	$NetBSD: x86_xpmap.c,v 1.12.4.3 2009/05/31 20:15:37 jym Exp $	*/
 
 /*
  * Copyright (c) 2006 Mathieu Ropert <[email protected]>
@@ -79,7 +79,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.12.4.2 2009/05/13 17:18:50 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: x86_xpmap.c,v 1.12.4.3 2009/05/31 20:15:37 jym Exp $");
 
 #include "opt_xen.h"
 #include "opt_ddb.h"
@@ -139,6 +139,8 @@
  *
  * The thread responsible for the domU suspension will acquire an exclusive
  * (writer) lock.
+ *
+ * XXX JYM the locking will need revisit - rwlock(9) is currently inadequate
  */
 static krwlock_t xen_ptom_lock;
 
@@ -149,17 +151,17 @@
 
 void
 xen_release_ptom_lock(void) {
-	rw_exit(&xen_ptom_lock);
+	/* rw_exit(&xen_ptom_lock); */
 }
 
 void
 xen_acquire_reader_ptom_lock(void) {
-	rw_enter(&xen_ptom_lock, RW_READER);
+	/* rw_enter(&xen_ptom_lock, RW_READER); */
 }
 
 void
 xen_acquire_writer_ptom_lock(void) {
-	rw_enter(&xen_ptom_lock, RW_WRITER);
+	/* rw_enter(&xen_ptom_lock, RW_WRITER); */
 }
 
 void xen_failsafe_handler(void);

Index: src/sys/arch/xen/x86/xen_bus_dma.c
diff -u src/sys/arch/xen/x86/xen_bus_dma.c:1.14.2.1 src/sys/arch/xen/x86/xen_bus_dma.c:1.14.2.2
--- src/sys/arch/xen/x86/xen_bus_dma.c:1.14.2.1	Mon Feb  9 00:03:55 2009
+++ src/sys/arch/xen/x86/xen_bus_dma.c	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: xen_bus_dma.c,v 1.14.2.1 2009/02/09 00:03:55 jym Exp $	*/
+/*	$NetBSD: xen_bus_dma.c,v 1.14.2.2 2009/05/31 20:15:37 jym Exp $	*/
 /*	NetBSD bus_dma.c,v 1.21 2005/04/16 07:53:35 yamt Exp */
 
 /*-
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xen_bus_dma.c,v 1.14.2.1 2009/02/09 00:03:55 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_bus_dma.c,v 1.14.2.2 2009/05/31 20:15:37 jym Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -89,6 +89,7 @@
 	if (error)
 		return (error);
 
+	xen_acquire_reader_ptom_lock();
 	for (pg = mlistp->tqh_first; pg != NULL; pg = pg->pageq.queue.tqe_next) {
 		pa = VM_PAGE_TO_PHYS(pg);
 		mfn = xpmap_ptom(pa) >> PAGE_SHIFT;
@@ -125,6 +126,7 @@
 		}
 #endif
 	}
+
 	/* Get the new contiguous memory extent */
 #ifdef XEN3
 	xenguest_handle(res.extent_start) = &mfn;
@@ -169,10 +171,13 @@
 			uvm_pagefree(pg);
 		}
 	}
+
 	/* Flush updates through and flush the TLB */
 	xpq_queue_tlb_flush();
 	xpq_flush_queue();
 	splx(s);
+
+	xen_release_ptom_lock();
 	return 0;
 
 failed:

Index: src/sys/arch/xen/xen/evtchn.c
diff -u src/sys/arch/xen/xen/evtchn.c:1.42.2.2 src/sys/arch/xen/xen/evtchn.c:1.42.2.3
--- src/sys/arch/xen/xen/evtchn.c:1.42.2.2	Wed May 13 17:18:50 2009
+++ src/sys/arch/xen/xen/evtchn.c	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: evtchn.c,v 1.42.2.2 2009/05/13 17:18:50 jym Exp $	*/
+/*	$NetBSD: evtchn.c,v 1.42.2.3 2009/05/31 20:15:37 jym Exp $	*/
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -64,7 +64,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: evtchn.c,v 1.42.2.2 2009/05/13 17:18:50 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: evtchn.c,v 1.42.2.3 2009/05/31 20:15:37 jym Exp $");
 
 #include "opt_xen.h"
 #include "isa.h"
@@ -192,7 +192,7 @@
 	ctrl_if_init();
 #endif
 	debug_port = bind_virq_to_evtch(VIRQ_DEBUG);
-	aprint_verbose("debug virtual interrupt using event channel %d\n",
+	aprint_verbose("VIRQ_DEBUG interrupt using event channel %d\n",
 	    debug_port);
 	/*
 	 * Don't call event_set_handler(), we'll use a shortcut. Just set

Index: src/sys/arch/xen/xen/if_xennet_xenbus.c
diff -u src/sys/arch/xen/xen/if_xennet_xenbus.c:1.33.2.2 src/sys/arch/xen/xen/if_xennet_xenbus.c:1.33.2.3
--- src/sys/arch/xen/xen/if_xennet_xenbus.c:1.33.2.2	Wed May 13 17:18:50 2009
+++ src/sys/arch/xen/xen/if_xennet_xenbus.c	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/*      $NetBSD: if_xennet_xenbus.c,v 1.33.2.2 2009/05/13 17:18:50 jym Exp $      */
+/*      $NetBSD: if_xennet_xenbus.c,v 1.33.2.3 2009/05/31 20:15:37 jym Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -95,7 +95,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.33.2.2 2009/05/13 17:18:50 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.33.2.3 2009/05/31 20:15:37 jym Exp $");
 
 #include "opt_xen.h"
 #include "opt_nfs_boot.h"
@@ -393,20 +393,21 @@
 	sc->sc_tx_ring.sring = tx_ring;
 	sc->sc_rx_ring.sring = rx_ring;
 
+	/* resume shared structures and tell backend that we are ready */
+	xennet_xenbus_resume(self, PMF_F_NONE);
+
 #if NRND > 0
 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
 	    RND_TYPE_NET, 0);
 #endif
 
-	/* initialize shared structures and tell backend that we are ready */
-	xennet_xenbus_resume(self, PMF_F_NONE);
-
 	if (!pmf_device_register(self,
-			         xennet_xenbus_suspend,
+				 xennet_xenbus_suspend,
 				 xennet_xenbus_resume))
 		aprint_error_dev(self, "couldn't establish power handler\n");
 	else
 		pmf_class_network_register(self, ifp);
+
 }
 
 static int
@@ -477,6 +478,16 @@
 	paddr_t ma;
 	const char *errmsg;
 
+	/* invalidate the RX and TX rings */
+	if (sc->sc_backend_status == BEST_SUSPENDED) {
+		/*
+		 * Device was suspended, so ensure that access associated to
+		 * the previous RX and TX rings are revoked.
+		 */
+		xengnt_revoke_access(sc->sc_tx_ring_gntref);
+		xengnt_revoke_access(sc->sc_rx_ring_gntref);
+	}
+
 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
 
@@ -546,6 +557,11 @@
 		return false;
 	}
 	xennet_alloc_rx_buffer(sc);
+
+	if (sc->sc_backend_status == BEST_SUSPENDED) {
+		xenbus_device_resume(sc->sc_xbusd);
+	}
+
 	sc->sc_backend_status = BEST_CONNECTED;
 
 	return true;
@@ -562,22 +578,34 @@
 	int s;
 	struct xennet_xenbus_softc *sc = device_private(dev);
 
-	/* xennet_stop() is called by pmf(9) before xennet_xenbus_suspend() */
+	/*
+	 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(),
+	 * so we do not mask event channel here
+	 */
 
 	s = splnet();
-
 	/* process any outstanding TX responses, then collect RX packets */
 	xennet_handler(sc);
 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
 		tsleep(xennet_xenbus_suspend, PRIBIO, "xnet_suspend", hz/2);
 		xennet_handler(sc);
 	}
-	xennet_free_rx_buffer(sc);
+	
+	/*
+	 * dom0 may still use references to the grants we gave away
+	 * earlier during RX buffers allocation. So we do not free RX buffers
+	 * here, as dom0 does not expect the guest domain to suddenly revoke
+	 * access to these grants.
+	 */
 
 	sc->sc_backend_status = BEST_SUSPENDED;
+	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
+
 	splx(s);
 
-	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
+	xenbus_device_suspend(sc->sc_xbusd);
+	aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
+
 	return true;
 }
 
@@ -786,11 +814,6 @@
 	 * RX buffers to catch-up with backend's consumption
 	 */
 	req->rxreq_gntref = GRANT_INVALID_REF;
-
-	/*
-	 * ring needs more requests to be pushed in, allocate some
-	 * RX buffers to catch-up with backend's consumption
-	 */
 	if (sc->sc_free_rxreql >= SC_NLIVEREQ(sc) &&
 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
 		xennet_alloc_rx_buffer(sc);
@@ -836,6 +859,7 @@
 		else
 			ifp->if_opackets++;
 		xengnt_revoke_access(req->txreq_gntref);
+
 		m_freem(req->txreq_m);
 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
 	}

Index: src/sys/arch/xen/xen/xbd_xenbus.c
diff -u src/sys/arch/xen/xen/xbd_xenbus.c:1.38.2.2 src/sys/arch/xen/xen/xbd_xenbus.c:1.38.2.3
--- src/sys/arch/xen/xen/xbd_xenbus.c:1.38.2.2	Wed May 13 17:18:50 2009
+++ src/sys/arch/xen/xen/xbd_xenbus.c	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbd_xenbus.c,v 1.38.2.2 2009/05/13 17:18:50 jym Exp $      */
+/*      $NetBSD: xbd_xenbus.c,v 1.38.2.3 2009/05/31 20:15:37 jym Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.38.2.2 2009/05/13 17:18:50 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.38.2.3 2009/05/31 20:15:37 jym Exp $");
 
 #include "opt_xen.h"
 #include "rnd.h"
@@ -260,7 +260,7 @@
 		panic("%s: can't alloc ring", device_xname(self));
 	sc->sc_ring.sring = ring;
 
-	/* initialize shared structures and tell backend that we are ready */
+	/* resume shared structures and tell backend that we are ready */
 	xbd_xenbus_resume(self, PMF_F_NONE);
 
 #if NRND > 0
@@ -346,14 +346,13 @@
 		    sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
 			tsleep(xbd_xenbus_suspend, PRIBIO, "xbdsuspend", hz/2);
 
+	hypervisor_mask_event(sc->sc_evtchn);
 	sc->sc_backend_status = BLKIF_STATE_SUSPENDED;
-	splx(s);
+	event_remove_handler(sc->sc_evtchn, xbd_handler, sc);
 
-	hypervisor_mask_event(sc->sc_evtchn);
-	if (event_remove_handler(sc->sc_evtchn, xbd_handler, sc) != 0)
-		aprint_error_dev(dev,
-				 "can't remove handler: xbd_handler\n");
+	splx(s);
 
+	xenbus_device_suspend(sc->sc_xbusd);
 	aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
 
 	return true;
@@ -370,6 +369,14 @@
 	const char *errmsg;
 
 	sc = device_private(dev);
+
+	if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) {
+		/*
+		 * Device was suspended, so ensure that access associated to
+		 * the block I/O ring is revoked.
+		 */
+		xengnt_revoke_access(sc->sc_ring_gntref);
+	}
 	sc->sc_ring_gntref = GRANT_INVALID_REF;
 	ring = sc->sc_ring.sring;
 
@@ -381,7 +388,6 @@
 	 * for the block device
 	 */
 	xen_acquire_reader_ptom_lock();
-
 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)ring, &ma);
 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_ring_gntref);
 	xen_release_ptom_lock();
@@ -437,13 +443,11 @@
 		 * already initialized - we use a shortcut
 		 */
 		sc->sc_backend_status = BLKIF_STATE_CONNECTED;
+		xenbus_device_resume(sc->sc_xbusd);
 		hypervisor_enable_event(sc->sc_evtchn);
 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
 	}
 
-// XXX JYM
-	printf("read_otherend_details '%s' (%d)\n", sc->sc_xbusd->xbusd_otherend, sc->sc_xbusd->xbusd_otherend_id);
-
 	return true;
 
 abort_transaction:

Index: src/sys/arch/xen/xen/xen_machdep.c
diff -u src/sys/arch/xen/xen/xen_machdep.c:1.4.12.2 src/sys/arch/xen/xen/xen_machdep.c:1.4.12.3
--- src/sys/arch/xen/xen/xen_machdep.c:1.4.12.2	Wed May 13 17:18:50 2009
+++ src/sys/arch/xen/xen/xen_machdep.c	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: xen_machdep.c,v 1.4.12.2 2009/05/13 17:18:50 jym Exp $	*/
+/*	$NetBSD: xen_machdep.c,v 1.4.12.3 2009/05/31 20:15:37 jym Exp $	*/
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -63,7 +63,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.4.12.2 2009/05/13 17:18:50 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xen_machdep.c,v 1.4.12.3 2009/05/31 20:15:37 jym Exp $");
 
 #include "opt_xen.h"
 
@@ -246,7 +246,7 @@
 	 * it should not call this function to register
 	 * machdep.sleep_state sysctl
 	 */
-	KASSERT( !(xen_start_info.flags & SIF_INITDOMAIN) );
+	KASSERT( !(xendomain_is_dom0()) );
 
 	ret = sysctl_createv(NULL, 0, NULL, NULL, CTLFLAG_READWRITE,
 	     CTLTYPE_INT, "sleep_state", NULL, sysctl_xen_sleepstate, 0,
@@ -286,6 +286,13 @@
 	xen_suspendclocks();
 
 	xen_acquire_writer_ptom_lock();
+
+	/*
+	 * Xen lazy evaluation of recursive mappings requires
+	 * to flush the APDP entries
+	 */
+	pmap_unmap_all_apdp_pdes();
+
 	/*
 	 * save/restore code does not translate these MFNs to their
 	 * associated PFNs, so we must do it

Index: src/sys/arch/xen/xen/xencons.c
diff -u src/sys/arch/xen/xen/xencons.c:1.31.2.1 src/sys/arch/xen/xen/xencons.c:1.31.2.2
--- src/sys/arch/xen/xen/xencons.c:1.31.2.1	Mon Feb  9 00:03:55 2009
+++ src/sys/arch/xen/xen/xencons.c	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: xencons.c,v 1.31.2.1 2009/02/09 00:03:55 jym Exp $	*/
+/*	$NetBSD: xencons.c,v 1.31.2.2 2009/05/31 20:15:37 jym Exp $	*/
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -63,7 +63,7 @@
 
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xencons.c,v 1.31.2.1 2009/02/09 00:03:55 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xencons.c,v 1.31.2.2 2009/05/31 20:15:37 jym Exp $");
 
 #include "opt_xen.h"
 
@@ -228,13 +228,13 @@
 
 	int evtch;
 
-	if (xen_start_info.flags & SIF_INITDOMAIN) {
+	if (xendomain_is_dom0()) {
 		evtch = unbind_virq_from_evtch(VIRQ_CONSOLE);
 		hypervisor_mask_event(evtch);
 		if (event_remove_handler(evtch, xencons_intr,
 		    xencons_console_device) != 0)
 			aprint_error_dev(dev,
-				       	 "can't remove handler: xencons_intr\n");
+			    "can't remove handler: xencons_intr\n");
 	} else {
 #ifdef XEN3
 		evtch = xen_start_info.console_evtchn;
@@ -242,7 +242,7 @@
 		if (event_remove_handler(evtch, xencons_handler,
 		    xencons_console_device) != 0)
 			aprint_error_dev(dev,
-				       	 "can't remove handler: xencons_handler\n");
+			    "can't remove handler: xencons_handler\n");
 #endif
 	}
 

Index: src/sys/arch/xen/xen/xennetback_xenbus.c
diff -u src/sys/arch/xen/xen/xennetback_xenbus.c:1.27.2.2 src/sys/arch/xen/xen/xennetback_xenbus.c:1.27.2.3
--- src/sys/arch/xen/xen/xennetback_xenbus.c:1.27.2.2	Wed May 13 17:18:51 2009
+++ src/sys/arch/xen/xen/xennetback_xenbus.c	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/*      $NetBSD: xennetback_xenbus.c,v 1.27.2.2 2009/05/13 17:18:51 jym Exp $      */
+/*      $NetBSD: xennetback_xenbus.c,v 1.27.2.3 2009/05/31 20:15:37 jym Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -214,11 +214,9 @@
 	    0, 0, &mlist, NB_XMIT_PAGES_BATCH, 0) != 0)
 		panic("xennetback_init: uvm_pglistalloc");
 
-	xen_acquire_reader_ptom_lock();
 	for (i = 0, pg = mlist.tqh_first; pg != NULL;
 	    pg = pg->pageq.queue.tqe_next, i++)
 		mcl_pages[i] = xpmap_ptom(VM_PAGE_TO_PHYS(pg)) >> PAGE_SHIFT;
-	xen_release_ptom_lock();
 
 
 	if (i != NB_XMIT_PAGES_BATCH)
@@ -862,7 +860,6 @@
 		return;
 	}
 
-	xen_acquire_reader_ptom_lock();
 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
 		XENPRINTF(("pkt\n"));
 		req_prod = xneti->xni_rxring.sring->req_prod;
@@ -1087,7 +1084,6 @@
 			break;
 		}
 	}
-	xen_release_ptom_lock();
 	splx(s);
 }
 

Index: src/sys/arch/xen/xenbus/xenbus_probe.c
diff -u src/sys/arch/xen/xenbus/xenbus_probe.c:1.27.2.1 src/sys/arch/xen/xenbus/xenbus_probe.c:1.27.2.2
--- src/sys/arch/xen/xenbus/xenbus_probe.c:1.27.2.1	Mon Feb  9 00:03:55 2009
+++ src/sys/arch/xen/xenbus/xenbus_probe.c	Sun May 31 20:15:37 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: xenbus_probe.c,v 1.27.2.1 2009/02/09 00:03:55 jym Exp $ */
+/* $NetBSD: xenbus_probe.c,v 1.27.2.2 2009/05/31 20:15:37 jym Exp $ */
 /******************************************************************************
  * Talks to Xen Store to figure out what devices we have.
  *
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xenbus_probe.c,v 1.27.2.1 2009/02/09 00:03:55 jym Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xenbus_probe.c,v 1.27.2.2 2009/05/31 20:15:37 jym Exp $");
 
 #if 0
 #define DPRINTK(fmt, args...) \
@@ -73,6 +73,13 @@
 static bool xenbus_suspend(device_t PMF_FN_PROTO);
 static bool xenbus_resume(device_t PMF_FN_PROTO);
 
+/* routines gathering device information from XenStore */
+static int  read_otherend_details(struct xenbus_device *,
+				  const char *, const char *);
+static int  read_backend_details (struct xenbus_device *);
+static int  read_frontend_details(struct xenbus_device *);
+static void free_otherend_details(struct xenbus_device *);
+
 CFATTACH_DECL_NEW(xenbus, 0, xenbus_match, xenbus_attach,
     NULL, NULL);
 
@@ -130,6 +137,29 @@
 	return true;
 }
 
+/*
+ * Suspend a xenbus device
+ */
+bool
+xenbus_device_suspend(struct xenbus_device *dev) {
+	free_otherend_details(dev);
+
+	return true;
+}
+
+/*
+ * Resume a xenbus device
+ */
+bool
+xenbus_device_resume(struct xenbus_device *dev) {
+
+	if (dev->xbusd_type == XENBUS_FRONTEND_DEVICE) {
+		read_backend_details(dev);
+	}
+
+	return true;
+}
+
 void
 xenbus_backend_register(struct xenbus_backend_driver *xbakd)
 {
@@ -180,8 +210,7 @@
 		printf("missing other end from %s\n", xendev->xbusd_path);
 		xenbus_dev_fatal(xendev, -ENOENT, "missing other end from %s",
 				 xendev->xbusd_path);
-		free(xendev->xbusd_otherend, M_DEVBUF);
-		xendev->xbusd_otherend = NULL;
+		free_otherend_details(xendev);
 		return ENOENT;
 	}
 
@@ -201,15 +230,12 @@
 	return read_otherend_details(xendev, "frontend-id", "frontend");
 }
 
-#if unused
 static void
 free_otherend_details(struct xenbus_device *dev)
 {
 	free(dev->xbusd_otherend, M_DEVBUF);
 	dev->xbusd_otherend = NULL;
 }
-#endif
-
 
 static void
 free_otherend_watch(struct xenbus_device *dev)
@@ -269,9 +295,19 @@
 		    xdev->xbusd_u.b.b_cookie : xdev->xbusd_u.f.f_dev, state);
 }
 
+#ifdef unused
 static int
 talk_to_otherend(struct xenbus_device *dev)
 {
+	return xenbus_watch_path2(dev, dev->xbusd_otherend, "state",
+				  &dev->xbusd_otherend_watch,
+				  otherend_changed);
+}
+#endif
+
+static int
+watch_otherend(struct xenbus_device *dev)
+{
 	free_otherend_watch(dev);
 
 	return xenbus_watch_path2(dev, dev->xbusd_otherend, "state",
@@ -382,7 +418,7 @@
 		}
 		SLIST_INSERT_HEAD(&xenbus_device_list,
 		    xbusd, xbusd_entries);
-		talk_to_otherend(xbusd);
+		watch_otherend(xbusd);
 	}
 	free(dir, M_DEVBUF);
 	return err;
@@ -489,7 +525,7 @@
 	KASSERT(xenbus_lookup_device_path(xbusd->xbusd_path) == xbusd);
 	SLIST_REMOVE(&xenbus_device_list, xbusd, xenbus_device, xbusd_entries);
 	free_otherend_watch(xbusd);
-	free(xbusd->xbusd_otherend, M_DEVBUF);
+	free_otherend_details(xbusd);
 	xenbus_switch_state(xbusd, NULL, XenbusStateClosed);
 	free(xbusd, M_DEVBUF);
 	return 0;

Reply via email to