Module Name:    src
Committed By:   bouyer
Date:           Wed Feb 22 18:54:51 UTC 2012

Modified Files:
        src/sys/arch/xen/xen: if_xennet_xenbus.c

Log Message:
Allocating a fixed, limited number of buffers for receive and sending them
to the network stack is a bad idea, because if all receive buffers sits
in socket queues, the interface receive stalls by lack of buffers.
Instead, get receive pages from a pool_cache(9). Copying to a mbuf cluser
would work too, but testings shows this has an important performance hit.
This also simplifies locking.

While there, notify the dom0 when we add some receive buffers (older
linux dom0 didn't care, but newer one do).

Problem reported and fix tested by Brian Marcotte on port-xen


To generate a diff of this commit:
cvs rdiff -u -r1.58 -r1.59 src/sys/arch/xen/xen/if_xennet_xenbus.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/xen/xen/if_xennet_xenbus.c
diff -u src/sys/arch/xen/xen/if_xennet_xenbus.c:1.58 src/sys/arch/xen/xen/if_xennet_xenbus.c:1.59
--- src/sys/arch/xen/xen/if_xennet_xenbus.c:1.58	Thu Feb  2 19:43:01 2012
+++ src/sys/arch/xen/xen/if_xennet_xenbus.c	Wed Feb 22 18:54:51 2012
@@ -1,4 +1,4 @@
-/*      $NetBSD: if_xennet_xenbus.c,v 1.58 2012/02/02 19:43:01 tls Exp $      */
+/*      $NetBSD: if_xennet_xenbus.c,v 1.59 2012/02/22 18:54:51 bouyer Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -85,7 +85,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.58 2012/02/02 19:43:01 tls Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.59 2012/02/22 18:54:51 bouyer Exp $");
 
 #include "opt_xen.h"
 #include "opt_nfs_boot.h"
@@ -148,7 +148,6 @@ int xennet_debug = 0xff;
 #endif
 
 #define GRANT_INVALID_REF -1 /* entry is free */
-#define GRANT_STACK_REF   -2 /* entry owned by the network stack */
 
 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
@@ -210,6 +209,9 @@ struct xennet_xenbus_softc {
 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
 static u_long xennet_pages[NET_RX_RING_SIZE];
 
+static pool_cache_t if_xennetrxbuf_cache;
+static int if_xennetrxbuf_cache_inited=0;
+
 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
 static void xennet_xenbus_attach(device_t, device_t, void *);
 static int  xennet_xenbus_detach(device_t, int);
@@ -219,6 +221,7 @@ static void xennet_alloc_rx_buffer(struc
 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
 static void xennet_tx_complete(struct xennet_xenbus_softc *);
 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
+static void xennet_rx_free_req(struct xennet_rxreq *);
 static int  xennet_handler(void *);
 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
 #ifdef XENNET_DEBUG_DUMP
@@ -301,6 +304,14 @@ xennet_xenbus_attach(device_t parent, de
 	sc->sc_xbusd = xa->xa_xbusd;
 	sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
 
+	/* xenbus ensure 2 devices can't be probed at the same time */
+	if (if_xennetrxbuf_cache_inited == 0) {
+		if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
+		    "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
+		if_xennetrxbuf_cache_inited = 1;
+	}
+		
+
 	/* initialize free RX and RX request lists */
 	mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
 	SLIST_INIT(&sc->sc_txreq_head);
@@ -316,13 +327,10 @@ xennet_xenbus_attach(device_t parent, de
 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
 		rxreq->rxreq_id = i;
 		rxreq->rxreq_sc = sc;
-		rxreq->rxreq_va = uvm_km_alloc(kernel_map,
-		    PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
+		rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+		    if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
 		if (rxreq->rxreq_va == 0)
 			break;
-		if (!pmap_extract(pmap_kernel(), rxreq->rxreq_va,
-		    &rxreq->rxreq_pa))
-			panic("%s: no pa for mapped va ?", device_xname(self));
 		rxreq->rxreq_gntref = GRANT_INVALID_REF;
 		SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
 	}
@@ -581,7 +589,9 @@ again:
 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
 		return false;
 	}
+	mutex_enter(&sc->sc_rx_lock);
 	xennet_alloc_rx_buffer(sc);
+	mutex_exit(&sc->sc_rx_lock);
 
 	if (sc->sc_backend_status == BEST_SUSPENDED) {
 		xenbus_device_resume(sc->sc_xbusd);
@@ -675,12 +685,12 @@ xennet_alloc_rx_buffer(struct xennet_xen
 	RING_IDX i;
 	struct xennet_rxreq *req;
 	struct xen_memory_reservation reservation;
-	int s, otherend_id;
+	int s, otherend_id, notify;
 	paddr_t pfn;
 
 	otherend_id = sc->sc_xbusd->xbusd_otherend_id;
 
-	mutex_enter(&sc->sc_rx_lock);
+	KASSERT(mutex_owned(&sc->sc_rx_lock));
 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
 		KASSERT(req != NULL);
@@ -729,7 +739,6 @@ xennet_alloc_rx_buffer(struct xennet_xen
 
 out_loop:
 	if (i == 0) {
-		mutex_exit(&sc->sc_rx_lock);
 		return;
 	}
 
@@ -762,9 +771,9 @@ out_loop:
 	}
 
 	sc->sc_rx_ring.req_prod_pvt = req_prod + i;
-	RING_PUSH_REQUESTS(&sc->sc_rx_ring);
-
-	mutex_exit(&sc->sc_rx_lock);
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
+	if (notify)
+		hypervisor_notify_via_evtchn(sc->sc_evtchn);
 	return;
 }
 
@@ -780,7 +789,6 @@ xennet_free_rx_buffer(struct xennet_xenb
 	mmu_update_t mmu[1];
 	multicall_entry_t mcl[2];
 
-	int s = splbio();
 	mutex_enter(&sc->sc_rx_lock);
 	
 	DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
@@ -788,16 +796,6 @@ xennet_free_rx_buffer(struct xennet_xenb
 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
 
-		/*
-		 * if the buffer is in transit in the network stack, wait for
-		 * the network stack to free it.
-		 */
-		while ((volatile grant_ref_t)rxreq->rxreq_gntref ==
-		    GRANT_STACK_REF)
-			mutex_exit(&sc->sc_rx_lock);
-			tsleep(xennet_xenbus_detach, PRIBIO, "xnet_free", hz/2);
-			mutex_enter(&sc->sc_rx_lock);
-
 		if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
 			/*
 			 * this req is still granted. Get back the page or
@@ -861,7 +859,6 @@ xennet_free_rx_buffer(struct xennet_xenb
 
 	}
 	mutex_exit(&sc->sc_rx_lock);
-	splx(s);
 	DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
 }
 
@@ -871,10 +868,23 @@ xennet_free_rx_buffer(struct xennet_xenb
 static void
 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
 {
-	struct xennet_rxreq *req = arg;
+	int s = splnet();
+	KASSERT(buf == m->m_ext.ext_buf);
+	KASSERT(arg == NULL);
+	KASSERT(m != NULL);
+	vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
+	pool_cache_put_paddr(if_xennetrxbuf_cache,
+	    (void *)va, m->m_ext.ext_paddr);
+	pool_cache_put(mb_cache, m);
+	splx(s);
+};
+
+static void
+xennet_rx_free_req(struct xennet_rxreq *req)
+{
 	struct xennet_xenbus_softc *sc = req->rxreq_sc;
 	
-	mutex_enter(&sc->sc_rx_lock);
+	KASSERT(mutex_owned(&sc->sc_rx_lock));
 
 	/* puts back the RX request in the list of free RX requests */
 	SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
@@ -886,18 +896,10 @@ xennet_rx_mbuf_free(struct mbuf *m, void
 	 */
 	req->rxreq_gntref = GRANT_INVALID_REF;
 	
-	if (sc->sc_free_rxreql >= SC_NLIVEREQ(sc) &&
+	if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
-		mutex_exit(&sc->sc_rx_lock);
 		xennet_alloc_rx_buffer(sc);
 	}
-	else {
-		mutex_exit(&sc->sc_rx_lock);
-	}
-	
-	if (m)
-		pool_cache_put(mb_cache, m);
-
 }
 
 /*
@@ -988,16 +990,10 @@ again:
 	DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
 	    sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
 
+	mutex_enter(&sc->sc_rx_lock);
 	resp_prod = sc->sc_rx_ring.sring->rsp_prod;
 	xen_rmb(); /* ensure we see replies up to resp_prod */
 
-	/*
-	 * The locking in this loop needs to be done carefully, as
-	 * m_free() will take the sc_rx_lock() via
-	 * xennet_rx_mbuf_free()
-	 */
-	mutex_enter(&sc->sc_rx_lock);
-	
 	for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
 		netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
 		req = &sc->sc_rxreqs[rx->id];
@@ -1034,8 +1030,6 @@ again:
 			    __func__, sc->sc_rx_feature);
 		}
 
-		req->rxreq_gntref = GRANT_INVALID_REF;
-
 		pa = req->rxreq_pa;
 		va = req->rxreq_va;
 		
@@ -1067,10 +1061,7 @@ again:
 				DPRINTFN(XEDB_EVENT,
 				    ("xennet_handler bad dest\n"));
 				/* packet not for us */
-				mutex_exit(&sc->sc_rx_lock);
-				xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE,
-				    req);
-				mutex_enter(&sc->sc_rx_lock);
+				xennet_rx_free_req(req);
 				continue;
 			}
 		}
@@ -1078,50 +1069,37 @@ again:
 		if (__predict_false(m == NULL)) {
 			printf("%s: rx no mbuf\n", ifp->if_xname);
 			ifp->if_ierrors++;
-			mutex_exit(&sc->sc_rx_lock);
-			xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
-			mutex_enter(&sc->sc_rx_lock);
+			xennet_rx_free_req(req);
 			continue;
 		}
 		MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
 
 		m->m_pkthdr.rcvif = ifp;
-		if (__predict_true(sc->sc_rx_ring.req_prod_pvt != 
-		    sc->sc_rx_ring.sring->rsp_prod)) {
-			m->m_len = m->m_pkthdr.len = rx->status;
-			MEXTADD(m, pktp, rx->status,
-			    M_DEVBUF, xennet_rx_mbuf_free, req);
-			m->m_flags |= M_EXT_RW; /* we own the buffer */
-			req->rxreq_gntref = GRANT_STACK_REF;
-			mutex_exit(&sc->sc_rx_lock);
-		} else {
-			/*
-			 * This was our last receive buffer, allocate
-			 * memory, copy data and push the receive
-			 * buffer back to the hypervisor.
-			 */
-			m->m_len = min(MHLEN, rx->status);
-			m->m_pkthdr.len = 0;
-			mutex_exit(&sc->sc_rx_lock);
-			m_copyback(m, 0, rx->status, pktp);
-			xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
-			if (m->m_pkthdr.len < rx->status) {
-				/* out of memory, just drop packets */
-				ifp->if_ierrors++;
-				m_freem(m);
-				mutex_enter(&sc->sc_rx_lock);
-				continue;
-			}
+		req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
+		    if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
+		if (__predict_false(req->rxreq_va == 0)) {
+			printf("%s: rx no buf\n", ifp->if_xname);
+			ifp->if_ierrors++;
+			req->rxreq_va = va;
+			req->rxreq_pa = pa;
+			xennet_rx_free_req(req);
+			m_freem(m);
+			continue;
 		}
-		
+		m->m_len = m->m_pkthdr.len = rx->status;
+		MEXTADD(m, pktp, rx->status,
+		    M_DEVBUF, xennet_rx_mbuf_free, NULL);
+		m->m_flags |= M_EXT_RW; /* we own the buffer */
+		m->m_ext.ext_paddr = pa;
 		if ((rx->flags & NETRXF_csum_blank) != 0) {
 			xennet_checksum_fill(&m);
 			if (m == NULL) {
 				ifp->if_ierrors++;
-				mutex_enter(&sc->sc_rx_lock);
 				continue;
 			}
 		}
+		/* free req may overwrite *rx, better doing it late */
+		xennet_rx_free_req(req);
 		/*
 		 * Pass packet to bpf if there is a listener.
 		 */
@@ -1131,7 +1109,6 @@ again:
 
 		/* Pass the packet up. */
 		(*ifp->if_input)(ifp, m);
-		mutex_enter(&sc->sc_rx_lock);
 	}
 	xen_rmb();
 	sc->sc_rx_ring.rsp_cons = i;

Reply via email to