Module Name: src Committed By: jym Date: Sun Jul 24 23:56:34 UTC 2011
Modified Files: src/sys/arch/xen/xen: xbdback_xenbus.c Log Message: Add more comments to xbdback(4) code. These make the continuations a bit easier to follow (and understand). Helped tracking down a regression between save/restore xbdback(4) states. A few minor fixes, which are merely cosmetic: - call graph is (somewhat) more readable - rework the xbdback_do_io routine with a switch statement, so as to trigger a panic() in case an invalid operation passed through the sanity checks. panic might be overkill here, but I am sure to catch errrors in case it happens. To generate a diff of this commit: cvs rdiff -u -r1.40 -r1.41 src/sys/arch/xen/xen/xbdback_xenbus.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/xen/xen/xbdback_xenbus.c diff -u src/sys/arch/xen/xen/xbdback_xenbus.c:1.40 src/sys/arch/xen/xen/xbdback_xenbus.c:1.41 --- src/sys/arch/xen/xen/xbdback_xenbus.c:1.40 Sun Jun 12 03:35:50 2011 +++ src/sys/arch/xen/xen/xbdback_xenbus.c Sun Jul 24 23:56:34 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: xbdback_xenbus.c,v 1.40 2011/06/12 03:35:50 rmind Exp $ */ +/* $NetBSD: xbdback_xenbus.c,v 1.41 2011/07/24 23:56:34 jym Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.40 2011/06/12 03:35:50 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.41 2011/07/24 23:56:34 jym Exp $"); #include <sys/types.h> #include <sys/param.h> @@ -87,31 +87,41 @@ * it's finished, set xbdi->xbdi_cont (see below) to NULL and the return * doesn't matter. Otherwise it's passed as the second parameter to * the new value of xbdi->xbdi_cont. + * * Here's how the call graph is supposed to be for a single I/O: - * xbdback_co_main() - * | |-> xbdback_co_cache_doflush() -> stall - * | xbdback_co_cache_flush2() <- xbdback_co_flush_done() <- - * | | | - * | |-> xbdback_co_cache_flush() -> xbdback_co_flush() -- - * xbdback_co_main_loop() -> xbdback_co_main_done() -> xbdback_co_flush() - * | | | - * | xbdback_co_main_done2() <- xbdback_co_flush_done() - * | | - * | xbdback_co_main() or NULL - * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop() + * xbdback_co_main() + * | + * | --> xbdback_co_cache_doflush() or NULL + * | | + * | -- xbdback_co_cache_flush2() <- xbdback_co_flush_done() <-- + * | | | + * | |-> xbdback_co_cache_flush() -> xbdback_co_flush() -- + * xbdback_co_main_loop()-| + * | |-> xbdback_co_main_done() -> xbdback_co_flush() -- + * | | | + * | -- xbdback_co_main_done2() <- xbdback_co_flush_done() <-- + * | | + * | --> xbdback_co_main() or NULL + * | + * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop() * | - * xbdback_co_io_gotreq() -> xbdback_co_flush() -> xbdback_co_flush() - * | | | - * xbdback_co_io_loop() --- <---------------- xbdback_co_flush_done() - * | | - * xbdback_co_io_gotio() | - * | | - * xbdback_co_io_gotio2()<- - * | |--------> xbdback_co_io_gotfrag - * | | - * xbdback_co_io_gotfrag2() <----------| - * | |--> xbdback_co_io_loop() - * xbdback_co_main_incr() + * xbdback_co_io_gotreq()--+---------> xbdback_co_flush() -- + * | | | + * -> xbdback_co_io_loop()----| <- xbdback_co_flush_done() <-- + * | | | | + * | | | |----------> xbdback_co_io_gotio() + * | | | | + * | | xbdback_co_main_incr() | + * | | | | + * | | xbdback_co_main_loop() | + * | | | + * | xbdback_co_io_gotio2() <-----------| + * | | | + * | | |----------> xbdback_co_io_gotfrag() + * | | | + * -- xbdback_co_io_gotfrag2() <---------| + * | + * xbdback_co_main_incr() -> xbdback_co_main_loop() */ typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *); @@ -153,11 +163,11 @@ RING_IDX xbdi_req_prod; /* limit on request indices */ xbdback_cont_t xbdi_cont, xbdi_cont_aux; SIMPLEQ_ENTRY(xbdback_instance) xbdi_on_hold; /* waiting on resources */ - /* _request state */ + /* _request state: track requests fetched from ring */ struct xbdback_request *xbdi_req; /* if NULL, ignore following */ blkif_request_t xbdi_xen_req; int xbdi_segno; - /* _io state */ + /* _io state: I/O associated to this instance */ struct xbdback_io *xbdi_io; /* if NULL, ignore next field */ daddr_t xbdi_next_sector; uint8_t xbdi_last_fs, xbdi_this_fs; /* first sectors */ @@ -214,7 +224,7 @@ /* grants release */ grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST]; uint16_t xio_nrma; /* number of guest pages */ - uint16_t xio_mapped; + uint16_t xio_mapped; /* == 1: grants are mapped */ } xio_rw; uint64_t xio_flush_id; } u; @@ -230,7 +240,7 @@ #define xio_flush_id u.xio_flush_id /* - * Rather than have the xbdback_io keep an array of the + * Rather than having the xbdback_io keep an array of the * xbdback_requests involved, since the actual number will probably be * small but might be as large as BLKIF_RING_SIZE, use a list. This * would be threaded through xbdback_request, but one of them might be @@ -850,6 +860,7 @@ xbdi->xbdi_cont = xbdback_co_main; xbdback_trampoline(xbdi, xbdi); } + return 1; } @@ -867,6 +878,10 @@ return xbdi; } +/* + * Fetch a blkif request from the ring, and pass control to the appropriate + * continuation. + */ static void * xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj) { @@ -931,6 +946,9 @@ return xbdi; } +/* + * Increment consumer index and move on to the next request. + */ static void * xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj) { @@ -940,6 +958,10 @@ return xbdi; } +/* + * Ring processing is over. If there are any I/O still present for this + * instance, handle them first. + */ static void * xbdback_co_main_done(struct xbdback_instance *xbdi, void *obj) { @@ -953,6 +975,10 @@ return xbdi; } +/* + * Check for requests in the instance's ring. In case there are, start again + * from the beginning. If not, stall. + */ static void * xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj) { @@ -966,12 +992,16 @@ return xbdi; } +/* + * Frontend requested a cache flush operation. + */ static void * xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj) { (void)obj; XENPRINTF(("xbdback_co_cache_flush %p %p\n", xbdi, obj)); if (xbdi->xbdi_io != NULL) { + /* Some I/Os are required for this instance. Process them. */ xbdi->xbdi_cont = xbdback_co_flush; xbdi->xbdi_cont_aux = xbdback_co_cache_flush2; } else { @@ -986,7 +1016,8 @@ (void)obj; XENPRINTF(("xbdback_co_cache_flush2 %p %p\n", xbdi, obj)); if (xbdi->xbdi_pendingreqs > 0) { - /* event or iodone will restart processing */ + /* There are pending requests. + * Event or iodone() will restart processing */ xbdi->xbdi_cont = NULL; xbdi_put(xbdi); return NULL; @@ -995,6 +1026,7 @@ return xbdback_pool_get(&xbdback_io_pool, xbdi); } +/* Enqueue the flush work */ static void * xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj) { @@ -1007,10 +1039,14 @@ xbd_io->xio_flush_id = xbdi->xbdi_xen_req.id; workqueue_enqueue(xbdback_workqueue, &xbdi->xbdi_io->xio_work, NULL); /* xbdback_do_io() will advance req pointer and restart processing */ - xbdi->xbdi_cont = xbdback_co_cache_doflush; + xbdi->xbdi_cont = NULL; return NULL; } +/* + * A read or write I/O request must be processed. Do some checks first, + * then get the segment information directly from the ring request. + */ static void * xbdback_co_io(struct xbdback_instance *xbdi, void *obj) { @@ -1061,6 +1097,7 @@ xbdi->xbdi_cont = xbdback_co_io_gotreq; return xbdback_pool_get(&xbdback_request_pool, xbdi); + end: xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id, xbdi->xbdi_xen_req.operation, error); @@ -1068,6 +1105,11 @@ return xbdi; } +/* + * We have fetched segment requests from the ring. In case there are already + * I/Os prepared for this instance, we can try coalescing the requests + * with these I/Os. + */ static void * xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj) { @@ -1110,7 +1152,7 @@ return xbdi; } - +/* Handle coalescing of multiple segment requests into one I/O work */ static void * xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj) { @@ -1189,10 +1231,9 @@ return xbdi; } - +/* Prepare an I/O buffer for a xbdback instance */ static void * xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) - { struct xbdback_io *xbd_io; vaddr_t start_offset; /* start offset in vm area */ @@ -1234,7 +1275,7 @@ return xbdi; } - +/* Manage fragments */ static void * xbdback_co_io_gotio2(struct xbdback_instance *xbdi, void *obj) { @@ -1249,7 +1290,7 @@ return xbdi; } - +/* Prepare the instance for its first fragment */ static void * xbdback_co_io_gotfrag(struct xbdback_instance *xbdi, void *obj) { @@ -1264,6 +1305,7 @@ return xbdi; } +/* Last routine to manage segments fragments for one I/O */ static void * xbdback_co_io_gotfrag2(struct xbdback_instance *xbdi, void *obj) { @@ -1302,7 +1344,10 @@ return xbdi; } - +/* + * Map the different I/O requests in backend's VA space, then schedule + * the I/O work. + */ static void * xbdback_co_flush(struct xbdback_instance *xbdi, void *obj) { @@ -1314,6 +1359,7 @@ return xbdback_map_shm(xbdi->xbdi_io); } +/* Transfer all I/O work to the workqueue */ static void * xbdback_co_flush_done(struct xbdback_instance *xbdi, void *obj) { @@ -1331,13 +1377,19 @@ xbdback_iodone(&xbd_io->xio_buf); } +/* + * Main xbdback workqueue routine: performs I/O on behalf of backend. Has + * thread context. + */ static void xbdback_do_io(struct work *wk, void *dummy) { struct xbdback_io *xbd_io = (void *)wk; KASSERT(&xbd_io->xio_work == wk); - if (xbd_io->xio_operation == BLKIF_OP_FLUSH_DISKCACHE) { + switch (xbd_io->xio_operation) { + case BLKIF_OP_FLUSH_DISKCACHE: + { int error; int force = 1; struct xbdback_instance *xbdi = xbd_io->xio_xbdi; @@ -1361,37 +1413,44 @@ xbdi->xbdi_io = NULL; xbdi->xbdi_cont = xbdback_co_main_incr; xbdback_trampoline(xbdi, xbdi); - return; + break; } - - /* should be read or write */ - xbd_io->xio_buf.b_data = - (void *)((vaddr_t)xbd_io->xio_buf.b_data + xbd_io->xio_vaddr); + case BLKIF_OP_READ: + case BLKIF_OP_WRITE: + xbd_io->xio_buf.b_data = (void *) + ((vaddr_t)xbd_io->xio_buf.b_data + xbd_io->xio_vaddr); #ifdef DIAGNOSTIC - { - vaddr_t bdata = (vaddr_t)xbd_io->xio_buf.b_data; - int nsegs = - ((((bdata + xbd_io->xio_buf.b_bcount - 1) & ~PAGE_MASK) - - (bdata & ~PAGE_MASK)) >> PAGE_SHIFT) + 1; - if ((bdata & ~PAGE_MASK) != (xbd_io->xio_vaddr & ~PAGE_MASK)) { - printf("xbdback_do_io vaddr 0x%lx bdata 0x%lx\n", - xbd_io->xio_vaddr, bdata); - panic("xbdback_do_io: bdata page change"); - } - if (nsegs > xbd_io->xio_nrma) { - printf("xbdback_do_io vaddr 0x%lx bcount 0x%x doesn't fit in " - " %d pages\n", bdata, xbd_io->xio_buf.b_bcount, - xbd_io->xio_nrma); - panic("xbdback_do_io: not enough pages"); - } - } + { + vaddr_t bdata = (vaddr_t)xbd_io->xio_buf.b_data; + int nsegs = + ((((bdata + xbd_io->xio_buf.b_bcount - 1) & ~PAGE_MASK) - + (bdata & ~PAGE_MASK)) >> PAGE_SHIFT) + 1; + if ((bdata & ~PAGE_MASK) != (xbd_io->xio_vaddr & ~PAGE_MASK)) { + printf("xbdback_do_io: vaddr %#" PRIxVADDR + " bdata %#" PRIxVADDR "\n", + xbd_io->xio_vaddr, bdata); + panic("xbdback_do_io: bdata page change"); + } + if (nsegs > xbd_io->xio_nrma) { + printf("xbdback_do_io: vaddr %#" PRIxVADDR + " bcount %#x doesn't fit in %d pages\n", + bdata, xbd_io->xio_buf.b_bcount, xbd_io->xio_nrma); + panic("xbdback_do_io: not enough pages"); + } + } #endif - if ((xbd_io->xio_buf.b_flags & B_READ) == 0) { - mutex_enter(xbd_io->xio_buf.b_vp->v_interlock); - xbd_io->xio_buf.b_vp->v_numoutput++; - mutex_exit(xbd_io->xio_buf.b_vp->v_interlock); + if ((xbd_io->xio_buf.b_flags & B_READ) == 0) { + mutex_enter(xbd_io->xio_buf.b_vp->v_interlock); + xbd_io->xio_buf.b_vp->v_numoutput++; + mutex_exit(xbd_io->xio_buf.b_vp->v_interlock); + } + bdev_strategy(&xbd_io->xio_buf); + break; + default: + /* Should never happen */ + panic("xbdback_do_io: unsupported operation %d", + xbd_io->xio_operation); } - bdev_strategy(&xbd_io->xio_buf); } /* This gets reused by xbdback_io_error to report errors from other sources. */ @@ -1409,7 +1468,7 @@ XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n", xbdi->xbdi_domid, (long)xbd_io)); - if (xbd_io->xio_mapped) + if (xbd_io->xio_mapped == 1) xbdback_unmap_shm(xbd_io); if (bp->b_error != 0) { @@ -1418,7 +1477,6 @@ errp = 1; } else errp = 0; - /* for each constituent xbd request */ while(!SLIST_EMPTY(&xbd_io->xio_rq)) { @@ -1510,8 +1568,8 @@ } /* - * Map a request into our virtual address space. The xbd_req->rq_ma - * array is to be filled out by the caller. + * Map multiple entries of an I/O request into backend's VA space. + * The xbd_io->xio_gref array has to be filled out by the caller. */ static void * xbdback_map_shm(struct xbdback_io *xbd_io) @@ -1679,6 +1737,10 @@ } } +/* + * Trampoline routine. Calls continuations in a loop and only exits when + * either the returned object or the next callback is NULL. + */ static void xbdback_trampoline(struct xbdback_instance *xbdi, void *obj) { @@ -1693,7 +1755,7 @@ #ifdef DIAGNOSTIC if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) { printf("xbdback_trampoline: 0x%lx didn't set " - "xbdi->xbdi_cont!\n2", (long)cont); + "xbdi->xbdi_cont!\n", (long)cont); panic("xbdback_trampoline: bad continuation"); } #endif