Module Name: src Committed By: jdolecek Date: Mon Apr 20 16:12:28 UTC 2020
Modified Files: src/sys/arch/xen/xen: xbdback_xenbus.c Log Message: remove I/O defragmentation logic, pass requests straight to the underlying block device without trying to coalesce them it seems rarely useful, and it makes the handling logic unnecessarily complex - ultimately it's the DomU operating system responsibility to issue optimal I/O might also help with the ZFS problem reported on port-xen, and will surely simplify eventual indirect segment support To generate a diff of this commit: cvs rdiff -u -r1.82 -r1.83 src/sys/arch/xen/xen/xbdback_xenbus.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/xen/xen/xbdback_xenbus.c diff -u src/sys/arch/xen/xen/xbdback_xenbus.c:1.82 src/sys/arch/xen/xen/xbdback_xenbus.c:1.83 --- src/sys/arch/xen/xen/xbdback_xenbus.c:1.82 Mon Apr 20 14:11:04 2020 +++ src/sys/arch/xen/xen/xbdback_xenbus.c Mon Apr 20 16:12:28 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: xbdback_xenbus.c,v 1.82 2020/04/20 14:11:04 jdolecek Exp $ */ +/* $NetBSD: xbdback_xenbus.c,v 1.83 2020/04/20 16:12:28 jdolecek Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.82 2020/04/20 14:11:04 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.83 2020/04/20 16:12:28 jdolecek Exp $"); #include <sys/atomic.h> #include <sys/buf.h> @@ -74,9 +74,7 @@ __KERNEL_RCSID(0, "$NetBSD: xbdback_xenb /* Need to alloc one extra page to account for possible mapping offset */ #define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE) -struct xbdback_request; struct xbdback_io; -struct xbdback_fragment; struct xbdback_instance; /* @@ -98,8 +96,8 @@ typedef enum {WAITING, RUN, DISCONNECTIN * callback), the return value of a continuation can be set to NULL. In that * case, the thread will go back to sleeping and wait for the proper * condition before it starts processing requests again from where it left. - * Continuation state is "stored" in the xbdback instance (xbdi_cont and - * xbdi_cont_aux), and should only be manipulated by the instance thread. + * Continuation state is "stored" in the xbdback instance (xbdi_cont), + * and should only be manipulated by the instance thread. * * As xbdback(4) has to handle different sort of asynchronous events (Xen * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock @@ -109,36 +107,23 @@ typedef enum {WAITING, RUN, DISCONNECTIN * Here's how the call graph is supposed to be for a single I/O: * * xbdback_co_main() - * | - * | --> xbdback_co_cache_doflush() or NULL - * | | - * | - xbdback_co_cache_flush2() <- xbdback_co_do_io() <- - * | | | - * | |-> xbdback_co_cache_flush() -> xbdback_co_map_io()- + * | --> xbdback_co_cache_flush() + * | | | + * | | -> xbdback_co_cache_doflush() or NULL + * | | | + * | | -> xbdback_co_do_io() * xbdback_co_main_loop()-| - * | |-> xbdback_co_main_done() ---> xbdback_co_map_io()- - * | | | - * | -- xbdback_co_main_done2() <-- xbdback_co_do_io() <- + * | |-> xbdback_co_main_done2() or NULL * | | - * | --> xbdback_co_main() or NULL + * | --> xbdback_co_main_incr() -> xbdback_co_main_loop() * | * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop() * | - * xbdback_co_io_gotreq()--+--> xbdback_co_map_io() --- - * | | | - * -> xbdback_co_io_loop()----| <- xbdback_co_do_io() <-- - * | | | | - * | | | |----------> xbdback_co_io_gotio() - * | | | | - * | | xbdback_co_main_incr() | - * | | | | - * | | xbdback_co_main_loop() | - * | | | - * | xbdback_co_io_gotio2() <-----------| - * | | | - * | | |----------> xbdback_co_io_gotfrag() - * | | | - * -- xbdback_co_io_gotfrag2() <---------| + * xbdback_co_io_gotio() -> xbdback_map_shm() + * | | + * | xbdback_co_main_incr() -> xbdback_co_main_loop() + * | + * xbdback_co_do_io() * | * xbdback_co_main_incr() -> xbdback_co_main_loop() */ @@ -192,17 +177,12 @@ struct xbdback_instance { * continuation-ness (see above). */ RING_IDX xbdi_req_prod; /* limit on request indices */ - xbdback_cont_t xbdi_cont, xbdi_cont_aux; + xbdback_cont_t xbdi_cont; /* _request state: track requests fetched from ring */ struct xbdback_request *xbdi_req; /* if NULL, ignore following */ blkif_request_t xbdi_xen_req; - int xbdi_segno; /* _io state: I/O associated to this instance */ - struct xbdback_io *xbdi_io; /* if NULL, ignore next field */ - daddr_t xbdi_next_sector; - uint8_t xbdi_last_fs, xbdi_this_fs; /* first sectors */ - uint8_t xbdi_last_ls, xbdi_this_ls; /* last sectors */ - grant_ref_t xbdi_thisgrt, xbdi_lastgrt; /* grants */ + struct xbdback_io *xbdi_io; /* other state */ int xbdi_same_page; /* are we merging two segments on the same page? */ uint xbdi_pendingreqs; /* number of I/O in fly */ @@ -223,20 +203,6 @@ static SLIST_HEAD(, xbdback_instance) xb static kmutex_t xbdback_lock; /* - * For each request from a guest, a xbdback_request is allocated from - * a pool. This will describe the request until completion. The - * request may require multiple IO operations to perform, so the - * per-IO information is not stored here. - */ -struct xbdback_request { - struct xbdback_instance *rq_xbdi; /* our xbd instance */ - uint64_t rq_id; - int rq_iocount; /* reference count; or, number of outstanding I/O's */ - int rq_ioerrs; - uint8_t rq_operation; -}; - -/* * For each I/O operation associated with one of those requests, an * xbdback_io is allocated from a pool. It may correspond to multiple * Xen disk requests, or parts of them, if several arrive at once that @@ -246,56 +212,35 @@ struct xbdback_io { /* The instance pointer is duplicated for convenience. */ struct xbdback_instance *xio_xbdi; /* our xbd instance */ uint8_t xio_operation; + uint64_t xio_id; union { struct { struct buf xio_buf; /* our I/O */ - /* xbd requests involved */ - SLIST_HEAD(, xbdback_fragment) xio_rq; /* the virtual address to map the request at */ vaddr_t xio_vaddr; struct xbdback_va *xio_xv; + vaddr_t xio_start_offset; /* I/O start offset */ /* grants to map */ grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST]; /* grants release */ grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST]; uint16_t xio_nrma; /* number of guest pages */ - uint16_t xio_mapped; /* == 1: grants are mapped */ } xio_rw; - uint64_t xio_flush_id; } u; }; #define xio_buf u.xio_rw.xio_buf -#define xio_rq u.xio_rw.xio_rq #define xio_vaddr u.xio_rw.xio_vaddr +#define xio_start_offset u.xio_rw.xio_start_offset #define xio_xv u.xio_rw.xio_xv #define xio_gref u.xio_rw.xio_gref #define xio_gh u.xio_rw.xio_gh #define xio_nrma u.xio_rw.xio_nrma -#define xio_mapped u.xio_rw.xio_mapped - -#define xio_flush_id u.xio_flush_id - -/* - * Rather than having the xbdback_io keep an array of the - * xbdback_requests involved, since the actual number will probably be - * small but might be as large as BLKIF_RING_SIZE, use a list. This - * would be threaded through xbdback_request, but one of them might be - * part of multiple I/O's, alas. - */ -struct xbdback_fragment { - struct xbdback_request *car; - SLIST_ENTRY(xbdback_fragment) cdr; -}; /* * Pools to manage the chain of block requests and I/Os fragments * submitted by frontend. */ -/* XXXSMP */ -static struct xbdback_pool { - struct pool_cache pc; - struct timeval last_warning; -} xbdback_request_pool, xbdback_io_pool, xbdback_fragment_pool; +static struct pool_cache xbdback_io_pool; /* Interval between reports of I/O errors from frontend */ static const struct timeval xbdback_err_intvl = { 1, 0 }; @@ -320,22 +265,14 @@ static bool xbdif_lookup(domid_t, uint32 static void *xbdback_co_main(struct xbdback_instance *, void *); static void *xbdback_co_main_loop(struct xbdback_instance *, void *); static void *xbdback_co_main_incr(struct xbdback_instance *, void *); -static void *xbdback_co_main_done(struct xbdback_instance *, void *); static void *xbdback_co_main_done2(struct xbdback_instance *, void *); static void *xbdback_co_cache_flush(struct xbdback_instance *, void *); -static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *); static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *); static void *xbdback_co_io(struct xbdback_instance *, void *); -static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *); -static void *xbdback_co_io_loop(struct xbdback_instance *, void *); static void *xbdback_co_io_gotio(struct xbdback_instance *, void *); -static void *xbdback_co_io_gotio2(struct xbdback_instance *, void *); -static void *xbdback_co_io_gotfrag(struct xbdback_instance *, void *); -static void *xbdback_co_io_gotfrag2(struct xbdback_instance *, void *); -static void *xbdback_co_map_io(struct xbdback_instance *, void *); static void *xbdback_co_do_io(struct xbdback_instance *, void *); static void xbdback_io_error(struct xbdback_io *, int); @@ -345,9 +282,9 @@ static void xbdback_send_reply(struct xb static void *xbdback_map_shm(struct xbdback_io *); static void xbdback_unmap_shm(struct xbdback_io *); -static void *xbdback_pool_get(struct xbdback_pool *, +static void *xbdback_pool_get(struct pool_cache *, struct xbdback_instance *); -static void xbdback_pool_put(struct xbdback_pool *, void *); +static void xbdback_pool_put(struct pool_cache *, void *); static void xbdback_thread(void *); static void xbdback_wakeup_thread(struct xbdback_instance *); static void xbdback_trampoline(struct xbdback_instance *, void *); @@ -369,21 +306,12 @@ xbdbackattach(int n) SLIST_INIT(&xbdback_instances); mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE); - pool_cache_bootstrap(&xbdback_request_pool.pc, - sizeof(struct xbdback_request), 0, 0, 0, "xbbrp", NULL, - IPL_SOFTBIO, NULL, NULL, NULL); - pool_cache_bootstrap(&xbdback_io_pool.pc, + pool_cache_bootstrap(&xbdback_io_pool, sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL, IPL_SOFTBIO, NULL, NULL, NULL); - pool_cache_bootstrap(&xbdback_fragment_pool.pc, - sizeof(struct xbdback_fragment), 0, 0, 0, "xbbfp", NULL, - IPL_SOFTBIO, NULL, NULL, NULL); /* we allocate enough to handle a whole ring at once */ - pool_prime(&xbdback_request_pool.pc.pc_pool, BLKIF_RING_SIZE); - pool_prime(&xbdback_io_pool.pc.pc_pool, BLKIF_RING_SIZE); - pool_prime(&xbdback_fragment_pool.pc.pc_pool, - BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE); + pool_prime(&xbdback_io_pool.pc_pool, BLKIF_RING_SIZE); xenbus_backend_register(&xbd_backend_driver); } @@ -1074,7 +1002,8 @@ xbdback_co_main_loop(struct xbdback_inst break; } } else { - xbdi->xbdi_cont = xbdback_co_main_done; + KASSERT(xbdi->xbdi_io == NULL); + xbdi->xbdi_cont = xbdback_co_main_done2; } return xbdi; } @@ -1112,25 +1041,6 @@ xbdback_co_main_incr(struct xbdback_inst } /* - * Ring processing is over. If there are any I/O still present for this - * instance, handle them first. - */ -static void * -xbdback_co_main_done(struct xbdback_instance *xbdi, void *obj) -{ - (void)obj; - if (xbdi->xbdi_io != NULL) { - KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || - xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); - xbdi->xbdi_cont = xbdback_co_map_io; - xbdi->xbdi_cont_aux = xbdback_co_main_done2; - } else { - xbdi->xbdi_cont = xbdback_co_main_done2; - } - return xbdi; -} - -/* * Check for requests in the instance's ring. In case there are, start again * from the beginning. If not, stall. */ @@ -1139,6 +1049,7 @@ xbdback_co_main_done2(struct xbdback_ins { int work_to_do; + KASSERT(xbdi->xbdio_io == NULL); RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do); if (work_to_do) xbdi->xbdi_cont = xbdback_co_main; @@ -1152,29 +1063,8 @@ xbdback_co_main_done2(struct xbdback_ins * Frontend requested a cache flush operation. */ static void * -xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj) -{ - (void)obj; - - XENPRINTF(("xbdback_co_cache_flush %p %p\n", xbdi, obj)); - if (xbdi->xbdi_io != NULL) { - /* Some I/Os are required for this instance. Process them. */ - KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || - xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); - KASSERT(xbdi->xbdi_pendingreqs > 0); - xbdi->xbdi_cont = xbdback_co_map_io; - xbdi->xbdi_cont_aux = xbdback_co_cache_flush2; - } else { - xbdi->xbdi_cont = xbdback_co_cache_flush2; - } - return xbdi; -} - -static void * -xbdback_co_cache_flush2(struct xbdback_instance *xbdi, void *obj) +xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj __unused) { - (void)obj; - XENPRINTF(("xbdback_co_cache_flush2 %p %p\n", xbdi, obj)); if (xbdi->xbdi_pendingreqs > 0) { /* * There are pending requests. @@ -1198,7 +1088,7 @@ xbdback_co_cache_doflush(struct xbdback_ xbd_io = xbdi->xbdi_io = obj; xbd_io->xio_xbdi = xbdi; xbd_io->xio_operation = xbdi->xbdi_xen_req.operation; - xbd_io->xio_flush_id = xbdi->xbdi_xen_req.id; + xbd_io->xio_id = xbdi->xbdi_xen_req.id; xbdi->xbdi_cont = xbdback_co_do_io; return xbdi; } @@ -1240,8 +1130,6 @@ xbdback_co_io(struct xbdback_instance *x } } - xbdi->xbdi_segno = 0; - /* copy request segments */ switch(xbdi->xbdi_proto) { case XBDIP_NATIVE: @@ -1261,169 +1149,51 @@ xbdback_co_io(struct xbdback_instance *x break; } - xbdi->xbdi_cont = xbdback_co_io_gotreq; - return xbdback_pool_get(&xbdback_request_pool, xbdi); + KASSERT(xbdi->xbdi_io == NULL); + xbdi->xbdi_cont = xbdback_co_io_gotio; + return xbdback_pool_get(&xbdback_io_pool, xbdi); end: xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id, - xbdi->xbdi_xen_req.operation, error); + xbdi->xbdi_xen_req.operation, + (error == EROFS) ? BLKIF_RSP_EOPNOTSUPP : BLKIF_RSP_ERROR); xbdi->xbdi_cont = xbdback_co_main_incr; return xbdi; } -/* - * We have fetched segment requests from the ring. In case there are already - * I/Os prepared for this instance, we can try coalescing the requests - * with these I/Os. - */ -static void * -xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj) -{ - struct xbdback_request *xrq; - - xrq = xbdi->xbdi_req = obj; - - xrq->rq_xbdi = xbdi; - xrq->rq_iocount = 0; - xrq->rq_ioerrs = 0; - xrq->rq_id = xbdi->xbdi_xen_req.id; - xrq->rq_operation = xbdi->xbdi_xen_req.operation; - KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ || - xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE); - - /* - * Request-level reasons not to coalesce: different device, - * different op, or noncontiguous disk sectors (vs. previous - * request handed to us). - */ - xbdi->xbdi_cont = xbdback_co_io_loop; - if (xbdi->xbdi_io != NULL) { - struct xbdback_request *last_req; - last_req = SLIST_FIRST(&xbdi->xbdi_io->xio_rq)->car; - XENPRINTF(("xbdback_io domain %d: hoping for sector %" PRIu64 - "; got %" PRIu64 "\n", xbdi->xbdi_domid, - xbdi->xbdi_next_sector, - xbdi->xbdi_xen_req.sector_number)); - if ((xrq->rq_operation != last_req->rq_operation) - || (xbdi->xbdi_xen_req.sector_number != - xbdi->xbdi_next_sector)) { - XENPRINTF(("xbdback_io domain %d: segment break\n", - xbdi->xbdi_domid)); - xbdi->xbdi_next_sector = - xbdi->xbdi_xen_req.sector_number; - KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || - xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); - xbdi->xbdi_cont_aux = xbdback_co_io_loop; - xbdi->xbdi_cont = xbdback_co_map_io; - } - } else { - xbdi->xbdi_next_sector = xbdi->xbdi_xen_req.sector_number; - } - return xbdi; -} - -/* Handle coalescing of multiple segment requests into one I/O work */ -static void * -xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj) -{ - (void)obj; - KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ || - xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE); - if (xbdi->xbdi_segno < xbdi->xbdi_xen_req.nr_segments) { - uint8_t this_fs, this_ls, last_ls; - grant_ref_t thisgrt; - /* - * Segment-level reason to coalesce: handling full - * pages, or adjacent sector ranges from the same page - * (and yes, this latter does happen). But not if the - * array of client pseudo-physical pages is full. - */ - this_fs = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].first_sect; - this_ls = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].last_sect; - thisgrt = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].gref; - XENPRINTF(("xbdback_io domain %d: " - "first,last_sect[%d]=0%o,0%o\n", - xbdi->xbdi_domid, xbdi->xbdi_segno, - this_fs, this_ls)); - last_ls = xbdi->xbdi_last_ls = xbdi->xbdi_this_ls; - xbdi->xbdi_this_fs = this_fs; - xbdi->xbdi_this_ls = this_ls; - xbdi->xbdi_thisgrt = thisgrt; - if (xbdi->xbdi_io != NULL) { - if (last_ls == VBD_MAXSECT - && this_fs == 0 - && xbdi->xbdi_io->xio_nrma - < XENSHM_MAX_PAGES_PER_REQUEST) { - xbdi->xbdi_same_page = 0; - } else if (last_ls + 1 - == this_fs -#ifdef notyet - && (last_fas & ~PAGE_MASK) - == (this_fas & ~PAGE_MASK) -#else - && 0 /* can't know frame number yet */ -#endif - ) { -#ifdef DEBUG - if (ratecheck(&xbdi->xbdi_lastfragio_time, - &xbdback_fragio_intvl)) - printf("%s: domain is sending" - " excessively fragmented I/O\n", - xbdi->xbdi_name); -#endif - printf("xbdback_io: would maybe glue " - "same page sec %d (%d->%d)\n", - xbdi->xbdi_segno, this_fs, this_ls); - XENPRINTF(("xbdback_io domain %d: glue same " - "page", xbdi->xbdi_domid)); - panic("notyet!"); - xbdi->xbdi_same_page = 1; - } else { - KASSERT(xbdi->xbdi_io->xio_operation == - BLKIF_OP_READ || - xbdi->xbdi_io->xio_operation == - BLKIF_OP_WRITE); - xbdi->xbdi_cont_aux = xbdback_co_io_loop; - xbdi->xbdi_cont = xbdback_co_map_io; - return xbdi; - } - } else - xbdi->xbdi_same_page = 0; - - if (xbdi->xbdi_io == NULL) { - xbdi->xbdi_cont = xbdback_co_io_gotio; - return xbdback_pool_get(&xbdback_io_pool, xbdi); - } else { - xbdi->xbdi_cont = xbdback_co_io_gotio2; - } - } else { - /* done with the loop over segments; get next request */ - xbdi->xbdi_cont = xbdback_co_main_incr; - } - return xbdi; -} - /* Prepare an I/O buffer for a xbdback instance */ static void * xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) { struct xbdback_io *xbd_io; - vaddr_t start_offset; /* start offset in vm area */ int buf_flags; + size_t bcount; + blkif_request_t *req; xbdi_get(xbdi); atomic_inc_uint(&xbdi->xbdi_pendingreqs); + req = &xbdi->xbdi_xen_req; xbd_io = xbdi->xbdi_io = obj; + memset(xbd_io, 0, sizeof(*xbd_io)); buf_init(&xbd_io->xio_buf); xbd_io->xio_xbdi = xbdi; - SLIST_INIT(&xbd_io->xio_rq); - xbd_io->xio_nrma = 0; - xbd_io->xio_mapped = 0; - xbd_io->xio_operation = xbdi->xbdi_xen_req.operation; + xbd_io->xio_operation = req->operation; + xbd_io->xio_id = req->id; - start_offset = xbdi->xbdi_this_fs * VBD_BSIZE; - KASSERT(start_offset < PAGE_SIZE); + /* Process segments */ + bcount = 0; + for (int i = 0; i < req->nr_segments; i++) { + xbd_io->xio_gref[i] = req->seg[i].gref; + bcount += (req->seg[i].last_sect - req->seg[i].first_sect + 1) + * VBD_BSIZE; + } + KASSERT(bcount <= MAXPHYS); + xbd_io->xio_nrma = req->nr_segments; + + xbd_io->xio_start_offset = req->seg[0].first_sect * VBD_BSIZE; + KASSERT(xbd_io->xio_start_offset < PAGE_SIZE); + KASSERT(bcount + xbd_io->xio_start_offset < VBD_VA_SIZE); if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) { buf_flags = B_WRITE; @@ -1439,97 +1209,11 @@ xbdback_co_io_gotio(struct xbdback_insta xbd_io->xio_buf.b_vp = xbdi->xbdi_vp; xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock; xbd_io->xio_buf.b_dev = xbdi->xbdi_dev; - xbd_io->xio_buf.b_blkno = xbdi->xbdi_next_sector; - xbd_io->xio_buf.b_bcount = 0; - xbd_io->xio_buf.b_data = (void *)start_offset; + xbd_io->xio_buf.b_blkno = req->sector_number; + xbd_io->xio_buf.b_bcount = bcount; + xbd_io->xio_buf.b_data = NULL; xbd_io->xio_buf.b_private = xbd_io; - xbdi->xbdi_cont = xbdback_co_io_gotio2; - return xbdi; -} - -/* Manage fragments */ -static void * -xbdback_co_io_gotio2(struct xbdback_instance *xbdi, void *obj) -{ - (void)obj; - if (xbdi->xbdi_segno == 0 || SLIST_EMPTY(&xbdi->xbdi_io->xio_rq)) { - /* if this is the first segment of a new request */ - /* or if it's the first segment of the io */ - xbdi->xbdi_cont = xbdback_co_io_gotfrag; - return xbdback_pool_get(&xbdback_fragment_pool, xbdi); - } - xbdi->xbdi_cont = xbdback_co_io_gotfrag2; - return xbdi; -} - -/* Prepare the instance for its first fragment */ -static void * -xbdback_co_io_gotfrag(struct xbdback_instance *xbdi, void *obj) -{ - struct xbdback_fragment *xbd_fr; - - xbd_fr = obj; - xbd_fr->car = xbdi->xbdi_req; - SLIST_INSERT_HEAD(&xbdi->xbdi_io->xio_rq, xbd_fr, cdr); - ++xbdi->xbdi_req->rq_iocount; - - xbdi->xbdi_cont = xbdback_co_io_gotfrag2; - return xbdi; -} - -/* Last routine to manage segments fragments for one I/O */ -static void * -xbdback_co_io_gotfrag2(struct xbdback_instance *xbdi, void *obj) -{ - struct xbdback_io *xbd_io; - int seg_size; - uint8_t this_fs, this_ls; - - this_fs = xbdi->xbdi_this_fs; - this_ls = xbdi->xbdi_this_ls; - xbd_io = xbdi->xbdi_io; - seg_size = this_ls - this_fs + 1; - - if (seg_size < 0) { - if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) { - printf("xbdback_io domain %d: negative-size request " - "(%d %d)\n", - xbdi->xbdi_domid, this_ls, this_fs); - } - xbdback_io_error(xbdi->xbdi_io, EINVAL); - xbdi->xbdi_io = NULL; - xbdi->xbdi_cont = xbdback_co_main_incr; - return xbdi; - } - - if (!xbdi->xbdi_same_page) { - XENPRINTF(("xbdback_io domain %d: appending grant %u\n", - xbdi->xbdi_domid, (u_int)xbdi->xbdi_thisgrt)); - xbd_io->xio_gref[xbd_io->xio_nrma++] = xbdi->xbdi_thisgrt; - } - - xbd_io->xio_buf.b_bcount += (daddr_t)(seg_size * VBD_BSIZE); - XENPRINTF(("xbdback_io domain %d: start sect %ld size %d\n", - xbdi->xbdi_domid, (long)xbdi->xbdi_next_sector, seg_size)); - - /* Finally, the end of the segment loop! */ - xbdi->xbdi_next_sector += seg_size; - ++xbdi->xbdi_segno; - xbdi->xbdi_cont = xbdback_co_io_loop; - return xbdi; -} - -/* - * Map the different I/O requests in backend's VA space. - */ -static void * -xbdback_co_map_io(struct xbdback_instance *xbdi, void *obj) -{ - (void)obj; - XENPRINTF(("xbdback_io domain %d: flush sect %ld size %d ptr 0x%lx\n", - xbdi->xbdi_domid, (long)xbdi->xbdi_io->xio_buf.b_blkno, - (int)xbdi->xbdi_io->xio_buf.b_bcount, (long)xbdi->xbdi_io)); xbdi->xbdi_cont = xbdback_co_do_io; return xbdback_map_shm(xbdi->xbdi_io); } @@ -1549,7 +1233,6 @@ static void * xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj) { struct xbdback_io *xbd_io = xbdi->xbdi_io; - vaddr_t start_offset; int nsegs __diagused; switch (xbd_io->xio_operation) { @@ -1569,7 +1252,7 @@ xbdback_co_do_io(struct xbdback_instance error = BLKIF_RSP_ERROR; } else error = BLKIF_RSP_OKAY; - xbdback_send_reply(xbdi, xbd_io->xio_flush_id, + xbdback_send_reply(xbdi, xbd_io->xio_id, xbd_io->xio_operation, error); xbdback_pool_put(&xbdback_io_pool, xbd_io); xbdi_put(xbdi); @@ -1579,30 +1262,18 @@ xbdback_co_do_io(struct xbdback_instance } case BLKIF_OP_READ: case BLKIF_OP_WRITE: - start_offset = (vaddr_t)xbd_io->xio_buf.b_data; - KASSERT(xbd_io->xio_buf.b_bcount + start_offset < VBD_VA_SIZE); xbd_io->xio_buf.b_data = (void *) - (start_offset + xbd_io->xio_vaddr); -#ifdef DIAGNOSTIC - nsegs = round_page(start_offset + xbd_io->xio_buf.b_bcount) - >> PAGE_SHIFT; - if (nsegs > xbd_io->xio_nrma) { - printf("xbdback_co_do_io: vaddr %#" PRIxVADDR - " bcount %#x doesn't fit in %d pages\n", - start_offset, xbd_io->xio_buf.b_bcount, - xbd_io->xio_nrma); - panic("xbdback_co_do_io: not enough pages"); - } -#endif + (xbd_io->xio_vaddr + xbd_io->xio_start_offset); + if ((xbd_io->xio_buf.b_flags & B_READ) == 0) { mutex_enter(xbd_io->xio_buf.b_vp->v_interlock); xbd_io->xio_buf.b_vp->v_numoutput++; mutex_exit(xbd_io->xio_buf.b_vp->v_interlock); } - bdev_strategy(&xbd_io->xio_buf); /* will call xbdback_iodone() asynchronously when done */ + bdev_strategy(&xbd_io->xio_buf); xbdi->xbdi_io = NULL; - xbdi->xbdi_cont = xbdi->xbdi_cont_aux; + xbdi->xbdi_cont = xbdback_co_main_incr; return xbdi; default: /* Should never happen */ @@ -1622,7 +1293,7 @@ xbdback_iodone(struct buf *bp) { struct xbdback_io *xbd_io; struct xbdback_instance *xbdi; - int errp; + int status; KERNEL_LOCK(1, NULL); /* XXXSMP */ @@ -1632,49 +1303,19 @@ xbdback_iodone(struct buf *bp) XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n", xbdi->xbdi_domid, (long)xbd_io)); - if (xbd_io->xio_mapped == 1) + KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL); + if (xbd_io->xio_xv != NULL) xbdback_unmap_shm(xbd_io); if (bp->b_error != 0) { printf("xbd IO domain %d: error %d\n", xbdi->xbdi_domid, bp->b_error); - errp = 1; + status = BLKIF_RSP_ERROR; } else - errp = 0; + status = BLKIF_RSP_OKAY; - /* for each constituent xbd request */ - while(!SLIST_EMPTY(&xbd_io->xio_rq)) { - struct xbdback_fragment *xbd_fr; - struct xbdback_request *xbd_req; - struct xbdback_instance *rxbdi __diagused; - int error; - - xbd_fr = SLIST_FIRST(&xbd_io->xio_rq); - xbd_req = xbd_fr->car; - SLIST_REMOVE_HEAD(&xbd_io->xio_rq, cdr); - xbdback_pool_put(&xbdback_fragment_pool, xbd_fr); - - if (errp) - ++xbd_req->rq_ioerrs; - - /* finalize it only if this was its last I/O */ - if (--xbd_req->rq_iocount > 0) - continue; - - rxbdi = xbd_req->rq_xbdi; - KASSERT(xbdi == rxbdi); - - error = xbd_req->rq_ioerrs > 0 - ? BLKIF_RSP_ERROR - : BLKIF_RSP_OKAY; - - XENPRINTF(("xbdback_io domain %d: end request %"PRIu64 - " error=%d\n", - xbdi->xbdi_domid, xbd_req->rq_id, error)); - xbdback_send_reply(xbdi, xbd_req->rq_id, - xbd_req->rq_operation, error); - xbdback_pool_put(&xbdback_request_pool, xbd_req); - } + xbdback_send_reply(xbdi, xbd_io->xio_id, xbd_io->xio_operation, status); + xbdi_put(xbdi); atomic_dec_uint(&xbdi->xbdi_pendingreqs); buf_destroy(&xbd_io->xio_buf); @@ -1759,7 +1400,6 @@ static void * xbdback_map_shm(struct xbdback_io *xbd_io) { struct xbdback_instance *xbdi = xbd_io->xio_xbdi; - struct xbdback_request *xbd_rq; int error, s; #ifdef XENDEBUG_VBD @@ -1770,11 +1410,7 @@ xbdback_map_shm(struct xbdback_io *xbd_i } #endif - KASSERT(xbd_io->xio_mapped == 0); - s = splvm(); /* XXXSMP */ - xbd_rq = SLIST_FIRST(&xbd_io->xio_rq)->car; - xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free); KASSERT(xbd_io->xio_xv != NULL); SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next); @@ -1783,7 +1419,7 @@ xbdback_map_shm(struct xbdback_io *xbd_i error = xen_shm_map(xbd_io->xio_nrma, xbdi->xbdi_domid, xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh, - (xbd_rq->rq_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0); + (xbd_io->xio_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0); switch(error) { case 0: @@ -1794,7 +1430,6 @@ xbdback_map_shm(struct xbdback_io *xbd_i } printf("\n"); #endif - xbd_io->xio_mapped = 1; return xbdi; default: if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) { @@ -1825,8 +1460,7 @@ xbdback_unmap_shm(struct xbdback_io *xbd printf("\n"); #endif - KASSERT(xbd_io->xio_mapped == 1); - xbd_io->xio_mapped = 0; + KASSERT(xbd_io->xio_xv != NULL); xen_shm_unmap(xbd_io->xio_vaddr, xbd_io->xio_nrma, xbd_io->xio_gh); SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next); @@ -1836,17 +1470,17 @@ xbdback_unmap_shm(struct xbdback_io *xbd /* Obtain memory from a pool */ static void * -xbdback_pool_get(struct xbdback_pool *pp, +xbdback_pool_get(struct pool_cache *pc, struct xbdback_instance *xbdi) { - return pool_cache_get(&pp->pc, PR_WAITOK); + return pool_cache_get(pc, PR_WAITOK); } /* Restore memory to a pool */ static void -xbdback_pool_put(struct xbdback_pool *pp, void *item) +xbdback_pool_put(struct pool_cache *pc, void *item) { - pool_cache_put(&pp->pc, item); + pool_cache_put(pc, item); } /*