Module Name: src Committed By: bouyer Date: Sun May 25 19:23:49 UTC 2014
Modified Files: src/sys/arch/xen/xen: xbd_xenbus.c src/sys/dev: cgd.c dksubr.c dkvar.h Log Message: As proposed in https://mail-index.netbsd.org/tech-kern/2014/05/21/msg017098.html remove dk_start() and dk_iodone() from dksubr.c and move the related code to the underlying driver. This increase complexity only marginally: the underlying drivers have to do the while() loop themselves, but this can now be done properly with bufq_peek()/bufq_get(), removing the buffer from the queue at the right time. This handle both the recursion and reordering issues (the reordering issue is described here: https://mail-index.netbsd.org/tech-kern/2014/05/19/msg017089.html the recursion isssue is PR #25240). Difference with the patch posted to tech-kern@: KASSERT() that the buffer we remove with bufq_get() is the same as the one we bufq_peek()'d just before. Hopefully this will allow more disk drivers to use dksubr.c To generate a diff of this commit: cvs rdiff -u -r1.62 -r1.63 src/sys/arch/xen/xen/xbd_xenbus.c cvs rdiff -u -r1.86 -r1.87 src/sys/dev/cgd.c cvs rdiff -u -r1.49 -r1.50 src/sys/dev/dksubr.c cvs rdiff -u -r1.18 -r1.19 src/sys/dev/dkvar.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/xen/xen/xbd_xenbus.c diff -u src/sys/arch/xen/xen/xbd_xenbus.c:1.62 src/sys/arch/xen/xen/xbd_xenbus.c:1.63 --- src/sys/arch/xen/xen/xbd_xenbus.c:1.62 Sun Mar 16 05:20:26 2014 +++ src/sys/arch/xen/xen/xbd_xenbus.c Sun May 25 19:23:49 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: xbd_xenbus.c,v 1.62 2014/03/16 05:20:26 dholland Exp $ */ +/* $NetBSD: xbd_xenbus.c,v 1.63 2014/05/25 19:23:49 bouyer Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -50,7 +50,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.62 2014/03/16 05:20:26 dholland Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.63 2014/05/25 19:23:49 bouyer Exp $"); #include "opt_xen.h" @@ -168,7 +168,7 @@ static bool xbd_xenbus_suspend(device_t, static bool xbd_xenbus_resume(device_t, const pmf_qual_t *); static int xbd_handler(void *); -static int xbdstart(struct dk_softc *, struct buf *); +static void xbdstart(struct dk_softc *); static void xbd_backend_changed(void *, XenbusState); static void xbd_connect(struct xbd_xenbus_softc *); @@ -722,9 +722,10 @@ done: if (more_to_do) goto again; - dk_iodone(di, &sc->sc_dksc); if (sc->sc_xbdreq_wait) wakeup(&sc->sc_xbdreq_wait); + else + xbdstart(&sc->sc_dksc); return 1; } @@ -926,133 +927,155 @@ xbddump(dev_t dev, daddr_t blkno, void * return dk_dump(di, &sc->sc_dksc, dev, blkno, va, size); } -static int -xbdstart(struct dk_softc *dksc, struct buf *bp) +static void +xbdstart(struct dk_softc *dksc) { struct xbd_xenbus_softc *sc = (struct xbd_xenbus_softc *)dksc; + struct buf *bp; +#ifdef DIAGNOSTIC + struct buf *qbp; +#endif struct xbd_req *xbdreq; blkif_request_t *req; - int ret = 0, runqueue = 1; size_t bcount, off; paddr_t ma; vaddr_t va; int nsects, nbytes, seg; int notify; - DPRINTF(("xbdstart(%p): b_bcount = %ld\n", bp, (long)bp->b_bcount)); + while ((bp = bufq_peek(dksc->sc_bufq)) != NULL) { - if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { - bp->b_error = EIO; - goto err; - } + DPRINTF(("xbdstart(%p): b_bcount = %ld\n", + bp, (long)bp->b_bcount)); - if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) { - /* invalid block number */ - bp->b_error = EINVAL; - goto err; - } + if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { + bp->b_error = EIO; + goto err; + } - if (bp->b_rawblkno == sc->sc_xbdsize) { - /* at end of disk; return short read */ - bp->b_resid = bp->b_bcount; - biodone(bp); - return 0; - } + if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) { + /* invalid block number */ + bp->b_error = EINVAL; + goto err; + } + + if (bp->b_rawblkno == sc->sc_xbdsize) { + /* at end of disk; return short read */ + bp->b_resid = bp->b_bcount; +#ifdef DIAGNOSTIC + qbp = bufq_get(dksc->sc_bufq); + KASSERT(bp == qbp); +#else + (void)bufq_get(dksc->sc_bufq); +#endif + biodone(bp); + continue; + } - if (__predict_false(sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) { - /* device is suspended, do not consume buffer */ - DPRINTF(("%s: (xbdstart) device suspended\n", - device_xname(sc->sc_dksc.sc_dev))); - ret = -1; - goto out; - } - - if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) { - DPRINTF(("xbdstart: ring_full\n")); - ret = -1; - goto out; - } - - xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head); - if (__predict_false(xbdreq == NULL)) { - DPRINTF(("xbdstart: no req\n")); - ret = -1; /* dk_start should not remove bp from queue */ - goto out; - } - - xbdreq->req_bp = bp; - xbdreq->req_data = bp->b_data; - if ((vaddr_t)bp->b_data & (XEN_BSIZE - 1)) { - if (__predict_false(xbd_map_align(xbdreq) != 0)) { - ret = -1; + if (__predict_false( + sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) { + /* device is suspended, do not consume buffer */ + DPRINTF(("%s: (xbdstart) device suspended\n", + device_xname(sc->sc_dksc.sc_dev))); goto out; } - } - /* now we're sure we'll send this buf */ - disk_busy(&dksc->sc_dkdev); - SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); - req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt); - req->id = xbdreq->req_id; - req->operation = bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; - req->sector_number = bp->b_rawblkno; - req->handle = sc->sc_handle; - - va = (vaddr_t)xbdreq->req_data & ~PAGE_MASK; - off = (vaddr_t)xbdreq->req_data & PAGE_MASK; - if (bp->b_rawblkno + bp->b_bcount / DEV_BSIZE >= sc->sc_xbdsize) { - bcount = (sc->sc_xbdsize - bp->b_rawblkno) * DEV_BSIZE; - bp->b_resid = bp->b_bcount - bcount; - } else { - bcount = bp->b_bcount; - bp->b_resid = 0; - } - if (bcount > XBD_MAX_XFER) { - bp->b_resid += bcount - XBD_MAX_XFER; - bcount = XBD_MAX_XFER; - } - for (seg = 0; bcount > 0;) { - pmap_extract_ma(pmap_kernel(), va, &ma); - KASSERT((ma & (XEN_BSIZE - 1)) == 0); - if (bcount > PAGE_SIZE - off) - nbytes = PAGE_SIZE - off; - else - nbytes = bcount; - nsects = nbytes >> XEN_BSHIFT; - req->seg[seg].first_sect = off >> XEN_BSHIFT; - req->seg[seg].last_sect = (off >> XEN_BSHIFT) + nsects - 1; - KASSERT(req->seg[seg].first_sect <= req->seg[seg].last_sect); - KASSERT(req->seg[seg].last_sect < 8); - if (__predict_false(xengnt_grant_access( - sc->sc_xbusd->xbusd_otherend_id, ma, - (bp->b_flags & B_READ) == 0, &xbdreq->req_gntref[seg]))) - panic("xbdstart: xengnt_grant_access"); /* XXX XXX !!! */ - req->seg[seg].gref = xbdreq->req_gntref[seg]; - seg++; - KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); - va += PAGE_SIZE; - off = 0; - bcount -= nbytes; - } - xbdreq->req_nr_segments = req->nr_segments = seg; - sc->sc_ring.req_prod_pvt++; - if (bufq_peek(sc->sc_dksc.sc_bufq)) { - /* we will be called again; don't notify guest yet */ - runqueue = 0; + + if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) { + DPRINTF(("xbdstart: ring_full\n")); + goto out; + } + + xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head); + if (__predict_false(xbdreq == NULL)) { + DPRINTF(("xbdstart: no req\n")); + goto out; + } + + xbdreq->req_bp = bp; + xbdreq->req_data = bp->b_data; + if ((vaddr_t)bp->b_data & (XEN_BSIZE - 1)) { + if (__predict_false(xbd_map_align(xbdreq) != 0)) { + DPRINTF(("xbdstart: no align\n")); + goto out; + } + } + /* now we're sure we'll send this buf */ +#ifdef DIAGNOSTIC + qbp = bufq_get(dksc->sc_bufq); + KASSERT(bp == qbp); +#else + (void)bufq_get(dksc->sc_bufq); +#endif + disk_busy(&dksc->sc_dkdev); + + SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); + req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt); + req->id = xbdreq->req_id; + req->operation = + bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; + req->sector_number = bp->b_rawblkno; + req->handle = sc->sc_handle; + + va = (vaddr_t)xbdreq->req_data & ~PAGE_MASK; + off = (vaddr_t)xbdreq->req_data & PAGE_MASK; + if (bp->b_rawblkno + bp->b_bcount / DEV_BSIZE >= + sc->sc_xbdsize) { + bcount = (sc->sc_xbdsize - bp->b_rawblkno) * DEV_BSIZE; + bp->b_resid = bp->b_bcount - bcount; + } else { + bcount = bp->b_bcount; + bp->b_resid = 0; + } + if (bcount > XBD_MAX_XFER) { + bp->b_resid += bcount - XBD_MAX_XFER; + bcount = XBD_MAX_XFER; + } + for (seg = 0; bcount > 0;) { + pmap_extract_ma(pmap_kernel(), va, &ma); + KASSERT((ma & (XEN_BSIZE - 1)) == 0); + if (bcount > PAGE_SIZE - off) + nbytes = PAGE_SIZE - off; + else + nbytes = bcount; + nsects = nbytes >> XEN_BSHIFT; + req->seg[seg].first_sect = off >> XEN_BSHIFT; + req->seg[seg].last_sect = + (off >> XEN_BSHIFT) + nsects - 1; + KASSERT(req->seg[seg].first_sect <= + req->seg[seg].last_sect); + KASSERT(req->seg[seg].last_sect < 8); + if (__predict_false(xengnt_grant_access( + sc->sc_xbusd->xbusd_otherend_id, ma, + (bp->b_flags & B_READ) == 0, + &xbdreq->req_gntref[seg]))) + panic("xbdstart: xengnt_grant_access"); /* XXX XXX !!! */ + req->seg[seg].gref = xbdreq->req_gntref[seg]; + seg++; + KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); + va += PAGE_SIZE; + off = 0; + bcount -= nbytes; + } + xbdreq->req_nr_segments = req->nr_segments = seg; + sc->sc_ring.req_prod_pvt++; } out: - if (runqueue) { - RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); - if (notify) - hypervisor_notify_via_evtchn(sc->sc_evtchn); - } - - return ret; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); + if (notify) + hypervisor_notify_via_evtchn(sc->sc_evtchn); + return; err: +#ifdef DIAGNOSTIC + qbp = bufq_get(dksc->sc_bufq); + KASSERT(bp == qbp); +#else + (void)bufq_get(dksc->sc_bufq); +#endif bp->b_resid = bp->b_bcount; biodone(bp); - return 0; + return; } static int Index: src/sys/dev/cgd.c diff -u src/sys/dev/cgd.c:1.86 src/sys/dev/cgd.c:1.87 --- src/sys/dev/cgd.c:1.86 Sun May 25 19:15:50 2014 +++ src/sys/dev/cgd.c Sun May 25 19:23:49 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: cgd.c,v 1.86 2014/05/25 19:15:50 christos Exp $ */ +/* $NetBSD: cgd.c,v 1.87 2014/05/25 19:23:49 bouyer Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.86 2014/05/25 19:15:50 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cgd.c,v 1.87 2014/05/25 19:23:49 bouyer Exp $"); #include <sys/types.h> #include <sys/param.h> @@ -100,7 +100,7 @@ static int cgd_destroy(device_t); /* Internal Functions */ -static int cgdstart(struct dk_softc *, struct buf *); +static void cgdstart(struct dk_softc *); static void cgdiodone(struct buf *); static int cgd_ioctl_set(struct cgd_softc *, void *, struct lwp *); @@ -382,69 +382,78 @@ cgd_putdata(struct dk_softc *dksc, void } } -static int -cgdstart(struct dk_softc *dksc, struct buf *bp) +static void +cgdstart(struct dk_softc *dksc) { struct cgd_softc *cs = (struct cgd_softc *)dksc; - struct buf *nbp; + struct buf *bp, *nbp; +#ifdef DIAGNOSTIC + struct buf *qbp; +#endif void * addr; void * newaddr; daddr_t bn; struct vnode *vp; - DPRINTF_FOLLOW(("cgdstart(%p, %p)\n", dksc, bp)); - disk_busy(&dksc->sc_dkdev); /* XXX: put in dksubr.c */ - - bn = bp->b_rawblkno; + while ((bp = bufq_peek(dksc->sc_bufq)) != NULL) { - /* - * We attempt to allocate all of our resources up front, so that - * we can fail quickly if they are unavailable. - */ + DPRINTF_FOLLOW(("cgdstart(%p, %p)\n", dksc, bp)); + disk_busy(&dksc->sc_dkdev); - nbp = getiobuf(cs->sc_tvn, false); - if (nbp == NULL) { - disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ)); - return -1; - } + bn = bp->b_rawblkno; - /* - * If we are writing, then we need to encrypt the outgoing - * block into a new block of memory. If we fail, then we - * return an error and let the dksubr framework deal with it. - */ - newaddr = addr = bp->b_data; - if ((bp->b_flags & B_READ) == 0) { - newaddr = cgd_getdata(dksc, bp->b_bcount); - if (!newaddr) { - putiobuf(nbp); + /* + * We attempt to allocate all of our resources up front, so that + * we can fail quickly if they are unavailable. + */ + nbp = getiobuf(cs->sc_tvn, false); + if (nbp == NULL) { disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ)); - return -1; + break; } - cgd_cipher(cs, newaddr, addr, bp->b_bcount, bn, - DEV_BSIZE, CGD_CIPHER_ENCRYPT); - } - nbp->b_data = newaddr; - nbp->b_flags = bp->b_flags; - nbp->b_oflags = bp->b_oflags; - nbp->b_cflags = bp->b_cflags; - nbp->b_iodone = cgdiodone; - nbp->b_proc = bp->b_proc; - nbp->b_blkno = bn; - nbp->b_bcount = bp->b_bcount; - nbp->b_private = bp; - - BIO_COPYPRIO(nbp, bp); - - if ((nbp->b_flags & B_READ) == 0) { - vp = nbp->b_vp; - mutex_enter(vp->v_interlock); - vp->v_numoutput++; - mutex_exit(vp->v_interlock); + /* + * If we are writing, then we need to encrypt the outgoing + * block into a new block of memory. + */ + newaddr = addr = bp->b_data; + if ((bp->b_flags & B_READ) == 0) { + newaddr = cgd_getdata(dksc, bp->b_bcount); + if (!newaddr) { + putiobuf(nbp); + disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ)); + break; + } + cgd_cipher(cs, newaddr, addr, bp->b_bcount, bn, + DEV_BSIZE, CGD_CIPHER_ENCRYPT); + } + /* we now have all needed resources to process this buf */ +#ifdef DIAGNOSTIC + qbp = bufq_get(dksc->sc_bufq); + KASSERT(bp == qbp); +#else + (void)bufq_get(dksc->sc_bufq); +#endif + nbp->b_data = newaddr; + nbp->b_flags = bp->b_flags; + nbp->b_oflags = bp->b_oflags; + nbp->b_cflags = bp->b_cflags; + nbp->b_iodone = cgdiodone; + nbp->b_proc = bp->b_proc; + nbp->b_blkno = bn; + nbp->b_bcount = bp->b_bcount; + nbp->b_private = bp; + + BIO_COPYPRIO(nbp, bp); + + if ((nbp->b_flags & B_READ) == 0) { + vp = nbp->b_vp; + mutex_enter(vp->v_interlock); + vp->v_numoutput++; + mutex_exit(vp->v_interlock); + } + VOP_STRATEGY(cs->sc_tvn, nbp); } - VOP_STRATEGY(cs->sc_tvn, nbp); - return 0; } static void @@ -493,7 +502,7 @@ cgdiodone(struct buf *nbp) disk_unbusy(&dksc->sc_dkdev, obp->b_bcount - obp->b_resid, (obp->b_flags & B_READ)); biodone(obp); - dk_iodone(di, dksc); + cgdstart(dksc); splx(s); } Index: src/sys/dev/dksubr.c diff -u src/sys/dev/dksubr.c:1.49 src/sys/dev/dksubr.c:1.50 --- src/sys/dev/dksubr.c:1.49 Sat Dec 28 19:25:07 2013 +++ src/sys/dev/dksubr.c Sun May 25 19:23:49 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: dksubr.c,v 1.49 2013/12/28 19:25:07 pgoyette Exp $ */ +/* $NetBSD: dksubr.c,v 1.50 2014/05/25 19:23:49 bouyer Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 1999, 2002, 2008 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: dksubr.c,v 1.49 2013/12/28 19:25:07 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: dksubr.c,v 1.50 2014/05/25 19:23:49 bouyer Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -224,37 +224,11 @@ dk_strategy(struct dk_intf *di, struct d */ s = splbio(); bufq_put(dksc->sc_bufq, bp); - dk_start(di, dksc); + di->di_diskstart(dksc); splx(s); return; } -void -dk_start(struct dk_intf *di, struct dk_softc *dksc) -{ - struct buf *bp; - - DPRINTF_FOLLOW(("dk_start(%s, %p)\n", di->di_dkname, dksc)); - - /* Process the work queue */ - while ((bp = bufq_get(dksc->sc_bufq)) != NULL) { - if (di->di_diskstart(dksc, bp) != 0) { - bufq_put(dksc->sc_bufq, bp); - break; - } - } -} - -void -dk_iodone(struct dk_intf *di, struct dk_softc *dksc) -{ - - DPRINTF_FOLLOW(("dk_iodone(%s, %p)\n", di->di_dkname, dksc)); - - /* We kick the queue in case we are able to get more work done */ - dk_start(di, dksc); -} - int dk_size(struct dk_intf *di, struct dk_softc *dksc, dev_t dev) { Index: src/sys/dev/dkvar.h diff -u src/sys/dev/dkvar.h:1.18 src/sys/dev/dkvar.h:1.19 --- src/sys/dev/dkvar.h:1.18 Wed May 29 23:25:55 2013 +++ src/sys/dev/dkvar.h Sun May 25 19:23:49 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: dkvar.h,v 1.18 2013/05/29 23:25:55 christos Exp $ */ +/* $NetBSD: dkvar.h,v 1.19 2014/05/25 19:23:49 bouyer Exp $ */ /*- * Copyright (c) 2002 The NetBSD Foundation, Inc. @@ -74,7 +74,7 @@ struct dk_intf { int (*di_open)(dev_t, int, int, struct lwp *); int (*di_close)(dev_t, int, int, struct lwp *); void (*di_strategy)(struct buf *); - int (*di_diskstart)(struct dk_softc *, struct buf *); + void (*di_diskstart)(struct dk_softc *); }; #define DK_BUSY(_dksc, _pmask) \ @@ -93,8 +93,6 @@ int dk_open(struct dk_intf *, struct dk_ int dk_close(struct dk_intf *, struct dk_softc *, dev_t, int, int, struct lwp *); void dk_strategy(struct dk_intf *, struct dk_softc *, struct buf *); -void dk_start(struct dk_intf *, struct dk_softc *); -void dk_iodone(struct dk_intf *, struct dk_softc *); int dk_size(struct dk_intf *, struct dk_softc *, dev_t); int dk_ioctl(struct dk_intf *, struct dk_softc *, dev_t, u_long, void *, int, struct lwp *);