comments inline...
On 04/04/2011, at 9:01 AM, Kenneth R Westerback wrote:
> Works on my crypto volume. People with other volume types would be nice
> to hear from.
>
> .... Ken
>
> Index: softraid.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/softraid.c,v
> retrieving revision 1.222
> diff -u -p -r1.222 softraid.c
> --- softraid.c 15 Mar 2011 13:29:41 -0000 1.222
> +++ softraid.c 3 Apr 2011 21:07:48 -0000
> @@ -1805,7 +1805,7 @@ sr_wu_alloc(struct sr_discipline *sd)
> for (i = 0; i < no_wu; i++) {
> wu = &sd->sd_wu[i];
> wu->swu_dis = sd;
> - sr_wu_put(wu);
> + sr_wu_put(sd, wu);
> }
>
> return (0);
> @@ -1833,17 +1833,15 @@ sr_wu_free(struct sr_discipline *sd)
> }
>
> void
> -sr_wu_put(struct sr_workunit *wu)
> +sr_wu_put(void *xsd, void *xwu)
> {
> - struct sr_discipline *sd = wu->swu_dis;
> + struct sr_discipline *sd = (struct sr_discipline *)xsd;
> + struct sr_workunit *wu = (struct sr_workunit *)xwu;
> struct sr_ccb *ccb;
> -
> int s;
>
> DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu);
>
> - s = splbio();
> -
> wu->swu_xs = NULL;
> wu->swu_state = SR_WU_FREE;
> wu->swu_ios_complete = 0;
> @@ -1864,9 +1862,12 @@ sr_wu_put(struct sr_workunit *wu)
> }
> TAILQ_INIT(&wu->swu_ccb);
>
> + mtx_enter(&sd->sd_wu_mtx);
> TAILQ_INSERT_TAIL(&sd->sd_wu_freeq, wu, swu_link);
> sd->sd_wu_pending--;
> + mtx_leave(&sd->sd_wu_mtx);
>
> + s = splbio();
> /* wake up sleepers */
> #ifdef DIAGNOSTIC
> if (sd->sd_wu_sleep < 0)
> @@ -1874,34 +1875,23 @@ sr_wu_put(struct sr_workunit *wu)
> #endif /* DIAGNOSTIC */
> if (sd->sd_wu_sleep)
> wakeup(&sd->sd_wu_sleep);
> -
> splx(s);
> }
>
> -struct sr_workunit *
> -sr_wu_get(struct sr_discipline *sd, int canwait)
> +void *
> +sr_wu_get(void *xsd)
> {
> + struct sr_discipline *sd = (struct sr_discipline *)xsd;
> struct sr_workunit *wu;
> - int s;
> -
> - s = splbio();
>
> - for (;;) {
> - wu = TAILQ_FIRST(&sd->sd_wu_freeq);
> - if (wu) {
> - TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
> - wu->swu_state = SR_WU_INPROGRESS;
> - sd->sd_wu_pending++;
> - break;
> - } else if (wu == NULL && canwait) {
> - sd->sd_wu_sleep++;
> - tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0);
> - sd->sd_wu_sleep--;
> - } else
> - break;
> + mtx_enter(&sd->sd_wu_mtx);
> + wu = TAILQ_FIRST(&sd->sd_wu_freeq);
> + if (wu) {
> + TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
> + wu->swu_state = SR_WU_INPROGRESS;
> + sd->sd_wu_pending++;
> }
> -
> - splx(s);
> + mtx_leave(&sd->sd_wu_mtx);
>
> DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu);
>
> @@ -1949,19 +1939,9 @@ sr_scsi_cmd(struct scsi_xfer *xs)
> goto stuffup;
> }
>
> - /*
> - * we'll let the midlayer deal with stalls instead of being clever
> - * and sending sr_wu_get !(xs->flags & SCSI_NOSLEEP) in cansleep
> - */
> - if ((wu = sr_wu_get(sd, 0)) == NULL) {
> - DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc));
> - xs->error = XS_NO_CCB;
> - sr_scsi_done(sd, xs);
> - return;
> - }
> -
> - xs->error = XS_NOERROR;
> + wu = xs->io;
> wu->swu_xs = xs;
> + xs->error = XS_NOERROR;
scsi_xs_exec sets xs->error to XS_NOERROR before giving it to this func, so
this is technically unnecessary. not an issue with this diff exactly, just an
interesting factoid.
>
> /* the midlayer will query LUNs so report sense to stop scanning */
> if (link->target != 0 || link->lun != 0) {
> @@ -2049,10 +2029,9 @@ stuffup:
> xs->error = XS_DRIVER_STUFFUP;
> }
> complete:
> - if (wu)
> - sr_wu_put(wu);
> sr_scsi_done(sd, xs);
> }
> +
> int
> sr_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag)
> {
> @@ -3042,6 +3021,8 @@ sr_ioctl_createraid(struct sr_softc *sc,
> }
>
> /* setup scsi midlayer */
> + mtx_init(&sd->sd_wu_mtx, IPL_BIO);
> + scsi_iopool_init(&sd->sd_iopool, sd, sr_wu_get, sr_wu_put);
> if (sd->sd_openings)
> sd->sd_link.openings = sd->sd_openings(sd);
> else
> @@ -3051,6 +3032,7 @@ sr_ioctl_createraid(struct sr_softc *sc,
> sd->sd_link.adapter = &sr_switch;
> sd->sd_link.adapter_target = SR_MAX_LD;
> sd->sd_link.adapter_buswidth = 1;
> + sd->sd_link.pool = &sd->sd_iopool;
> bzero(&saa, sizeof(saa));
> saa.saa_sc_link = &sd->sd_link;
>
> @@ -3953,11 +3935,17 @@ sr_rebuild_thread(void *arg)
> mysize += sz;
> lba = blk * sz;
>
> - /* get some wu */
> - if ((wu_r = sr_wu_get(sd, 1)) == NULL)
> - panic("%s: rebuild exhausted wu_r", DEVNAME(sc));
> - if ((wu_w = sr_wu_get(sd, 1)) == NULL)
> - panic("%s: rebuild exhausted wu_w", DEVNAME(sc));
> + /* Waiting for the required work units is allowed. */
> + while ((wu_r = sr_wu_get(sd)) == NULL) {
> + sd->sd_wu_sleep++;
> + tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0);
> + sd->sd_wu_sleep--;
> + }
> + while ((wu_w = sr_wu_get(sd)) == NULL) {
> + sd->sd_wu_sleep++;
> + tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0);
> + sd->sd_wu_sleep--;
> + }
you should use scsi_io_get to fetch these as it can sleep waiting for a
resource to be returned to the pool.
>
> /* setup read io */
> bzero(&xs_r, sizeof xs_r);
> @@ -4009,11 +3997,8 @@ sr_rebuild_thread(void *arg)
> TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
>
> /* schedule io */
> - if (sr_check_io_collision(wu_r))
> - goto queued;
> -
> - sr_raid_startwu(wu_r);
> -queued:
> + if (sr_check_io_collision(wu_r) == 0)
> + sr_raid_startwu(wu_r);
> splx(s);
nice :)
>
> /* wait for read completion */
> @@ -4026,8 +4011,8 @@ queued:
> if (slept == 0)
> tsleep(sc, PWAIT, "sr_yield", 1);
>
> - sr_wu_put(wu_r);
> - sr_wu_put(wu_w);
> + sr_wu_put(sd, wu_r);
> + sr_wu_put(sd, wu_w);
use scsi_io_put so other stuff waiting on the ios can be woken up.
>
> sd->sd_meta->ssd_rebuild = lba;
>
> Index: softraid_aoe.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/softraid_aoe.c,v
> retrieving revision 1.17
> diff -u -p -r1.17 softraid_aoe.c
> --- softraid_aoe.c 2 Jul 2010 15:49:25 -0000 1.17
> +++ softraid_aoe.c 3 Apr 2011 22:14:12 -0000
> @@ -511,7 +511,7 @@ sr_aoe_input(struct aoe_handler *ah, str
> struct aoe_packet *ap;
> struct sr_workunit *wu, *wup;
> daddr64_t blk, offset;
> - int len, s;
> + int len, s, done = 0;
> int tag;
>
> ap = mtod(m, struct aoe_packet *);
> @@ -576,10 +576,9 @@ sr_aoe_input(struct aoe_handler *ah, str
> break;
> }
> }
> -
> - /* do not change the order of these 2 functions */
> - sr_wu_put(wu);
> + splx(s);
> sr_scsi_done(sd, xs);
> + s = splbio();
> }
>
> out:
> Index: softraid_crypto.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/softraid_crypto.c,v
> retrieving revision 1.63
> diff -u -p -r1.63 softraid_crypto.c
> --- softraid_crypto.c 6 Mar 2011 21:41:57 -0000 1.63
> +++ softraid_crypto.c 3 Apr 2011 22:16:36 -0000
> @@ -1134,7 +1134,6 @@ sr_crypto_rw(struct sr_workunit *wu)
> int
> sr_crypto_write(struct cryptop *crp)
> {
> - int s;
> struct sr_workunit *wu = crp->crp_opaque;
>
> DNPRINTF(SR_D_INTR, "%s: sr_crypto_write: wu %x xs: %x\n",
> @@ -1144,9 +1143,7 @@ sr_crypto_write(struct cryptop *crp)
> /* fail io */
> ((struct sr_workunit *)(crp->crp_opaque))->swu_xs->error =
> XS_DRIVER_STUFFUP;
> - s = splbio();
> sr_crypto_finish_io(crp->crp_opaque);
> - splx(s);
> }
is that safe?
>
> return (sr_crypto_rw2(wu, crp));
> @@ -1216,13 +1213,8 @@ sr_crypto_rw2(struct sr_workunit *wu, st
> ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
>
> s = splbio();
> -
> - if (sr_check_io_collision(wu))
> - goto queued;
> -
> - sr_raid_startwu(wu);
> -
> -queued:
> + if (sr_check_io_collision(wu) == 0)
> + sr_raid_startwu(wu);
> splx(s);
> return (0);
> bad:
> @@ -1313,7 +1305,9 @@ sr_crypto_intr(struct buf *bp)
> goto done;
> }
>
> + splx(s);
> sr_crypto_finish_io(wu);
> + s = splbio();
> }
>
> done:
> @@ -1329,12 +1323,13 @@ sr_crypto_finish_io(struct sr_workunit *
> #ifdef SR_DEBUG
> struct sr_softc *sc = sd->sd_sc;
> #endif /* SR_DEBUG */
> + int s;
>
> - splassert(IPL_BIO);
>
> DNPRINTF(SR_D_INTR, "%s: sr_crypto_finish_io: wu %x xs: %x\n",
> DEVNAME(sc), wu, xs);
>
> + s = splbio();
> xs->resid = 0;
>
> if (wu->swu_cb_active == 1)
> @@ -1345,18 +1340,16 @@ sr_crypto_finish_io(struct sr_workunit *
> sr_crypto_putcryptop(ccb->ccb_opaque);
> }
>
> - /* do not change the order of these 2 functions */
> - sr_wu_put(wu);
> - sr_scsi_done(sd, xs);
> -
> if (sd->sd_sync && sd->sd_wu_pending == 0)
> wakeup(sd);
> +
> + splx(s);
> + sr_scsi_done(sd, xs);
> }
>
> int
> sr_crypto_read(struct cryptop *crp)
> {
> - int s;
> struct sr_workunit *wu = crp->crp_opaque;
>
> DNPRINTF(SR_D_INTR, "%s: sr_crypto_read: wu %x xs: %x\n",
> @@ -1365,9 +1358,7 @@ sr_crypto_read(struct cryptop *crp)
> if (crp->crp_etype)
> wu->swu_xs->error = XS_DRIVER_STUFFUP;
>
> - s = splbio();
> sr_crypto_finish_io(wu);
> - splx(s);
>
> return (0);
> }
> Index: softraid_raid0.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/softraid_raid0.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 softraid_raid0.c
> --- softraid_raid0.c 2 Jul 2010 09:20:26 -0000 1.22
> +++ softraid_raid0.c 3 Apr 2011 22:18:12 -0000
> @@ -370,12 +370,8 @@ sr_raid0_rw(struct sr_workunit *wu)
> }
>
> s = splbio();
> -
> - if (sr_check_io_collision(wu))
> - goto queued;
> -
> - sr_raid_startwu(wu);
> -queued:
> + if (sr_check_io_collision(wu) == 0)
> + sr_raid_startwu(wu);
> splx(s);
> return (0);
> bad:
> @@ -456,9 +452,9 @@ sr_raid0_intr(struct buf *bp)
> printf("%s: wu: %p not on pending queue\n",
> DEVNAME(sc), wu);
>
> - /* do not change the order of these 2 functions */
> - sr_wu_put(wu);
> + splx(s);
> sr_scsi_done(sd, xs);
> + s = splbio();
>
> if (sd->sd_sync && sd->sd_wu_pending == 0)
> wakeup(sd);
> @@ -468,7 +464,6 @@ sr_raid0_intr(struct buf *bp)
> return;
> bad:
> xs->error = XS_DRIVER_STUFFUP;
> - sr_wu_put(wu);
> - sr_scsi_done(sd, xs);
> splx(s);
> + sr_scsi_done(sd, xs);
> }
> Index: softraid_raid1.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/softraid_raid1.c,v
> retrieving revision 1.26
> diff -u -p -r1.26 softraid_raid1.c
> --- softraid_raid1.c 6 Nov 2010 23:01:56 -0000 1.26
> +++ softraid_raid1.c 3 Apr 2011 22:04:56 -0000
> @@ -615,9 +615,9 @@ sr_raid1_intr(struct buf *bp)
> wakeup(wu);
> }
> } else {
> - /* do not change the order of these 2 functions */
> - sr_wu_put(wu);
> - scsi_done(xs);
> + splx(s);
> + sr_scsi_done(sd, xs);
> + s = splbio();
> }
>
> if (sd->sd_sync && sd->sd_wu_pending == 0)
> @@ -632,13 +632,11 @@ bad:
> if (wu->swu_flags & SR_WUF_REBUILD) {
> wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
> wakeup(wu);
> + splx(s);
> } else {
> - /* do not change the order of these 2 functions */
> - sr_wu_put(wu);
> - scsi_done(xs);
> + splx(s);
> + sr_scsi_done(sd, xs);
> }
> -
> - splx(s);
> }
>
> void
> Index: softraid_raid6.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/softraid_raid6.c,v
> retrieving revision 1.19
> diff -u -p -r1.19 softraid_raid6.c
> --- softraid_raid6.c 7 Aug 2010 03:50:01 -0000 1.19
> +++ softraid_raid6.c 3 Apr 2011 21:19:24 -0000
> @@ -480,7 +480,7 @@ sr_raid6_rw(struct sr_workunit *wu)
> rwmode = (xs->flags & SCSI_DATA_IN) ? M_RFLG : M_WFLG;
> if (xs->flags & SCSI_DATA_OUT)
> /* create write workunit */
> - if ((wu_w = sr_wu_get(sd, 0)) == NULL) {
> + if ((wu_w = sr_wu_get(sd)) == NULL) {
scsi_io_get
> printf("%s: can't get wu_w", DEVNAME(sd->sd_sc));
> goto bad;
> }
> @@ -744,7 +744,7 @@ queued:
> bad:
> /* wu is unwound by sr_wu_put */
> if (wu_w)
> - sr_wu_put(wu_w);
> + sr_wu_put(sd, wu_w);
scsi_io_put
> return (1);
> }
>
> @@ -888,11 +888,12 @@ sr_raid6_intr(struct buf *bp)
> wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
> wakeup(wu);
> }
> + } else if (xs == NULL) {
> + sr_wu_put(sd, wu);
scsi_io_put
> } else {
> - /* do not change the order of these 2 functions */
> - sr_wu_put(wu);
> - if (xs != NULL)
> - scsi_done(xs);
> + splx(s);
> + sr_scsi_done(sd, xs);
> + s = splbio();
> }
>
> if (sd->sd_sync && sd->sd_wu_pending == 0)
> @@ -907,13 +908,11 @@ bad:
> if (wu->swu_flags & SR_WUF_REBUILD) {
> wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
> wakeup(wu);
> + splx(s);
> } else {
> - /* do not change the order of these 2 functions */
> - sr_wu_put(wu);
> - scsi_done(xs);
> + splx(s);
> + sr_scsi_done(sd, xs);
> }
> -
> - splx(s);
> }
>
> void
> Index: softraid_raidp.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/softraid_raidp.c,v
> retrieving revision 1.19
> diff -u -p -r1.19 softraid_raidp.c
> --- softraid_raidp.c 7 Aug 2010 03:50:01 -0000 1.19
> +++ softraid_raidp.c 3 Apr 2011 21:19:58 -0000
> @@ -396,7 +396,7 @@ sr_raidp_rw(struct sr_workunit *wu)
>
> if (xs->flags & SCSI_DATA_OUT)
> /* create write workunit */
> - if ((wu_w = sr_wu_get(sd, 0)) == NULL) {
> + if ((wu_w = sr_wu_get(sd)) == NULL) {
scsi_io_get
> printf("%s: can't get wu_w", DEVNAME(sd->sd_sc));
> goto bad;
> }
> @@ -553,7 +553,7 @@ queued:
> bad:
> /* wu is unwound by sr_wu_put */
> if (wu_w)
> - sr_wu_put(wu_w);
> + sr_wu_put(sd, wu_w);
scsi_io_put
> return (1);
> }
>
> @@ -670,9 +670,13 @@ sr_raidp_intr(struct buf *bp)
> }
> } else {
> /* do not change the order of these 2 functions */
> - sr_wu_put(wu);
> - if (xs != NULL)
> - scsi_done(xs);
> + if (xs == NULL)
> + sr_wu_put(sd, wu);
scsi_io_put
> + else {
> + splx(s);
> + sr_scsi_done(sd, xs);
> + s = splbio();
> + }
> }
>
> if (sd->sd_sync && sd->sd_wu_pending == 0)
> @@ -687,13 +691,11 @@ bad:
> if (wu->swu_flags & SR_WUF_REBUILD) {
> wu->swu_flags |= SR_WUF_REBUILDIOCOMP;
> wakeup(wu);
> + splx(s);
> } else {
> - /* do not change the order of these 2 functions */
> - sr_wu_put(wu);
> - scsi_done(xs);
> + splx(s);
> + sr_scsi_done(sd, xs);
> }
> -
> - splx(s);
> }
>
> void
> @@ -831,10 +833,17 @@ sr_raidp_scrub(struct sr_discipline *sd)
> int s, slept;
> void *xorbuf;
>
> - if ((wu_r = sr_wu_get(sd, 1)) == NULL)
> - goto done;
> - if ((wu_w = sr_wu_get(sd, 1)) == NULL)
> - goto done;
> + /* Waiting for the required work units is allowed. */
> + while ((wu_r = sr_wu_get(sd)) == NULL) {
> + sd->sd_wu_sleep++;
> + tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0);
> + sd->sd_wu_sleep--;
> + }
> + while ((wu_w = sr_wu_get(sd)) == NULL) {
> + sd->sd_wu_sleep++;
> + tsleep(&sd->sd_wu_sleep, PRIBIO, "sr_wu_get", 0);
> + sd->sd_wu_sleep--;
> + }
scsi_io_get
>
> no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 1;
> strip_size = sd->sd_meta->ssdi.ssd_strip_size;
> Index: softraidvar.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/softraidvar.h,v
> retrieving revision 1.98
> diff -u -p -r1.98 softraidvar.h
> --- softraidvar.h 15 Mar 2011 13:29:41 -0000 1.98
> +++ softraidvar.h 3 Apr 2011 20:46:10 -0000
> @@ -526,6 +526,9 @@ struct sr_discipline {
> struct sr_wu_list sd_wu_defq; /* deferred wu queue */
> int sd_wu_sleep; /* wu sleepers counter */
>
> + struct mutex sd_wu_mtx;
> + struct scsi_iopool sd_iopool;
> +
> /* discipline stats */
> int sd_wu_pending;
> u_int64_t sd_wu_collisions;
> @@ -604,8 +607,8 @@ struct sr_ccb *sr_ccb_get(struct sr_dis
> void sr_ccb_put(struct sr_ccb *);
> int sr_wu_alloc(struct sr_discipline *);
> void sr_wu_free(struct sr_discipline *);
> -struct sr_workunit *sr_wu_get(struct sr_discipline *, int);
> -void sr_wu_put(struct sr_workunit *);
> +void *sr_wu_get(void *);
> +void sr_wu_put(void *, void *);
>
> /* misc functions */
> int32_t sr_validate_stripsize(u_int32_t);
>
all access to the iopool should be through the scsi_io_get/put api so the
midlayer can do appropriate callbacks, sleeps, and wakeups.