> this is a new version of my diff, which is necessary following some
> changes that have been committed to the tree since my original diff.
> it also fixes a race in the scsi_scsi_cmd completion path, and locks
> the sd buffers consistently.
> 
> this diff has been tested on mpi, siop, isp, ami, ahci, and umass
> (usb). i have read through mfi, arc, and sili. if you want to avoid
> this diff breaking any other controllers, please try the diff out
> and report results.
> 
> On Thu, Aug 13, 2009 at 02:50:00AM +1000, David Gwynne wrote:
>> this diff starts to address several problems i have with the scsi
>> midlayer.
>> 
>> the most important at the moment is that the entrypoint into the
>> current midlayer is through a function called scsi_scsi_cmd. the
>> problem with this function is that it is impossible to start an
>> async scsi operation and then tell if the command has failed,
>> completed, or been queued without shoving a buf down with it.
>> 
>> this sucks for mpath because it simply wants to take commands from
>> sd/cd/etc and push them down a real physical path. the buf handling
>> will be done by the sd to mpath leg of the journey, trying to do
>> it again on the mpath to mpi/isp/etc leg of the journey will cause
>> a use after free.
>> 
>> this lets drivers like sd/cd/mpath allocate an xs, fill it in, and
>> supply a completion routine for it. in the mpath case this completion
>> routine goes on and completes the io it was asked to do on cd/sd/etc's
>> behalf.
>> 
>> i have tweaked sd to use this new interface too to verify it is
>> usable. i have also reimplimented scsi_scsi_cmd to retain backward
>> compat for old users (currently everything except sd).
>> 
>> these changes clear the way to make a hbas scsi_cmd routine not
>> need to return an error code, in the future the ability of the hba
>> to complete an xs will be reported by changing the state of the xs
>> and completing it. there is a huge amount of confusion in hba drivers
>> at the moment about the right way to report errors up to the midlayer
>> and then in turn up to the device drivers. forcing all reporting
>> to be done via the xs will simplify code hugely and make it more
>> robust.
>> 
>> the last benefit is this makes it easier to allow the hba to provide
>> the xs to cd/sd/mpath/etc.
>> 
>> i encourage everyone to test this diff and tell me what blows up.
> 
> Index: mpath.c
> ===================================================================
> RCS file: /cvs/src/sys/scsi/mpath.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 mpath.c
> --- mpath.c   9 Aug 2009 16:55:02 -0000       1.3
> +++ mpath.c   13 Aug 2009 21:44:17 -0000
> @@ -78,6 +78,8 @@ int         mpath_cmd(struct scsi_xfer *);
>  void         mpath_minphys(struct buf *, struct scsi_link *);
>  int          mpath_probe(struct scsi_link *);
>  
> +void         mpath_done(struct scsi_xfer *);
> +
>  struct scsi_adapter mpath_switch = {
>       mpath_cmd,
>       scsi_minphys,
> @@ -148,36 +150,53 @@ mpath_cmd(struct scsi_xfer *xs)
>       struct scsi_link *link = xs->sc_link;
>       struct mpath_node *n = mpath_nodes[link->target];
>       struct mpath_path *p = TAILQ_FIRST(&n->node_paths);
> -     int rv;
> -     int s;
> +     struct scsi_xfer *mxs;
>  
>       if (n == NULL || p == NULL) {
>               mpath_xs_stuffup(xs);
>               return (COMPLETE);
>       }
>  
> -     rv = scsi_scsi_cmd(p->path_link, xs->cmd, xs->cmdlen,
> -         xs->data, xs->datalen,
> -         2, xs->timeout, NULL, SCSI_POLL |
> -         (xs->flags & (SCSI_DATA_IN|SCSI_DATA_OUT)));
> +     mxs = scsi_xs_get(p->path_link, xs->flags);
> +     if (mxs == NULL) {
> +             mpath_xs_stuffup(xs);
> +             return (COMPLETE);
> +     }
>  
> +     memcpy(mxs->cmd, xs->cmd, xs->cmdlen);
> +     mxs->cmdlen = xs->cmdlen;
> +     mxs->data = xs->data;
> +     mxs->datalen = xs->datalen;
> +     mxs->retries = xs->retries;
> +     mxs->timeout = xs->timeout;
> +     mxs->req_sense_length = xs->req_sense_length;
>  
> -     xs->flags |= ITSDONE;
> -     if (rv == 0) {
> -             xs->error = XS_NOERROR;
> -             xs->status = SCSI_OK;
> -             xs->resid = 0;
> -     } else {
> -             printf("%s: t%dl%d rv %d cmd %x\n", DEVNAME(mpath),
> -                 link->target, link->lun, rv, xs->cmd->opcode);
> -             xs->error = XS_DRIVER_STUFFUP;
> -     }
> +     mxs->cookie = xs;
> +     mxs->done = mpath_done;
> +
> +     scsi_xs_exec(mxs);
> +
> +     return (COMPLETE); /* doesnt matter anymore */
> +}
> +
> +void
> +mpath_done(struct scsi_xfer *mxs)
> +{
> +     struct scsi_xfer *xs = mxs->cookie;
> +     int s;
> +
> +     xs->error = mxs->error;
> +     xs->status = mxs->status;
> +     xs->flags = mxs->flags;
> +     xs->resid = mxs->resid;
> +
> +     memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
> +
> +     scsi_xs_put(mxs);
>  
>       s = splbio();
>       scsi_done(xs);
>       splx(s);
> -
> -     return (COMPLETE);
>  }
>  
>  void
> Index: scsi_base.c
> ===================================================================
> RCS file: /cvs/src/sys/scsi/scsi_base.c,v
> retrieving revision 1.134
> diff -u -p -r1.134 scsi_base.c
> --- scsi_base.c       13 Aug 2009 21:35:56 -0000      1.134
> +++ scsi_base.c       13 Aug 2009 21:44:17 -0000
> @@ -50,15 +50,14 @@
>  #include <scsi/scsi_disk.h>
>  #include <scsi/scsiconf.h>
>  
> -static __inline struct scsi_xfer *scsi_make_xs(struct scsi_link *,
> -    struct scsi_generic *, int cmdlen, u_char *data_addr,
> -    int datalen, int retries, int timeout, struct buf *, int flags);
>  static __inline void asc2ascii(u_int8_t, u_int8_t ascq, char *result,
>      size_t len);
>  int  sc_err1(struct scsi_xfer *);
>  int  scsi_interpret_sense(struct scsi_xfer *);
>  char   *scsi_decode_sense(struct scsi_sense_data *, int);
>  
> +void scsi_xs_done(struct scsi_xfer *);
> +
>  /* Values for flag parameter to scsi_decode_sense. */
>  #define      DECODE_SENSE_KEY        1
>  #define      DECODE_ASC_ASCQ         2
> @@ -94,6 +93,7 @@ scsi_init()
>       /* Initialize the scsi_xfer pool. */
>       pool_init(&scsi_xfer_pool, sizeof(struct scsi_xfer), 0,
>           0, 0, "scxspl", NULL);
> +     pool_setipl(&scsi_xfer_pool, IPL_BIO);
>       /* Initialize the scsi_plug pool */
>       pool_init(&scsi_plug_pool, sizeof(struct scsi_plug), 0,
>           0, 0, "scsiplug", NULL);
> @@ -188,42 +188,43 @@ scsi_deinit()
>   */
>  
>  struct scsi_xfer *
> -scsi_get_xs(struct scsi_link *sc_link, int flags)
> +scsi_xs_get(struct scsi_link *link, int flags)
>  {
> -     struct scsi_xfer                *xs;
> -     int                             s;
> -
> -     SC_DEBUG(sc_link, SDEV_DB3, ("scsi_get_xs\n"));
> +     struct scsi_xfer *xs;
>  
> -     s = splbio();
> -     while (sc_link->openings == 0) {
> -             SC_DEBUG(sc_link, SDEV_DB3, ("sleeping\n"));
> -             if ((flags & SCSI_NOSLEEP) != 0) {
> -                     splx(s);
> -                     return (NULL);
> -             }
> -             sc_link->flags |= SDEV_WAITING;
> -             if (tsleep(sc_link, PRIBIO|PCATCH, "getxs", 0)) {
> -                     /* Bail out on getting a signal. */
> -                     sc_link->flags &= ~SDEV_WAITING;
> -                     splx(s);
> +     mtx_enter(&link->mtx);
> +     while (link->openings == 0) {
> +             if (!ISSET(flags, SCSI_NOSLEEP)) {
> +                     mtx_leave(&link->mtx);
>                       return (NULL);
>               }
> +
> +             SET(link->flags, SDEV_WAITING);
> +             msleep(link, &link->mtx, PRIBIO, "getxs", 0);
>       }
> -     SC_DEBUG(sc_link, SDEV_DB3, ("calling pool_get\n"));
> +     link->openings--;
> +     mtx_leave(&link->mtx);
> +
> +     /* pool is shared, link mtx is not */
>       xs = pool_get(&scsi_xfer_pool,
> -         ((flags & SCSI_NOSLEEP) != 0 ? PR_NOWAIT : PR_WAITOK));
> -     if (xs != NULL) {
> -             bzero(xs, sizeof(*xs));
> -             sc_link->openings--;
> -             xs->flags = flags;
> +         ISSET(flags, SCSI_NOSLEEP) ? PR_NOWAIT : PR_WAITOK);
> +     if (xs == NULL) {
> +             mtx_enter(&link->mtx);
> +             link->openings++;
> +             mtx_leave(&link->mtx);
>       } else {
> -             sc_print_addr(sc_link);
> -             printf("cannot allocate scsi xs\n");
> +             xs->flags = flags;
> +             xs->sc_link = link;
> +             xs->retries = SCSI_RETRIES;
> +             xs->timeout = 0;
> +             bzero(&xs->cmdstore, sizeof(xs->cmdstore));
> +             xs->cmd = &xs->cmdstore;
> +             xs->cmdlen = 0;
> +             xs->data = NULL;
> +             xs->datalen = 0;
> +             xs->resid = 0;
> +             xs->bp = NULL;
>       }
> -     splx(s);
> -
> -     SC_DEBUG(sc_link, SDEV_DB3, ("returning\n"));
>  
>       return (xs);
>  }
> @@ -234,75 +235,21 @@ scsi_get_xs(struct scsi_link *sc_link, i
>   * If another process is waiting for an xs, do a wakeup, let it proceed
>   */
>  void
> -scsi_free_xs(struct scsi_xfer *xs, int start)
> +scsi_xs_put(struct scsi_xfer *xs)
>  {
> -     struct scsi_link *sc_link = xs->sc_link;
> -
> -     splassert(IPL_BIO);
> -
> -     SC_DEBUG(sc_link, SDEV_DB3, ("scsi_free_xs\n"));
> +     struct scsi_link *link = xs->sc_link;
>  
>       pool_put(&scsi_xfer_pool, xs);
> -     sc_link->openings++;
> +
> +     mtx_enter(&link->mtx);
> +     link->openings++;
>  
>       /* If someone is waiting for scsi_xfer, wake them up. */
> -     if ((sc_link->flags & SDEV_WAITING) != 0) {
> -             sc_link->flags &= ~SDEV_WAITING;
> -             wakeup(sc_link);
> -     } else if (start && sc_link->device->start) {
> -             SC_DEBUG(sc_link, SDEV_DB2,
> -                 ("calling private start()\n"));
> -             (*(sc_link->device->start)) (sc_link->device_softc);
> +     if (ISSET(link->flags, SDEV_WAITING)) {
> +             CLR(link->flags, SDEV_WAITING);
> +             wakeup(link);
>       }
> -}
> -
> -/*
> - * Make a scsi_xfer, and return a pointer to it.
> - */
> -static __inline struct scsi_xfer *
> -scsi_make_xs(struct scsi_link *sc_link, struct scsi_generic *scsi_cmd,
> -    int cmdlen, u_char *data_addr, int datalen, int retries, int timeout,
> -    struct buf *bp, int flags)
> -{
> -     struct scsi_xfer                *xs;
> -
> -     if ((xs = scsi_get_xs(sc_link, flags)) == NULL)
> -             return (NULL);
> -
> -     /*
> -      * Fill out the scsi_xfer structure.  We don't know whose context
> -      * the cmd is in, so copy it.
> -      */
> -     xs->sc_link = sc_link;
> -     bcopy(scsi_cmd, &xs->cmdstore, cmdlen);
> -     xs->cmd = &xs->cmdstore;
> -     xs->cmdlen = cmdlen;
> -     xs->data = data_addr;
> -     xs->datalen = datalen;
> -     xs->retries = retries;
> -     xs->timeout = timeout;
> -     xs->bp = bp;
> -
> -     /*
> -      * Set the LUN in the CDB if it fits in the three bits available. This
> -      * may only be needed if we have an older device.  However, we also set
> -      * it for more modern SCSI devices "just in case".  The old code
> -      * assumed everything newer than SCSI-2 would not need it, but why risk
> -      * it?  This was the old conditional:
> -      *
> -      * if ((SCSISPC(sc_link->inqdata.version) <= 2))
> -      */
> -     xs->cmd->bytes[0] &= ~SCSI_CMD_LUN_MASK;
> -     if (sc_link->lun < 8)
> -             xs->cmd->bytes[0] |= ((sc_link->lun << SCSI_CMD_LUN_SHIFT) &
> -                 SCSI_CMD_LUN_MASK);
> -
> -#ifdef       SCSIDEBUG
> -     if ((sc_link->flags & SDEV_DB1) != 0)
> -             show_scsi_xs(xs);
> -#endif /* SCSIDEBUG */
> -
> -     return (xs);
> +     mtx_leave(&link->mtx);
>  }
>  
>  /*
> @@ -764,18 +711,44 @@ scsi_report_luns(struct scsi_link *sc_li
>       return (error);
>  }
>  
> +void
> +scsi_xs_exec(struct scsi_xfer *xs)
> +{
> +     int rv;
> +     int s;
> +
> +     xs->flags &= ~ITSDONE;
> +     xs->error = XS_NOERROR;
> +     xs->resid = xs->datalen;
> +     xs->status = 0;
> +
> +     rv = xs->sc_link->adapter->scsi_cmd(xs);
> +     switch (rv) {
> +     case NO_CCB:
> +             if (!ISSET(xs->flags, SCSI_POLL) && xs->retries-- > 0) {
> +                     timeout_set(&xs->stimeout,
> +                         (void (*)(void *))scsi_xs_exec, xs);
> +                     timeout_add(&xs->stimeout, 1);
> +                     break;
> +             }
> +             /* FALLTHROUGH */
> +     case TRY_AGAIN_LATER:
> +             /* hahaha, sif... */
> +
> +             xs->error = XS_DRIVER_STUFFUP;
> +             s = splbio();
> +             scsi_done(xs);
> +             splx(s);
> +             break;
> +     }
> +}
> +
>  /*
> - * This routine is called by the scsi interrupt when the transfer is 
> complete.
> + * This routine is called by the adapter when its xs handling is done.
>   */
>  void
>  scsi_done(struct scsi_xfer *xs)
>  {
> -     struct scsi_link                        *sc_link = xs->sc_link;
> -     struct buf                              *bp;
> -     int                                     error;
> -
> -     SC_DEBUG(sc_link, SDEV_DB2, ("scsi_done\n"));
> -
>       splassert(IPL_BIO);
>  
>       xs->flags |= ITSDONE;
> @@ -786,159 +759,11 @@ scsi_done(struct scsi_xfer *xs)
>        * xs when the user returns (and restarting the device's queue).
>        */
>       if ((xs->flags & SCSI_USER) != 0) {
> -             SC_DEBUG(sc_link, SDEV_DB3, ("calling user done()\n"));
>               scsi_user_done(xs); /* to take a copy of the sense etc. */
> -             SC_DEBUG(sc_link, SDEV_DB3, ("returned from user done()\n"));
> -
> -             scsi_free_xs(xs, 1); /* restarts queue too */
> -             SC_DEBUG(sc_link, SDEV_DB3, ("returning to adapter\n"));
>               return;
>       }
>  
> -     if (!((xs->flags & (SCSI_NOSLEEP | SCSI_POLL)) == SCSI_NOSLEEP)) {
> -             /*
> -              * if it's a normal upper level request, then ask
> -              * the upper level code to handle error checking
> -              * rather than doing it here at interrupt time
> -              */
> -             wakeup(xs);
> -             return;
> -     }
> -
> -     /*
> -      * Go and handle errors now.
> -      * If it returns ERESTART then we should RETRY
> -      */
> -retry:
> -     error = sc_err1(xs);
> -     if (error == ERESTART) {
> -             switch ((*(sc_link->adapter->scsi_cmd)) (xs)) {
> -             case SUCCESSFULLY_QUEUED:
> -                     return;
> -
> -             case TRY_AGAIN_LATER:
> -                     xs->error = XS_BUSY;
> -                     /* FALLTHROUGH */
> -             case COMPLETE:
> -                     goto retry;
> -             }
> -     }
> -
> -     bp = xs->bp;
> -     if (bp != NULL) {
> -             if (error) {
> -                     bp->b_error = error;
> -                     bp->b_flags |= B_ERROR;
> -                     bp->b_resid = bp->b_bcount;
> -             } else {
> -                     bp->b_error = 0;
> -                     bp->b_resid = xs->resid;
> -             }
> -     }
> -
> -     if (sc_link->device->done) {
> -             /*
> -              * Tell the device the operation is actually complete.
> -              * No more will happen with this xfer.  This for
> -              * notification of the upper-level driver only; they
> -              * won't be returning any meaningful information to us.
> -              */
> -             (*sc_link->device->done)(xs);
> -     }
> -     scsi_free_xs(xs, 1);
> -     if (bp != NULL)
> -             biodone(bp);
> -}
> -
> -int
> -scsi_execute_xs(struct scsi_xfer *xs)
> -{
> -     int                                     error, flags, rslt, s;
> -
> -     xs->flags &= ~ITSDONE;
> -     xs->error = XS_NOERROR;
> -     xs->resid = xs->datalen;
> -     xs->status = 0;
> -
> -     /*
> -      * Do the transfer. If we are polling we will return:
> -      * COMPLETE,  Was poll, and scsi_done has been called
> -      * TRY_AGAIN_LATER, Adapter short resources, try again
> -      *
> -      * if under full steam (interrupts) it will return:
> -      * SUCCESSFULLY_QUEUED, will do a wakeup when complete
> -      * TRY_AGAIN_LATER, (as for polling)
> -      * After the wakeup, we must still check if it succeeded
> -      *
> -      * If we have a SCSI_NOSLEEP (typically because we have a buf)
> -      * we just return.  All the error processing and the buffer
> -      * code both expect us to return straight to them, so as soon
> -      * as the command is queued, return.
> -      */
> -
> -     /*
> -      * We save the flags here because the xs structure may already
> -      * be freed by scsi_done by the time adapter->scsi_cmd returns.
> -      *
> -      * scsi_done is responsible for freeing the xs if either
> -      * (flags & (SCSI_NOSLEEP | SCSI_POLL)) == SCSI_NOSLEEP
> -      * -or-
> -      * (flags & SCSI_USER) != 0
> -      *
> -      * Note: SCSI_USER must always be called with SCSI_NOSLEEP
> -      * and never with SCSI_POLL, so the second expression should be
> -      * is equivalent to the first.
> -      */
> -
> -     flags = xs->flags;
> -#ifdef DIAGNOSTIC
> -     if ((flags & (SCSI_USER | SCSI_NOSLEEP)) == SCSI_USER)
> -             panic("scsi_execute_xs: USER without NOSLEEP");
> -     if ((flags & (SCSI_USER | SCSI_POLL)) == (SCSI_USER | SCSI_POLL))
> -             panic("scsi_execute_xs: USER with POLL");
> -#endif
> -retry:
> -     rslt = (*(xs->sc_link->adapter->scsi_cmd))(xs);
> -     switch (rslt) {
> -     case SUCCESSFULLY_QUEUED:
> -             if ((flags & (SCSI_NOSLEEP | SCSI_POLL)) == SCSI_NOSLEEP)
> -                     return (EJUSTRETURN);
> -#ifdef DIAGNOSTIC
> -             if (flags & SCSI_NOSLEEP)
> -                     panic("scsi_execute_xs: NOSLEEP and POLL");
> -#endif
> -             s = splbio();
> -             /* Since the xs is active we can't bail out on a signal. */
> -             while ((xs->flags & ITSDONE) == 0)
> -                     tsleep(xs, PRIBIO + 1, "scsicmd", 0);
> -             splx(s);
> -             /* FALLTHROUGH */
> -     case COMPLETE:          /* Polling command completed ok */
> -             if ((flags & (SCSI_NOSLEEP | SCSI_POLL)) == SCSI_NOSLEEP)
> -                     return (EJUSTRETURN);
> -             if (xs->bp)
> -                     return (EJUSTRETURN);
> -     doit:
> -             SC_DEBUG(xs->sc_link, SDEV_DB3, ("back in cmd()\n"));
> -             if ((error = sc_err1(xs)) != ERESTART)
> -                     return (error);
> -             goto retry;
> -
> -     case TRY_AGAIN_LATER:   /* adapter resource shortage */
> -             xs->error = XS_BUSY;
> -             goto doit;
> -
> -     case NO_CCB:
> -             return (EAGAIN);
> -
> -     default:
> -             panic("scsi_execute_xs: invalid return code (%#x)", rslt);
> -     }
> -
> -#ifdef DIAGNOSTIC
> -     panic("scsi_execute_xs: impossible");
> -#endif
> -     return (EINVAL);
> +     xs->done(xs);
>  }
>  
>  /*
> @@ -948,54 +773,81 @@ retry:
>   * to associate with the transfer, we need that too.
>   */
>  int
> -scsi_scsi_cmd(struct scsi_link *sc_link, struct scsi_generic *scsi_cmd,
> +scsi_scsi_cmd(struct scsi_link *link, struct scsi_generic *scsi_cmd,
>      int cmdlen, u_char *data_addr, int datalen, int retries, int timeout,
>      struct buf *bp, int flags)
>  {
> -     struct scsi_xfer                        *xs;
> -     int                                     error;
> -     int                                     s;
> -
> -     SC_DEBUG(sc_link, SDEV_DB2, ("scsi_cmd\n"));
> +     struct scsi_xfer *xs;
> +     int error;
> +     int s;
>  
>  #ifdef DIAGNOSTIC
>       if (bp != NULL && (flags & SCSI_NOSLEEP) == 0)
>               panic("scsi_scsi_cmd: buffer without nosleep");
>  #endif
>  
> -     if ((xs = scsi_make_xs(sc_link, scsi_cmd, cmdlen, data_addr, datalen,
> -         retries, timeout, bp, flags)) == NULL)
> +     xs = scsi_xs_get(link, flags);
> +     if (xs == NULL)
>               return (ENOMEM);
>  
> -#ifdef       SCSIDEBUG
> -     if ((sc_link->flags & SDEV_DB1) != 0)
> -             if (xs->datalen && (xs->flags & SCSI_DATA_OUT))
> -                     show_mem(xs->data, min(64, xs->datalen));
> -#endif       /* SCSIDEBUG */
> -
> -     error = scsi_execute_xs(xs);
> -
> -#ifdef       SCSIDEBUG
> -     if ((sc_link->flags & SDEV_DB1) != 0)
> -             if (xs->datalen && (xs->flags & SCSI_DATA_IN))
> -                     show_mem(xs->data, min(64, xs->datalen));
> -#endif       /* SCSIDEBUG */
> +     memcpy(xs->cmd, scsi_cmd, cmdlen);
> +     xs->cmdlen = cmdlen;
> +     xs->data = data_addr;
> +     xs->datalen = datalen;
> +     xs->retries = retries;
> +     xs->timeout = timeout;
>  
> -     if (error == EJUSTRETURN)
> -             return (0);
> +     xs->done = scsi_xs_done;
> +
> +     do {
> +             scsi_xs_exec(xs);
> +             if (!ISSET(xs->flags, SCSI_POLL)) {
> +                     s = splbio();
> +                     while (!ISSET(xs->flags, ITSDONE))
> +                             tsleep(xs, PRIBIO, "scsicmd", 0);
> +                     splx(s);
> +             }
>  
> -     s = splbio();
> +             error = sc_err1(xs);
> +     } while (error == ERESTART);
>  
> -     if (error == EAGAIN)
> -             scsi_free_xs(xs, 0); /* Don't restart queue. */
> -     else
> -             scsi_free_xs(xs, 1);
> +     if (bp != NULL) {
> +             if (error) {
> +                     bp->b_error = error;
> +                     bp->b_flags |= B_ERROR;
> +                     bp->b_resid = bp->b_bcount;
> +             } else {  
> +                     bp->b_error = 0;
> +                     bp->b_resid = xs->resid;
> +             }
> +
> +             s = splbio();
> +             biodone(bp);
> +             splx(s);
> +     }
> +
> +     if (link->device->done) {
> +             /*
> +              * Tell the device the operation is actually complete.
> +              * No more will happen with this xfer.  This for
> +              * notification of the upper-level driver only; they
> +              * won't be returning any meaningful information to us.
> +              */
> +             link->device->done(xs);
> +     }
>  
> -     splx(s);
> +     scsi_xs_put(xs);
>  
>       return (error);
>  }
>  
> +void
> +scsi_xs_done(struct scsi_xfer *xs)
> +{
> +     if (!ISSET(xs->flags, SCSI_POLL))
> +             wakeup_one(xs);
> +}
> +
>  int
>  sc_err1(struct scsi_xfer *xs)
>  {
> @@ -1115,10 +967,6 @@ scsi_interpret_sense(struct scsi_xfer *x
>           sense->flags & SSD_EOM ? 1 : 0,
>           sense->flags & SSD_FILEMARK ? 1 : 0,
>           sense->extra_len));
> -#ifdef       SCSIDEBUG
> -     if ((sc_link->flags & SDEV_DB1) != 0)
> -             show_mem((u_char *)&xs->sense, sizeof xs->sense);
> -#endif       /* SCSIDEBUG */
>  
>       /*
>        * If the device has its own error handler, call it first.
> @@ -2011,57 +1859,3 @@ scsi_decode_sense(struct scsi_sense_data
>  
>       return (rqsbuf);
>  }
> -
> -#ifdef SCSIDEBUG
> -/*
> - * Given a scsi_xfer, dump the request, in all its glory
> - */
> -void
> -show_scsi_xs(struct scsi_xfer *xs)
> -{
> -     u_char *b = (u_char *) xs->cmd;
> -     int i = 0;
> -
> -     sc_print_addr(xs->sc_link);
> -
> -     printf("xs(%p): ", xs);
> -
> -     printf("flg(0x%x)", xs->flags);
> -     printf("sc_link(%p)", xs->sc_link);
> -     printf("retr(0x%x)", xs->retries);
> -     printf("timo(0x%x)", xs->timeout);
> -     printf("cmd(%p)", xs->cmd);
> -     printf("len(0x%x)", xs->cmdlen);
> -     printf("data(%p)", xs->data);
> -     printf("len(0x%x)", xs->datalen);
> -     printf("res(0x%x)", xs->resid);
> -     printf("err(0x%x)", xs->error);
> -     printf("bp(%p)\n", xs->bp);
> -
> -     printf("command: ");
> -
> -     if ((xs->flags & SCSI_RESET) == 0) {
> -             while (i < xs->cmdlen) {
> -                     if (i)
> -                             printf(",");
> -                     printf("%x", b[i++]);
> -             }
> -             printf("-[%d bytes]\n", xs->datalen);
> -     } else
> -             printf("-RESET-\n");
> -}
> -
> -void
> -show_mem(u_char *address, int num)
> -{
> -     int                                     x;
> -
> -     printf("------------------------------");
> -     for (x = 0; x < num; x++) {
> -             if ((x % 16) == 0)
> -                     printf("\n%03d: ", x);
> -             printf("%02x ", *address++);
> -     }
> -     printf("\n------------------------------\n");
> -}
> -#endif /* SCSIDEBUG */
> Index: scsiconf.c
> ===================================================================
> RCS file: /cvs/src/sys/scsi/scsiconf.c,v
> retrieving revision 1.142
> diff -u -p -r1.142 scsiconf.c
> --- scsiconf.c        10 Aug 2009 11:02:38 -0000      1.142
> +++ scsiconf.c        13 Aug 2009 21:44:17 -0000
> @@ -738,6 +738,7 @@ scsi_probedev(struct scsibus_softc *scsi
>       sc_link->target = target;
>       sc_link->lun = lun;
>       sc_link->device = &probe_switch;
> +     mtx_init(&sc_link->mtx, IPL_BIO);
>       inqbuf = &sc_link->inqdata;
>  
>       SC_DEBUG(sc_link, SDEV_DB2, ("scsi_link created.\n"));
> Index: scsiconf.h
> ===================================================================
> RCS file: /cvs/src/sys/scsi/scsiconf.h,v
> retrieving revision 1.100
> diff -u -p -r1.100 scsiconf.h
> --- scsiconf.h        13 Aug 2009 19:49:31 -0000      1.100
> +++ scsiconf.h        13 Aug 2009 21:44:17 -0000
> @@ -53,6 +53,7 @@
>  #include <sys/queue.h>
>  #include <sys/timeout.h>
>  #include <sys/workq.h>
> +#include <sys/mutex.h>
>  #include <machine/cpu.h>
>  #include <scsi/scsi_debug.h>
>  
> @@ -369,6 +370,7 @@ struct scsi_link {
>       struct  scsibus_softc *bus;     /* link to the scsibus we're on */
>       struct  scsi_inquiry_data inqdata; /* copy of INQUIRY data from probe */
>       struct  devid id;
> +     struct  mutex mtx;
>  };
>  
>  int  scsiprint(void *, const char *);
> @@ -444,6 +446,8 @@ struct scsi_xfer {
>        * timeout structure for hba's to use for a command
>        */
>       struct timeout stimeout;
> +     void *cookie;
> +     void (*done)(struct scsi_xfer *);
>  };
>  
>  /*
> @@ -559,6 +563,10 @@ int      scsi_req_detach(struct scsibus_softc
>  
>  extern const u_int8_t version_to_spc[];
>  #define SCSISPC(x)(version_to_spc[(x) & SID_ANSII])
> +
> +struct scsi_xfer *   scsi_xs_get(struct scsi_link *, int);
> +void                 scsi_xs_exec(struct scsi_xfer *);
> +void                 scsi_xs_put(struct scsi_xfer *);
>  
>  /*
>   * Entrypoints for multipathing
> Index: sd.c
> ===================================================================
> RCS file: /cvs/src/sys/scsi/sd.c,v
> retrieving revision 1.157
> diff -u -p -r1.157 sd.c
> --- sd.c      13 Aug 2009 15:23:11 -0000      1.157
> +++ sd.c      13 Aug 2009 21:44:17 -0000
> @@ -83,10 +83,7 @@ int        sddetach(struct device *, int);
>  void sdminphys(struct buf *);
>  int  sdgetdisklabel(dev_t, struct sd_softc *, struct disklabel *, int);
>  void sdstart(void *);
> -void sdrestart(void *);
> -void sddone(struct scsi_xfer *);
>  void sd_shutdown(void *);
> -int  sd_reassign_blocks(struct sd_softc *, u_long);
>  int  sd_interpret_sense(struct scsi_xfer *);
>  int  sd_get_parms(struct sd_softc *, struct disk_parms *, int);
>  void sd_flush(struct sd_softc *, int);
> @@ -96,6 +93,16 @@ void       viscpy(u_char *, u_char *, int);
>  
>  int  sd_ioctl_inquiry(struct sd_softc *, struct dk_inquiry *);
>  
> +struct buf *sd_buf_dequeue(struct sd_softc *);
> +void sd_buf_requeue(struct sd_softc *, struct buf *);
> +
> +void sd_cmd_rw6(struct scsi_xfer *, int, daddr64_t, u_int);
> +void sd_cmd_rw10(struct scsi_xfer *, int, daddr64_t, u_int);
> +void sd_cmd_rw12(struct scsi_xfer *, int, daddr64_t, u_int);
> +void sd_cmd_rw16(struct scsi_xfer *, int, daddr64_t, u_int);
> +
> +void sd_buf_done(struct scsi_xfer *);
> +
>  struct cfattach sd_ca = {
>       sizeof(struct sd_softc), sdmatch, sdattach,
>       sddetach, sdactivate
> @@ -111,7 +118,7 @@ struct scsi_device sd_switch = {
>       sd_interpret_sense,     /* check out error handler first */
>       sdstart,                /* have a queue, served by this */
>       NULL,                   /* have no async handler */
> -     sddone,                 /* deal with stats at interrupt time */
> +     NULL,                   /* have no done handler */
>  };
>  
>  const struct scsi_inquiry_pattern sd_patterns[] = {
> @@ -163,6 +170,8 @@ sdattach(struct device *parent, struct d
>  
>       SC_DEBUG(sc_link, SDEV_DB2, ("sdattach:\n"));
>  
> +     mtx_init(&sd->sc_buf_mtx, IPL_BIO);
> +
>       /*
>        * Store information needed to contact our base driver
>        */
> @@ -197,7 +206,7 @@ sdattach(struct device *parent, struct d
>        */
>       printf("\n");
>  
> -     timeout_set(&sd->sc_timeout, sdrestart, sd);
> +     timeout_set(&sd->sc_timeout, sdstart, sd);
>  
>       /* Spin up non-UMASS devices ready or not. */
>       if ((sd->sc_link->flags & SDEV_UMASS) == 0)
> @@ -551,12 +560,12 @@ sdstrategy(struct buf *bp)
>           (sd->flags & (SDF_WLABEL|SDF_LABELLING)) != 0) <= 0)
>               goto done;
>  
> -     s = splbio();
> -
>       /*
>        * Place it in the queue of disk activities for this disk
>        */
> -     disksort(&sd->buf_queue, bp);
> +     mtx_enter(&sd->sc_buf_mtx);
> +     disksort(&sd->sc_buf_queue, bp);
> +     mtx_leave(&sd->sc_buf_mtx);
>  
>       /*
>        * Tell the device to get going on the transfer if it's
> @@ -564,8 +573,6 @@ sdstrategy(struct buf *bp)
>        */
>       sdstart(sd);
>  
> -     splx(s);
> -
>       device_unref(&sd->sc_dev);
>       return;
>  
> @@ -583,6 +590,77 @@ done:
>               device_unref(&sd->sc_dev);
>  }
>  
> +struct buf *
> +sd_buf_dequeue(struct sd_softc *sc)
> +{
> +     struct buf *bp;
> +
> +     mtx_enter(&sc->sc_buf_mtx);
> +     bp = sc->sc_buf_queue.b_actf;
> +     if (bp != NULL)
> +             sc->sc_buf_queue.b_actf = bp->b_actf;
> +     mtx_leave(&sc->sc_buf_mtx);
> +
> +     return (bp);
> +}
> +
> +void
> +sd_buf_requeue(struct sd_softc *sc, struct buf *bp)
> +{
> +     mtx_enter(&sc->sc_buf_mtx);
> +     bp->b_actf = sc->sc_buf_queue.b_actf;
> +     sc->sc_buf_queue.b_actf = bp;
> +     mtx_leave(&sc->sc_buf_mtx);
> +}
> +
> +void
> +sd_cmd_rw6(struct scsi_xfer *xs, int read, daddr64_t blkno, u_int nblks)
> +{
> +     struct scsi_rw *cmd = (struct scsi_rw *)xs->cmd;
> +
> +     cmd->opcode = read ? READ_COMMAND : WRITE_COMMAND;
> +     _lto3b(blkno, cmd->addr);
> +     cmd->length = nblks;
> +
> +     xs->cmdlen = sizeof(*cmd);
> +}
> +
> +void
> +sd_cmd_rw10(struct scsi_xfer *xs, int read, daddr64_t blkno, u_int nblks)
> +{
> +     struct scsi_rw_big *cmd = (struct scsi_rw_big *)xs->cmd;
> +
> +     cmd->opcode = read ? READ_BIG : WRITE_BIG;
> +     _lto4b(blkno, cmd->addr);
> +     _lto2b(nblks, cmd->length);
> +
> +     xs->cmdlen = sizeof(*cmd);
> +}
> +
> +void
> +sd_cmd_rw12(struct scsi_xfer *xs, int read, daddr64_t blkno, u_int nblks)
> +{
> +     struct scsi_rw_12 *cmd = (struct scsi_rw_12 *)xs->cmd;
> +
> +     cmd->opcode = read ? READ_12 : WRITE_12;
> +     _lto4b(blkno, cmd->addr);
> +     _lto4b(nblks, cmd->length);
> +
> +     xs->cmdlen = sizeof(*cmd);
> +}
> +
> +void
> +sd_cmd_rw16(struct scsi_xfer *xs, int read, daddr64_t blkno, u_int nblks)
> +{
> +     struct scsi_rw_16 *cmd = (struct scsi_rw_16 *)xs->cmd;
> +
> +     cmd->opcode = read ? READ_16 : WRITE_16;
> +     _lto4b(blkno, cmd->addr);
> +     _lto4b(nblks, cmd->length);
> +
> +     xs->cmdlen = sizeof(*cmd);
> +}
> +
>  /*
>   * sdstart looks to see if there is a buf waiting for the device
>   * and that the device is not already busy. If both are true,
> @@ -595,62 +673,31 @@ done:
>   * This routine is also called after other non-queued requests
>   * have been made of the scsi driver, to ensure that the queue
>   * continues to be drained.
> - *
> - * must be called at the correct (highish) spl level
> - * sdstart() is called at splbio from sdstrategy, sdrestart and scsi_done
>   */
>  void
>  sdstart(void *v)
>  {
> -     struct sd_softc *sd = (struct sd_softc *)v;
> -     struct scsi_link *sc_link = sd->sc_link;
> -     struct buf *bp = 0;
> -     struct buf *dp;
> -     struct scsi_rw_big cmd_big;
> -     struct scsi_rw_12 cmd_12;
> -     struct scsi_rw_16 cmd_16;
> -     struct scsi_rw cmd_small;
> -     struct scsi_generic *cmdp;
> +     struct sd_softc *sc = (struct sd_softc *)v;
> +     struct scsi_link *link = sc->sc_link;
> +     struct scsi_xfer *xs;
> +     struct buf *bp;
>       daddr64_t blkno;
> -     int nblks, cmdlen, error;
> +     int nblks;
> +     int read;
>       struct partition *p;
>  
> -     if (sd->flags & SDF_DYING)
> +     if (sc->flags & SDF_DYING)
>               return;
>  
>       SC_DEBUG(sc_link, SDEV_DB2, ("sdstart\n"));
>  
> -     splassert(IPL_BIO);
> -
> -     /*
> -      * Check if the device has room for another command
> -      */
> -     while (sc_link->openings > 0) {
> -             /*
> -              * there is excess capacity, but a special waits
> -              * It'll need the adapter as soon as we clear out of the
> -              * way and let it run (user level wait).
> -              */
> -             if (sc_link->flags & SDEV_WAITING) {
> -                     sc_link->flags &= ~SDEV_WAITING;
> -                     wakeup((caddr_t)sc_link);
> -                     return;
> -             }
> -
> -             /*
> -              * See if there is a buf with work for us to do..
> -              */
> -             dp = &sd->buf_queue;
> -             if ((bp = dp->b_actf) == NULL)  /* yes, an assign */
> -                     return;
> -             dp->b_actf = bp->b_actf;
> -
> +     while ((bp = sd_buf_dequeue(sc)) != NULL) {
>               /*
>                * If the device has become invalid, abort all the
>                * reads and writes until all files have been closed and
>                * re-opened
>                */
> -             if ((sc_link->flags & SDEV_MEDIA_LOADED) == 0) {
> +             if ((link->flags & SDEV_MEDIA_LOADED) == 0) {
>                       bp->b_error = EIO;
>                       bp->b_flags |= B_ERROR;
>                       bp->b_resid = bp->b_bcount;
> @@ -658,129 +705,77 @@ sdstart(void *v)
>                       continue;
>               }
>  
> -             /*
> -              * We have a buf, now we should make a command
> -              *
> -              * First, translate the block to absolute and put it in terms
> -              * of the logical blocksize of the device.
> -              */
> +             xs = scsi_xs_get(link, SCSI_NOSLEEP);
> +             if (xs == NULL) {
> +                     sd_buf_requeue(sc, bp);
> +                     return;
> +             }
> +
>               blkno =
> -                 bp->b_blkno / (sd->sc_dk.dk_label->d_secsize / DEV_BSIZE);
> -             p = &sd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)];
> +                 bp->b_blkno / (sc->sc_dk.dk_label->d_secsize / DEV_BSIZE);
> +             p = &sc->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)];
>               blkno += DL_GETPOFFSET(p);
> -             nblks = howmany(bp->b_bcount, sd->sc_dk.dk_label->d_secsize);
> +             nblks = howmany(bp->b_bcount, sc->sc_dk.dk_label->d_secsize);
> +             read = bp->b_flags & B_READ;
>  
>               /*
>                *  Fill out the scsi command.  If the transfer will
>                *  fit in a "small" cdb, use it.
>                */
> -             if (!(sc_link->flags & SDEV_ATAPI) &&
> -                 !(sc_link->quirks & SDEV_ONLYBIG) &&
> +             if (!(link->flags & SDEV_ATAPI) &&
> +                 !(link->quirks & SDEV_ONLYBIG) &&
>                   ((blkno & 0x1fffff) == blkno) &&
> -                 ((nblks & 0xff) == nblks)) {
> -                     /*
> -                      * We can fit in a 6 byte cdb.
> -                      */
> -                     bzero(&cmd_small, sizeof(cmd_small));
> -                     cmd_small.opcode = (bp->b_flags & B_READ) ?
> -                         READ_COMMAND : WRITE_COMMAND;
> -                     _lto3b(blkno, cmd_small.addr);
> -                     cmd_small.length = nblks;
> -                     cmdlen = sizeof(cmd_small);
> -                     cmdp = (struct scsi_generic *)&cmd_small;
> -             } else if (((blkno & 0xffffffff) == blkno) &&
> -                 ((nblks & 0xffff) == nblks)) {
> -                     /*
> -                      * We can fit in a 10 byte cdb.
> -                      */
> -                     bzero(&cmd_big, sizeof(cmd_big));
> -                     cmd_big.opcode = (bp->b_flags & B_READ) ?
> -                         READ_BIG : WRITE_BIG;
> -                     _lto4b(blkno, cmd_big.addr);
> -                     _lto2b(nblks, cmd_big.length);
> -                     cmdlen = sizeof(cmd_big);
> -                     cmdp = (struct scsi_generic *)&cmd_big;
> -             } else if (((blkno & 0xffffffff) == blkno) &&
> -                 ((nblks & 0xffffffff) == nblks)) {
> -                     /*
> -                      * We can fit in a 12 byte cdb.
> -                      */
> -                     bzero(&cmd_12, sizeof(cmd_12));
> -                     cmd_12.opcode = (bp->b_flags & B_READ) ?
> -                         READ_12 : WRITE_12;
> -                     _lto4b(blkno, cmd_12.addr);
> -                     _lto4b(nblks, cmd_12.length);
> -                     cmdlen = sizeof(cmd_12);
> -                     cmdp = (struct scsi_generic *)&cmd_12;
> -             } else {
> -                     /*
> -                      * Need a 16 byte cdb. There's nothing bigger.
> -                      */
> -                     bzero(&cmd_16, sizeof(cmd_16));
> -                     cmd_16.opcode = (bp->b_flags & B_READ) ?
> -                         READ_16 : WRITE_16;
> -                     _lto8b(blkno, cmd_16.addr);
> -                     _lto4b(nblks, cmd_16.length);
> -                     cmdlen = sizeof(cmd_16);
> -                     cmdp = (struct scsi_generic *)&cmd_16;
> -             }
> +                 ((nblks & 0xff) == nblks))
> +                     sd_cmd_rw6(xs, read, blkno, nblks);
> +             else if (((blkno & 0xffffffff) == blkno) &&
> +                 ((nblks & 0xffff) == nblks))
> +                     sd_cmd_rw10(xs, read, blkno, nblks);
> +             else if (((blkno & 0xffffffff) == blkno) &&
> +                 ((nblks & 0xffffffff) == nblks))
> +                     sd_cmd_rw12(xs, read, blkno, nblks);
> +             else
> +                     sd_cmd_rw16(xs, read, blkno, nblks);
>  
> -             /* Instrumentation. */
> -             disk_busy(&sd->sc_dk);
> +             xs->flags |= (read ? SCSI_DATA_IN : SCSI_DATA_OUT);
> +             xs->timeout = 60000;
> +             xs->data = bp->b_data;
> +             xs->datalen = bp->b_bcount;
>  
> -             /*
> -              * Call the routine that chats with the adapter.
> -              * Note: we cannot sleep as we may be an interrupt
> -              */
> -             error = scsi_scsi_cmd(sc_link, cmdp, cmdlen,
> -                 (u_char *)bp->b_data, bp->b_bcount,
> -                 SCSI_RETRIES, 60000, bp, SCSI_NOSLEEP |
> -                 ((bp->b_flags & B_READ) ? SCSI_DATA_IN : SCSI_DATA_OUT));
> -             switch (error) {
> -             case 0:
> -                     /*
> -                      * Mark the disk dirty so that the cache will be
> -                      * flushed on close.
> -                      */
> -                     if ((bp->b_flags & B_READ) == 0)
> -                             sd->flags |= SDF_DIRTY;
> -                     timeout_del(&sd->sc_timeout);
> -                     break;
> -             case EAGAIN:
> -                     /*
> -                      * The device can't start another i/o. Try again later.
> -                      */
> -                     dp->b_actf = bp;
> -                     disk_unbusy(&sd->sc_dk, 0, 0);
> -                     timeout_add(&sd->sc_timeout, 1);
> -                     return;
> -             default:
> -                     disk_unbusy(&sd->sc_dk, 0, 0);
> -                     printf("%s: not queued, error %d\n",
> -                         sd->sc_dev.dv_xname, error);
> -                     break;
> -             }
> +             xs->done = sd_buf_done;
> +             xs->cookie = bp;
> +
> +             /* Instrumentation. */
> +             disk_busy(&sc->sc_dk);
> +             scsi_xs_exec(xs);
>       }
>  }
>  
>  void
> -sdrestart(void *v)
> +sd_buf_done(struct scsi_xfer *xs)
>  {
> +     struct sd_softc *sc = xs->sc_link->device_softc;
> +     struct buf *bp = xs->cookie;
>       int s;
>  
> +     disk_unbusy(&sc->sc_dk, bp->b_bcount - bp->b_resid,
> +         bp->b_flags & B_READ);
> +
> +     if (xs->error == XS_NOERROR) {
> +             bp->b_error = 0;
> +             bp->b_resid = xs->resid;
> +     } else {
> +             bp->b_error = EIO;
> +             bp->b_flags |= B_ERROR;
> +             bp->b_resid = bp->b_bcount;
> +     }
> +
>       s = splbio();
> -     sdstart(v);
> +     biodone(bp);
>       splx(s);
> -}
>  
> -void
> -sddone(struct scsi_xfer *xs)
> -{
> -     struct sd_softc *sd = xs->sc_link->device_softc;
> +     scsi_xs_put(xs);
>  
> -     if (xs->bp != NULL)
> -             disk_unbusy(&sd->sc_dk, (xs->bp->b_bcount - xs->bp->b_resid),
> -                 (xs->bp->b_flags & B_READ));
> +     sdstart(sc); /* XXX */
>  }
>  
>  void
> @@ -1081,27 +1076,6 @@ sd_shutdown(void *arg)
>  }
>  
>  /*
> - * Tell the device to map out a defective block
> - */
> -int
> -sd_reassign_blocks(struct sd_softc *sd, u_long blkno)
> -{
> -     struct scsi_reassign_blocks scsi_cmd;
> -     struct scsi_reassign_blocks_data rbdata;
> -
> -     bzero(&scsi_cmd, sizeof(scsi_cmd));
> -     bzero(&rbdata, sizeof(rbdata));
> -     scsi_cmd.opcode = REASSIGN_BLOCKS;
> -
> -     _lto2b(sizeof(rbdata.defect_descriptor[0]), rbdata.length);
> -     _lto4b(blkno, rbdata.defect_descriptor[0].dlbaddr);
> -
> -     return scsi_scsi_cmd(sd->sc_link, (struct scsi_generic *)&scsi_cmd,
> -         sizeof(scsi_cmd), (u_char *)&rbdata, sizeof(rbdata), SCSI_RETRIES,
> -         5000, NULL, SCSI_DATA_OUT);
> -}
> -
> -/*
>   * Check Errors
>   */
>  int
> @@ -1186,7 +1160,6 @@ sdsize(dev_t dev)
>  }
>  
>  /* #define SD_DUMP_NOT_TRUSTED if you just want to watch */
> -static struct scsi_xfer sx;
>  static int sddoingadump;
>  
>  /*
> @@ -1196,7 +1169,7 @@ static int sddoingadump;
>  int
>  sddump(dev_t dev, daddr64_t blkno, caddr_t va, size_t size)
>  {
> -     struct sd_softc *sd;    /* disk unit to do the I/O */
> +     struct sd_softc *sc;    /* disk unit to do the I/O */
>       struct disklabel *lp;   /* disk's disklabel */
>       int     unit, part;
>       int     sectorsize;     /* size of a disk sector */
> @@ -1204,9 +1177,7 @@ sddump(dev_t dev, daddr64_t blkno, caddr
>       daddr64_t       sectoff;        /* sector offset of partition */
>       int     totwrt;         /* total number of sectors left to write */
>       int     nwrt;           /* current number of sectors to write */
> -     struct scsi_rw_big cmd; /* write command */
>       struct scsi_xfer *xs;   /* ... convenience */
> -     int     retval;
>  
>       /* Check if recursive dump; if so, punt. */
>       if (sddoingadump)
> @@ -1219,7 +1190,7 @@ sddump(dev_t dev, daddr64_t blkno, caddr
>       part = DISKPART(dev);
>  
>       /* Check for acceptable drive number. */
> -     if (unit >= sd_cd.cd_ndevs || (sd = sd_cd.cd_devs[unit]) == NULL)
> +     if (unit >= sd_cd.cd_ndevs || (sc = sd_cd.cd_devs[unit]) == NULL)
>               return ENXIO;
>  
>       /*
> @@ -1229,12 +1200,12 @@ sddump(dev_t dev, daddr64_t blkno, caddr
>        */
>  #if 0
>       /* Make sure it was initialized. */
> -     if ((sd->sc_link->flags & SDEV_MEDIA_LOADED) != SDEV_MEDIA_LOADED)
> +     if ((sc->sc_link->flags & SDEV_MEDIA_LOADED) != SDEV_MEDIA_LOADED)
>               return ENXIO;
>  #endif
>  
>       /* Convert to disk sectors.  Request must be a multiple of size. */
> -     lp = sd->sc_dk.dk_label;
> +     lp = sc->sc_dk.dk_label;
>       sectorsize = lp->d_secsize;
>       if ((size % sectorsize) != 0)
>               return EFAULT;
> @@ -1251,43 +1222,25 @@ sddump(dev_t dev, daddr64_t blkno, caddr
>       /* Offset block number to start of partition. */
>       blkno += sectoff;
>  
> -     xs = &sx;
> -
>       while (totwrt > 0) {
>               nwrt = totwrt;          /* XXX */
> +
>  #ifndef      SD_DUMP_NOT_TRUSTED
> -             /*
> -              *  Fill out the scsi command
> -              */
> -             bzero(&cmd, sizeof(cmd));
> -             cmd.opcode = WRITE_BIG;
> -             _lto4b(blkno, cmd.addr);
> -             _lto2b(nwrt, cmd.length);
> -             /*
> -              * Fill out the scsi_xfer structure
> -              *    Note: we cannot sleep as we may be an interrupt
> -              * don't use scsi_scsi_cmd() as it may want
> -              * to wait for an xs.
> -              */
> -             bzero(xs, sizeof(sx));
> -             xs->flags |= SCSI_AUTOCONF | SCSI_DATA_OUT;
> -             xs->sc_link = sd->sc_link;
> -             xs->retries = SCSI_RETRIES;
> -             xs->timeout = 10000;    /* 10000 millisecs for a disk ! */
> -             xs->cmd = (struct scsi_generic *)&cmd;
> -             xs->cmdlen = sizeof(cmd);
> -             xs->resid = nwrt * sectorsize;
> -             xs->error = XS_NOERROR;
> -             xs->bp = NULL;
> +             xs = scsi_xs_get(sc->sc_link, SCSI_NOSLEEP);
> +             if (xs == NULL)
> +                     return (ENOMEM);
> +
> +             xs->timeout = 10000;
> +             xs->flags = SCSI_POLL | SCSI_NOSLEEP | SCSI_DATA_OUT;
>               xs->data = va;
>               xs->datalen = nwrt * sectorsize;
>  
> -             /*
> -              * Pass all this info to the scsi driver.
> -              */
> -             retval = (*(sd->sc_link->adapter->scsi_cmd)) (xs);
> -             if (retval != COMPLETE)
> -                     return ENXIO;
> +             sd_cmd_rw10(xs, 0, blkno, nwrt); /* XXX */
> +
> +             scsi_xs_exec(xs);
> +             if (xs->error != XS_NOERROR)
> +                     return (ENXIO);
> +             scsi_xs_put(xs);
>  #else        /* SD_DUMP_NOT_TRUSTED */
>               /* Let's just talk about this first... */
>               printf("sd%d: dump addr 0x%x, blk %d\n", unit, va, blkno);
> @@ -1299,8 +1252,10 @@ sddump(dev_t dev, daddr64_t blkno, caddr
>               blkno += nwrt;
>               va += sectorsize * nwrt;
>       }
> +
>       sddoingadump = 0;
> -     return 0;
> +
> +     return (0);
>  }
>  
>  /*
> @@ -1469,14 +1424,17 @@ validate:
>  
>       return (SDGP_RESULT_OK);
>  }
> +void
> +sd_flush_done(struct scsi_xfer *xs);
>  
>  void
> -sd_flush(struct sd_softc *sd, int flags)
> +sd_flush(struct sd_softc *sc, int flags)
>  {
> -     struct scsi_link *sc_link = sd->sc_link;
> -     struct scsi_synchronize_cache cmd;
> +     struct scsi_link *link = sc->sc_link;
> +     struct scsi_xfer *xs;
> +     struct scsi_synchronize_cache *cmd;
>  
> -     if (sc_link->quirks & SDEV_NOSYNCCACHE)
> +     if (link->quirks & SDEV_NOSYNCCACHE)
>               return;
>  
>       /*
> @@ -1485,15 +1443,38 @@ sd_flush(struct sd_softc *sd, int flags)
>        * that the command is not supported by the device.
>        */
>  
> -     bzero(&cmd, sizeof(cmd));
> -     cmd.opcode = SYNCHRONIZE_CACHE;
> -             
> -     if (scsi_scsi_cmd(sc_link, (struct scsi_generic *)&cmd, sizeof(cmd),
> -         NULL, 0, SCSI_RETRIES, 100000, NULL,
> -         flags | SCSI_IGNORE_ILLEGAL_REQUEST)) {
> +     xs = scsi_xs_get(link, flags);
> +     if (xs == NULL) {
> +             SC_DEBUG(sc_link, SDEV_DB1, ("cache sync failed to get xs\n"));
> +             return;
> +     }
> +
> +     cmd = (struct scsi_synchronize_cache *)xs->cmd;
> +     cmd->opcode = SYNCHRONIZE_CACHE;
> +
> +     xs->timeout = 100000;
> +
> +     xs->done = sd_flush_done;
> +
> +     scsi_xs_exec(xs);
> +     if (!ISSET(xs->flags, SCSI_POLL)) {
> +             while (!ISSET(xs->flags, ITSDONE))
> +                     tsleep(xs, PRIBIO, "sdflush", 0);
> +     }
> +
> +     if (xs->error != XS_NOERROR)
>               SC_DEBUG(sc_link, SDEV_DB1, ("cache sync failed\n"));
> -     } else
> -             sd->flags &= ~SDF_DIRTY;
> +     else
> +             sc->flags &= ~SDF_DIRTY;
> +
> +     scsi_xs_put(xs);
> +}
> +
> +void
> +sd_flush_done(struct scsi_xfer *xs)
> +{
> +     if (!ISSET(xs->flags, SCSI_POLL))
> +             wakeup_one(xs);
>  }
>  
>  /*
> @@ -1502,16 +1483,11 @@ sd_flush(struct sd_softc *sd, int flags)
>  void
>  sd_kill_buffers(struct sd_softc *sd)
>  {
> -     struct buf *dp, *bp;
> -     int s;
> -
> -     s = splbio();
> -     for (dp = &sd->buf_queue; (bp = dp->b_actf) != NULL; ) {
> -             dp->b_actf = bp->b_actf;
> +     struct buf *bp;
>  
> +     while ((bp = sd_buf_dequeue(sd)) != NULL) {
>               bp->b_error = ENXIO;
>               bp->b_flags |= B_ERROR;
>               biodone(bp);
>       }
> -     splx(s);
>  }
> Index: sdvar.h
> ===================================================================
> RCS file: /cvs/src/sys/scsi/sdvar.h,v
> retrieving revision 1.18
> diff -u -p -r1.18 sdvar.h
> --- sdvar.h   17 Jun 2009 01:30:32 -0000      1.18
> +++ sdvar.h   13 Aug 2009 21:44:17 -0000
> @@ -49,10 +49,10 @@
>  
>  #ifdef _KERNEL
>  struct sd_softc {
> -     struct device sc_dev;
> -     struct disk sc_dk;
> +     struct device           sc_dev;
> +     struct disk             sc_dk;
>  
> -     int flags;
> +     int                     flags;
>  #define      SDF_LOCKED      0x01
>  #define      SDF_WANTED      0x02
>  #define      SDF_WLABEL      0x04            /* label is writable */
> @@ -60,7 +60,7 @@ struct sd_softc {
>  #define      SDF_ANCIENT     0x10            /* disk is ancient; for minphys 
> */
>  #define      SDF_DIRTY       0x20            /* disk is dirty; needs cache 
> flush */
>  #define      SDF_DYING       0x40            /* dying, when deactivated */
> -     struct scsi_link *sc_link;      /* contains our targ, lun, etc. */
> +     struct scsi_link        *sc_link; /* contains our targ, lun, etc. */
>       struct disk_parms {
>               u_long  heads;          /* number of heads */
>               u_long  cyls;           /* number of cylinders */
> @@ -69,9 +69,11 @@ struct sd_softc {
>               u_long  rot_rate;       /* rotational rate, in RPM */
>               daddr64_t       disksize;       /* total number sectors */
>       } params;
> -     struct buf buf_queue;
> +     struct mutex sc_buf_mtx;
> +     struct buf sc_buf_queue;
>       void *sc_sdhook;                /* our shutdown hook */
>       struct timeout sc_timeout;
> +     
>  };
>  
>  #define      SDGP_RESULT_OK          0       /* parameters obtained */
This second patch applies cleanly, and no errors were generated in 
dmesg.  Subsequent usage of growisofs fails to burn a dvd, and hangs the 
computer.  The following error message was displayed on the console:

cd0(atapiscsi0:0:0):User Command with no buffer

Thanks, Dave.
Dmesg is attached below:
OpenBSD 4.6-current (GENERIC.MP) #3: Fri Aug 14 07:28:33 EDT 2009
    [email protected]:/usr/src/sys/arch/i386/compile/GENERIC.MP
cpu0: Intel(R) Core(TM) Duo CPU L2500 @ 1.83GHz ("GenuineIntel" 686-class) 1.83 
GHz
cpu0: 
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,SBF,SSE3,MWAIT,VMX,EST,TM2,xTPR
real mem  = 2137419776 (2038MB)
avail mem = 2062163968 (1966MB)
mainbus0 at root
bios0 at mainbus0: AT/286+ BIOS, date 11/20/08, BIOS32 rev. 0 @ 0xfd690, SMBIOS 
rev. 2.4 @ 0xe0010 (67 entries)
bios0: vendor LENOVO version "7BETD7WW (2.18 )" date 11/20/2008
bios0: LENOVO 1702H7U
acpi0 at bios0: rev 2
acpi0: tables DSDT FACP SSDT ECDT TCPA APIC MCFG HPET BOOT SSDT SSDT SSDT SSDT
acpi0: wakeup devices LID_(S3) SLPB(S3) DURT(S3) EXP0(S4) EXP1(S4) EXP2(S4) 
EXP3(S4) PCI1(S4) USB0(S3) USB1(S3) USB2(S3) USB7(S3) HDEF(S4)
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: apic clock running at 166MHz
cpu1 at mainbus0: apid 1 (application processor)
cpu1: Intel(R) Core(TM) Duo CPU L2500 @ 1.83GHz ("GenuineIntel" 686-class) 1.83 
GHz
cpu1: 
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,SBF,SSE3,MWAIT,VMX,EST,TM2,xTPR
ioapic0 at mainbus0: apid 1 pa 0xfec00000, version 20, 24 pins
ioapic0: misconfigured as apic 2, remapped to apid 1
acpihpet0 at acpi0: 14318179 Hz
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus -1 (AGP_)
acpiprt2 at acpi0: bus 2 (EXP0)
acpiprt3 at acpi0: bus 3 (EXP1)
acpiprt4 at acpi0: bus 4 (EXP2)
acpiprt5 at acpi0: bus 12 (EXP3)
acpiprt6 at acpi0: bus 21 (PCI1)
acpiec0 at acpi0
acpicpu0 at acpi0: C3, C2, C1, PSS
acpicpu1 at acpi0: C3, C2, C1, PSS
acpipwrres0 at acpi0: PUBS
acpitz0 at acpi0: critical temperature 127 degC
acpitz1 at acpi0: critical temperature 97 degC
acpibtn0 at acpi0: LID_
acpibtn1 at acpi0: SLPB
acpibat0 at acpi0: BAT0 model "42T4571" serial  6761 type LION oem "SONY"
acpibat1 at acpi0: BAT1 not present
acpibat2 at acpi0: BAT2 not present
acpiac0 at acpi0: AC unit online
acpithinkpad0 at acpi0
acpidock0 at acpi0: GDCK docked (15)
bios0: ROM list: 0xc0000/0xea00! 0xcf000/0x1000 0xd0000/0x1000 0xdc000/0x4000! 
0xe0000/0x10000!
cpu0: Enhanced SpeedStep 1829 MHz: speeds: 1833, 1333, 1000 MHz
pci0 at mainbus0 bus 0: configuration mode 1 (bios)
pchb0 at pci0 dev 0 function 0 "Intel 82945GM Host" rev 0x03
vga1 at pci0 dev 2 function 0 "Intel 82945GM Video" rev 0x03
wsdisplay0 at vga1 mux 1: console (80x25, vt100 emulation)
wsdisplay0: screen 1-5 added (80x25, vt100 emulation)
intagp0 at vga1
agp0 at intagp0: aperture at 0xd0000000, size 0x10000000
inteldrm0 at vga1: apic 1 int 16 (irq 11)
drm0 at inteldrm0
"Intel 82945GM Video" rev 0x03 at pci0 dev 2 function 1 not configured
azalia0 at pci0 dev 27 function 0 "Intel 82801GB HD Audio" rev 0x02: apic 1 int 
17 (irq 11)
azalia0: RIRB time out
azalia0: codecs: Analog Devices AD1981HD, 0x0000/0x0000, using Analog Devices 
AD1981HD
azalia0: RIRB time out
audio0 at azalia0
ppb0 at pci0 dev 28 function 0 "Intel 82801GB PCIE" rev 0x02: apic 1 int 20 
(irq 11)
pci1 at ppb0 bus 2
em0 at pci1 dev 0 function 0 "Intel PRO/1000MT (82573L)" rev 0x00: apic 1 int 
16 (irq 11), address 00:16:d3:32:eb:43
ppb1 at pci0 dev 28 function 1 "Intel 82801GB PCIE" rev 0x02: apic 1 int 21 
(irq 11)
pci2 at ppb1 bus 3
wpi0 at pci2 dev 0 function 0 "Intel PRO/Wireless 3945ABG" rev 0x02: apic 1 int 
17 (irq 11), MoW1, address 00:19:d2:28:0b:06
ppb2 at pci0 dev 28 function 2 "Intel 82801GB PCIE" rev 0x02: apic 1 int 22 
(irq 11)
pci3 at ppb2 bus 4
ppb3 at pci0 dev 28 function 3 "Intel 82801GB PCIE" rev 0x02: apic 1 int 23 
(irq 11)
pci4 at ppb3 bus 12
uhci0 at pci0 dev 29 function 0 "Intel 82801GB USB" rev 0x02: apic 1 int 16 
(irq 11)
uhci1 at pci0 dev 29 function 1 "Intel 82801GB USB" rev 0x02: apic 1 int 17 
(irq 11)
uhci2 at pci0 dev 29 function 2 "Intel 82801GB USB" rev 0x02: apic 1 int 18 
(irq 11)
uhci3 at pci0 dev 29 function 3 "Intel 82801GB USB" rev 0x02: apic 1 int 19 
(irq 11)
ehci0 at pci0 dev 29 function 7 "Intel 82801GB USB" rev 0x02: apic 1 int 19 
(irq 11)
usb0 at ehci0: USB revision 2.0
uhub0 at usb0 "Intel EHCI root hub" rev 2.00/1.00 addr 1
ppb4 at pci0 dev 30 function 0 "Intel 82801BAM Hub-to-PCI" rev 0xe2
pci5 at ppb4 bus 21
cbb0 at pci5 dev 0 function 0 "Ricoh 5C476 CardBus" rev 0xb4: apic 1 int 16 
(irq 11)
"Ricoh 5C552 Firewire" rev 0x09 at pci5 dev 0 function 1 not configured
sdhc0 at pci5 dev 0 function 2 "Ricoh 5C822 SD/MMC" rev 0x18: apic 1 int 18 
(irq 11)
sdmmc0 at sdhc0
cardslot0 at cbb0 slot 0 flags 0
cardbus0 at cardslot0: bus 22 device 0 cacheline 0x8, lattimer 0xb0
pcmcia0 at cardslot0
ichpcib0 at pci0 dev 31 function 0 "Intel 82801GBM LPC" rev 0x02: PM disabled
pciide0 at pci0 dev 31 function 1 "Intel 82801GB IDE" rev 0x02: DMA, channel 0 
configured to compatibility, channel 1 configured to compatibility
atapiscsi0 at pciide0 channel 0 drive 0
scsibus0 at atapiscsi0: 2 targets
cd0 at scsibus0 targ 0 lun 0: <HL-DT-ST, DVDRAM GSA-4083N, 1.08> ATAPI 5/cdrom 
removable
cd0(pciide0:0:0): using PIO mode 4, Ultra-DMA mode 2
pciide0: channel 1 ignored (disabled)
ahci0 at pci0 dev 31 function 2 "Intel 82801GBM AHCI" rev 0x02: apic 1 int 16 
(irq 11), AHCI 1.1
scsibus1 at ahci0: 32 targets
sd0 at scsibus1 targ 0 lun 0: <ATA, MR25.2-S128G, B8K2> SCSI3 0/direct fixed
sd0: 122879MB, 512 bytes/sec, 251658239 sec total
ichiic0 at pci0 dev 31 function 3 "Intel 82801GB SMBus" rev 0x02: apic 1 int 23 
(irq 11)
iic0 at ichiic0
usb1 at uhci0: USB revision 1.0
uhub1 at usb1 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb2 at uhci1: USB revision 1.0
uhub2 at usb2 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb3 at uhci2: USB revision 1.0
uhub3 at usb3 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb4 at uhci3: USB revision 1.0
uhub4 at usb4 "Intel UHCI root hub" rev 1.00/1.00 addr 1
isa0 at ichpcib0
isadma0 at isa0
com0 at isa0 port 0x3f8/8 irq 4: ns16550a, 16 byte fifo
com1 at isa0 port 0x2f8/8 irq 3: ns16550a, 16 byte fifo
pckbc0 at isa0 port 0x60/5
pckbd0 at pckbc0 (kbd slot)
pckbc0: using irq 1 for kbd slot
wskbd0 at pckbd0: console keyboard, using wsdisplay0
pms0 at pckbc0 (aux slot)
pckbc0: using irq 12 for aux slot
wsmouse0 at pms0 mux 0
pcppi0 at isa0 port 0x61
midi0 at pcppi0: <PC speaker>
spkr0 at pcppi0
lpt0 at isa0 port 0x378/4 irq 7
aps0 at isa0 port 0x1600/31
npx0 at isa0 port 0xf0/16: reported by CPUID; using exception 16
mtrr: Pentium Pro MTRR support
uhub5 at uhub0 port 6 "vendor 0x17ef product 0x1000" rev 2.00/0.01 addr 2
vscsi0 at root
scsibus2 at vscsi0: 256 targets
softraid0 at root
root on sd0a swap on sd0b dump on sd0b

Reply via email to