Module Name: src Committed By: martin Date: Sun Mar 18 11:05:27 UTC 2018
Modified Files: src/sys/dev/ic [netbsd-8]: ld_nvme.c nvme.c nvmevar.h Log Message: Pull up following revision(s) (requested by jdolecek in ticket #641): sys/dev/ic/nvme.c: revision 1.34 sys/dev/ic/nvme.c: revision 1.35 sys/dev/ic/nvme.c: revision 1.36 sys/dev/ic/nvme.c: revision 1.37 sys/dev/ic/ld_nvme.c: revision 1.19 sys/dev/ic/nvmevar.h: revision 1.15 refactor the locking code around DIOCGCACHE handling to be reusable for other infrequent commands,it uses single condvar for simplicity, and uses it both when waiting for ccb or command completion - this is fine, since usually there will be just one such command qeueued anyway use this to finally properly implement DIOCCACHESYNC - return only after the command is confirmed as completed by the controller. switch handling of passthrough commands to use queue, instead of polling should fix PR kern/53059 by Frank Kardel fix passthrough command usage also in nvme_get_number_of_queues(), fixes memory corruption and possible panic on boot also remove now duplicate nvme_ccb_put() call from nvme_get_number_of_queues() To generate a diff of this commit: cvs rdiff -u -r1.16.2.1 -r1.16.2.2 src/sys/dev/ic/ld_nvme.c cvs rdiff -u -r1.30.2.1 -r1.30.2.2 src/sys/dev/ic/nvme.c cvs rdiff -u -r1.13.6.1 -r1.13.6.2 src/sys/dev/ic/nvmevar.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/ic/ld_nvme.c diff -u src/sys/dev/ic/ld_nvme.c:1.16.2.1 src/sys/dev/ic/ld_nvme.c:1.16.2.2 --- src/sys/dev/ic/ld_nvme.c:1.16.2.1 Fri Sep 1 09:59:11 2017 +++ src/sys/dev/ic/ld_nvme.c Sun Mar 18 11:05:27 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ld_nvme.c,v 1.16.2.1 2017/09/01 09:59:11 martin Exp $ */ +/* $NetBSD: ld_nvme.c,v 1.16.2.2 2018/03/18 11:05:27 martin Exp $ */ /*- * Copyright (C) 2016 NONAKA Kimihiro <non...@netbsd.org> @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.16.2.1 2017/09/01 09:59:11 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.16.2.2 2018/03/18 11:05:27 martin Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -49,14 +49,6 @@ struct ld_nvme_softc { struct nvme_softc *sc_nvme; uint16_t sc_nsid; - - /* getcache handling */ - kmutex_t sc_getcache_lock; - kcondvar_t sc_getcache_cv; - kcondvar_t sc_getcache_ready_cv; - bool sc_getcache_waiting; - bool sc_getcache_ready; - int sc_getcache_result; }; static int ld_nvme_match(device_t, cfdata_t, void *); @@ -73,8 +65,6 @@ static int ld_nvme_getcache(struct ld_so static int ld_nvme_ioctl(struct ld_softc *, u_long, void *, int32_t, bool); static void ld_nvme_biodone(void *, struct buf *, uint16_t, uint32_t); -static void ld_nvme_syncdone(void *, struct buf *, uint16_t, uint32_t); -static void ld_nvme_getcache_done(void *, struct buf *, uint16_t, uint32_t); static int ld_nvme_match(device_t parent, cfdata_t match, void *aux) @@ -103,10 +93,6 @@ ld_nvme_attach(device_t parent, device_t sc->sc_nvme = nsc; sc->sc_nsid = naa->naa_nsid; - mutex_init(&sc->sc_getcache_lock, MUTEX_DEFAULT, IPL_SOFTBIO); - cv_init(&sc->sc_getcache_cv, "nvmegcq"); - cv_init(&sc->sc_getcache_ready_cv, "nvmegcr"); - aprint_naive("\n"); aprint_normal("\n"); @@ -203,116 +189,16 @@ ld_nvme_flush(struct ld_softc *ld, bool { struct ld_nvme_softc *sc = device_private(ld->sc_dv); - if (!nvme_has_volatile_write_cache(sc->sc_nvme)) { - /* cache not present, no value in trying to flush it */ - return 0; - } - - return nvme_ns_sync(sc->sc_nvme, sc->sc_nsid, sc, - poll ? NVME_NS_CTX_F_POLL : 0, - ld_nvme_syncdone); -} - -static void -ld_nvme_syncdone(void *xc, struct buf *bp, uint16_t cmd_status, uint32_t cdw0) -{ - /* nothing to do */ + return nvme_ns_sync(sc->sc_nvme, sc->sc_nsid, + poll ? NVME_NS_CTX_F_POLL : 0); } static int ld_nvme_getcache(struct ld_softc *ld, int *addr) { - int error; struct ld_nvme_softc *sc = device_private(ld->sc_dv); - /* - * DPO not supported, Dataset Management (DSM) field doesn't specify - * the same semantics. - */ - *addr = DKCACHE_FUA; - - if (!nvme_has_volatile_write_cache(sc->sc_nvme)) { - /* cache simply not present */ - return 0; - } - - /* - * This is admin queue request. The queue is relatively limited in size, - * and this is not performance critical call, so have at most one pending - * cache request at a time to avoid spurious EWOULDBLOCK failures. - */ - mutex_enter(&sc->sc_getcache_lock); - while (sc->sc_getcache_waiting) { - error = cv_wait_sig(&sc->sc_getcache_cv, &sc->sc_getcache_lock); - if (error) - goto out; - } - sc->sc_getcache_waiting = true; - sc->sc_getcache_ready = false; - mutex_exit(&sc->sc_getcache_lock); - - error = nvme_admin_getcache(sc->sc_nvme, sc, ld_nvme_getcache_done); - if (error) { - mutex_enter(&sc->sc_getcache_lock); - goto out; - } - - mutex_enter(&sc->sc_getcache_lock); - while (!sc->sc_getcache_ready) { - error = cv_wait_sig(&sc->sc_getcache_ready_cv, - &sc->sc_getcache_lock); - if (error) - goto out; - } - - KDASSERT(sc->sc_getcache_ready); - - if (sc->sc_getcache_result >= 0) - *addr |= sc->sc_getcache_result; - else - error = EINVAL; - - out: - sc->sc_getcache_waiting = false; - - /* wake one of eventual waiters */ - cv_signal(&sc->sc_getcache_cv); - - mutex_exit(&sc->sc_getcache_lock); - - return error; -} - -static void -ld_nvme_getcache_done(void *xc, struct buf *bp, uint16_t cmd_status, uint32_t cdw0) -{ - struct ld_nvme_softc *sc = xc; - uint16_t status = NVME_CQE_SC(cmd_status); - int result; - - if (status == NVME_CQE_SC_SUCCESS) { - result = 0; - - if (cdw0 & NVME_CQE_CDW0_VWC_WCE) - result |= DKCACHE_WRITE; - - /* - * If volatile write cache is present, the flag shall also be - * settable. - */ - result |= DKCACHE_WCHANGE; - } else { - result = -1; - } - - mutex_enter(&sc->sc_getcache_lock); - sc->sc_getcache_result = result; - sc->sc_getcache_ready = true; - - /* wake up the waiter */ - cv_signal(&sc->sc_getcache_ready_cv); - - mutex_exit(&sc->sc_getcache_lock); + return nvme_admin_getcache(sc->sc_nvme, addr); } static int Index: src/sys/dev/ic/nvme.c diff -u src/sys/dev/ic/nvme.c:1.30.2.1 src/sys/dev/ic/nvme.c:1.30.2.2 --- src/sys/dev/ic/nvme.c:1.30.2.1 Sat Mar 17 08:11:18 2018 +++ src/sys/dev/ic/nvme.c Sun Mar 18 11:05:27 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: nvme.c,v 1.30.2.1 2018/03/17 08:11:18 martin Exp $ */ +/* $NetBSD: nvme.c,v 1.30.2.2 2018/03/18 11:05:27 martin Exp $ */ /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ /* @@ -18,7 +18,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.30.2.1 2018/03/17 08:11:18 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.30.2.2 2018/03/18 11:05:27 martin Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -61,7 +61,7 @@ static int nvme_ccbs_alloc(struct nvme_q static void nvme_ccbs_free(struct nvme_queue *); static struct nvme_ccb * - nvme_ccb_get(struct nvme_queue *); + nvme_ccb_get(struct nvme_queue *, bool); static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *); static int nvme_poll(struct nvme_softc *, struct nvme_queue *, @@ -83,6 +83,8 @@ static void nvme_q_submit(struct nvme_so struct nvme_ccb *, void *)); static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q); static void nvme_q_free(struct nvme_softc *, struct nvme_queue *); +static void nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *, + bool (*)(void *), void *); static struct nvme_dmamem * nvme_dmamem_alloc(struct nvme_softc *, size_t); @@ -564,7 +566,7 @@ nvme_ns_identify(struct nvme_softc *sc, KASSERT(nsid > 0); - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ mem = nvme_dmamem_alloc(sc, sizeof(*identify)); @@ -617,7 +619,7 @@ nvme_ns_dobio(struct nvme_softc *sc, uin bus_dmamap_t dmap; int i, error; - ccb = nvme_ccb_get(q); + ccb = nvme_ccb_get(q, false); if (ccb == NULL) return EAGAIN; @@ -736,31 +738,44 @@ nvme_ns_io_done(struct nvme_queue *q, st * If there is no volatile write cache, it makes no sense to issue * flush commands or query for the status. */ -bool +static bool nvme_has_volatile_write_cache(struct nvme_softc *sc) { /* sc_identify is filled during attachment */ return ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0); } +static bool +nvme_ns_sync_finished(void *cookie) +{ + int *result = cookie; + + return (*result != 0); +} + int -nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, void *cookie, - int flags, nvme_nnc_done nnc_done) +nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags) { struct nvme_queue *q = nvme_get_q(sc); struct nvme_ccb *ccb; + int result = 0; + + if (!nvme_has_volatile_write_cache(sc)) { + /* cache not present, no value in trying to flush it */ + return 0; + } - ccb = nvme_ccb_get(q); + ccb = nvme_ccb_get(q, true); if (ccb == NULL) return EAGAIN; ccb->ccb_done = nvme_ns_sync_done; - ccb->ccb_cookie = cookie; + ccb->ccb_cookie = &result; /* namespace context */ ccb->nnc_nsid = nsid; ccb->nnc_flags = flags; - ccb->nnc_done = nnc_done; + ccb->nnc_done = NULL; if (ISSET(flags, NVME_NS_CTX_F_POLL)) { if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0) @@ -769,7 +784,12 @@ nvme_ns_sync(struct nvme_softc *sc, uint } nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill); - return 0; + + /* wait for completion */ + nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result); + KASSERT(result != 0); + + return (result > 0) ? 0 : EIO; } static void @@ -785,36 +805,64 @@ static void nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) { - void *cookie = ccb->ccb_cookie; - nvme_nnc_done nnc_done = ccb->nnc_done; + int *result = ccb->ccb_cookie; + uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); + + if (status == NVME_CQE_SC_SUCCESS) + *result = 1; + else + *result = -1; nvme_ccb_put(q, ccb); +} - nnc_done(cookie, NULL, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); +static bool +nvme_getcache_finished(void *xc) +{ + int *addr = xc; + + return (*addr != 0); } /* * Get status of volatile write cache. Always asynchronous. */ int -nvme_admin_getcache(struct nvme_softc *sc, void *cookie, nvme_nnc_done nnc_done) +nvme_admin_getcache(struct nvme_softc *sc, int *addr) { struct nvme_ccb *ccb; struct nvme_queue *q = sc->sc_admin_q; + int result = 0, error; - ccb = nvme_ccb_get(q); - if (ccb == NULL) - return EAGAIN; + if (!nvme_has_volatile_write_cache(sc)) { + /* cache simply not present */ + *addr = 0; + return 0; + } + + ccb = nvme_ccb_get(q, true); + KASSERT(ccb != NULL); ccb->ccb_done = nvme_getcache_done; - ccb->ccb_cookie = cookie; + ccb->ccb_cookie = &result; /* namespace context */ ccb->nnc_flags = 0; - ccb->nnc_done = nnc_done; + ccb->nnc_done = NULL; nvme_q_submit(sc, q, ccb, nvme_getcache_fill); - return 0; + + /* wait for completion */ + nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result); + KASSERT(result != 0); + + if (result > 0) { + *addr = result; + error = 0; + } else + error = EINVAL; + + return error; } static void @@ -830,12 +878,35 @@ static void nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) { - void *cookie = ccb->ccb_cookie; - nvme_nnc_done nnc_done = ccb->nnc_done; + int *addr = ccb->ccb_cookie; + uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); + uint32_t cdw0 = lemtoh32(&cqe->cdw0); + int result; - nvme_ccb_put(q, ccb); + if (status == NVME_CQE_SC_SUCCESS) { + result = 0; - nnc_done(cookie, NULL, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); + /* + * DPO not supported, Dataset Management (DSM) field doesn't + * specify the same semantics. FUA is always supported. + */ + result = DKCACHE_FUA; + + if (cdw0 & NVME_CQE_CDW0_VWC_WCE) + result |= DKCACHE_WRITE; + + /* + * If volatile write cache is present, the flag shall also be + * settable. + */ + result |= DKCACHE_WCHANGE; + } else { + result = -1; + } + + *addr = result; + + nvme_ccb_put(q, ccb); } void @@ -853,12 +924,18 @@ nvme_ns_free(struct nvme_softc *sc, uint kmem_free(identify, sizeof(*identify)); } +struct nvme_pt_state { + struct nvme_pt_command *pt; + bool finished; +}; + static void nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) { struct nvme_softc *sc = q->q_sc; struct nvme_sqe *sqe = slot; - struct nvme_pt_command *pt = ccb->ccb_cookie; + struct nvme_pt_state *state = ccb->ccb_cookie; + struct nvme_pt_command *pt = state->pt; bus_dmamap_t dmap = ccb->ccb_dmamap; int i; @@ -900,7 +977,8 @@ static void nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) { struct nvme_softc *sc = q->q_sc; - struct nvme_pt_command *pt = ccb->ccb_cookie; + struct nvme_pt_state *state = ccb->ccb_cookie; + struct nvme_pt_command *pt = state->pt; bus_dmamap_t dmap = ccb->ccb_dmamap; if (pt->buf != NULL && pt->len > 0) { @@ -919,6 +997,18 @@ nvme_pt_done(struct nvme_queue *q, struc pt->cpl.cdw0 = lemtoh32(&cqe->cdw0); pt->cpl.flags = lemtoh16(&cqe->flags) & ~NVME_CQE_PHASE; + + state->finished = true; + + nvme_ccb_put(q, ccb); +} + +static bool +nvme_pt_finished(void *cookie) +{ + struct nvme_pt_state *state = cookie; + + return state->finished; } static int @@ -928,6 +1018,7 @@ nvme_command_passthrough(struct nvme_sof struct nvme_queue *q; struct nvme_ccb *ccb; void *buf = NULL; + struct nvme_pt_state state; int error; /* limit command size to maximum data transfer size */ @@ -936,9 +1027,8 @@ nvme_command_passthrough(struct nvme_sof return EINVAL; q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc); - ccb = nvme_ccb_get(q); - if (ccb == NULL) - return EBUSY; + ccb = nvme_ccb_get(q, true); + KASSERT(ccb != NULL); if (pt->buf != NULL) { KASSERT(pt->len > 0); @@ -959,24 +1049,30 @@ nvme_command_passthrough(struct nvme_sof pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); } + memset(&state, 0, sizeof(state)); + state.pt = pt; + state.finished = false; + ccb->ccb_done = nvme_pt_done; - ccb->ccb_cookie = pt; + ccb->ccb_cookie = &state; pt->cmd.nsid = nsid; - if (nvme_poll(sc, q, ccb, nvme_pt_fill, NVME_TIMO_PT)) { - error = EIO; - goto out; - } + + nvme_q_submit(sc, q, ccb, nvme_pt_fill); + + /* wait for completion */ + nvme_q_wait_complete(sc, q, nvme_pt_finished, &state); + KASSERT(state.finished); error = 0; -out: + if (buf != NULL) { if (error == 0 && pt->is_read) error = copyout(buf, pt->buf, pt->len); kmem_free: kmem_free(buf, pt->len); } - nvme_ccb_put(q, ccb); + return error; } @@ -1009,6 +1105,8 @@ nvme_q_submit(struct nvme_softc *sc, str struct nvme_poll_state { struct nvme_sqe s; struct nvme_cqe c; + void *cookie; + void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); }; static int @@ -1016,8 +1114,6 @@ nvme_poll(struct nvme_softc *sc, struct void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec) { struct nvme_poll_state state; - void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); - void *cookie; uint16_t flags; int step = 10; int maxloop = timo_sec * 1000000 / step; @@ -1026,8 +1122,8 @@ nvme_poll(struct nvme_softc *sc, struct memset(&state, 0, sizeof(state)); (*fill)(q, ccb, &state.s); - done = ccb->ccb_done; - cookie = ccb->ccb_cookie; + state.done = ccb->ccb_done; + state.cookie = ccb->ccb_cookie; ccb->ccb_done = nvme_poll_done; ccb->ccb_cookie = &state; @@ -1043,13 +1139,22 @@ nvme_poll(struct nvme_softc *sc, struct } } - ccb->ccb_cookie = cookie; - done(q, ccb, &state.c); - if (error == 0) { flags = lemtoh16(&state.c.flags); return flags & ~NVME_CQE_PHASE; } else { + /* + * If it succeds later, it would hit ccb which will have been + * already reused for something else. Not good. Cross + * fingers and hope for best. XXX do controller reset? + */ + aprint_error_dev(sc->sc_dev, "polled command timed out\n"); + + /* Invoke the callback to clean state anyway */ + struct nvme_cqe cqe; + memset(&cqe, 0, sizeof(cqe)); + ccb->ccb_done(q, ccb, &cqe); + return 1; } } @@ -1071,6 +1176,9 @@ nvme_poll_done(struct nvme_queue *q, str SET(cqe->flags, htole16(NVME_CQE_PHASE)); state->c = *cqe; + + ccb->ccb_cookie = state->cookie; + state->done(q, ccb, &state->c); } static void @@ -1147,6 +1255,26 @@ nvme_q_complete(struct nvme_softc *sc, s return rv; } +static void +nvme_q_wait_complete(struct nvme_softc *sc, + struct nvme_queue *q, bool (*finished)(void *), void *cookie) +{ + mutex_enter(&q->q_ccb_mtx); + if (finished(cookie)) + goto out; + + for(;;) { + q->q_ccb_waiting = true; + cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); + + if (finished(cookie)) + break; + } + +out: + mutex_exit(&q->q_ccb_mtx); +} + static int nvme_identify(struct nvme_softc *sc, u_int mps) { @@ -1157,7 +1285,7 @@ nvme_identify(struct nvme_softc *sc, u_i u_int mdts; int rv = 1; - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ mem = nvme_dmamem_alloc(sc, sizeof(*identify)); @@ -1214,7 +1342,7 @@ nvme_q_create(struct nvme_softc *sc, str if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0) return 1; - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); ccb->ccb_done = nvme_empty_done; @@ -1260,7 +1388,7 @@ nvme_q_delete(struct nvme_softc *sc, str struct nvme_ccb *ccb; int rv; - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); ccb->ccb_done = nvme_empty_done; @@ -1310,25 +1438,28 @@ nvme_fill_identify(struct nvme_queue *q, static int nvme_get_number_of_queues(struct nvme_softc *sc, u_int *nqap) { + struct nvme_pt_state state; struct nvme_pt_command pt; struct nvme_ccb *ccb; uint16_t ncqa, nsqa; int rv; - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ memset(&pt, 0, sizeof(pt)); pt.cmd.opcode = NVM_ADMIN_GET_FEATURES; pt.cmd.cdw10 = NVM_FEATURE_NUMBER_OF_QUEUES; + memset(&state, 0, sizeof(state)); + state.pt = &pt; + state.finished = false; + ccb->ccb_done = nvme_pt_done; - ccb->ccb_cookie = &pt; + ccb->ccb_cookie = &state; rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_pt_fill, NVME_TIMO_QOP); - nvme_ccb_put(sc->sc_admin_q, ccb); - if (rv != 0) { *nqap = 0; return EIO; @@ -1351,6 +1482,8 @@ nvme_ccbs_alloc(struct nvme_queue *q, ui u_int i; mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); + cv_init(&q->q_ccb_wait, "nvmeqw"); + q->q_ccb_waiting = false; SIMPLEQ_INIT(&q->q_ccb_list); q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP); @@ -1390,17 +1523,24 @@ free_maps: } static struct nvme_ccb * -nvme_ccb_get(struct nvme_queue *q) +nvme_ccb_get(struct nvme_queue *q, bool wait) { struct nvme_ccb *ccb = NULL; mutex_enter(&q->q_ccb_mtx); +again: ccb = SIMPLEQ_FIRST(&q->q_ccb_list); if (ccb != NULL) { SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); #ifdef DEBUG ccb->ccb_cookie = NULL; #endif + } else { + if (__predict_false(wait)) { + q->q_ccb_waiting = true; + cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); + goto again; + } } mutex_exit(&q->q_ccb_mtx); @@ -1416,6 +1556,13 @@ nvme_ccb_put(struct nvme_queue *q, struc ccb->ccb_cookie = (void *)NVME_CCB_FREE; #endif SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry); + + /* It's unlikely there are any waiters, it's not used for regular I/O */ + if (__predict_false(q->q_ccb_waiting)) { + q->q_ccb_waiting = false; + cv_broadcast(&q->q_ccb_wait); + } + mutex_exit(&q->q_ccb_mtx); } @@ -1435,6 +1582,7 @@ nvme_ccbs_free(struct nvme_queue *q) nvme_dmamem_free(sc, q->q_ccb_prpls); kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs); q->q_ccbs = NULL; + cv_destroy(&q->q_ccb_wait); mutex_destroy(&q->q_ccb_mtx); } Index: src/sys/dev/ic/nvmevar.h diff -u src/sys/dev/ic/nvmevar.h:1.13.6.1 src/sys/dev/ic/nvmevar.h:1.13.6.2 --- src/sys/dev/ic/nvmevar.h:1.13.6.1 Sat Mar 17 08:11:18 2018 +++ src/sys/dev/ic/nvmevar.h Sun Mar 18 11:05:27 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: nvmevar.h,v 1.13.6.1 2018/03/17 08:11:18 martin Exp $ */ +/* $NetBSD: nvmevar.h,v 1.13.6.2 2018/03/18 11:05:27 martin Exp $ */ /* $OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */ /* @@ -87,6 +87,8 @@ struct nvme_queue { uint16_t q_cq_phase; kmutex_t q_ccb_mtx; + kcondvar_t q_ccb_wait; /* wait for ccb avail/finish */ + bool q_ccb_waiting; /* whether there are waiters */ uint16_t q_nccbs; /* total number of ccbs */ struct nvme_ccb *q_ccbs; SIMPLEQ_HEAD(, nvme_ccb) q_ccb_list; @@ -179,6 +181,5 @@ int nvme_ns_identify(struct nvme_softc * void nvme_ns_free(struct nvme_softc *, uint16_t); int nvme_ns_dobio(struct nvme_softc *, uint16_t, void *, struct buf *, void *, size_t, int, daddr_t, int, nvme_nnc_done); -int nvme_ns_sync(struct nvme_softc *, uint16_t, void *, int, nvme_nnc_done); -bool nvme_has_volatile_write_cache(struct nvme_softc *); -int nvme_admin_getcache(struct nvme_softc *, void *, nvme_nnc_done); +int nvme_ns_sync(struct nvme_softc *, uint16_t, int); +int nvme_admin_getcache(struct nvme_softc *, int *);