Module Name: src Committed By: jdolecek Date: Fri Mar 16 23:31:19 UTC 2018
Modified Files: src/sys/dev/ic: ld_nvme.c nvme.c nvmevar.h Log Message: refactor the locking code around DIOCGCACHE handling to be reusable for other infrequent commands it uses single condvar for simplicity, and uses it both when waiting for ccb or command completion - this is fine, since usually there will be just one such command qeueued anyway use this to finally properly implement DIOCCACHESYNC - return only after the command is confirmed as completed by the controller To generate a diff of this commit: cvs rdiff -u -r1.18 -r1.19 src/sys/dev/ic/ld_nvme.c cvs rdiff -u -r1.33 -r1.34 src/sys/dev/ic/nvme.c cvs rdiff -u -r1.14 -r1.15 src/sys/dev/ic/nvmevar.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/dev/ic/ld_nvme.c diff -u src/sys/dev/ic/ld_nvme.c:1.18 src/sys/dev/ic/ld_nvme.c:1.19 --- src/sys/dev/ic/ld_nvme.c:1.18 Tue Jan 23 22:42:29 2018 +++ src/sys/dev/ic/ld_nvme.c Fri Mar 16 23:31:19 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: ld_nvme.c,v 1.18 2018/01/23 22:42:29 pgoyette Exp $ */ +/* $NetBSD: ld_nvme.c,v 1.19 2018/03/16 23:31:19 jdolecek Exp $ */ /*- * Copyright (C) 2016 NONAKA Kimihiro <non...@netbsd.org> @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.18 2018/01/23 22:42:29 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.19 2018/03/16 23:31:19 jdolecek Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -49,14 +49,6 @@ struct ld_nvme_softc { struct nvme_softc *sc_nvme; uint16_t sc_nsid; - - /* getcache handling */ - kmutex_t sc_getcache_lock; - kcondvar_t sc_getcache_cv; - kcondvar_t sc_getcache_ready_cv; - bool sc_getcache_waiting; - bool sc_getcache_ready; - int sc_getcache_result; }; static int ld_nvme_match(device_t, cfdata_t, void *); @@ -73,8 +65,6 @@ static int ld_nvme_getcache(struct ld_so static int ld_nvme_ioctl(struct ld_softc *, u_long, void *, int32_t, bool); static void ld_nvme_biodone(void *, struct buf *, uint16_t, uint32_t); -static void ld_nvme_syncdone(void *, struct buf *, uint16_t, uint32_t); -static void ld_nvme_getcache_done(void *, struct buf *, uint16_t, uint32_t); static int ld_nvme_match(device_t parent, cfdata_t match, void *aux) @@ -103,10 +93,6 @@ ld_nvme_attach(device_t parent, device_t sc->sc_nvme = nsc; sc->sc_nsid = naa->naa_nsid; - mutex_init(&sc->sc_getcache_lock, MUTEX_DEFAULT, IPL_SOFTBIO); - cv_init(&sc->sc_getcache_cv, "nvmegcq"); - cv_init(&sc->sc_getcache_ready_cv, "nvmegcr"); - aprint_naive("\n"); aprint_normal("\n"); @@ -203,116 +189,16 @@ ld_nvme_flush(struct ld_softc *ld, bool { struct ld_nvme_softc *sc = device_private(ld->sc_dv); - if (!nvme_has_volatile_write_cache(sc->sc_nvme)) { - /* cache not present, no value in trying to flush it */ - return 0; - } - - return nvme_ns_sync(sc->sc_nvme, sc->sc_nsid, sc, - poll ? NVME_NS_CTX_F_POLL : 0, - ld_nvme_syncdone); -} - -static void -ld_nvme_syncdone(void *xc, struct buf *bp, uint16_t cmd_status, uint32_t cdw0) -{ - /* nothing to do */ + return nvme_ns_sync(sc->sc_nvme, sc->sc_nsid, + poll ? NVME_NS_CTX_F_POLL : 0); } static int ld_nvme_getcache(struct ld_softc *ld, int *addr) { - int error; struct ld_nvme_softc *sc = device_private(ld->sc_dv); - /* - * DPO not supported, Dataset Management (DSM) field doesn't specify - * the same semantics. - */ - *addr = DKCACHE_FUA; - - if (!nvme_has_volatile_write_cache(sc->sc_nvme)) { - /* cache simply not present */ - return 0; - } - - /* - * This is admin queue request. The queue is relatively limited in size, - * and this is not performance critical call, so have at most one pending - * cache request at a time to avoid spurious EWOULDBLOCK failures. - */ - mutex_enter(&sc->sc_getcache_lock); - while (sc->sc_getcache_waiting) { - error = cv_wait_sig(&sc->sc_getcache_cv, &sc->sc_getcache_lock); - if (error) - goto out; - } - sc->sc_getcache_waiting = true; - sc->sc_getcache_ready = false; - mutex_exit(&sc->sc_getcache_lock); - - error = nvme_admin_getcache(sc->sc_nvme, sc, ld_nvme_getcache_done); - if (error) { - mutex_enter(&sc->sc_getcache_lock); - goto out; - } - - mutex_enter(&sc->sc_getcache_lock); - while (!sc->sc_getcache_ready) { - error = cv_wait_sig(&sc->sc_getcache_ready_cv, - &sc->sc_getcache_lock); - if (error) - goto out; - } - - KDASSERT(sc->sc_getcache_ready); - - if (sc->sc_getcache_result >= 0) - *addr |= sc->sc_getcache_result; - else - error = EINVAL; - - out: - sc->sc_getcache_waiting = false; - - /* wake one of eventual waiters */ - cv_signal(&sc->sc_getcache_cv); - - mutex_exit(&sc->sc_getcache_lock); - - return error; -} - -static void -ld_nvme_getcache_done(void *xc, struct buf *bp, uint16_t cmd_status, uint32_t cdw0) -{ - struct ld_nvme_softc *sc = xc; - uint16_t status = NVME_CQE_SC(cmd_status); - int result; - - if (status == NVME_CQE_SC_SUCCESS) { - result = 0; - - if (cdw0 & NVME_CQE_CDW0_VWC_WCE) - result |= DKCACHE_WRITE; - - /* - * If volatile write cache is present, the flag shall also be - * settable. - */ - result |= DKCACHE_WCHANGE; - } else { - result = -1; - } - - mutex_enter(&sc->sc_getcache_lock); - sc->sc_getcache_result = result; - sc->sc_getcache_ready = true; - - /* wake up the waiter */ - cv_signal(&sc->sc_getcache_ready_cv); - - mutex_exit(&sc->sc_getcache_lock); + return nvme_admin_getcache(sc->sc_nvme, addr); } static int Index: src/sys/dev/ic/nvme.c diff -u src/sys/dev/ic/nvme.c:1.33 src/sys/dev/ic/nvme.c:1.34 --- src/sys/dev/ic/nvme.c:1.33 Fri Mar 16 18:49:18 2018 +++ src/sys/dev/ic/nvme.c Fri Mar 16 23:31:19 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: nvme.c,v 1.33 2018/03/16 18:49:18 jdolecek Exp $ */ +/* $NetBSD: nvme.c,v 1.34 2018/03/16 23:31:19 jdolecek Exp $ */ /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ /* @@ -18,7 +18,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.33 2018/03/16 18:49:18 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.34 2018/03/16 23:31:19 jdolecek Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -63,7 +63,7 @@ static int nvme_ccbs_alloc(struct nvme_q static void nvme_ccbs_free(struct nvme_queue *); static struct nvme_ccb * - nvme_ccb_get(struct nvme_queue *); + nvme_ccb_get(struct nvme_queue *, bool); static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *); static int nvme_poll(struct nvme_softc *, struct nvme_queue *, @@ -85,6 +85,8 @@ static void nvme_q_submit(struct nvme_so struct nvme_ccb *, void *)); static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q); static void nvme_q_free(struct nvme_softc *, struct nvme_queue *); +static void nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *, + bool (*)(void *), void *); static struct nvme_dmamem * nvme_dmamem_alloc(struct nvme_softc *, size_t); @@ -566,7 +568,7 @@ nvme_ns_identify(struct nvme_softc *sc, KASSERT(nsid > 0); - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ mem = nvme_dmamem_alloc(sc, sizeof(*identify)); @@ -622,7 +624,7 @@ nvme_ns_dobio(struct nvme_softc *sc, uin bus_dmamap_t dmap; int i, error; - ccb = nvme_ccb_get(q); + ccb = nvme_ccb_get(q, false); if (ccb == NULL) return EAGAIN; @@ -741,31 +743,44 @@ nvme_ns_io_done(struct nvme_queue *q, st * If there is no volatile write cache, it makes no sense to issue * flush commands or query for the status. */ -bool +static bool nvme_has_volatile_write_cache(struct nvme_softc *sc) { /* sc_identify is filled during attachment */ return ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0); } +static bool +nvme_ns_sync_finished(void *cookie) +{ + int *result = cookie; + + return (*result != 0); +} + int -nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, void *cookie, - int flags, nvme_nnc_done nnc_done) +nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags) { struct nvme_queue *q = nvme_get_q(sc); struct nvme_ccb *ccb; + int result = 0; + + if (!nvme_has_volatile_write_cache(sc)) { + /* cache not present, no value in trying to flush it */ + return 0; + } - ccb = nvme_ccb_get(q); + ccb = nvme_ccb_get(q, true); if (ccb == NULL) return EAGAIN; ccb->ccb_done = nvme_ns_sync_done; - ccb->ccb_cookie = cookie; + ccb->ccb_cookie = &result; /* namespace context */ ccb->nnc_nsid = nsid; ccb->nnc_flags = flags; - ccb->nnc_done = nnc_done; + ccb->nnc_done = NULL; if (ISSET(flags, NVME_NS_CTX_F_POLL)) { if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0) @@ -774,7 +789,12 @@ nvme_ns_sync(struct nvme_softc *sc, uint } nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill); - return 0; + + /* wait for completion */ + nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result); + KASSERT(result != 0); + + return (result > 0) ? 0 : EIO; } static void @@ -790,36 +810,64 @@ static void nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) { - void *cookie = ccb->ccb_cookie; - nvme_nnc_done nnc_done = ccb->nnc_done; + int *result = ccb->ccb_cookie; + uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); + + if (status == NVME_CQE_SC_SUCCESS) + *result = 1; + else + *result = -1; nvme_ccb_put(q, ccb); +} + +static bool +nvme_getcache_finished(void *xc) +{ + int *addr = xc; - nnc_done(cookie, NULL, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); + return (*addr != 0); } /* * Get status of volatile write cache. Always asynchronous. */ int -nvme_admin_getcache(struct nvme_softc *sc, void *cookie, nvme_nnc_done nnc_done) +nvme_admin_getcache(struct nvme_softc *sc, int *addr) { struct nvme_ccb *ccb; struct nvme_queue *q = sc->sc_admin_q; + int result = 0, error; - ccb = nvme_ccb_get(q); - if (ccb == NULL) - return EAGAIN; + if (!nvme_has_volatile_write_cache(sc)) { + /* cache simply not present */ + *addr = 0; + return 0; + } + + ccb = nvme_ccb_get(q, true); + KASSERT(ccb != NULL); ccb->ccb_done = nvme_getcache_done; - ccb->ccb_cookie = cookie; + ccb->ccb_cookie = &result; /* namespace context */ ccb->nnc_flags = 0; - ccb->nnc_done = nnc_done; + ccb->nnc_done = NULL; nvme_q_submit(sc, q, ccb, nvme_getcache_fill); - return 0; + + /* wait for completion */ + nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result); + KASSERT(result != 0); + + if (result > 0) { + *addr = result; + error = 0; + } else + error = EINVAL; + + return error; } static void @@ -835,12 +883,35 @@ static void nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) { - void *cookie = ccb->ccb_cookie; - nvme_nnc_done nnc_done = ccb->nnc_done; + int *addr = ccb->ccb_cookie; + uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); + uint32_t cdw0 = lemtoh32(&cqe->cdw0); + int result; - nvme_ccb_put(q, ccb); + if (status == NVME_CQE_SC_SUCCESS) { + result = 0; - nnc_done(cookie, NULL, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); + /* + * DPO not supported, Dataset Management (DSM) field doesn't + * specify the same semantics. FUA is always supported. + */ + result = DKCACHE_FUA; + + if (cdw0 & NVME_CQE_CDW0_VWC_WCE) + result |= DKCACHE_WRITE; + + /* + * If volatile write cache is present, the flag shall also be + * settable. + */ + result |= DKCACHE_WCHANGE; + } else { + result = -1; + } + + *addr = result; + + nvme_ccb_put(q, ccb); } void @@ -941,9 +1012,8 @@ nvme_command_passthrough(struct nvme_sof return EINVAL; q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc); - ccb = nvme_ccb_get(q); - if (ccb == NULL) - return EBUSY; + ccb = nvme_ccb_get(q, true); + KASSERT(ccb != NULL); if (pt->buf != NULL) { KASSERT(pt->len > 0); @@ -1014,6 +1084,8 @@ nvme_q_submit(struct nvme_softc *sc, str struct nvme_poll_state { struct nvme_sqe s; struct nvme_cqe c; + void *cookie; + void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); }; static int @@ -1021,8 +1093,6 @@ nvme_poll(struct nvme_softc *sc, struct void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec) { struct nvme_poll_state state; - void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); - void *cookie; uint16_t flags; int step = 10; int maxloop = timo_sec * 1000000 / step; @@ -1031,8 +1101,8 @@ nvme_poll(struct nvme_softc *sc, struct memset(&state, 0, sizeof(state)); (*fill)(q, ccb, &state.s); - done = ccb->ccb_done; - cookie = ccb->ccb_cookie; + state.done = ccb->ccb_done; + state.cookie = ccb->ccb_cookie; ccb->ccb_done = nvme_poll_done; ccb->ccb_cookie = &state; @@ -1048,13 +1118,22 @@ nvme_poll(struct nvme_softc *sc, struct } } - ccb->ccb_cookie = cookie; - done(q, ccb, &state.c); - if (error == 0) { flags = lemtoh16(&state.c.flags); return flags & ~NVME_CQE_PHASE; } else { + /* + * If it succeds later, it would hit ccb which will have been + * already reused for something else. Not good. Cross + * fingers and hope for best. XXX do controller reset? + */ + aprint_error_dev(sc->sc_dev, "polled command timed out\n"); + + /* Invoke the callback to clean state anyway */ + struct nvme_cqe cqe; + memset(&cqe, 0, sizeof(cqe)); + ccb->ccb_done(q, ccb, &cqe); + return 1; } } @@ -1076,6 +1155,9 @@ nvme_poll_done(struct nvme_queue *q, str SET(cqe->flags, htole16(NVME_CQE_PHASE)); state->c = *cqe; + + ccb->ccb_cookie = state->cookie; + state->done(q, ccb, &state->c); } static void @@ -1152,6 +1234,26 @@ nvme_q_complete(struct nvme_softc *sc, s return rv; } +static void +nvme_q_wait_complete(struct nvme_softc *sc, + struct nvme_queue *q, bool (*finished)(void *), void *cookie) +{ + mutex_enter(&q->q_ccb_mtx); + if (finished(cookie)) + goto out; + + for(;;) { + q->q_ccb_waiting = true; + cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); + + if (finished(cookie)) + break; + } + +out: + mutex_exit(&q->q_ccb_mtx); +} + static int nvme_identify(struct nvme_softc *sc, u_int mps) { @@ -1162,7 +1264,7 @@ nvme_identify(struct nvme_softc *sc, u_i u_int mdts; int rv = 1; - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ mem = nvme_dmamem_alloc(sc, sizeof(*identify)); @@ -1219,7 +1321,7 @@ nvme_q_create(struct nvme_softc *sc, str if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0) return 1; - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); ccb->ccb_done = nvme_empty_done; @@ -1265,7 +1367,7 @@ nvme_q_delete(struct nvme_softc *sc, str struct nvme_ccb *ccb; int rv; - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); ccb->ccb_done = nvme_empty_done; @@ -1320,7 +1422,7 @@ nvme_get_number_of_queues(struct nvme_so uint16_t ncqa, nsqa; int rv; - ccb = nvme_ccb_get(sc->sc_admin_q); + ccb = nvme_ccb_get(sc->sc_admin_q, false); KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ memset(&pt, 0, sizeof(pt)); @@ -1356,6 +1458,8 @@ nvme_ccbs_alloc(struct nvme_queue *q, ui u_int i; mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); + cv_init(&q->q_ccb_wait, "nvmeqw"); + q->q_ccb_waiting = false; SIMPLEQ_INIT(&q->q_ccb_list); q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP); @@ -1395,17 +1499,24 @@ free_maps: } static struct nvme_ccb * -nvme_ccb_get(struct nvme_queue *q) +nvme_ccb_get(struct nvme_queue *q, bool wait) { struct nvme_ccb *ccb = NULL; mutex_enter(&q->q_ccb_mtx); +again: ccb = SIMPLEQ_FIRST(&q->q_ccb_list); if (ccb != NULL) { SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); #ifdef DEBUG ccb->ccb_cookie = NULL; #endif + } else { + if (__predict_false(wait)) { + q->q_ccb_waiting = true; + cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); + goto again; + } } mutex_exit(&q->q_ccb_mtx); @@ -1421,6 +1532,13 @@ nvme_ccb_put(struct nvme_queue *q, struc ccb->ccb_cookie = (void *)NVME_CCB_FREE; #endif SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry); + + /* It's unlikely there are any waiters, it's not used for regular I/O */ + if (__predict_false(q->q_ccb_waiting)) { + q->q_ccb_waiting = false; + cv_broadcast(&q->q_ccb_wait); + } + mutex_exit(&q->q_ccb_mtx); } @@ -1440,6 +1558,7 @@ nvme_ccbs_free(struct nvme_queue *q) nvme_dmamem_free(sc, q->q_ccb_prpls); kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs); q->q_ccbs = NULL; + cv_destroy(&q->q_ccb_wait); mutex_destroy(&q->q_ccb_mtx); } Index: src/sys/dev/ic/nvmevar.h diff -u src/sys/dev/ic/nvmevar.h:1.14 src/sys/dev/ic/nvmevar.h:1.15 --- src/sys/dev/ic/nvmevar.h:1.14 Fri Mar 16 18:49:18 2018 +++ src/sys/dev/ic/nvmevar.h Fri Mar 16 23:31:19 2018 @@ -1,4 +1,4 @@ -/* $NetBSD: nvmevar.h,v 1.14 2018/03/16 18:49:18 jdolecek Exp $ */ +/* $NetBSD: nvmevar.h,v 1.15 2018/03/16 23:31:19 jdolecek Exp $ */ /* $OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */ /* @@ -87,6 +87,8 @@ struct nvme_queue { uint16_t q_cq_phase; kmutex_t q_ccb_mtx; + kcondvar_t q_ccb_wait; /* wait for ccb avail/finish */ + bool q_ccb_waiting; /* whether there are waiters */ uint16_t q_nccbs; /* total number of ccbs */ struct nvme_ccb *q_ccbs; SIMPLEQ_HEAD(, nvme_ccb) q_ccb_list; @@ -179,6 +181,5 @@ int nvme_ns_identify(struct nvme_softc * void nvme_ns_free(struct nvme_softc *, uint16_t); int nvme_ns_dobio(struct nvme_softc *, uint16_t, void *, struct buf *, void *, size_t, int, daddr_t, int, nvme_nnc_done); -int nvme_ns_sync(struct nvme_softc *, uint16_t, void *, int, nvme_nnc_done); -bool nvme_has_volatile_write_cache(struct nvme_softc *); -int nvme_admin_getcache(struct nvme_softc *, void *, nvme_nnc_done); +int nvme_ns_sync(struct nvme_softc *, uint16_t, int); +int nvme_admin_getcache(struct nvme_softc *, int *);