Module Name:    src
Committed By:   martin
Date:           Sun Mar 18 11:05:27 UTC 2018

Modified Files:
        src/sys/dev/ic [netbsd-8]: ld_nvme.c nvme.c nvmevar.h

Log Message:
Pull up following revision(s) (requested by jdolecek in ticket #641):
        sys/dev/ic/nvme.c: revision 1.34
        sys/dev/ic/nvme.c: revision 1.35
        sys/dev/ic/nvme.c: revision 1.36
        sys/dev/ic/nvme.c: revision 1.37
        sys/dev/ic/ld_nvme.c: revision 1.19
        sys/dev/ic/nvmevar.h: revision 1.15

refactor the locking code around DIOCGCACHE handling to be reusable
for other infrequent commands,it uses single condvar for simplicity,
and uses it both when waiting for ccb or command completion - this
is fine, since usually there will be just one such command qeueued anyway
use this to finally properly implement DIOCCACHESYNC - return only after
the command is confirmed as completed by the controller.

switch handling of passthrough commands to use queue, instead of polling
should fix PR kern/53059 by Frank Kardel

fix passthrough command usage also in nvme_get_number_of_queues(), fixes
memory corruption and possible panic on boot

also remove now duplicate nvme_ccb_put() call from
nvme_get_number_of_queues()


To generate a diff of this commit:
cvs rdiff -u -r1.16.2.1 -r1.16.2.2 src/sys/dev/ic/ld_nvme.c
cvs rdiff -u -r1.30.2.1 -r1.30.2.2 src/sys/dev/ic/nvme.c
cvs rdiff -u -r1.13.6.1 -r1.13.6.2 src/sys/dev/ic/nvmevar.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ic/ld_nvme.c
diff -u src/sys/dev/ic/ld_nvme.c:1.16.2.1 src/sys/dev/ic/ld_nvme.c:1.16.2.2
--- src/sys/dev/ic/ld_nvme.c:1.16.2.1	Fri Sep  1 09:59:11 2017
+++ src/sys/dev/ic/ld_nvme.c	Sun Mar 18 11:05:27 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: ld_nvme.c,v 1.16.2.1 2017/09/01 09:59:11 martin Exp $	*/
+/*	$NetBSD: ld_nvme.c,v 1.16.2.2 2018/03/18 11:05:27 martin Exp $	*/
 
 /*-
  * Copyright (C) 2016 NONAKA Kimihiro <non...@netbsd.org>
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.16.2.1 2017/09/01 09:59:11 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.16.2.2 2018/03/18 11:05:27 martin Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -49,14 +49,6 @@ struct ld_nvme_softc {
 	struct nvme_softc	*sc_nvme;
 
 	uint16_t		sc_nsid;
-
-	/* getcache handling */
-	kmutex_t		sc_getcache_lock;
-	kcondvar_t		sc_getcache_cv;
-	kcondvar_t		sc_getcache_ready_cv;
-	bool			sc_getcache_waiting;
-	bool			sc_getcache_ready;
-	int			sc_getcache_result;
 };
 
 static int	ld_nvme_match(device_t, cfdata_t, void *);
@@ -73,8 +65,6 @@ static int	ld_nvme_getcache(struct ld_so
 static int	ld_nvme_ioctl(struct ld_softc *, u_long, void *, int32_t, bool);
 
 static void	ld_nvme_biodone(void *, struct buf *, uint16_t, uint32_t);
-static void	ld_nvme_syncdone(void *, struct buf *, uint16_t, uint32_t);
-static void	ld_nvme_getcache_done(void *, struct buf *, uint16_t, uint32_t);
 
 static int
 ld_nvme_match(device_t parent, cfdata_t match, void *aux)
@@ -103,10 +93,6 @@ ld_nvme_attach(device_t parent, device_t
 	sc->sc_nvme = nsc;
 	sc->sc_nsid = naa->naa_nsid;
 
-	mutex_init(&sc->sc_getcache_lock, MUTEX_DEFAULT, IPL_SOFTBIO);
-	cv_init(&sc->sc_getcache_cv, "nvmegcq");
-	cv_init(&sc->sc_getcache_ready_cv, "nvmegcr");
-
 	aprint_naive("\n");
 	aprint_normal("\n");
 
@@ -203,116 +189,16 @@ ld_nvme_flush(struct ld_softc *ld, bool 
 {
 	struct ld_nvme_softc *sc = device_private(ld->sc_dv);
 
-	if (!nvme_has_volatile_write_cache(sc->sc_nvme)) {
-		/* cache not present, no value in trying to flush it */
-		return 0;
-	}
-
-	return nvme_ns_sync(sc->sc_nvme, sc->sc_nsid, sc,
-	    poll ? NVME_NS_CTX_F_POLL : 0,
-	    ld_nvme_syncdone);
-}
-
-static void
-ld_nvme_syncdone(void *xc, struct buf *bp, uint16_t cmd_status, uint32_t cdw0)
-{
-	/* nothing to do */
+	return nvme_ns_sync(sc->sc_nvme, sc->sc_nsid,
+	    poll ? NVME_NS_CTX_F_POLL : 0);
 }
 
 static int
 ld_nvme_getcache(struct ld_softc *ld, int *addr)
 {
-	int error;
 	struct ld_nvme_softc *sc = device_private(ld->sc_dv);
 
-	/*
-	 * DPO not supported, Dataset Management (DSM) field doesn't specify
-	 * the same semantics.
-	 */ 
-	*addr = DKCACHE_FUA;
-
-	if (!nvme_has_volatile_write_cache(sc->sc_nvme)) {
-		/* cache simply not present */
-		return 0;
-	}
-
-	/*
-	 * This is admin queue request. The queue is relatively limited in size,
-	 * and this is not performance critical call, so have at most one pending
-	 * cache request at a time to avoid spurious EWOULDBLOCK failures.
-	 */ 
-	mutex_enter(&sc->sc_getcache_lock);
-	while (sc->sc_getcache_waiting) {
-		error = cv_wait_sig(&sc->sc_getcache_cv, &sc->sc_getcache_lock);
-		if (error)
-			goto out;
-	}
-	sc->sc_getcache_waiting = true;
-	sc->sc_getcache_ready = false;
-	mutex_exit(&sc->sc_getcache_lock);
-
-	error = nvme_admin_getcache(sc->sc_nvme, sc, ld_nvme_getcache_done);
-	if (error) {
-		mutex_enter(&sc->sc_getcache_lock);
-		goto out;
-	}
-
-	mutex_enter(&sc->sc_getcache_lock);
-	while (!sc->sc_getcache_ready) {
-		error = cv_wait_sig(&sc->sc_getcache_ready_cv,
-		    &sc->sc_getcache_lock);
-		if (error)
-			goto out;
-	}
-
-	KDASSERT(sc->sc_getcache_ready);
-
-	if (sc->sc_getcache_result >= 0)
-		*addr |= sc->sc_getcache_result;
-	else
-		error = EINVAL;
-
-    out:
-	sc->sc_getcache_waiting = false;
-
-	/* wake one of eventual waiters */
-	cv_signal(&sc->sc_getcache_cv);
-
-	mutex_exit(&sc->sc_getcache_lock);
-
-	return error;
-}
-
-static void
-ld_nvme_getcache_done(void *xc, struct buf *bp, uint16_t cmd_status, uint32_t cdw0)
-{
-	struct ld_nvme_softc *sc = xc;
-	uint16_t status = NVME_CQE_SC(cmd_status);
-	int result;
-
-	if (status == NVME_CQE_SC_SUCCESS) {
-		result = 0;
-
-		if (cdw0 & NVME_CQE_CDW0_VWC_WCE)
-			result |= DKCACHE_WRITE;
-
-		/*
-		 * If volatile write cache is present, the flag shall also be
-		 * settable.
-		 */
-		result |= DKCACHE_WCHANGE;
-	} else {
-		result = -1;
-	}
-
-	mutex_enter(&sc->sc_getcache_lock);
-	sc->sc_getcache_result = result;
-	sc->sc_getcache_ready = true;
-
-	/* wake up the waiter */
-	cv_signal(&sc->sc_getcache_ready_cv);
-
-	mutex_exit(&sc->sc_getcache_lock);
+	return nvme_admin_getcache(sc->sc_nvme, addr);
 }
 
 static int

Index: src/sys/dev/ic/nvme.c
diff -u src/sys/dev/ic/nvme.c:1.30.2.1 src/sys/dev/ic/nvme.c:1.30.2.2
--- src/sys/dev/ic/nvme.c:1.30.2.1	Sat Mar 17 08:11:18 2018
+++ src/sys/dev/ic/nvme.c	Sun Mar 18 11:05:27 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvme.c,v 1.30.2.1 2018/03/17 08:11:18 martin Exp $	*/
+/*	$NetBSD: nvme.c,v 1.30.2.2 2018/03/18 11:05:27 martin Exp $	*/
 /*	$OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */
 
 /*
@@ -18,7 +18,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.30.2.1 2018/03/17 08:11:18 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.30.2.2 2018/03/18 11:05:27 martin Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -61,7 +61,7 @@ static int	nvme_ccbs_alloc(struct nvme_q
 static void	nvme_ccbs_free(struct nvme_queue *);
 
 static struct nvme_ccb *
-		nvme_ccb_get(struct nvme_queue *);
+		nvme_ccb_get(struct nvme_queue *, bool);
 static void	nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *);
 
 static int	nvme_poll(struct nvme_softc *, struct nvme_queue *,
@@ -83,6 +83,8 @@ static void	nvme_q_submit(struct nvme_so
 		    struct nvme_ccb *, void *));
 static int	nvme_q_complete(struct nvme_softc *, struct nvme_queue *q);
 static void	nvme_q_free(struct nvme_softc *, struct nvme_queue *);
+static void	nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *,
+		    bool (*)(void *), void *);
 
 static struct nvme_dmamem *
 		nvme_dmamem_alloc(struct nvme_softc *, size_t);
@@ -564,7 +566,7 @@ nvme_ns_identify(struct nvme_softc *sc, 
 
 	KASSERT(nsid > 0);
 
-	ccb = nvme_ccb_get(sc->sc_admin_q);
+	ccb = nvme_ccb_get(sc->sc_admin_q, false);
 	KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */
 
 	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
@@ -617,7 +619,7 @@ nvme_ns_dobio(struct nvme_softc *sc, uin
 	bus_dmamap_t dmap;
 	int i, error;
 
-	ccb = nvme_ccb_get(q);
+	ccb = nvme_ccb_get(q, false);
 	if (ccb == NULL)
 		return EAGAIN;
 
@@ -736,31 +738,44 @@ nvme_ns_io_done(struct nvme_queue *q, st
  * If there is no volatile write cache, it makes no sense to issue
  * flush commands or query for the status.
  */
-bool
+static bool
 nvme_has_volatile_write_cache(struct nvme_softc *sc)
 {
 	/* sc_identify is filled during attachment */
 	return  ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0);
 }
 
+static bool
+nvme_ns_sync_finished(void *cookie)
+{
+	int *result = cookie;
+
+	return (*result != 0);
+}
+
 int
-nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, void *cookie,
-    int flags, nvme_nnc_done nnc_done)
+nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags)
 {
 	struct nvme_queue *q = nvme_get_q(sc);
 	struct nvme_ccb *ccb;
+	int result = 0;
+
+	if (!nvme_has_volatile_write_cache(sc)) {
+		/* cache not present, no value in trying to flush it */
+		return 0;
+	}
 
-	ccb = nvme_ccb_get(q);
+	ccb = nvme_ccb_get(q, true);
 	if (ccb == NULL)
 		return EAGAIN;
 
 	ccb->ccb_done = nvme_ns_sync_done;
-	ccb->ccb_cookie = cookie;
+	ccb->ccb_cookie = &result;
 
 	/* namespace context */
 	ccb->nnc_nsid = nsid;
 	ccb->nnc_flags = flags;
-	ccb->nnc_done = nnc_done;
+	ccb->nnc_done = NULL;
 
 	if (ISSET(flags, NVME_NS_CTX_F_POLL)) {
 		if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0)
@@ -769,7 +784,12 @@ nvme_ns_sync(struct nvme_softc *sc, uint
 	}
 
 	nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill);
-	return 0;
+
+	/* wait for completion */
+	nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result);
+	KASSERT(result != 0);
+
+	return (result > 0) ? 0 : EIO;
 }
 
 static void
@@ -785,36 +805,64 @@ static void
 nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb,
     struct nvme_cqe *cqe)
 {
-	void *cookie = ccb->ccb_cookie;
-	nvme_nnc_done nnc_done = ccb->nnc_done;
+	int *result = ccb->ccb_cookie;
+	uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags));
+
+	if (status == NVME_CQE_SC_SUCCESS)
+		*result = 1;
+	else
+		*result = -1;
 
 	nvme_ccb_put(q, ccb);
+}
 
-	nnc_done(cookie, NULL, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0));
+static bool
+nvme_getcache_finished(void *xc)
+{
+	int *addr = xc;
+
+	return (*addr != 0);
 }
 
 /*
  * Get status of volatile write cache. Always asynchronous.
  */
 int
-nvme_admin_getcache(struct nvme_softc *sc, void *cookie, nvme_nnc_done nnc_done)
+nvme_admin_getcache(struct nvme_softc *sc, int *addr)
 {
 	struct nvme_ccb *ccb;
 	struct nvme_queue *q = sc->sc_admin_q;
+	int result = 0, error;
 
-	ccb = nvme_ccb_get(q);
-	if (ccb == NULL)
-		return EAGAIN;
+	if (!nvme_has_volatile_write_cache(sc)) {
+		/* cache simply not present */
+		*addr = 0;
+		return 0;
+	}
+
+	ccb = nvme_ccb_get(q, true);
+	KASSERT(ccb != NULL);
 
 	ccb->ccb_done = nvme_getcache_done;
-	ccb->ccb_cookie = cookie;
+	ccb->ccb_cookie = &result;
 
 	/* namespace context */
 	ccb->nnc_flags = 0;
-	ccb->nnc_done = nnc_done;
+	ccb->nnc_done = NULL;
 
 	nvme_q_submit(sc, q, ccb, nvme_getcache_fill);
-	return 0;
+
+	/* wait for completion */
+	nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result);
+	KASSERT(result != 0);
+
+	if (result > 0) {
+		*addr = result;
+		error = 0;
+	} else
+		error = EINVAL;
+
+	return error;
 }
 
 static void
@@ -830,12 +878,35 @@ static void
 nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb,
     struct nvme_cqe *cqe)
 {
-	void *cookie = ccb->ccb_cookie;
-	nvme_nnc_done nnc_done = ccb->nnc_done;
+	int *addr = ccb->ccb_cookie;
+	uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags));
+	uint32_t cdw0 = lemtoh32(&cqe->cdw0);
+	int result;
 
-	nvme_ccb_put(q, ccb);
+	if (status == NVME_CQE_SC_SUCCESS) {
+		result = 0;
 
-	nnc_done(cookie, NULL, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0));
+		/*
+		 * DPO not supported, Dataset Management (DSM) field doesn't
+		 * specify the same semantics. FUA is always supported.
+		 */ 
+		result = DKCACHE_FUA;
+
+		if (cdw0 & NVME_CQE_CDW0_VWC_WCE)
+			result |= DKCACHE_WRITE;
+
+		/*
+		 * If volatile write cache is present, the flag shall also be
+		 * settable.
+		 */
+		result |= DKCACHE_WCHANGE;
+	} else {
+		result = -1;
+	}
+
+	*addr = result;
+
+	nvme_ccb_put(q, ccb);
 }
 
 void
@@ -853,12 +924,18 @@ nvme_ns_free(struct nvme_softc *sc, uint
 		kmem_free(identify, sizeof(*identify));
 }
 
+struct nvme_pt_state {
+	struct nvme_pt_command *pt;
+	bool finished;
+};
+
 static void
 nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot)
 {
 	struct nvme_softc *sc = q->q_sc;
 	struct nvme_sqe *sqe = slot;
-	struct nvme_pt_command *pt = ccb->ccb_cookie;
+	struct nvme_pt_state *state = ccb->ccb_cookie;
+	struct nvme_pt_command *pt = state->pt;
 	bus_dmamap_t dmap = ccb->ccb_dmamap;
 	int i;
 
@@ -900,7 +977,8 @@ static void
 nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe)
 {
 	struct nvme_softc *sc = q->q_sc;
-	struct nvme_pt_command *pt = ccb->ccb_cookie;
+	struct nvme_pt_state *state = ccb->ccb_cookie;
+	struct nvme_pt_command *pt = state->pt;
 	bus_dmamap_t dmap = ccb->ccb_dmamap;
 
 	if (pt->buf != NULL && pt->len > 0) {
@@ -919,6 +997,18 @@ nvme_pt_done(struct nvme_queue *q, struc
 
 	pt->cpl.cdw0 = lemtoh32(&cqe->cdw0);
 	pt->cpl.flags = lemtoh16(&cqe->flags) & ~NVME_CQE_PHASE;
+
+	state->finished = true;
+
+	nvme_ccb_put(q, ccb);
+}
+
+static bool
+nvme_pt_finished(void *cookie)
+{
+	struct nvme_pt_state *state = cookie;
+
+	return state->finished;
 }
 
 static int
@@ -928,6 +1018,7 @@ nvme_command_passthrough(struct nvme_sof
 	struct nvme_queue *q;
 	struct nvme_ccb *ccb;
 	void *buf = NULL;
+	struct nvme_pt_state state;
 	int error;
 
 	/* limit command size to maximum data transfer size */
@@ -936,9 +1027,8 @@ nvme_command_passthrough(struct nvme_sof
 		return EINVAL;
 
 	q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc);
-	ccb = nvme_ccb_get(q);
-	if (ccb == NULL)
-		return EBUSY;
+	ccb = nvme_ccb_get(q, true);
+	KASSERT(ccb != NULL);
 
 	if (pt->buf != NULL) {
 		KASSERT(pt->len > 0);
@@ -959,24 +1049,30 @@ nvme_command_passthrough(struct nvme_sof
 		    pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
 	}
 
+	memset(&state, 0, sizeof(state));
+	state.pt = pt;
+	state.finished = false;
+
 	ccb->ccb_done = nvme_pt_done;
-	ccb->ccb_cookie = pt;
+	ccb->ccb_cookie = &state;
 
 	pt->cmd.nsid = nsid;
-	if (nvme_poll(sc, q, ccb, nvme_pt_fill, NVME_TIMO_PT)) {
-		error = EIO;
-		goto out;
-	}
+
+	nvme_q_submit(sc, q, ccb, nvme_pt_fill);
+
+	/* wait for completion */
+	nvme_q_wait_complete(sc, q, nvme_pt_finished, &state);
+	KASSERT(state.finished);
 
 	error = 0;
-out:
+
 	if (buf != NULL) {
 		if (error == 0 && pt->is_read)
 			error = copyout(buf, pt->buf, pt->len);
 kmem_free:
 		kmem_free(buf, pt->len);
 	}
-	nvme_ccb_put(q, ccb);
+
 	return error;
 }
 
@@ -1009,6 +1105,8 @@ nvme_q_submit(struct nvme_softc *sc, str
 struct nvme_poll_state {
 	struct nvme_sqe s;
 	struct nvme_cqe c;
+	void *cookie;
+	void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *);
 };
 
 static int
@@ -1016,8 +1114,6 @@ nvme_poll(struct nvme_softc *sc, struct 
     void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec)
 {
 	struct nvme_poll_state state;
-	void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *);
-	void *cookie;
 	uint16_t flags;
 	int step = 10;
 	int maxloop = timo_sec * 1000000 / step;
@@ -1026,8 +1122,8 @@ nvme_poll(struct nvme_softc *sc, struct 
 	memset(&state, 0, sizeof(state));
 	(*fill)(q, ccb, &state.s);
 
-	done = ccb->ccb_done;
-	cookie = ccb->ccb_cookie;
+	state.done = ccb->ccb_done;
+	state.cookie = ccb->ccb_cookie;
 
 	ccb->ccb_done = nvme_poll_done;
 	ccb->ccb_cookie = &state;
@@ -1043,13 +1139,22 @@ nvme_poll(struct nvme_softc *sc, struct 
 		}
 	}
 
-	ccb->ccb_cookie = cookie;
-	done(q, ccb, &state.c);
-
 	if (error == 0) {
 		flags = lemtoh16(&state.c.flags);
 		return flags & ~NVME_CQE_PHASE;
 	} else {
+		/*
+		 * If it succeds later, it would hit ccb which will have been
+		 * already reused for something else. Not good. Cross
+		 * fingers and hope for best. XXX do controller reset?
+		 */
+		aprint_error_dev(sc->sc_dev, "polled command timed out\n");
+
+		/* Invoke the callback to clean state anyway */
+		struct nvme_cqe cqe;
+		memset(&cqe, 0, sizeof(cqe));
+		ccb->ccb_done(q, ccb, &cqe);
+
 		return 1;
 	}
 }
@@ -1071,6 +1176,9 @@ nvme_poll_done(struct nvme_queue *q, str
 
 	SET(cqe->flags, htole16(NVME_CQE_PHASE));
 	state->c = *cqe;
+
+	ccb->ccb_cookie = state->cookie;
+	state->done(q, ccb, &state->c);
 }
 
 static void
@@ -1147,6 +1255,26 @@ nvme_q_complete(struct nvme_softc *sc, s
 	return rv;
 }
 
+static void
+nvme_q_wait_complete(struct nvme_softc *sc,
+    struct nvme_queue *q, bool (*finished)(void *), void *cookie)
+{
+	mutex_enter(&q->q_ccb_mtx);
+	if (finished(cookie))
+		goto out;
+
+	for(;;) {
+		q->q_ccb_waiting = true;
+		cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx);
+
+		if (finished(cookie))
+			break;
+	}
+
+out:
+	mutex_exit(&q->q_ccb_mtx);
+}
+
 static int
 nvme_identify(struct nvme_softc *sc, u_int mps)
 {
@@ -1157,7 +1285,7 @@ nvme_identify(struct nvme_softc *sc, u_i
 	u_int mdts;
 	int rv = 1;
 
-	ccb = nvme_ccb_get(sc->sc_admin_q);
+	ccb = nvme_ccb_get(sc->sc_admin_q, false);
 	KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */
 
 	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
@@ -1214,7 +1342,7 @@ nvme_q_create(struct nvme_softc *sc, str
 	if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0)
 		return 1;
 
-	ccb = nvme_ccb_get(sc->sc_admin_q);
+	ccb = nvme_ccb_get(sc->sc_admin_q, false);
 	KASSERT(ccb != NULL);
 
 	ccb->ccb_done = nvme_empty_done;
@@ -1260,7 +1388,7 @@ nvme_q_delete(struct nvme_softc *sc, str
 	struct nvme_ccb *ccb;
 	int rv;
 
-	ccb = nvme_ccb_get(sc->sc_admin_q);
+	ccb = nvme_ccb_get(sc->sc_admin_q, false);
 	KASSERT(ccb != NULL);
 
 	ccb->ccb_done = nvme_empty_done;
@@ -1310,25 +1438,28 @@ nvme_fill_identify(struct nvme_queue *q,
 static int
 nvme_get_number_of_queues(struct nvme_softc *sc, u_int *nqap)
 {
+	struct nvme_pt_state state;
 	struct nvme_pt_command pt;
 	struct nvme_ccb *ccb;
 	uint16_t ncqa, nsqa;
 	int rv;
 
-	ccb = nvme_ccb_get(sc->sc_admin_q);
+	ccb = nvme_ccb_get(sc->sc_admin_q, false);
 	KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */
 
 	memset(&pt, 0, sizeof(pt));
 	pt.cmd.opcode = NVM_ADMIN_GET_FEATURES;
 	pt.cmd.cdw10 = NVM_FEATURE_NUMBER_OF_QUEUES;
 
+	memset(&state, 0, sizeof(state));
+	state.pt = &pt;
+	state.finished = false;
+
 	ccb->ccb_done = nvme_pt_done;
-	ccb->ccb_cookie = &pt;
+	ccb->ccb_cookie = &state;
 
 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_pt_fill, NVME_TIMO_QOP);
 
-	nvme_ccb_put(sc->sc_admin_q, ccb);
-
 	if (rv != 0) {
 		*nqap = 0;
 		return EIO;
@@ -1351,6 +1482,8 @@ nvme_ccbs_alloc(struct nvme_queue *q, ui
 	u_int i;
 
 	mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO);
+	cv_init(&q->q_ccb_wait, "nvmeqw");
+	q->q_ccb_waiting = false;
 	SIMPLEQ_INIT(&q->q_ccb_list);
 
 	q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP);
@@ -1390,17 +1523,24 @@ free_maps:
 }
 
 static struct nvme_ccb *
-nvme_ccb_get(struct nvme_queue *q)
+nvme_ccb_get(struct nvme_queue *q, bool wait)
 {
 	struct nvme_ccb *ccb = NULL;
 
 	mutex_enter(&q->q_ccb_mtx);
+again:
 	ccb = SIMPLEQ_FIRST(&q->q_ccb_list);
 	if (ccb != NULL) {
 		SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry);
 #ifdef DEBUG
 		ccb->ccb_cookie = NULL;
 #endif
+	} else {
+		if (__predict_false(wait)) {
+			q->q_ccb_waiting = true;
+			cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx);
+			goto again;
+		}
 	}
 	mutex_exit(&q->q_ccb_mtx);
 
@@ -1416,6 +1556,13 @@ nvme_ccb_put(struct nvme_queue *q, struc
 	ccb->ccb_cookie = (void *)NVME_CCB_FREE;
 #endif
 	SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry);
+
+	/* It's unlikely there are any waiters, it's not used for regular I/O */
+	if (__predict_false(q->q_ccb_waiting)) {
+		q->q_ccb_waiting = false;
+		cv_broadcast(&q->q_ccb_wait);
+	}
+
 	mutex_exit(&q->q_ccb_mtx);
 }
 
@@ -1435,6 +1582,7 @@ nvme_ccbs_free(struct nvme_queue *q)
 	nvme_dmamem_free(sc, q->q_ccb_prpls);
 	kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs);
 	q->q_ccbs = NULL;
+	cv_destroy(&q->q_ccb_wait);
 	mutex_destroy(&q->q_ccb_mtx);
 }
 

Index: src/sys/dev/ic/nvmevar.h
diff -u src/sys/dev/ic/nvmevar.h:1.13.6.1 src/sys/dev/ic/nvmevar.h:1.13.6.2
--- src/sys/dev/ic/nvmevar.h:1.13.6.1	Sat Mar 17 08:11:18 2018
+++ src/sys/dev/ic/nvmevar.h	Sun Mar 18 11:05:27 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmevar.h,v 1.13.6.1 2018/03/17 08:11:18 martin Exp $	*/
+/*	$NetBSD: nvmevar.h,v 1.13.6.2 2018/03/18 11:05:27 martin Exp $	*/
 /*	$OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */
 
 /*
@@ -87,6 +87,8 @@ struct nvme_queue {
 	uint16_t		q_cq_phase;
 
 	kmutex_t		q_ccb_mtx;
+	kcondvar_t		q_ccb_wait;	/* wait for ccb avail/finish */
+	bool			q_ccb_waiting;	/* whether there are waiters */
 	uint16_t		q_nccbs;	/* total number of ccbs */
 	struct nvme_ccb		*q_ccbs;
 	SIMPLEQ_HEAD(, nvme_ccb) q_ccb_list;
@@ -179,6 +181,5 @@ int	nvme_ns_identify(struct nvme_softc *
 void	nvme_ns_free(struct nvme_softc *, uint16_t);
 int	nvme_ns_dobio(struct nvme_softc *, uint16_t, void *,
     struct buf *, void *, size_t, int, daddr_t, int, nvme_nnc_done);
-int	nvme_ns_sync(struct nvme_softc *, uint16_t, void *, int, nvme_nnc_done);
-bool	nvme_has_volatile_write_cache(struct nvme_softc *);
-int	nvme_admin_getcache(struct nvme_softc *, void *, nvme_nnc_done);
+int	nvme_ns_sync(struct nvme_softc *, uint16_t, int);
+int	nvme_admin_getcache(struct nvme_softc *, int *);

Reply via email to