Module Name:    src
Committed By:   jdolecek
Date:           Sun Sep 18 21:19:39 UTC 2016

Modified Files:
        src/sys/dev/ic: ld_nvme.c nvme.c nvmevar.h
        src/sys/dev/pci: nvme_pci.c

Log Message:
fix several bugs, make nvme(4) MPSAFE by default and also bump default
number of ioq from 128 to 1024; tested with VirtualBox and QEMU

* remove NVME_INTMC/NVME_INTMS writes in hw intr handler as this is not MPSAFE,
  fortunately they don't seem to be necessary; shaves two register writes
* need to use full mutex_enter() in nvme_q_complete(), to avoid small
  race between one handler exiting the loop and another entering
* for MSI, handover the command result processing to softintr; unfortunately
  can't easily do that for INTx interrupts as they require doorbell write
  to deassert
* unlock/relock q->q_cq_mtx before calling ccb_done to avoid potential deadlocks
* make sure to destroy queue mutexes when destroying the queue (LOCKDEBUG)
* make ns ctx pool per-device, so that it's deallocated properly on module
  unload
* handle ctx allocation failure in ld_nvme_dobio()
* remove splbio() calls in ld_nvme_dobio() and sync, the paths are exercised
  only for dump/shutdown, and that already disables interrupts
* free the ns ctx in ld_nvme_biodone() before calling lddone() to avoid
  memory starvation, as lddone() can trigger another i/o request
* be more careful with using PR_WAITOK, the paths are called from interrupt
  context and there we can't wait


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/sys/dev/ic/ld_nvme.c
cvs rdiff -u -r1.8 -r1.9 src/sys/dev/ic/nvme.c
cvs rdiff -u -r1.2 -r1.3 src/sys/dev/ic/nvmevar.h
cvs rdiff -u -r1.13 -r1.14 src/sys/dev/pci/nvme_pci.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ic/ld_nvme.c
diff -u src/sys/dev/ic/ld_nvme.c:1.3 src/sys/dev/ic/ld_nvme.c:1.4
--- src/sys/dev/ic/ld_nvme.c:1.3	Fri Sep 16 15:24:47 2016
+++ src/sys/dev/ic/ld_nvme.c	Sun Sep 18 21:19:39 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: ld_nvme.c,v 1.3 2016/09/16 15:24:47 jdolecek Exp $	*/
+/*	$NetBSD: ld_nvme.c,v 1.4 2016/09/18 21:19:39 jdolecek Exp $	*/
 
 /*-
  * Copyright (C) 2016 NONAKA Kimihiro <non...@netbsd.org>
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.3 2016/09/16 15:24:47 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ld_nvme.c,v 1.4 2016/09/18 21:19:39 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -108,8 +108,8 @@ ld_nvme_attach(device_t parent, device_t
 
 	ld->sc_secsize = 1 << f->lbads;
 	ld->sc_secperunit = nsze;
-	ld->sc_maxxfer = MAXPHYS;
-	ld->sc_maxqueuecnt = naa->naa_qentries;
+	ld->sc_maxxfer = MAXPHYS; /* XXX set according to device */
+	ld->sc_maxqueuecnt = naa->naa_qentries; /* XXX set to max allowed by device, not current config */
 	ld->sc_start = ld_nvme_start;
 	ld->sc_dump = ld_nvme_dump;
 	ld->sc_flush = ld_nvme_flush;
@@ -156,9 +156,12 @@ ld_nvme_dobio(struct ld_nvme_softc *sc, 
 {
 	struct nvme_ns_context *ctx;
 	int error;
-	int s;
+	int waitok = (bp != NULL && !cpu_softintr_p() && !cpu_intr_p());
+
+	ctx = nvme_ns_get_ctx(sc, waitok ? PR_WAITOK : PR_NOWAIT);
+	if (ctx == NULL)
+		return EAGAIN;
 
-	ctx = nvme_ns_get_ctx(bp != NULL ? PR_WAITOK : PR_NOWAIT);
 	ctx->nnc_cookie = sc;
 	ctx->nnc_nsid = sc->sc_nsid;
 	ctx->nnc_done = ld_nvme_biodone;
@@ -168,14 +171,12 @@ ld_nvme_dobio(struct ld_nvme_softc *sc, 
 	ctx->nnc_secsize = sc->sc_ld.sc_secsize;
 	ctx->nnc_blkno = blkno;
 	ctx->nnc_flags = dowrite ? 0 : NVME_NS_CTX_F_READ;
-	if (bp == NULL) {
+	if (bp == NULL)
 		SET(ctx->nnc_flags, NVME_NS_CTX_F_POLL);
-		s = splbio();
-	}
+
 	error = nvme_ns_dobio(sc->sc_nvme, ctx);
-	if (bp == NULL) {
-		splx(s);
-	}
+	if (error)
+		nvme_ns_put_ctx(sc, ctx);
 
 	return error;
 }
@@ -187,6 +188,10 @@ ld_nvme_biodone(struct nvme_ns_context *
 	struct buf *bp = ctx->nnc_buf;
 	int status = NVME_CQE_SC(ctx->nnc_status);
 
+	/* free before processing to avoid starvation, lddone() could trigger
+	 * another i/o request */
+	nvme_ns_put_ctx(sc, ctx);
+
 	if (bp != NULL) {
 		if (status != NVME_CQE_SC_SUCCESS) {
 			bp->b_error = EIO;
@@ -201,7 +206,6 @@ ld_nvme_biodone(struct nvme_ns_context *
 			aprint_error_dev(sc->sc_ld.sc_dv, "I/O error\n");
 		}
 	}
-	nvme_ns_put_ctx(ctx);
 }
 
 static int
@@ -210,21 +214,23 @@ ld_nvme_flush(struct ld_softc *ld, int f
 	struct ld_nvme_softc *sc = device_private(ld->sc_dv);
 	struct nvme_ns_context *ctx;
 	int error;
-	int s;
+	int waitok = (!ISSET(flags, LDFL_POLL)
+	    && !cpu_softintr_p() && !cpu_intr_p());
+
+	ctx = nvme_ns_get_ctx(sc, waitok ? PR_WAITOK : PR_NOWAIT);
+	if (ctx == NULL)
+		return EAGAIN;
 
-	ctx = nvme_ns_get_ctx((flags & LDFL_POLL) ? PR_NOWAIT : PR_WAITOK);
 	ctx->nnc_cookie = sc;
 	ctx->nnc_nsid = sc->sc_nsid;
 	ctx->nnc_done = ld_nvme_syncdone;
 	ctx->nnc_flags = 0;
-	if (flags & LDFL_POLL) {
+	if (flags & LDFL_POLL)
 		SET(ctx->nnc_flags, NVME_NS_CTX_F_POLL);
-		s = splbio();
-	}
+
 	error = nvme_ns_sync(sc->sc_nvme, ctx);
-	if (flags & LDFL_POLL) {
-		splx(s);
-	}
+	if (error)
+		nvme_ns_put_ctx(sc, ctx);
 
 	return error;
 }
@@ -232,6 +238,7 @@ ld_nvme_flush(struct ld_softc *ld, int f
 static void
 ld_nvme_syncdone(struct nvme_ns_context *ctx)
 {
+	struct ld_nvme_softc *sc = ctx->nnc_cookie;
 
-	nvme_ns_put_ctx(ctx);
+	nvme_ns_put_ctx(sc, ctx);
 }

Index: src/sys/dev/ic/nvme.c
diff -u src/sys/dev/ic/nvme.c:1.8 src/sys/dev/ic/nvme.c:1.9
--- src/sys/dev/ic/nvme.c:1.8	Sat Sep 17 19:52:16 2016
+++ src/sys/dev/ic/nvme.c	Sun Sep 18 21:19:39 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvme.c,v 1.8 2016/09/17 19:52:16 jdolecek Exp $	*/
+/*	$NetBSD: nvme.c,v 1.9 2016/09/18 21:19:39 jdolecek Exp $	*/
 /*	$OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */
 
 /*
@@ -18,7 +18,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.8 2016/09/17 19:52:16 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.9 2016/09/18 21:19:39 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -41,7 +41,7 @@ __KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.8
 #include <dev/ic/nvmeio.h>
 
 int nvme_adminq_size = 128;
-int nvme_ioq_size = 128;
+int nvme_ioq_size = 1024;
 
 static int	nvme_print(void *, const char *);
 
@@ -156,18 +156,6 @@ nvme_write8(struct nvme_softc *sc, bus_s
 #define nvme_barrier(_s, _r, _l, _f) \
 	bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f))
 
-pool_cache_t nvme_ns_ctx_cache;
-ONCE_DECL(nvme_init_once);
-
-static int
-nvme_init(void)
-{
-	nvme_ns_ctx_cache = pool_cache_init(sizeof(struct nvme_ns_context),
-	    0, 0, 0, "nvme_ns_ctx", NULL, IPL_BIO, NULL, NULL, NULL);
-	KASSERT(nvme_ns_ctx_cache != NULL);
-	return 0;
-}
-
 static void
 nvme_version(struct nvme_softc *sc, uint32_t ver)
 {
@@ -350,8 +338,6 @@ nvme_attach(struct nvme_softc *sc)
 	int ioq_entries = nvme_ioq_size;
 	int i;
 
-	RUN_ONCE(&nvme_init_once, nvme_init);
-
 	reg = nvme_read4(sc, NVME_VS);
 	if (reg == 0xffffffff) {
 		aprint_error_dev(sc->sc_dev, "invalid mapping\n");
@@ -431,8 +417,20 @@ nvme_attach(struct nvme_softc *sc)
 	if (!sc->sc_use_mq)
 		nvme_write4(sc, NVME_INTMC, 1);
 
+	snprintf(sc->sc_ctxpoolname, sizeof(sc->sc_ctxpoolname),
+	    "%s_ns_ctx", device_xname(sc->sc_dev));
+	sc->sc_ctxpool = pool_cache_init(sizeof(struct nvme_ns_context),
+	    0, 0, 0, sc->sc_ctxpoolname, NULL, IPL_BIO, NULL, NULL, NULL);
+	if (sc->sc_ctxpool == NULL) {
+		aprint_error_dev(sc->sc_dev, "unable to create ctx pool\n");
+		goto free_q;
+	}
+
+	/* probe subdevices */
 	sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn,
 	    KM_SLEEP);
+	if (sc->sc_namespaces == NULL)
+		goto free_q;
 	for (i = 0; i < sc->sc_nn; i++) {
 		memset(&naa, 0, sizeof(naa));
 		naa.naa_nsid = i + 1;
@@ -485,6 +483,9 @@ nvme_detach(struct nvme_softc *sc, int f
 	if (error)
 		return error;
 
+	/* from now on we are committed to detach, following will never fail */
+	pool_cache_destroy(sc->sc_ctxpool);
+
 	for (i = 0; i < sc->sc_nq; i++)
 		nvme_q_free(sc, sc->sc_q[i]);
 	kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq);
@@ -564,7 +565,8 @@ nvme_ns_identify(struct nvme_softc *sc, 
 	KASSERT(nsid > 0);
 
 	ccb = nvme_ccb_get(sc->sc_admin_q);
-	KASSERT(ccb != NULL);
+	if (ccb == NULL)
+		return EAGAIN;
 
 	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
 	if (mem == NULL)
@@ -856,8 +858,9 @@ nvme_command_passthrough(struct nvme_sof
 	void *buf = NULL;
 	int error;
 
+	/* limit command size to maximum data transfer size */
 	if ((pt->buf == NULL && pt->len > 0) ||
-	    (pt->buf != NULL && pt->len == 0))
+	    (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts)))
 		return EINVAL;
 
 	q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc);
@@ -865,7 +868,8 @@ nvme_command_passthrough(struct nvme_sof
 	if (ccb == NULL)
 		return EBUSY;
 
-	if (pt->buf != NULL && pt->len > 0) {
+	if (pt->buf != NULL) {
+		KASSERT(pt->len > 0);
 		buf = kmem_alloc(pt->len, KM_SLEEP);
 		if (buf == NULL) {
 			error = ENOMEM;
@@ -1022,35 +1026,42 @@ nvme_q_complete(struct nvme_softc *sc, s
 {
 	struct nvme_ccb *ccb;
 	struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe;
-	uint32_t head;
 	uint16_t flags;
 	int rv = 0;
 
-	if (!mutex_tryenter(&q->q_cq_mtx))
-		return -1;
+	mutex_enter(&q->q_cq_mtx);
 
 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
-	head = q->q_cq_head;
 	for (;;) {
-		cqe = &ring[head];
+		cqe = &ring[q->q_cq_head];
 		flags = lemtoh16(&cqe->flags);
 		if ((flags & NVME_CQE_PHASE) != q->q_cq_phase)
 			break;
 
 		ccb = &q->q_ccbs[cqe->cid];
-		ccb->ccb_done(q, ccb, cqe);
 
-		if (++head >= q->q_entries) {
-			head = 0;
+		if (++q->q_cq_head >= q->q_entries) {
+			q->q_cq_head = 0;
 			q->q_cq_phase ^= NVME_CQE_PHASE;
 		}
 
 		rv = 1;
+
+		/*
+		 * Unlock the mutext before calling the ccb_done callback
+		 * and re-lock afterwards. The callback triggers lddone()
+		 * which schedules another i/o, and also calls nvme_ccb_put().
+		 * Unlock/relock avoids possibility of deadlock.
+		 */
+		mutex_exit(&q->q_cq_mtx);
+		ccb->ccb_done(q, ccb, cqe);
+		mutex_enter(&q->q_cq_mtx);
 	}
 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
 
 	if (rv)
-		nvme_write4(sc, q->q_cqhdbl, q->q_cq_head = head);
+		nvme_write4(sc, q->q_cqhdbl, q->q_cq_head);
+
 	mutex_exit(&q->q_cq_mtx);
 
 	return rv;
@@ -1121,7 +1132,7 @@ nvme_q_create(struct nvme_softc *sc, str
 	struct nvme_ccb *ccb;
 	int rv;
 
-	if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q))
+	if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0)
 		return 1;
 
 	ccb = nvme_ccb_get(sc->sc_admin_q);
@@ -1367,6 +1378,8 @@ static void
 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q)
 {
 	nvme_ccbs_free(q);
+	mutex_destroy(&q->q_sq_mtx);
+	mutex_destroy(&q->q_cq_mtx);
 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
 	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE);
 	nvme_dmamem_free(sc, q->q_cq_dmamem);
@@ -1380,46 +1393,38 @@ nvme_intr(void *xsc)
 	struct nvme_softc *sc = xsc;
 	int rv = 0;
 
-	nvme_write4(sc, NVME_INTMS, 1);
-
+	/* INTx is level triggered, controller deasserts the interrupt only
+	 * when we advance command queue head via write to the doorbell */
 	if (nvme_q_complete(sc, sc->sc_admin_q))
-		rv = 1;
+	        rv = 1;
 	if (sc->sc_q != NULL)
-		if (nvme_q_complete(sc, sc->sc_q[0]))
-			rv = 1;
-
-	nvme_write4(sc, NVME_INTMC, 1);
+	        if (nvme_q_complete(sc, sc->sc_q[0]))
+	                rv = 1;
 
 	return rv;
 }
 
 int
-nvme_mq_msi_intr(void *xq)
+nvme_intr_msi(void *xq)
 {
 	struct nvme_queue *q = xq;
-	struct nvme_softc *sc = q->q_sc;
-	int rv = 0;
-
-	nvme_write4(sc, NVME_INTMS, 1U << q->q_id);
 
-	if (nvme_q_complete(sc, q))
-		rv = 1;
+	KASSERT(q && q->q_sc && q->q_sc->sc_softih
+	    && q->q_sc->sc_softih[q->q_id]);
 
-	nvme_write4(sc, NVME_INTMC, 1U << q->q_id);
+	/* MSI are edge triggered, so can handover processing to softint */
+	softint_schedule(q->q_sc->sc_softih[q->q_id]);
 
-	return rv;
+	return 1;
 }
 
-int
-nvme_mq_msix_intr(void *xq)
+void
+nvme_softintr_msi(void *xq)
 {
 	struct nvme_queue *q = xq;
-	int rv = 0;
-
-	if (nvme_q_complete(q->q_sc, q))
-		rv = 1;
+	struct nvme_softc *sc = q->q_sc;
 
-	return rv;
+	nvme_q_complete(sc, q);
 }
 
 static struct nvme_dmamem *

Index: src/sys/dev/ic/nvmevar.h
diff -u src/sys/dev/ic/nvmevar.h:1.2 src/sys/dev/ic/nvmevar.h:1.3
--- src/sys/dev/ic/nvmevar.h:1.2	Sat Jun  4 16:11:51 2016
+++ src/sys/dev/ic/nvmevar.h	Sun Sep 18 21:19:39 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvmevar.h,v 1.2 2016/06/04 16:11:51 nonaka Exp $	*/
+/*	$NetBSD: nvmevar.h,v 1.3 2016/09/18 21:19:39 jdolecek Exp $	*/
 /*	$OpenBSD: nvmevar.h,v 1.8 2016/04/14 11:18:32 dlg Exp $ */
 
 /*
@@ -95,17 +95,18 @@ struct nvme_softc {
 				    uint16_t qid, struct nvme_queue *);
 	int			(*sc_intr_disestablish)(struct nvme_softc *,
 				    uint16_t qid);
-	void			**sc_ih;
+	void			**sc_ih;	/* interrupt handlers */
+	void			**sc_softih;	/* softintr handlers */
 
-	u_int			sc_rdy_to;
-	size_t			sc_mps;
-	size_t			sc_mdts;
-	u_int			sc_max_sgl;
+	u_int			sc_rdy_to;	/* RDY timeout */
+	size_t			sc_mps;		/* memory page size */  
+	size_t			sc_mdts;	/* max data trasfer size */
+	u_int			sc_max_sgl;	/* max S/G segments */
 
 	struct nvm_identify_controller
 				sc_identify;
 
-	u_int			sc_nn;
+	u_int			sc_nn;		/* namespace count */
 	struct nvme_namespace	*sc_namespaces;
 
 	bool			sc_use_mq;
@@ -113,6 +114,9 @@ struct nvme_softc {
 	struct nvme_queue	*sc_admin_q;
 	struct nvme_queue	**sc_q;
 
+	pool_cache_t		sc_ctxpool;
+	char			sc_ctxpoolname[16];	/* pool wchan */
+
 	uint32_t		sc_flags;
 #define	NVME_F_ATTACHED	__BIT(0)
 #define	NVME_F_OPEN	__BIT(1)
@@ -134,8 +138,8 @@ int	nvme_attach(struct nvme_softc *);
 int	nvme_detach(struct nvme_softc *, int flags);
 void	nvme_childdet(device_t, device_t);
 int	nvme_intr(void *);
-int	nvme_mq_msi_intr(void *);
-int	nvme_mq_msix_intr(void *);
+int	nvme_intr_msi(void *);
+void	nvme_softintr_msi(void *);
 
 static inline struct nvme_queue *
 nvme_get_q(struct nvme_softc *sc)
@@ -174,10 +178,10 @@ struct nvme_ns_context {
 	int		nnc_status;
 };
 
-extern pool_cache_t nvme_ns_ctx_cache;
-
-#define	nvme_ns_get_ctx(flags)	pool_cache_get(nvme_ns_ctx_cache, (flags))
-#define	nvme_ns_put_ctx(ctx)	pool_cache_put(nvme_ns_ctx_cache, (ctx))
+#define	nvme_ns_get_ctx(sc, flags) \
+	pool_cache_get((sc)->sc_nvme->sc_ctxpool, (flags))
+#define	nvme_ns_put_ctx(sc, ctx) \
+	pool_cache_put((sc)->sc_nvme->sc_ctxpool, (ctx))
 
 int	nvme_ns_dobio(struct nvme_softc *, struct nvme_ns_context *);
 int	nvme_ns_sync(struct nvme_softc *, struct nvme_ns_context *);

Index: src/sys/dev/pci/nvme_pci.c
diff -u src/sys/dev/pci/nvme_pci.c:1.13 src/sys/dev/pci/nvme_pci.c:1.14
--- src/sys/dev/pci/nvme_pci.c:1.13	Sun Sep 18 11:58:35 2016
+++ src/sys/dev/pci/nvme_pci.c	Sun Sep 18 21:19:39 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: nvme_pci.c,v 1.13 2016/09/18 11:58:35 jdolecek Exp $	*/
+/*	$NetBSD: nvme_pci.c,v 1.14 2016/09/18 21:19:39 jdolecek Exp $	*/
 /*	$OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
 
 /*
@@ -43,7 +43,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.13 2016/09/18 11:58:35 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.14 2016/09/18 21:19:39 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -64,7 +64,7 @@ __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v
 #include <dev/ic/nvmevar.h>
 
 int nvme_pci_force_intx = 0;
-int nvme_pci_mpsafe = 0;
+int nvme_pci_mpsafe = 1;
 int nvme_pci_mq = 1;		/* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
 
 #define NVME_PCI_BAR		0x10
@@ -192,9 +192,19 @@ nvme_pci_attach(device_t parent, device_
 		goto intr_release;
 	}
 
+	if (sc->sc_use_mq) {
+		sc->sc_softih = kmem_zalloc(
+		    sizeof(*sc->sc_softih) * psc->psc_nintrs, KM_SLEEP);
+		if (sc->sc_softih == NULL) {
+			aprint_error_dev(self,
+			    "unable to allocate softih memory\n");
+			goto intr_free;
+		}
+	}
+
 	if (nvme_attach(sc) != 0) {
 		/* error printed by nvme_attach() */
-		goto intr_free;
+		goto softintr_free;
 	}
 
 	if (!pmf_device_register(self, NULL, NULL))
@@ -203,6 +213,11 @@ nvme_pci_attach(device_t parent, device_
 	SET(sc->sc_flags, NVME_F_ATTACHED);
 	return;
 
+softintr_free:
+	if (sc->sc_softih) {
+		kmem_free(sc->sc_softih,
+		    sizeof(*sc->sc_softih) * psc->psc_nintrs);
+	}
 intr_free:
 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
 	sc->sc_nq = 0;
@@ -228,6 +243,11 @@ nvme_pci_detach(device_t self, int flags
 	if (error)
 		return error;
 
+	if (sc->sc_softih) {
+		kmem_free(sc->sc_softih,
+		    sizeof(*sc->sc_softih) * psc->psc_nintrs);
+		sc->sc_softih = NULL;
+	}
 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
 	pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
 	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
@@ -274,13 +294,11 @@ nvme_pci_intr_establish(struct nvme_soft
 			    device_xname(sc->sc_dev), qid);
 		}
 		ih_arg = q;
-		if (pci_intr_type(psc->psc_pc, psc->psc_intrs[qid])
-		    == PCI_INTR_TYPE_MSIX)
-			ih_func = nvme_mq_msix_intr;
-		else
-			ih_func = nvme_mq_msi_intr;
+		ih_func = nvme_intr_msi;
 	}
 #endif /* __HAVE_PCI_MSI_MSIX */
+
+	/* establish hardware interrupt */
 	sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
 	    psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
 	if (sc->sc_ih[qid] == NULL) {
@@ -288,6 +306,23 @@ nvme_pci_intr_establish(struct nvme_soft
 		    "unable to establish %s interrupt\n", intr_xname);
 		return 1;
 	}
+
+	/* if MSI, establish also the software interrupt */
+	if (sc->sc_softih) {
+		sc->sc_softih[qid] = softint_establish(
+		    SOFTINT_BIO|(nvme_pci_mpsafe ? SOFTINT_MPSAFE : 0),
+		    nvme_softintr_msi, q);
+		if (sc->sc_softih[qid] == NULL) {
+			pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
+			sc->sc_ih[qid] = NULL;
+	
+			aprint_error_dev(sc->sc_dev,
+			    "unable to establish %s soft interrupt\n",
+			    intr_xname);
+			return 1;
+		}
+	}
+	
 	intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
 	    sizeof(intrbuf));
 	if (!sc->sc_use_mq) {
@@ -324,11 +359,14 @@ nvme_pci_intr_disestablish(struct nvme_s
 {
 	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
 
-	if (!sc->sc_use_mq && qid > 0)
-		return 0;
-
+	KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
 	KASSERT(sc->sc_ih[qid] != NULL);
 
+	if (sc->sc_softih) {
+		softint_disestablish(sc->sc_softih[qid]);
+		sc->sc_softih[qid] = NULL;
+	}
+
 	pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
 	sc->sc_ih[qid] = NULL;
 

Reply via email to