Hi tech@, while trying to add bio(4) support to mfii(4), we noticed that SAS2208 based card with cache memory hangs on boot. https://marc.info/?t=147738428600001&r=1&w=2
after investigation, we made conclusion that MPII SCSI PASSTHRU feature need to be used for sending MFI commands (such as DCMD_*) instead of using MFA request. this patch shouldn't change any functionality/behavior, but this is required for upcoming bio(4) support. tested on: SAS2208, SAS3008, and SAS3108 --- sys/dev/pci/mfii.c +++ sys/dev/pci/mfii.c @@ -44,6 +44,8 @@ #define MFII_OSTS_INTR_VALID 0x00000009 #define MFII_RPI 0x6c /* reply post host index */ +#define MFII_OSP2 0xb4 /* outbound scratch pad 2 */ +#define MFII_OSP3 0xb8 /* outbound scratch pad 3 */ #define MFII_REQ_TYPE_SCSI MPII_REQ_DESCR_SCSI_IO #define MFII_REQ_TYPE_LDIO (0x7 << 1) @@ -51,20 +53,42 @@ #define MFII_REQ_TYPE_NO_LOCK (0x2 << 1) #define MFII_REQ_TYPE_HI_PRI (0x6 << 1) -#define MFII_REQ_MFA(_a) htole64((_a) | MFII_REQ_TYPE_MFA) - #define MFII_FUNCTION_PASSTHRU_IO (0xf0) #define MFII_FUNCTION_LDIO_REQUEST (0xf1) -struct mfii_request_descr { +#define MFII_MAX_CHAIN_UNIT 0x00400000 +#define MFII_MAX_CHAIN_MASK 0x000003E0 +#define MFII_MAX_CHAIN_SHIFT 5 + +#define MFII_256K_IO 128 +#define MFII_1MB_IO (MFII_256K_IO * 4) + +#define MFII_CHAIN_FRAME_MIN 1024 + +struct mfii_scsi_io_request_descr { u_int8_t flags; u_int8_t msix_index; u_int16_t smid; - u_int16_t lmid; u_int16_t dev_handle; } __packed; +struct mfii_mfa_io_request_descr { + u_int32_t flags:8; + u_int32_t addr1:24; + u_int32_t addr2; +} __packed; + +union mfii_request_descr { + struct mfii_scsi_io_request_descr scsi; + struct mfii_mfa_io_request_descr mfa; + struct { + u_int32_t lo; + u_int32_t hi; + } __packed u; + u_int64_t word; +}; + #define MFII_RAID_CTX_IO_TYPE_SYSPD (0x1 << 4) #define MFII_RAID_CTX_TYPE_CUDA (0x2 << 4) @@ -180,6 +204,10 @@ struct mfii_ccb { u_int64_t ccb_request_dva; bus_addr_t ccb_request_offset; + void *ccb_mfi; + u_int64_t ccb_mfi_dva; + bus_addr_t ccb_mfi_offset; + struct mfi_sense *ccb_sense; u_int64_t ccb_sense_dva; bus_addr_t ccb_sense_offset; @@ -189,7 +217,7 @@ struct mfii_ccb { bus_addr_t ccb_sgl_offset; u_int ccb_sgl_len; - struct mfii_request_descr ccb_req; + union mfii_request_descr ccb_req; bus_dmamap_t ccb_dmamap; @@ -250,6 +278,7 @@ struct mfii_softc { struct mutex sc_ccb_mtx; struct mutex sc_post_mtx; + u_int sc_max_fw_cmds; u_int sc_max_cmds; u_int sc_max_sgl; @@ -259,6 +288,7 @@ struct mfii_softc { struct mfii_dmamem *sc_reply_postq; struct mfii_dmamem *sc_requests; + struct mfii_dmamem *sc_mfi; struct mfii_dmamem *sc_sense; struct mfii_dmamem *sc_sgl; @@ -280,6 +310,32 @@ struct mfii_softc { struct mfi_ctrl_info sc_info; }; +#ifdef MFII_DEBUG +#define DPRINTF(x...) do { if (mfii_debug) printf(x); } while(0) +#define DNPRINTF(n,x...) do { if (mfii_debug & n) printf(x); } while(0) +#define MFII_D_CMD 0x0001 +#define MFII_D_INTR 0x0002 +#define MFII_D_MISC 0x0004 +#define MFII_D_DMA 0x0008 +#define MFII_D_IOCTL 0x0010 +#define MFII_D_RW 0x0020 +#define MFII_D_MEM 0x0040 +#define MFII_D_CCB 0x0080 +uint32_t mfii_debug = 0 +/* | MFII_D_CMD */ +/* | MFII_D_INTR */ +/* | MFII_D_MISC */ +/* | MFII_D_DMA */ +/* | MFII_D_IOCTL */ +/* | MFII_D_RW */ +/* | MFII_D_MEM */ +/* | MFII_D_CCB */ + ; +#else +#define DPRINTF(x...) +#define DNPRINTF(n,x...) +#endif + int mfii_match(struct device *, void *, void *); void mfii_attach(struct device *, struct device *, void *); int mfii_detach(struct device *, int); @@ -356,6 +412,7 @@ int mfii_mfa_poll(struct mfii_softc *, struct mfii_ccb *); int mfii_mgmt(struct mfii_softc *, struct mfii_ccb *, u_int32_t, const union mfi_mbox *, void *, size_t, int); +void mfii_empty_done(struct mfii_softc *, struct mfii_ccb *); int mfii_scsi_cmd_io(struct mfii_softc *, struct scsi_xfer *); @@ -517,7 +574,8 @@ mfii_attach(struct device *parent, struct device *self, void *aux) pcireg_t memtype; pci_intr_handle_t ih; struct scsibus_attach_args saa; - u_int32_t status; + u_int32_t status, scpad2, scpad3; + int chain_frame_sz, nsge_in_io, nsge_in_chain; /* init sc */ sc->sc_iop = mfii_find_iop(aux); @@ -556,9 +614,43 @@ mfii_attach(struct device *parent, struct device *self, void *aux) if (mfii_transition_firmware(sc)) goto pci_unmap; + /* determine max_cmds (refer to the Linux megaraid_sas driver) */ + scpad3 = mfii_read(sc, MFII_OSP3); status = mfii_fw_state(sc); - sc->sc_max_cmds = status & MFI_STATE_MAXCMD_MASK; - sc->sc_max_sgl = (status & MFI_STATE_MAXSGL_MASK) >> 16; + sc->sc_max_fw_cmds = scpad3 & MFI_STATE_MAXCMD_MASK; + if (sc->sc_max_fw_cmds == 0) + sc->sc_max_fw_cmds = status & MFI_STATE_MAXCMD_MASK; + /* + * reduce max_cmds by 1 to ensure that the reply queue depth does not + * exceed FW supplied max_fw_cmds. + */ + sc->sc_max_cmds = min(sc->sc_max_fw_cmds, 1024) - 1; + + /* determine max_sgl (refer to the Linux megaraid_sas driver) */ + scpad2 = mfii_read(sc, MFII_OSP2); + chain_frame_sz = + ((scpad2 & MFII_MAX_CHAIN_MASK) >> MFII_MAX_CHAIN_SHIFT) * + ((scpad2 & MFII_MAX_CHAIN_UNIT) ? MFII_1MB_IO : MFII_256K_IO); + if (chain_frame_sz < MFII_CHAIN_FRAME_MIN) + chain_frame_sz = MFII_CHAIN_FRAME_MIN; + + nsge_in_io = (MFII_REQUEST_SIZE - + sizeof(struct mpii_msg_scsi_io) - + sizeof(struct mfii_raid_context)) / sizeof(struct mfii_sge); + nsge_in_chain = chain_frame_sz / sizeof(struct mfii_sge); + + /* round down to nearlest power of two */ + sc->sc_max_sgl = 1; + while ((sc->sc_max_sgl << 1) <= (nsge_in_io + nsge_in_chain)) + sc->sc_max_sgl <<= 1; + + DNPRINTF(MFII_D_MISC, "%s: OSP 0x%08x, OSP2 0x%08x, OSP3 0x%08x\n", + __func__, status, scpad2, scpad3); + DNPRINTF(MFII_D_MISC, "%s: max_fw_cmds %d, max_cmds %d\n", + __func__, sc->sc_max_fw_cmds, sc->sc_max_cmds); + DNPRINTF(MFII_D_MISC, "%s: nsge_in_io %d, nsge_in_chain %d, " + "max_sgl %d\n", __func__, nsge_in_io, nsge_in_chain, + sc->sc_max_sgl); /* sense memory */ CTASSERT(sizeof(struct mfi_sense) == MFI_SENSE_SIZE); @@ -568,7 +660,8 @@ mfii_attach(struct device *parent, struct device *self, void *aux) goto pci_unmap; } - sc->sc_reply_postq_depth = roundup(sc->sc_max_cmds, 16); + /* reply post queue */ + sc->sc_reply_postq_depth = roundup(sc->sc_max_fw_cmds, 16); sc->sc_reply_postq = mfii_dmamem_alloc(sc, sc->sc_reply_postq_depth * sizeof(struct mpii_reply_descr)); @@ -578,15 +671,22 @@ mfii_attach(struct device *parent, struct device *self, void *aux) memset(MFII_DMA_KVA(sc->sc_reply_postq), 0xff, MFII_DMA_LEN(sc->sc_reply_postq)); + /* MPII request frame array */ sc->sc_requests = mfii_dmamem_alloc(sc, MFII_REQUEST_SIZE * (sc->sc_max_cmds + 1)); if (sc->sc_requests == NULL) goto free_reply_postq; + /* MFI command frame array */ + sc->sc_mfi = mfii_dmamem_alloc(sc, sc->sc_max_cmds * MFI_FRAME_SIZE); + if (sc->sc_mfi == NULL) + goto free_requests; + + /* MPII SGL array */ sc->sc_sgl = mfii_dmamem_alloc(sc, sc->sc_max_cmds * sizeof(struct mfii_sge) * sc->sc_max_sgl); if (sc->sc_sgl == NULL) - goto free_requests; + goto free_mfi; if (mfii_init_ccb(sc) != 0) { printf("%s: could not init ccb list\n", DEVNAME(sc)); @@ -644,6 +744,8 @@ intr_disestablish: pci_intr_disestablish(sc->sc_pc, sc->sc_ih); free_sgl: mfii_dmamem_free(sc, sc->sc_sgl); +free_mfi: + mfii_dmamem_free(sc, sc->sc_mfi); free_requests: mfii_dmamem_free(sc, sc->sc_requests); free_reply_postq: @@ -762,6 +864,7 @@ mfii_detach(struct device *self, int flags) mfii_aen_unregister(sc); pci_intr_disestablish(sc->sc_pc, sc->sc_ih); mfii_dmamem_free(sc, sc->sc_sgl); + mfii_dmamem_free(sc, sc->sc_mfi); mfii_dmamem_free(sc, sc->sc_requests); mfii_dmamem_free(sc, sc->sc_reply_postq); mfii_dmamem_free(sc, sc->sc_sense); @@ -853,8 +956,8 @@ mfii_dcmd_start(struct mfii_softc *sc, struct mfii_ccb *ccb) htolem32(&sge->sg_len, sizeof(*ccb->ccb_sense)); sge->sg_flags = MFII_SGE_CHAIN_ELEMENT | MFII_SGE_ADDR_IOCPLBNTA; - ccb->ccb_req.flags = MFII_REQ_TYPE_SCSI; - ccb->ccb_req.smid = letoh16(ccb->ccb_smid); + ccb->ccb_req.scsi.flags = MFII_REQ_TYPE_SCSI; + ccb->ccb_req.scsi.smid = letoh16(ccb->ccb_smid); mfii_start(sc, ccb); } @@ -1279,8 +1382,14 @@ mfii_get_info(struct mfii_softc *sc) int mfii_mfa_poll(struct mfii_softc *sc, struct mfii_ccb *ccb) { + /* + * Since the frame address passed to the F/W through Inbound Queue + * Port register must be aligned to 256 byte boundary, this means + * that we can not use MFI frame memory (ccb->ccb_mfi) which is 128 + * byte boundary here. Caller must ensure that the MFI frame is built + * in the MPII frame memory (ccb->ccb_request). + */ struct mfi_frame_header *hdr = ccb->ccb_request; - u_int64_t r; int to = 0, rv = 0; #ifdef DIAGNOSTIC @@ -1289,11 +1398,12 @@ mfii_mfa_poll(struct mfii_softc *sc, struct mfii_ccb *ccb) #endif hdr->mfh_context = ccb->ccb_smid; - hdr->mfh_cmd_status = 0xff; + hdr->mfh_cmd_status = MFI_STAT_INVALID_STATUS; hdr->mfh_flags |= htole16(MFI_FRAME_DONT_POST_IN_REPLY_QUEUE); - r = MFII_REQ_MFA(ccb->ccb_request_dva); - memcpy(&ccb->ccb_req, &r, sizeof(ccb->ccb_req)); + ccb->ccb_req.u.lo = htole32(ccb->ccb_request_dva); + ccb->ccb_req.u.hi = htole32(ccb->ccb_request_dva >> 32); + ccb->ccb_req.mfa.flags |= MFII_REQ_TYPE_MFA; mfii_start(sc, ccb); @@ -1302,7 +1412,7 @@ mfii_mfa_poll(struct mfii_softc *sc, struct mfii_ccb *ccb) ccb->ccb_request_offset, MFII_REQUEST_SIZE, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); - if (hdr->mfh_cmd_status != 0xff) + if (hdr->mfh_cmd_status != MFI_STAT_INVALID_STATUS) break; if (to++ > 5000) { /* XXX 5 seconds busywait sucks */ @@ -1379,6 +1489,8 @@ mfii_exec(struct mfii_softc *sc, struct mfii_ccb *ccb) ccb->ccb_cookie = &m; ccb->ccb_done = mfii_exec_done; + mfii_start(sc, ccb); + mtx_enter(&m); while (ccb->ccb_cookie != NULL) msleep(ccb, &m, PRIBIO, "mfiiexec", 0); @@ -1403,11 +1515,13 @@ mfii_mgmt(struct mfii_softc *sc, struct mfii_ccb *ccb, u_int32_t opc, const union mfi_mbox *mbox, void *buf, size_t len, int flags) { - struct mfi_dcmd_frame *dcmd = ccb->ccb_request; - struct mfi_frame_header *hdr = &dcmd->mdf_header; - u_int64_t r; + struct mpii_msg_scsi_io *io = ccb->ccb_request; + struct mfii_raid_context *ctx = (struct mfii_raid_context *)(io + 1); + struct mfii_sge *sge = (struct mfii_sge *)(ctx + 1); + struct mfi_dcmd_frame *dcmd = ccb->ccb_mfi; + struct mfi_frame_header *hdr = &dcmd->mdf_header; u_int8_t *dma_buf; - int rv = EIO; + int rv = 0; dma_buf = dma_alloc(len, PR_WAITOK); if (dma_buf == NULL) @@ -1444,27 +1558,55 @@ mfii_mgmt(struct mfii_softc *sc, struct mfii_ccb *ccb, if (mbox != NULL) memcpy(&dcmd->mdf_mbox, mbox, sizeof(dcmd->mdf_mbox)); - if (ISSET(flags, SCSI_NOSLEEP)) - mfii_mfa_poll(sc, ccb); - else { - r = MFII_REQ_MFA(ccb->ccb_request_dva); - memcpy(&ccb->ccb_req, &r, sizeof(ccb->ccb_req)); + io->function = MFII_FUNCTION_PASSTHRU_IO; + io->sgl_offset0 = ((u_int8_t *)sge - (u_int8_t *)io) / 4; + io->chain_offset = ((u_int8_t *)sge - (u_int8_t *)io) / 16; + + htolem64(&sge->sg_addr, ccb->ccb_mfi_dva); + htolem32(&sge->sg_len, MFI_FRAME_SIZE); + sge->sg_flags = MFII_SGE_CHAIN_ELEMENT | MFII_SGE_ADDR_IOCPLBNTA; + + ccb->ccb_req.scsi.flags = MFII_REQ_TYPE_SCSI; + ccb->ccb_req.scsi.smid = letoh16(ccb->ccb_smid); + + if (ISSET(flags, SCSI_NOSLEEP)) { + /* busy-loop polling with done handler */ + ccb->ccb_cookie = NULL; + ccb->ccb_done = mfii_empty_done; + mfii_poll(sc, ccb); + } else { + /* sleep/wakeup without done handler */ + ccb->ccb_cookie = NULL; + ccb->ccb_done = NULL; mfii_exec(sc, ccb); } - if (hdr->mfh_cmd_status == MFI_STAT_OK) { - rv = 0; + if (ccb->ccb_len > 0) { + bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap, + 0, ccb->ccb_dmamap->dm_mapsize, + (ccb->ccb_direction == MFII_DATA_IN) ? + BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); - if (ccb->ccb_direction == MFII_DATA_IN) - memcpy(buf, dma_buf, len); + bus_dmamap_unload(sc->sc_dmat, ccb->ccb_dmamap); } + rv = hdr->mfh_cmd_status == MFI_STAT_OK ? 0 : 1; + + if (rv == 0 && ccb->ccb_direction == MFII_DATA_IN) + memcpy(buf, dma_buf, len); + done: dma_free(dma_buf, len); return (rv); } +void +mfii_empty_done(struct mfii_softc *sc, struct mfii_ccb *ccb) +{ + return; +} + int mfii_load_mfa(struct mfii_softc *sc, struct mfii_ccb *ccb, void *sglp, int nosleep) @@ -1500,21 +1642,22 @@ mfii_load_mfa(struct mfii_softc *sc, struct mfii_ccb *ccb, void mfii_start(struct mfii_softc *sc, struct mfii_ccb *ccb) { - u_long *r = (u_long *)&ccb->ccb_req; - bus_dmamap_sync(sc->sc_dmat, MFII_DMA_MAP(sc->sc_requests), ccb->ccb_request_offset, MFII_REQUEST_SIZE, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); #if defined(__LP64__) - bus_space_write_raw_8(sc->sc_iot, sc->sc_ioh, MFI_IQPL, *r); + bus_space_write_raw_8(sc->sc_iot, sc->sc_ioh, + MFI_IQPL, ccb->ccb_req.word); #else mtx_enter(&sc->sc_post_mtx); - bus_space_write_raw_4(sc->sc_iot, sc->sc_ioh, MFI_IQPL, r[0]); + bus_space_write_raw_4(sc->sc_iot, sc->sc_ioh, + MFI_IQPL, ccb->ccb_req.u.lo); bus_space_barrier(sc->sc_iot, sc->sc_ioh, MFI_IQPL, 8, BUS_SPACE_BARRIER_WRITE); - bus_space_write_raw_4(sc->sc_iot, sc->sc_ioh, MFI_IQPH, r[1]); + bus_space_write_raw_4(sc->sc_iot, sc->sc_ioh, + MFI_IQPH, ccb->ccb_req.u.hi); bus_space_barrier(sc->sc_iot, sc->sc_ioh, MFI_IQPH, 8, BUS_SPACE_BARRIER_WRITE); mtx_leave(&sc->sc_post_mtx); @@ -1839,8 +1982,8 @@ mfii_scsi_cmd_io(struct mfii_softc *sc, struct scsi_xfer *xs) break; } - ccb->ccb_req.flags = sc->sc_iop->ldio_req_type; - ccb->ccb_req.smid = letoh16(ccb->ccb_smid); + ccb->ccb_req.scsi.flags = sc->sc_iop->ldio_req_type; + ccb->ccb_req.scsi.smid = letoh16(ccb->ccb_smid); return (0); } @@ -1886,8 +2029,8 @@ mfii_scsi_cmd_cdb(struct mfii_softc *sc, struct scsi_xfer *xs) ctx->num_sge = (ccb->ccb_len == 0) ? 0 : ccb->ccb_dmamap->dm_nsegs; - ccb->ccb_req.flags = MFII_REQ_TYPE_SCSI; - ccb->ccb_req.smid = letoh16(ccb->ccb_smid); + ccb->ccb_req.scsi.flags = MFII_REQ_TYPE_SCSI; + ccb->ccb_req.scsi.smid = letoh16(ccb->ccb_smid); return (0); } @@ -2007,9 +2150,9 @@ mfii_pd_scsi_cmd_cdb(struct mfii_softc *sc, struct scsi_xfer *xs) ctx->num_sge = (ccb->ccb_len == 0) ? 0 : ccb->ccb_dmamap->dm_nsegs; - ccb->ccb_req.flags = MFII_REQ_TYPE_HI_PRI; - ccb->ccb_req.smid = letoh16(ccb->ccb_smid); - ccb->ccb_req.dev_handle = dev_handle; + ccb->ccb_req.scsi.flags = MFII_REQ_TYPE_HI_PRI; + ccb->ccb_req.scsi.smid = letoh16(ccb->ccb_smid); + ccb->ccb_req.scsi.dev_handle = dev_handle; return (XS_NOERROR); } @@ -2153,8 +2296,8 @@ mfii_abort(struct mfii_softc *sc, struct mfii_ccb *accb, uint16_t dev_handle, htolem16(&req->task_mid, smid); msg->flags = flags; - accb->ccb_req.flags = MFII_REQ_TYPE_HI_PRI; - accb->ccb_req.smid = letoh16(accb->ccb_smid); + accb->ccb_req.scsi.flags = MFII_REQ_TYPE_HI_PRI; + accb->ccb_req.scsi.smid = letoh16(accb->ccb_smid); } void @@ -2200,6 +2343,7 @@ mfii_scrub_ccb(struct mfii_ccb *ccb) memset(&ccb->ccb_req, 0, sizeof(ccb->ccb_req)); memset(ccb->ccb_request, 0, MFII_REQUEST_SIZE); + memset(ccb->ccb_mfi, 0, MFI_FRAME_SIZE); } void @@ -2218,6 +2362,7 @@ mfii_init_ccb(struct mfii_softc *sc) { struct mfii_ccb *ccb; u_int8_t *request = MFII_DMA_KVA(sc->sc_requests); + u_int8_t *mfi = MFII_DMA_KVA(sc->sc_mfi); u_int8_t *sense = MFII_DMA_KVA(sc->sc_sense); u_int8_t *sgl = MFII_DMA_KVA(sc->sc_sgl); u_int i; @@ -2246,6 +2391,12 @@ mfii_init_ccb(struct mfii_softc *sc) ccb->ccb_request_dva = MFII_DMA_DVA(sc->sc_requests) + ccb->ccb_request_offset; + /* select i'th MFI command frame */ + ccb->ccb_mfi_offset = MFI_FRAME_SIZE * i; + ccb->ccb_mfi = mfi + ccb->ccb_mfi_offset; + ccb->ccb_mfi_dva = MFII_DMA_DVA(sc->sc_mfi) + + ccb->ccb_mfi_offset; + /* select i'th sense */ ccb->ccb_sense_offset = MFI_SENSE_SIZE * i; ccb->ccb_sense = (struct mfi_sense *)(sense + -- FUKAUMI Naoki