Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1, Section 5.2 ("Asynchronous Event Request command").
Modified from Keith's qemu-nvme tree. Signed-off-by: Klaus Birkelund Jensen <klaus.jen...@cnexlabs.com> --- hw/block/nvme.c | 88 ++++++++++++++++++++++++++++++++++++++++++- hw/block/nvme.h | 7 ++++ hw/block/trace-events | 7 ++++ 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index eb6af6508e2d..a20576654f1b 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -39,6 +39,7 @@ #include "nvme.h" #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE +#define NVME_AERL 3 #define NVME_OP_ABORTED 0xff #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -318,6 +319,51 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); } +static void nvme_process_aers(void *opaque) +{ + NvmeCtrl *n = opaque; + NvmeRequest *req; + NvmeAerResult *result; + NvmeAsyncEvent *event, *next; + + trace_nvme_process_aers(); + + QSIMPLEQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) { + /* can't post cqe if there is nothing to complete */ + if (!n->outstanding_aers) { + trace_nvme_no_outstanding_aers(); + break; + } + + /* ignore if masked (cqe posted, but event not cleared) */ + if (n->aer_mask & (1 << event->result.event_type)) { + trace_nvme_aer_masked(event->result.event_type, n->aer_mask); + continue; + } + + QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry); + + n->aer_mask |= 1 << event->result.event_type; + n->aer_mask_queued &= ~(1 << event->result.event_type); + n->outstanding_aers--; + + req = n->aer_reqs[n->outstanding_aers]; + + result = (NvmeAerResult *) &req->cqe.result; + result->event_type = event->result.event_type; + result->event_info = event->result.event_info; + result->log_page = event->result.log_page; + g_free(event); + + req->status = NVME_SUCCESS; + + trace_nvme_aer_post_cqe(result->event_type, result->event_info, + result->log_page); + + nvme_enqueue_req_completion(&n->admin_cq, req); + } +} + static void nvme_rw_cb(void *opaque, int ret) { NvmeRequest *req = opaque; @@ -796,6 +842,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, cmd); + case NVME_ASYNCHRONOUS_EVENT_CONF: + result = cpu_to_le32(n->features.async_config); break; default: trace_nvme_err_invalid_getfeat(dw10); @@ -841,11 +889,11 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) req->cqe.result = cpu_to_le32((n->params.num_queues - 2) | ((n->params.num_queues - 2) << 16)); break; - case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, cmd); + case NVME_ASYNCHRONOUS_EVENT_CONF: + n->features.async_config = dw11; break; - default: trace_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -854,6 +902,22 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } +static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + trace_nvme_aer(req->cqe.cid); + + if (n->outstanding_aers > NVME_AERL) { + trace_nvme_aer_aerl_exceeded(); + return NVME_AER_LIMIT_EXCEEDED; + } + + n->aer_reqs[n->outstanding_aers] = req; + timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + n->outstanding_aers++; + + return NVME_NO_COMPLETE; +} + static uint16_t nvme_abort(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { NvmeSQueue *sq; @@ -918,6 +982,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return nvme_set_feature(n, cmd, req); case NVME_ADM_CMD_GET_FEATURES: return nvme_get_feature(n, cmd, req); + case NVME_ADM_CMD_ASYNC_EV_REQ: + return nvme_aer(n, cmd, req); case NVME_ADM_CMD_ABORT: return nvme_abort(n, cmd, req); default: @@ -963,6 +1029,7 @@ static void nvme_process_sq(void *opaque) static void nvme_clear_ctrl(NvmeCtrl *n) { + NvmeAsyncEvent *event; int i; blk_drain(n->conf.blk); @@ -978,8 +1045,19 @@ static void nvme_clear_ctrl(NvmeCtrl *n) } } + if (n->aer_timer) { + timer_del(n->aer_timer); + timer_free(n->aer_timer); + n->aer_timer = NULL; + } + while ((event = QSIMPLEQ_FIRST(&n->aer_queue)) != NULL) { + QSIMPLEQ_REMOVE_HEAD(&n->aer_queue, entry); + g_free(event); + } + blk_flush(n->conf.blk); n->bar.cc = 0; + n->outstanding_aers = 0; } static int nvme_start_ctrl(NvmeCtrl *n) @@ -1074,6 +1152,9 @@ static int nvme_start_ctrl(NvmeCtrl *n) nvme_set_timestamp(n, 0ULL); + n->aer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_aers, n); + QSIMPLEQ_INIT(&n->aer_queue); + return 0; } @@ -1405,6 +1486,7 @@ static void nvme_init_state(NvmeCtrl *n) n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); n->sq = g_new0(NvmeSQueue *, n->params.num_queues); n->cq = g_new0(NvmeCQueue *, n->params.num_queues); + n->aer_reqs = g_new0(NvmeRequest *, NVME_AERL + 1); } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -1472,6 +1554,7 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->ver = cpu_to_le32(0x00010201); id->oacs = cpu_to_le16(0); id->acl = 3; + id->aerl = NVME_AERL; id->frmw = 7 << 1; id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; @@ -1583,6 +1666,7 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_clear_ctrl(n); g_free(n->cq); g_free(n->sq); + g_free(n->aer_reqs); if (n->params.cmb_size_mb) { g_free(n->cmbuf); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index bea622ea71e0..48d26108eef1 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -86,18 +86,25 @@ typedef struct NvmeCtrl { uint32_t num_namespaces; uint32_t max_q_ents; uint64_t ns_size; + uint8_t outstanding_aers; uint32_t cmbsz; uint32_t cmbloc; uint8_t *cmbuf; uint64_t irq_status; uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ + QEMUTimer *aer_timer; + uint8_t aer_mask; + uint8_t aer_mask_queued; + NvmeRequest **aer_reqs; + QSIMPLEQ_HEAD(, NvmeAsyncEvent) aer_queue; NvmeNamespace namespace; NvmeSQueue **sq; NvmeCQueue **cq; NvmeSQueue admin_sq; NvmeCQueue admin_cq; + NvmeFeatureVal features; NvmeIdCtrl id_ctrl; } NvmeCtrl; diff --git a/hw/block/trace-events b/hw/block/trace-events index 13d1b21dd4e4..5e3919752a90 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -46,6 +46,13 @@ nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +nvme_process_aers(void) "processing aers" +nvme_aer(uint16_t cid) "cid %"PRIu16"" +nvme_aer_aerl_exceeded(void) "aerl exceeded" +nvme_aer_masked(uint8_t type, uint8_t mask) "type 0x%"PRIx8" mask 0x%"PRIx8"" +nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" +nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" -- 2.20.1