Add support for the Get Log Page command and stub/dumb implementations of the mandatory Error Information, SMART/Health Information and Firmware Slot Information log pages.
Required for compliance with NVMe revision 1.2.1. See NVM Express 1.2.1, Section 5.10 ("Get Log Page command"). Signed-off-by: Klaus Birkelund Jensen <klaus.jen...@cnexlabs.com> --- hw/block/nvme.c | 209 ++++++++++++++++++++++++++++++++++++++++++ hw/block/nvme.h | 3 + hw/block/trace-events | 3 + include/block/nvme.h | 4 +- 4 files changed, 217 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index a20576654f1b..93f5dff197e0 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -39,6 +39,8 @@ #include "nvme.h" #define NVME_MAX_QS PCI_MSIX_FLAGS_QSIZE +#define NVME_TEMPERATURE 0x143 +#define NVME_ELPE 3 #define NVME_AERL 3 #define NVME_OP_ABORTED 0xff #define NVME_GUEST_ERR(trace, fmt, ...) \ @@ -319,6 +321,36 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req) timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); } +static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, + uint8_t event_info, uint8_t log_page) +{ + NvmeAsyncEvent *event; + + trace_nvme_enqueue_event(event_type, event_info, log_page); + + /* + * Do not enqueue the event if something of this type is already queued. + * This bounds the size of the event queue and makes sure it does not grow + * indefinitely when events are not processed by the host (i.e. does not + * issue any AERs). + */ + if (n->aer_mask_queued & (1 << event_type)) { + return; + } + n->aer_mask_queued |= (1 << event_type); + + event = g_new(NvmeAsyncEvent, 1); + event->result = (NvmeAerResult) { + .event_type = event_type, + .event_info = event_info, + .log_page = log_page, + }; + + QSIMPLEQ_INSERT_TAIL(&n->aer_queue, event, entry); + + timer_mod(n->aer_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); +} + static void nvme_process_aers(void *opaque) { NvmeCtrl *n = opaque; @@ -831,6 +863,10 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t result; switch (dw10) { + case NVME_TEMPERATURE_THRESHOLD: + result = cpu_to_le32(n->features.temp_thresh); + break; + case NVME_ERROR_RECOVERY: case NVME_VOLATILE_WRITE_CACHE: result = blk_enable_write_cache(n->conf.blk); trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); @@ -878,6 +914,13 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t dw11 = le32_to_cpu(cmd->cdw11); switch (dw10) { + case NVME_TEMPERATURE_THRESHOLD: + n->features.temp_thresh = dw11; + if (n->features.temp_thresh <= n->temperature) { + nvme_enqueue_event(n, NVME_AER_TYPE_SMART, + NVME_AER_INFO_SMART_TEMP_THRESH, NVME_LOG_SMART_INFO); + } + break; case NVME_VOLATILE_WRITE_CACHE: blk_set_enable_write_cache(n->conf.blk, dw11 & 1); break; @@ -902,6 +945,137 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_SUCCESS; } +static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) +{ + n->aer_mask &= ~(1 << event_type); + if (!QSIMPLEQ_EMPTY(&n->aer_queue)) { + timer_mod(n->aer_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); + } +} + +static uint16_t nvme_error_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, + uint32_t buf_len, uint64_t off, NvmeRequest *req) +{ + uint32_t trans_len; + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + if (off > sizeof(*n->elpes) * (NVME_ELPE + 1)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(sizeof(*n->elpes) * (NVME_ELPE + 1) - off, buf_len); + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_ERROR); + } + + return nvme_dma_read_prp(n, (uint8_t *) n->elpes + off, trans_len, prp1, + prp2); +} + +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae, + uint32_t buf_len, uint64_t off, NvmeRequest *req) +{ + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + + uint32_t trans_len; + time_t current_ms; + NvmeSmartLog smart; + + if (cmd->nsid != 0 && cmd->nsid != 0xffffffff) { + trace_nvme_err(req->cqe.cid, "smart log not supported for namespace", + NVME_INVALID_FIELD | NVME_DNR); + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (off > sizeof(smart)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trans_len = MIN(sizeof(smart) - off, buf_len); + + memset(&smart, 0x0, sizeof(smart)); + smart.number_of_error_log_entries[0] = cpu_to_le64(0); + smart.temperature[0] = n->temperature & 0xff; + smart.temperature[1] = (n->temperature >> 8) & 0xff; + + if (n->features.temp_thresh <= n->temperature) { + smart.critical_warning |= NVME_SMART_TEMPERATURE; + } + + current_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + smart.power_on_hours[0] = cpu_to_le64( + (((current_ms - n->starttime_ms) / 1000) / 60) / 60); + + if (!rae) { + nvme_clear_events(n, NVME_AER_TYPE_SMART); + } + + return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1, + prp2); +} + +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len, + uint64_t off, NvmeRequest *req) +{ + uint32_t trans_len; + uint64_t prp1 = le64_to_cpu(cmd->prp1); + uint64_t prp2 = le64_to_cpu(cmd->prp2); + NvmeFwSlotInfoLog fw_log; + + if (off > sizeof(fw_log)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + memset(&fw_log, 0, sizeof(NvmeFwSlotInfoLog)); + + trans_len = MIN(sizeof(fw_log) - off, buf_len); + + return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1, + prp2); +} + +static uint16_t nvme_get_log(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t dw12 = le32_to_cpu(cmd->cdw12); + uint32_t dw13 = le32_to_cpu(cmd->cdw13); + uint16_t lid = dw10 & 0xff; + uint8_t rae = (dw10 >> 15) & 0x1; + uint32_t numdl, numdu, len; + uint64_t off, lpol, lpou; + + numdl = (dw10 >> 16); + numdu = (dw11 & 0xffff); + lpol = dw12; + lpou = dw13; + + len = (((numdu << 16) | numdl) + 1) << 2; + off = (lpou << 32ULL) | lpol; + + if (off & 0x3) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + trace_nvme_get_log(req->cqe.cid, lid); + + switch (lid) { + case NVME_LOG_ERROR_INFO: + return nvme_error_log_info(n, cmd, rae, len, off, req); + case NVME_LOG_SMART_INFO: + return nvme_smart_info(n, cmd, rae, len, off, req); + case NVME_LOG_FW_SLOT_INFO: + return nvme_fw_log_info(n, cmd, len, off, req); + default: + trace_nvme_err_invalid_log_page(req->cqe.cid, lid); + return NVME_INVALID_LOG_ID | NVME_DNR; + } +} + static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { trace_nvme_aer(req->cqe.cid); @@ -982,6 +1156,8 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return nvme_set_feature(n, cmd, req); case NVME_ADM_CMD_GET_FEATURES: return nvme_get_feature(n, cmd, req); + case NVME_ADM_CMD_GET_LOG_PAGE: + return nvme_get_log(n, cmd, req); case NVME_ADM_CMD_ASYNC_EV_REQ: return nvme_aer(n, cmd, req); case NVME_ADM_CMD_ABORT: @@ -1347,6 +1523,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) "completion queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER, + NVME_LOG_ERROR_INFO); + } + return; } @@ -1357,6 +1540,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) " beyond queue size, sqid=%"PRIu32"," " new_head=%"PRIu16", ignoring", qid, new_head); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO); + } + return; } @@ -1385,6 +1574,13 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) "submission queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER, + NVME_LOG_ERROR_INFO); + } + return; } @@ -1395,6 +1591,12 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) " beyond queue size, sqid=%"PRIu32"," " new_tail=%"PRIu16", ignoring", qid, new_tail); + + if (n->outstanding_aers) { + nvme_enqueue_event(n, NVME_AER_TYPE_ERROR, + NVME_AER_INFO_ERR_INVALID_DB_VALUE, NVME_LOG_ERROR_INFO); + } + return; } @@ -1484,9 +1686,13 @@ static void nvme_init_state(NvmeCtrl *n) { n->num_namespaces = 1; n->reg_size = pow2ceil(0x1004 + 2 * (n->params.num_queues + 1) * 4); + n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); n->sq = g_new0(NvmeSQueue *, n->params.num_queues); n->cq = g_new0(NvmeCQueue *, n->params.num_queues); + n->elpes = g_new0(NvmeErrorLog, NVME_ELPE + 1); n->aer_reqs = g_new0(NvmeRequest *, NVME_AERL + 1); + n->temperature = NVME_TEMPERATURE; + n->features.temp_thresh = 0x14d; } static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) @@ -1556,6 +1762,8 @@ static void nvme_init_ctrl(NvmeCtrl *n) id->acl = 3; id->aerl = NVME_AERL; id->frmw = 7 << 1; + id->lpa = 1 << 2; + id->elpe = NVME_ELPE; id->sqes = (0x6 << 4) | 0x6; id->cqes = (0x4 << 4) | 0x4; id->nn = cpu_to_le32(n->num_namespaces); @@ -1666,6 +1874,7 @@ static void nvme_exit(PCIDevice *pci_dev) nvme_clear_ctrl(n); g_free(n->cq); g_free(n->sq); + g_free(n->elpes); g_free(n->aer_reqs); if (n->params.cmb_size_mb) { diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 48d26108eef1..ed3fa3faa718 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -77,6 +77,8 @@ typedef struct NvmeCtrl { BlockConf conf; NvmeParams params; + uint64_t starttime_ms; + uint16_t temperature; uint32_t page_size; uint16_t page_bits; uint16_t max_prp_ents; @@ -99,6 +101,7 @@ typedef struct NvmeCtrl { NvmeRequest **aer_reqs; QSIMPLEQ_HEAD(, NvmeAsyncEvent) aer_queue; + NvmeErrorLog *elpes; NvmeNamespace namespace; NvmeSQueue **sq; NvmeCQueue **cq; diff --git a/hw/block/trace-events b/hw/block/trace-events index 5e3919752a90..ed666bbc94f2 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -46,6 +46,7 @@ nvme_getfeat_numq(int result) "get feature number of queues, result=%d" nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +nvme_get_log(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" nvme_process_aers(void) "processing aers" nvme_aer(uint16_t cid) "cid %"PRIu16"" nvme_aer_aerl_exceeded(void) "aerl exceeded" @@ -67,6 +68,7 @@ nvme_mmio_shutdown_set(void) "shutdown bit set" nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" # nvme traces for error conditions +nvme_err(uint16_t cid, const char *s, uint16_t status) "cid %"PRIu16" \"%s\" status 0x%"PRIx16"" nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" @@ -92,6 +94,7 @@ nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion q nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" +nvme_err_invalid_log_page(uint16_t cid, uint16_t lid) "cid %"PRIu16" lid 0x%"PRIx16"" nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" diff --git a/include/block/nvme.h b/include/block/nvme.h index 1b0accd4fe2b..5a10b8b67468 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -386,8 +386,8 @@ enum NvmeAsyncEventRequest { NVME_AER_TYPE_SMART = 1, NVME_AER_TYPE_IO_SPECIFIC = 6, NVME_AER_TYPE_VENDOR_SPECIFIC = 7, - NVME_AER_INFO_ERR_INVALID_SQ = 0, - NVME_AER_INFO_ERR_INVALID_DB = 1, + NVME_AER_INFO_ERR_INVALID_DB_REGISTER = 0, + NVME_AER_INFO_ERR_INVALID_DB_VALUE = 1, NVME_AER_INFO_ERR_DIAG_FAIL = 2, NVME_AER_INFO_ERR_PERS_INTERNAL_ERR = 3, NVME_AER_INFO_ERR_TRANS_INTERNAL_ERR = 4, -- 2.20.1