There is a very low probability that hitting physical NVMe disk hardware critical warning case, it's hard to write & test a monitor agent service.
For debugging purposes, add a new 'smart_critical_warning' property to emulate this situation. The orignal version of this change is implemented by adding a fixed property which could be initialized by QEMU command line. Suggested by Philippe & Klaus, rework like current version. Test with this patch: 1, change smart_critical_warning property for a running VM: #virsh qemu-monitor-command nvme-upstream '{ "execute": "qom-set", "arguments": { "path": "/machine/peripheral-anon/device[0]", "property": "smart_critical_warning", "value":16 } }' 2, run smartctl in guest #smartctl -H -l error /dev/nvme0n1 === START OF SMART DATA SECTION === SMART overall-health self-assessment test result: FAILED! - volatile memory backup device has failed Reviewed-by: Philippe Mathieu-Daudé <phi...@redhat.com> Signed-off-by: zhenwei pi <pizhen...@bytedance.com> --- hw/block/nvme.c | 45 +++++++++++++++++++++++++++++++++++++++++--- hw/block/nvme.h | 1 + include/block/nvme.h | 1 + 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 0854ee3072..2d71ebeb30 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -2425,6 +2425,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, } trans_len = MIN(sizeof(smart) - off, buf_len); + smart.critical_warning = n->smart_critical_warning; smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read, 1000)); @@ -4346,6 +4347,40 @@ static Property nvme_props[] = { DEFINE_PROP_END_OF_LIST(), }; +static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NvmeCtrl *n = NVME(obj); + uint8_t value = n->smart_critical_warning; + + visit_type_uint8(v, name, &value, errp); +} + +static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NvmeCtrl *n = NVME(obj); + uint8_t value, cap = 0; + + if (!visit_type_uint8(v, name, &value, errp)) { + return; + } + + cap = NVME_SMART_SPARE | NVME_SMART_TEMPERATURE | NVME_SMART_RELIABILITY + | NVME_SMART_MEDIA_READ_ONLY | NVME_SMART_FAILED_VOLATILE_MEDIA; + if (NVME_CAP_PMR(n->bar.cap)) { + cap |= NVME_SMART_PMR_UNRELIABLE; + } + + if ((value & cap) != value) { + error_setg(errp, "unsupported smart critical warning bits: 0x%x", + value & ~cap); + return; + } + + n->smart_critical_warning = value; +} + static const VMStateDescription nvme_vmstate = { .name = "nvme", .unmigratable = 1, @@ -4369,13 +4404,17 @@ static void nvme_class_init(ObjectClass *oc, void *data) static void nvme_instance_init(Object *obj) { - NvmeCtrl *s = NVME(obj); + NvmeCtrl *n = NVME(obj); - if (s->namespace.blkconf.blk) { - device_add_bootindex_property(obj, &s->namespace.blkconf.bootindex, + if (n->namespace.blkconf.blk) { + device_add_bootindex_property(obj, &n->namespace.blkconf.bootindex, "bootindex", "/namespace@1,0", DEVICE(obj)); } + + object_property_add(obj, "smart_critical_warning", "uint8", + nvme_get_smart_warning, + nvme_set_smart_warning, NULL, NULL); } static const TypeInfo nvme_info = { diff --git a/hw/block/nvme.h b/hw/block/nvme.h index b7fbcca39d..bd5a2b416f 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -145,6 +145,7 @@ typedef struct NvmeCtrl { uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ uint64_t starttime_ms; uint16_t temperature; + uint8_t smart_critical_warning; HostMemoryBackend *pmrdev; diff --git a/include/block/nvme.h b/include/block/nvme.h index 0baf4b27cd..520bc2e4cf 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -60,6 +60,7 @@ enum NvmeCapMask { #define NVME_CAP_CSS(cap) (((cap) >> CAP_CSS_SHIFT) & CAP_CSS_MASK) #define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK) #define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK) +#define NVME_CAP_PMR(cap) (((cap) >> CAP_PMR_SHIFT) & CAP_PMR_MASK) #define NVME_CAP_SET_MQES(cap, val) (cap |= (uint64_t)(val & CAP_MQES_MASK) \ << CAP_MQES_SHIFT) -- 2.25.1