From: "Maciej S. Szmigiero" <maciej.szmigi...@oracle.com> This device works like a virtual DIMM stick: it allows inserting extra RAM into the guest at run time and later removing it without having to duplicate all of the address space management logic of TYPE_MEMORY_DEVICE in each memory hot-add protocol driver.
This device is not meant to be instantiated or removed by the QEMU user directly: rather, the protocol driver is supposed to add and remove it as required. In fact, its very existence is supposed to be an implementation detail, transparent to the QEMU user. To prevent the user from accidentally manually creating an instance of this device the protocol driver is supposed to place the qdev_device_add*() call (that is uses to add this device) between hapvdimm_allow_adding() and hapvdimm_disallow_adding() calls in order to temporary authorize the operation. Signed-off-by: Maciej S. Szmigiero <maciej.szmigi...@oracle.com> --- hw/i386/Kconfig | 2 + hw/i386/pc.c | 4 +- hw/mem/Kconfig | 4 + hw/mem/hapvdimm.c | 221 ++++++++++++++++++++++++++++++++++++++ hw/mem/meson.build | 1 + include/hw/mem/hapvdimm.h | 27 +++++ 6 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 hw/mem/hapvdimm.c create mode 100644 include/hw/mem/hapvdimm.h diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index d22ac4a4b9..c248cd3734 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -69,6 +69,7 @@ config I440FX imply E1000_PCI imply VMPORT imply VMMOUSE + imply HAPVDIMM select PC_PCI select PC_ACPI select ACPI_SMBUS @@ -96,6 +97,7 @@ config Q35 imply E1000E_PCI_EXPRESS imply VMPORT imply VMMOUSE + imply HAPVDIMM select PC_PCI select PC_ACPI select PCI_EXPRESS_Q35 diff --git a/hw/i386/pc.c b/hw/i386/pc.c index d489ecc0d1..62b532f9e9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -73,6 +73,7 @@ #include "hw/acpi/acpi.h" #include "hw/acpi/cpu_hotplug.h" #include "acpi-build.h" +#include "hw/mem/hapvdimm.h" #include "hw/mem/pc-dimm.h" #include "hw/mem/nvdimm.h" #include "hw/cxl/cxl.h" @@ -1610,7 +1611,8 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine, object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_PMEM_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MEM_PCI) || object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || - object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) { + object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE) || + object_dynamic_cast(OBJECT(dev), TYPE_HAPVDIMM)) { return HOTPLUG_HANDLER(machine); } diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig index 73c5ae8ad9..d8c1feafed 100644 --- a/hw/mem/Kconfig +++ b/hw/mem/Kconfig @@ -16,3 +16,7 @@ config CXL_MEM_DEVICE bool default y if CXL select MEM_DEVICE + +config HAPVDIMM + bool + select MEM_DEVICE diff --git a/hw/mem/hapvdimm.c b/hw/mem/hapvdimm.c new file mode 100644 index 0000000000..9ae82edb2c --- /dev/null +++ b/hw/mem/hapvdimm.c @@ -0,0 +1,221 @@ +/* + * A memory hot-add protocol vDIMM device + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * Heavily based on pc-dimm.c: + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2014 Red Hat Inc + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "exec/memory.h" +#include "hw/boards.h" +#include "hw/mem/hapvdimm.h" +#include "hw/mem/memory-device.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "qemu/module.h" +#include "sysemu/hostmem.h" +#include "trace.h" + +typedef struct HAPVDIMMDevice { + /* private */ + DeviceState parent_obj; + + /* public */ + bool ever_realized; + uint64_t addr; + uint64_t align; + uint32_t node; + HostMemoryBackend *hostmem; +} HAPVDIMMDevice; + +typedef struct HAPVDIMMDeviceClass { + /* private */ + DeviceClass parent_class; +} HAPVDIMMDeviceClass; + +static bool hapvdimm_adding_allowed; +static Property hapvdimm_properties[] = { + DEFINE_PROP_UINT64(HAPVDIMM_ADDR_PROP, HAPVDIMMDevice, addr, 0), + DEFINE_PROP_UINT64(HAPVDIMM_ALIGN_PROP, HAPVDIMMDevice, align, 0), + DEFINE_PROP_LINK(HAPVDIMM_MEMDEV_PROP, HAPVDIMMDevice, hostmem, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_END_OF_LIST(), +}; + +void hapvdimm_allow_adding(void) +{ + hapvdimm_adding_allowed = true; +} + +void hapvdimm_disallow_adding(void) +{ + hapvdimm_adding_allowed = false; +} + +static void hapvdimm_get_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + ERRP_GUARD(); + uint64_t value; + + value = memory_device_get_region_size(MEMORY_DEVICE(obj), errp); + if (*errp) { + return; + } + + visit_type_uint64(v, name, &value, errp); +} + +static void hapvdimm_init(Object *obj) +{ + object_property_add(obj, HAPVDIMM_SIZE_PROP, "uint64", hapvdimm_get_size, + NULL, NULL, NULL); +} + +static void hapvdimm_realize(DeviceState *dev, Error **errp) +{ + ERRP_GUARD(); + HAPVDIMMDevice *hapvdimm = HAPVDIMM(dev); + MachineState *ms = MACHINE(qdev_get_machine()); + + if (!hapvdimm->ever_realized) { + if (!hapvdimm_adding_allowed) { + error_setg(errp, "direct adding not allowed"); + return; + } + + hapvdimm->ever_realized = true; + } + + memory_device_pre_plug(MEMORY_DEVICE(hapvdimm), ms, + hapvdimm->align ? &hapvdimm->align : NULL, + errp); + if (*errp) { + return; + } + + if (!hapvdimm->hostmem) { + error_setg(errp, "'" HAPVDIMM_MEMDEV_PROP "' property is not set"); + return; + } else if (host_memory_backend_is_mapped(hapvdimm->hostmem)) { + const char *path; + + path = object_get_canonical_path_component(OBJECT(hapvdimm->hostmem)); + error_setg(errp, "can't use already busy memdev: %s", path); + return; + } + + host_memory_backend_set_mapped(hapvdimm->hostmem, true); + + memory_device_plug(MEMORY_DEVICE(hapvdimm), ms); + vmstate_register_ram(host_memory_backend_get_memory(hapvdimm->hostmem), + dev); +} + +static void hapvdimm_unrealize(DeviceState *dev) +{ + HAPVDIMMDevice *hapvdimm = HAPVDIMM(dev); + MachineState *ms = MACHINE(qdev_get_machine()); + + memory_device_unplug(MEMORY_DEVICE(hapvdimm), ms); + vmstate_unregister_ram(host_memory_backend_get_memory(hapvdimm->hostmem), + dev); + + host_memory_backend_set_mapped(hapvdimm->hostmem, false); +} + +static uint64_t hapvdimm_md_get_addr(const MemoryDeviceState *md) +{ + return object_property_get_uint(OBJECT(md), HAPVDIMM_ADDR_PROP, + &error_abort); +} + +static void hapvdimm_md_set_addr(MemoryDeviceState *md, uint64_t addr, + Error **errp) +{ + object_property_set_uint(OBJECT(md), HAPVDIMM_ADDR_PROP, addr, errp); +} + +static MemoryRegion *hapvdimm_md_get_memory_region(MemoryDeviceState *md, + Error **errp) +{ + HAPVDIMMDevice *hapvdimm = HAPVDIMM(md); + + if (!hapvdimm->hostmem) { + error_setg(errp, "'" HAPVDIMM_MEMDEV_PROP "' property must be set"); + return NULL; + } + + return host_memory_backend_get_memory(hapvdimm->hostmem); +} + +static void hapvdimm_md_fill_device_info(const MemoryDeviceState *md, + MemoryDeviceInfo *info) +{ + PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); + const DeviceClass *dc = DEVICE_GET_CLASS(md); + const HAPVDIMMDevice *hapvdimm = HAPVDIMM(md); + const DeviceState *dev = DEVICE(md); + + if (dev->id) { + di->id = g_strdup(dev->id); + } + di->hotplugged = dev->hotplugged; + di->hotpluggable = dc->hotpluggable; + di->addr = hapvdimm->addr; + di->slot = -1; + di->node = 0; /* FIXME: report proper node */ + di->size = object_property_get_uint(OBJECT(hapvdimm), HAPVDIMM_SIZE_PROP, + NULL); + di->memdev = object_get_canonical_path(OBJECT(hapvdimm->hostmem)); + + info->u.dimm.data = di; + info->type = MEMORY_DEVICE_INFO_KIND_DIMM; +} + +static void hapvdimm_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc); + + dc->realize = hapvdimm_realize; + dc->unrealize = hapvdimm_unrealize; + device_class_set_props(dc, hapvdimm_properties); + dc->desc = "vDIMM for a hot add protocol"; + + mdc->get_addr = hapvdimm_md_get_addr; + mdc->set_addr = hapvdimm_md_set_addr; + mdc->get_plugged_size = memory_device_get_region_size; + mdc->get_memory_region = hapvdimm_md_get_memory_region; + mdc->fill_device_info = hapvdimm_md_fill_device_info; +} + +static const TypeInfo hapvdimm_info = { + .name = TYPE_HAPVDIMM, + .parent = TYPE_DEVICE, + .instance_size = sizeof(HAPVDIMMDevice), + .instance_init = hapvdimm_init, + .class_init = hapvdimm_class_init, + .class_size = sizeof(HAPVDIMMDeviceClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_MEMORY_DEVICE }, + { } + }, +}; + +static void hapvdimm_register_types(void) +{ + type_register_static(&hapvdimm_info); +} + +type_init(hapvdimm_register_types) diff --git a/hw/mem/meson.build b/hw/mem/meson.build index 609b2b36fc..5f7a0181d3 100644 --- a/hw/mem/meson.build +++ b/hw/mem/meson.build @@ -4,6 +4,7 @@ mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c')) mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c')) +mem_ss.add(when: 'CONFIG_HAPVDIMM', if_true: files('hapvdimm.c')) softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss) diff --git a/include/hw/mem/hapvdimm.h b/include/hw/mem/hapvdimm.h new file mode 100644 index 0000000000..bb9a135a52 --- /dev/null +++ b/include/hw/mem/hapvdimm.h @@ -0,0 +1,27 @@ +/* + * A memory hot-add protocol vDIMM device + * + * Copyright (C) 2020-2023 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_HAPVDIMM_H +#define QEMU_HAPVDIMM_H + +#include "qom/object.h" + +#define TYPE_HAPVDIMM "mem-hapvdimm" +OBJECT_DECLARE_SIMPLE_TYPE(HAPVDIMMDevice, HAPVDIMM) + +#define HAPVDIMM_ADDR_PROP "addr" +#define HAPVDIMM_ALIGN_PROP "align" +#define HAPVDIMM_SIZE_PROP "size" +#define HAPVDIMM_MEMDEV_PROP "memdev" + +void hapvdimm_allow_adding(void); +void hapvdimm_disallow_adding(void); + +#endif