On Thu, 31 Jul 2025 15:20:29 +0300 Manos Pitsidianakis <manos.pitsidiana...@linaro.org> wrote:
> On Thu, Jul 31, 2025 at 2:14 PM Shameerali Kolothum Thodi > <shameerali.kolothum.th...@huawei.com> wrote: > > > > > > > > > -----Original Message----- > > > From: Jonathan Cameron <jonathan.came...@huawei.com> > > > Sent: Thursday, July 31, 2025 11:01 AM > > > To: Manos Pitsidianakis <manos.pitsidiana...@linaro.org> > > > Cc: qemu-devel@nongnu.org; Peter Maydell <peter.mayd...@linaro.org>; > > > qemu-...@nongnu.org; Gustavo Romero <gustavo.rom...@linaro.org>; > > > Shameerali Kolothum Thodi <shameerali.kolothum.th...@huawei.com> > > > Subject: Re: [PATCH] hw/arm: add static NVDIMMs in device tree > > > > > > On Wed, 30 Jul 2025 15:21:41 +0300 > > > Manos Pitsidianakis <manos.pitsidiana...@linaro.org> wrote: > > > > > > > NVDIMM is used for fast rootfs with EROFS, for example by kata > > > > containers. To allow booting with static NVDIMM memory, add them to > > > > the device tree in arm virt machine. > > > > > > > > This allows users to boot directly with nvdimm memory devices without > > > > having to rely on ACPI and hotplug. > > > > > > > > Verified to work with command invocation: > > > > > > > > ./qemu-system-aarch64 \ > > > > -M virt,nvdimm=on \ > > > > -cpu cortex-a57 \ > > > > -m 4G,slots=2,maxmem=8G \ > > > > -object memory-backend-file,id=mem1,share=on,mem- > > > path=/tmp/nvdimm,size=4G,readonly=off \ > > > > -device nvdimm,id=nvdimm1,memdev=mem1,unarmed=off \ > > > > -drive file=./debian-12-nocloud-arm64-commited.qcow2,format=qcow2 \ > > > > -kernel ./vmlinuz-6.1.0-13-arm64 \ > > > > -append "root=/dev/vda1 console=ttyAMA0,115200 acpi=off" > > > > -initrd ./initrd.img-6.1.0-13-arm64 \ > > > > -nographic \ > > > > -serial mon:stdio > > > > > > > > Signed-off-by: Manos Pitsidianakis <manos.pitsidiana...@linaro.org> > > > > > > +CC shameer who might be able to remember how the nvdimm stuff works > > > in > > > +ACPI better > > > than I can. I think this is fine but more eyes would be good. > > > > Hello Shameer, > > > The cold plug DT support was part of the initial NVDIMM series, > > https://lore.kernel.org/qemu-devel/20191004155302.4632-5-shameerali.kolothum.th...@huawei.com/ > > > > But I can't remember the reason for dropping it, other than the comment from > > Igor, that why we should do it for NVDIMM but not PC-DIMM. > > https://lore.kernel.org/qemu-devel/20191111154627.63fc0...@redhat.com/ > > > > So, I guess there was not a strong use case for that at that time. > > Yes. Our motivation for this patch is just feature parity with x86. > It's a nice-to-have, if it's possible. if it's parity with x86, then why not use acpi like x86 does? If I'm not mistaken, DT in arm/virt was mostly intended to bootstrap 1st steps of firmware, and the preferred way do get other info from QEMU was via fw_cfg or standard enumeration methods (PCI/ACPI/...). (point is: do not duplicate acpi features in DT for arm/virt machine type, perhaps arm/sbsa-ref is a better target for DT). > > > > The PC-DIMM DT cold plug was dropped due to the issues/obstacles mentioned > > here, > > https://lore.kernel.org/qemu-devel/5fc3163cfd30c246abaa99954a238fa83f1b6...@lhreml524-mbs.china.huawei.com/ > > > > Thank you very much for this link. On a first glance, if this problem > is still happening with edk2, perhaps a temporary fix would be to only > put coldplugged DIMMs in the device tree when the machine has acpi=off > explicitly to prevent the potential for firmware confusion? > > > > > +CC: Igor and Eric. > > > > Thanks, > > Shameer > > > > > > --- > > > > hw/arm/boot.c | 39 +++++++++++++++++++++++++++++++++++++++ > > > > hw/arm/virt.c | 8 +++++--- > > > > 2 files changed, 44 insertions(+), 3 deletions(-) > > > > > > > > diff --git a/hw/arm/boot.c b/hw/arm/boot.c index > > > > > > > d391cd01bb1b92ff213e69b84e5a69413b36c4f8..a0c1bcdf946ca98bb5da63f1 > > > a518 > > > > 018eb578dd81 100644 > > > > --- a/hw/arm/boot.c > > > > +++ b/hw/arm/boot.c > > > > @@ -25,6 +25,7 @@ > > > > #include "hw/boards.h" > > > > #include "system/reset.h" > > > > #include "hw/loader.h" > > > > +#include "hw/mem/memory-device.h" > > > > #include "elf.h" > > > > #include "system/device_tree.h" > > > > #include "qemu/config-file.h" > > > > @@ -515,6 +516,26 @@ static void fdt_add_psci_node(void *fdt, > > > ARMCPU *armcpu) > > > > qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn); } > > > > > > > > +static int fdt_add_pmem_node(void *fdt, uint32_t acells, uint32_t > > > > scells, > > > > + int64_t mem_base, int64_t size, int64_t > > > > +node) { > > > > + int ret; > > > > + > > > > + g_autofree char *nodename = g_strdup_printf("/pmem@%" PRIx64, > > > > + mem_base); > > > > + > > > > + qemu_fdt_add_subnode(fdt, nodename); > > > > + qemu_fdt_setprop_string(fdt, nodename, "compatible", "pmem- > > > region"); > > > > + ret = qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", acells, > > > > + mem_base, scells, size); > > > > > > I'd burn some lines to avoid a comment covering unrelated ret handling > > > > > > if (ret) > > > return ret; > > > > > > if (node >= 0) { > > > return qem_fdt_setprop_cell() > > > } > > > > > > return 0; > > > > > > > + /* only set the NUMA ID if it is specified */ > > > > + if (!ret && node >= 0) { > > > > + ret = qemu_fdt_setprop_cell(fdt, nodename, "numa-node-id", > > > > + node); > > > > + } > > > > + > > > > + return ret; > > > > +} > > > > + > > > > int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, > > > > hwaddr addr_limit, AddressSpace *as, MachineState *ms, > > > > ARMCPU *cpu) > > > > @@ -525,6 +546,7 @@ int arm_load_dtb(hwaddr addr, const struct > > > arm_boot_info *binfo, > > > > unsigned int i; > > > > hwaddr mem_base, mem_len; > > > > char **node_path; > > > > + g_autofree MemoryDeviceInfoList *md_list = NULL; > > > > Error *err = NULL; > > > > > > > > if (binfo->dtb_filename) { > > > > @@ -628,6 +650,23 @@ int arm_load_dtb(hwaddr addr, const struct > > > arm_boot_info *binfo, > > > > } > > > > } > > > > > > > > + md_list = qmp_memory_device_list(); > > > > + for (MemoryDeviceInfoList *m = md_list; m != NULL; m = m->next) { > > > > + MemoryDeviceInfo *mi = m->value; > > > > + > > > > + if (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM) { > > > > + PCDIMMDeviceInfo *di = mi->u.nvdimm.data; > > > > + > > > > + rc = fdt_add_pmem_node(fdt, acells, scells, > > > > + di->addr, di->size, di->node); > > > > + if (rc < 0) { > > > > + fprintf(stderr, "couldn't add NVDIMM /pmem@%"PRIx64" > > > node\n", > > > > + di->addr); > > > > + goto fail; > > > > + } > > > > + } > > > > + } > > > > + > > > > rc = fdt_path_offset(fdt, "/chosen"); > > > > if (rc < 0) { > > > > qemu_fdt_add_subnode(fdt, "/chosen"); diff --git > > > > a/hw/arm/virt.c b/hw/arm/virt.c index > > > > > > > ef6be3660f5fb38da84235c32dc2d13a5c61889c..910f5bb5f66ee217a9140f912 > > > 880 > > > > 4a5b9f69b5b6 100644 > > > > --- a/hw/arm/virt.c > > > > +++ b/hw/arm/virt.c > > > > @@ -2917,7 +2917,7 @@ static void > > > virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > > > > const MachineState *ms = MACHINE(hotplug_dev); > > > > const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), > > > > TYPE_NVDIMM); > > > > > > > > - if (!vms->acpi_dev) { > > > > + if (!vms->acpi_dev && !(is_nvdimm && !dev->hotplugged)) { > > > > error_setg(errp, > > > > "memory hotplug is not enabled: missing acpi-ged > > > > device"); > > > > return; > > > > @@ -2949,8 +2949,10 @@ static void virt_memory_plug(HotplugHandler > > > *hotplug_dev, > > > > nvdimm_plug(ms->nvdimms_state); > > > > } > > > > > > > > - hotplug_handler_plug(HOTPLUG_HANDLER(vms->acpi_dev), > > > > - dev, &error_abort); > > > > + if (vms->acpi_dev) { > > > > + hotplug_handler_plug(HOTPLUG_HANDLER(vms->acpi_dev), > > > > + dev, &error_abort); > > > > + } > > > > } > > > > > > > > static void virt_machine_device_pre_plug_cb(HotplugHandler > > > > *hotplug_dev, > > > > > > > > --- > > > > base-commit: 9b80226ece693197af8a981b424391b68b5bc38e > > > > change-id: 20250730-nvdimm_arm64_virt-931a764bbe0c > > > > > > > > -- > > > > γαῖα πυρί μιχθήτω > > > > > > > > > > >