On Wed, 30 Jul 2025 15:21:41 +0300
Manos Pitsidianakis <manos.pitsidiana...@linaro.org> wrote:

> NVDIMM is used for fast rootfs with EROFS, for example by kata
> containers. To allow booting with static NVDIMM memory, add them to the
> device tree in arm virt machine.
> 
> This allows users to boot directly with nvdimm memory devices without
> having to rely on ACPI and hotplug.
> 
> Verified to work with command invocation:
> 
> ./qemu-system-aarch64 \
>   -M virt,nvdimm=on \
>   -cpu cortex-a57 \
>   -m 4G,slots=2,maxmem=8G \
>   -object 
> memory-backend-file,id=mem1,share=on,mem-path=/tmp/nvdimm,size=4G,readonly=off
>  \
>   -device nvdimm,id=nvdimm1,memdev=mem1,unarmed=off \
>   -drive file=./debian-12-nocloud-arm64-commited.qcow2,format=qcow2 \
>   -kernel ./vmlinuz-6.1.0-13-arm64 \
>   -append "root=/dev/vda1 console=ttyAMA0,115200 acpi=off"
>   -initrd ./initrd.img-6.1.0-13-arm64 \
>   -nographic \
>   -serial mon:stdio
> 
> Signed-off-by: Manos Pitsidianakis <manos.pitsidiana...@linaro.org>

+CC shameer who might be able to remember how the nvdimm stuff works in ACPI 
better
than I can.  I think this is fine but more eyes would be good.

> ---
>  hw/arm/boot.c | 39 +++++++++++++++++++++++++++++++++++++++
>  hw/arm/virt.c |  8 +++++---
>  2 files changed, 44 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
> index 
> d391cd01bb1b92ff213e69b84e5a69413b36c4f8..a0c1bcdf946ca98bb5da63f1a518018eb578dd81
>  100644
> --- a/hw/arm/boot.c
> +++ b/hw/arm/boot.c
> @@ -25,6 +25,7 @@
>  #include "hw/boards.h"
>  #include "system/reset.h"
>  #include "hw/loader.h"
> +#include "hw/mem/memory-device.h"
>  #include "elf.h"
>  #include "system/device_tree.h"
>  #include "qemu/config-file.h"
> @@ -515,6 +516,26 @@ static void fdt_add_psci_node(void *fdt, ARMCPU *armcpu)
>      qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
>  }
>  
> +static int fdt_add_pmem_node(void *fdt, uint32_t acells, uint32_t scells,
> +                             int64_t mem_base, int64_t size, int64_t node)
> +{
> +    int ret;
> +
> +    g_autofree char *nodename = g_strdup_printf("/pmem@%" PRIx64, mem_base);
> +
> +    qemu_fdt_add_subnode(fdt, nodename);
> +    qemu_fdt_setprop_string(fdt, nodename, "compatible", "pmem-region");
> +    ret = qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", acells,
> +                                       mem_base, scells, size);

I'd burn some lines to avoid a comment covering unrelated ret handling

        if (ret)
                return ret;

        if (node >= 0) {
                return qem_fdt_setprop_cell()
        }

        return 0;

> +    /* only set the NUMA ID if it is specified */
> +    if (!ret && node >= 0) {
> +        ret = qemu_fdt_setprop_cell(fdt, nodename, "numa-node-id",
> +                                    node);
> +    }
> +
> +    return ret;
> +}
> +
>  int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
>                   hwaddr addr_limit, AddressSpace *as, MachineState *ms,
>                   ARMCPU *cpu)
> @@ -525,6 +546,7 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info 
> *binfo,
>      unsigned int i;
>      hwaddr mem_base, mem_len;
>      char **node_path;
> +    g_autofree MemoryDeviceInfoList *md_list = NULL;
>      Error *err = NULL;
>  
>      if (binfo->dtb_filename) {
> @@ -628,6 +650,23 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info 
> *binfo,
>          }
>      }
>  
> +    md_list = qmp_memory_device_list();
> +    for (MemoryDeviceInfoList *m = md_list; m != NULL; m = m->next) {
> +        MemoryDeviceInfo *mi = m->value;
> +
> +        if (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM) {
> +            PCDIMMDeviceInfo *di = mi->u.nvdimm.data;
> +
> +            rc = fdt_add_pmem_node(fdt, acells, scells,
> +                                   di->addr, di->size, di->node);
> +            if (rc < 0) {
> +                fprintf(stderr, "couldn't add NVDIMM /pmem@%"PRIx64" node\n",
> +                        di->addr);
> +                goto fail;
> +            }
> +        }
> +    }
> +
>      rc = fdt_path_offset(fdt, "/chosen");
>      if (rc < 0) {
>          qemu_fdt_add_subnode(fdt, "/chosen");
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 
> ef6be3660f5fb38da84235c32dc2d13a5c61889c..910f5bb5f66ee217a9140f9128804a5b9f69b5b6
>  100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -2917,7 +2917,7 @@ static void virt_memory_pre_plug(HotplugHandler 
> *hotplug_dev, DeviceState *dev,
>      const MachineState *ms = MACHINE(hotplug_dev);
>      const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
>  
> -    if (!vms->acpi_dev) {
> +    if (!vms->acpi_dev && !(is_nvdimm && !dev->hotplugged)) {
>          error_setg(errp,
>                     "memory hotplug is not enabled: missing acpi-ged device");
>          return;
> @@ -2949,8 +2949,10 @@ static void virt_memory_plug(HotplugHandler 
> *hotplug_dev,
>          nvdimm_plug(ms->nvdimms_state);
>      }
>  
> -    hotplug_handler_plug(HOTPLUG_HANDLER(vms->acpi_dev),
> -                         dev, &error_abort);
> +    if (vms->acpi_dev) {
> +        hotplug_handler_plug(HOTPLUG_HANDLER(vms->acpi_dev),
> +                             dev, &error_abort);
> +    }
>  }
>  
>  static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
> 
> ---
> base-commit: 9b80226ece693197af8a981b424391b68b5bc38e
> change-id: 20250730-nvdimm_arm64_virt-931a764bbe0c
> 
> --
> γαῖα πυρί μιχθήτω
> 
> 


Reply via email to