Memory hotplug can fail for some combinations of RAM and maxmem when DDW is enabled in the presence of devices like nec-usb-xhci. DDW depends on maximum addressable memory returned by guest and this value is currently being calculated wrongly by the guest kernel routine memory_hotplug_max(). While there is an attempt to fix the guest kernel, this patch works around the problem within QEMU itself.
memory_hotplug_max() routine in the guest kernel arrives at max addressable memory by multiplying lmb-size with the lmb-count obtained from ibm,dynamic-memory property. There are two assumptions here: - All LMBs are part of ibm,dynamic memory: This is not true for PowerKVM where only hot-pluggable LMBs are present in this property. - The memory area comprising of RAM and hotplug region is contiguous: This needn't be true always for PowerKVM as there can be gap between boot time RAM and hotplug region. To work around this guest kernel bug, ensure that ibm,dynamic-memory has information about all the LMBs (RMA, boot-time LMBs, future hotpluggable LMBs, and dummy LMBs to cover the gap between RAM and hotpluggable region). RMA is represented separately by memory@0 node. Hence mark RMA LMBs and also the LMBs for the gap b/n RAM and hotpluggable region as reserved so that these LMBs are not recounted/counted by guest. Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com> --- Changes in v2: - Dropped the patch that removed alignment gap b/n RAM and hotplug region, but instead populated ibm,dynamic-memory with LMBs represented as RESERVED for that gap. We create DRC objects for these LMBs as it simplifies the logic of populating ibm,dynamic-memory. There can at max be 4 such DRC objects for the gap area (1GB max) and hence it should be fine. v1: https://lists.gnu.org/archive/html/qemu-devel/2016-06/msg00627.html hw/ppc/spapr.c | 51 ++++++++++++++++++++++++++++++++++++-------------- include/hw/ppc/spapr.h | 5 +++-- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 0636642..0f4f7a3 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -762,18 +762,14 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) int ret, i, offset; uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)}; - uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; + uint32_t hotplug_lmb_start = spapr->hotplug_memory.base / lmb_size; + uint32_t nr_lmbs = (spapr->hotplug_memory.base + + memory_region_size(&spapr->hotplug_memory.mr)) / + lmb_size; uint32_t *int_buf, *cur_index, buf_len; int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; /* - * Don't create the node if there are no DR LMBs. - */ - if (!nr_lmbs) { - return 0; - } - - /* * Allocate enough buffer size to fit in ibm,dynamic-memory * or ibm,associativity-lookup-arrays */ @@ -805,11 +801,18 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) for (i = 0; i < nr_lmbs; i++) { sPAPRDRConnector *drc; sPAPRDRConnectorClass *drck; - uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;; + uint64_t addr; uint32_t *dynamic_memory = cur_index; + if (i < hotplug_lmb_start) { + addr = i * lmb_size; + } else { + addr = (i - hotplug_lmb_start) * lmb_size + + spapr->hotplug_memory.base; + } + drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, - addr/lmb_size); + addr / lmb_size); g_assert(drc); drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); @@ -820,7 +823,14 @@ static int spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL)); if (addr < machine->ram_size || memory_region_present(get_system_memory(), addr)) { - dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); + if (addr < spapr->rma_size) { + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED); + } else { + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); + } + } else if (addr >= machine->ram_size && + addr < spapr->hotplug_memory.base) { + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED); } else { dynamic_memory[5] = cpu_to_be32(0); } @@ -1652,16 +1662,29 @@ static void spapr_drc_reset(void *opaque) static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr) { - MachineState *machine = MACHINE(spapr); uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; - uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; + uint32_t hotplug_lmb_start = spapr->hotplug_memory.base / lmb_size; + uint32_t nr_lmbs = (spapr->hotplug_memory.base + + memory_region_size(&spapr->hotplug_memory.mr)) / + lmb_size; int i; for (i = 0; i < nr_lmbs; i++) { sPAPRDRConnector *drc; uint64_t addr; - addr = i * lmb_size + spapr->hotplug_memory.base; + /* + * Create DRC objects for entire memory range including RMA, boot-time + * memory and hotplug memory and for the gap b/n RAM and hotplug + * memory region. + */ + if (i < hotplug_lmb_start) { + addr = i * lmb_size; + } else { + addr = (i - hotplug_lmb_start) * lmb_size + + spapr->hotplug_memory.base; + } + drc = spapr_dr_connector_new(OBJECT(spapr), SPAPR_DR_CONNECTOR_TYPE_LMB, addr/lmb_size); qemu_register_reset(spapr_drc_reset, drc); diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 971df3d..bb265a2 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -620,9 +620,10 @@ int spapr_rng_populate_dt(void *fdt); #define SPAPR_DR_LMB_LIST_ENTRY_SIZE 6 /* - * This flag value defines the LMB as assigned in ibm,dynamic-memory - * property under ibm,dynamic-reconfiguration-memory node. + * Defines for flag value in ibm,dynamic-memory property under + * ibm,dynamic-reconfiguration-memory node. */ #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008 +#define SPAPR_LMB_FLAGS_RESERVED 0x00000080 #endif /* !defined (__HW_SPAPR_H__) */ -- 2.1.0