On Fri, Jun 03, 2016 at 11:19:44AM +0530, Bharata B Rao wrote: > Memory hotplug can fail for some combinations of RAM and maxmem when > DDW is enabled in the presence of devices like nec-usb-xhci. DDW depends > on maximum addressable memory returned by guest and this value is currently > being calculated wrongly by the guest kernel routine memory_hotplug_max(). > While there is an attempt to fix the guest kernel, this patch works > around the problem within QEMU itself. > > memory_hotplug_max() routine in the guest kernel arrives at max > addressable memory by multiplying lmb-size with the lmb-count obtained > from ibm,dynamic-memory property. There are two assumptions here: > > - All LMBs are part of ibm,dynamic memory: This is not true for PowerKVM > where only hot-pluggable LMBs are present in this property. > - The memory area comprising of RAM and hotplug region is contiguous: This > needn't be true always for PowerKVM as there can be gap between > boot time RAM and hotplug region. > > This work around involves having all the LMBs (RMA, rest of the boot time > LMBs and hot-pluggable LMBs) as part of ibm,dynamic-memory so that > guest kernel's calculation of max addressable memory comes out correct > resulting in correct DDW value which prevents memory hotplug failures. > memory@0 is created for RMA, but RMA LMBs are also represented as > "reserved" LMBs in ibm,dynamic-memory. Parts of this are essenitally a > revert of e8f986fc57a664a74b9f685b466506366a15201b. > > In addition to this, the alignment of hotplug memory region is reduced from > current 1G to 256M (LMB size in PowerKVM) so that we don't end up with any > gaps between boot time RAM and hotplug region.
Hmm.. could we work around the problem without altering the memory alignment by inserting extra dummy reserved LMBs covering the gap? > > Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com> > --- > hw/ppc/spapr.c | 59 > +++++++++++++++++++++++++++++++++++--------------- > include/hw/ppc/spapr.h | 5 +++-- > 2 files changed, 45 insertions(+), 19 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 623c35f..3dfbc37 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -569,7 +569,6 @@ static int spapr_populate_memory(sPAPRMachineState > *spapr, void *fdt) > } > if (!mem_start) { > /* ppc_spapr_init() checks for rma_size <= node0_size already */ > - spapr_populate_memory_node(fdt, i, 0, spapr->rma_size); > mem_start += spapr->rma_size; > node_size -= spapr->rma_size; > } > @@ -762,18 +761,13 @@ static int > spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > int ret, i, offset; > uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; > uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)}; > - uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; > + uint32_t nr_rma_lmbs = spapr->rma_size / lmb_size; > + uint32_t nr_lmbs = machine->maxram_size / lmb_size; > + uint32_t nr_assigned_lmbs = machine->ram_size / lmb_size; > uint32_t *int_buf, *cur_index, buf_len; > int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; > > /* > - * Don't create the node if there are no DR LMBs. > - */ > - if (!nr_lmbs) { > - return 0; > - } > - > - /* > * Allocate enough buffer size to fit in ibm,dynamic-memory > * or ibm,associativity-lookup-arrays > */ > @@ -805,9 +799,15 @@ static int > spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > for (i = 0; i < nr_lmbs; i++) { > sPAPRDRConnector *drc; > sPAPRDRConnectorClass *drck; > - uint64_t addr = i * lmb_size + spapr->hotplug_memory.base;; > + uint64_t addr; > uint32_t *dynamic_memory = cur_index; > > + if (i < nr_assigned_lmbs) { > + addr = i * lmb_size; > + } else { > + addr = (i - nr_assigned_lmbs) * lmb_size + > + spapr->hotplug_memory.base; > + } > drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB, > addr/lmb_size); > g_assert(drc); > @@ -820,7 +820,11 @@ static int > spapr_populate_drconf_memory(sPAPRMachineState *spapr, void *fdt) > dynamic_memory[4] = cpu_to_be32(numa_get_node(addr, NULL)); > if (addr < machine->ram_size || > memory_region_present(get_system_memory(), addr)) { > - dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); > + if (i < nr_rma_lmbs) { > + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED); > + } else { > + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); > + } > } else { > dynamic_memory[5] = cpu_to_be32(0); > } > @@ -882,6 +886,8 @@ int spapr_h_cas_compose_response(sPAPRMachineState *spapr, > /* Generate ibm,dynamic-reconfiguration-memory node if required */ > if (memory_update && smc->dr_lmb_enabled) { > _FDT((spapr_populate_drconf_memory(spapr, fdt))); > + } else { > + _FDT((spapr_populate_memory(spapr, fdt))); > } > > /* Pack resulting tree */ > @@ -919,10 +925,23 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr, > /* open out the base tree into a temp buffer for the final tweaks */ > _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE))); > > - ret = spapr_populate_memory(spapr, fdt); > - if (ret < 0) { > - fprintf(stderr, "couldn't setup memory nodes in fdt\n"); > - exit(1); > + /* > + * Add memory@0 node to represent RMA. Rest of the memory is either > + * represented by memory nodes or ibm,dynamic-reconfiguration-memory > + * node later during ibm,client-architecture-support call. > + * > + * If NUMA is configured, ensure that memory@0 ends up in the > + * first memory-less node. > + */ > + if (nb_numa_nodes) { > + for (i = 0; i < nb_numa_nodes; ++i) { > + if (numa_info[i].node_mem) { > + spapr_populate_memory_node(fdt, i, 0, spapr->rma_size); > + break; > + } > + } > + } else { > + spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size); > } > > ret = spapr_populate_vdevice(spapr->vio_bus, fdt); > @@ -1654,14 +1673,20 @@ static void > spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr) > { > MachineState *machine = MACHINE(spapr); > uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; > - uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; > + uint32_t nr_lmbs = machine->maxram_size / lmb_size; > + uint32_t nr_assigned_lmbs = machine->ram_size / lmb_size; > int i; > > for (i = 0; i < nr_lmbs; i++) { > sPAPRDRConnector *drc; > uint64_t addr; > > - addr = i * lmb_size + spapr->hotplug_memory.base; > + if (i < nr_assigned_lmbs) { > + addr = i * lmb_size; > + } else { > + addr = (i - nr_assigned_lmbs) * lmb_size + > + spapr->hotplug_memory.base; > + } > drc = spapr_dr_connector_new(OBJECT(spapr), > SPAPR_DR_CONNECTOR_TYPE_LMB, > addr/lmb_size); > qemu_register_reset(spapr_drc_reset, drc); > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index b2aeb15..e5ef979 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -619,9 +619,10 @@ int spapr_rng_populate_dt(void *fdt); > #define SPAPR_DR_LMB_LIST_ENTRY_SIZE 6 > > /* > - * This flag value defines the LMB as assigned in ibm,dynamic-memory > - * property under ibm,dynamic-reconfiguration-memory node. > + * Defines for flag value in ibm,dynamic-memory property under > + * ibm,dynamic-reconfiguration-memory node. > */ > #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008 > +#define SPAPR_LMB_FLAGS_RESERVED 0x00000080 > > #endif /* !defined (__HW_SPAPR_H__) */ -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature