Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-09-10 Thread Igor Mammedov
On Fri, 7 Sep 2018 16:44:34 -0400
"Michael S. Tsirkin"  wrote:

> On Wed, Aug 22, 2018 at 11:46:44AM +0200, Igor Mammedov wrote:
> > Commit
> >   10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT 
> > entry size"
> > attemped to fix hotplug regression introduced by
> >   848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for DIMM 
> > devices"
> > 
> > fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 based
> > kernels (RHEL6) to the point where guest might crash at boot.
> > Reason is that 2.6 kernel discards SRAT table due too small last entry
> > which down the road leads to crashes. Hack I've tried in 10efd7e108 is also
> > not ACPI spec compliant according to which whole possible RAM should be
> > described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based 
> > kernels.
> > 
> > With 10efd7e108 reverted, I've also tried splitting SRAT table statically
> > in different ways %/node and %/slot but Windows still fails to online
> > 2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
> > sometimes even coldplugged pc-dimms where affected with static SRAT
> > partitioning.
> > The only known so far way where Windows stays happy is when we have 1
> > SRAT entry in the last node covering all hotplug area.
> > 
> > Revert 848a1cc1e until we come up with a way to avoid regression
> > on Windows with hotplug area split in several entries.
> > Tested this with 2.6/3.0 based kernels (RHEL6/7) and WS20[08/12/12R2/16]).
> > 
> > Signed-off-by: Igor Mammedov   
> 
> BTW should this cause any aml test blobs to change?
> Does not seem to ...
test variants memhp and dimmpxm should be affected by this
(I see your pull req has relevant updates to reference SRAT tables)

> 
> > ---
> > CC: haozhong.zh...@intel.com
> > CC: m...@redhat.com
> > CC: qemu-sta...@nongnu.org
> > CC: ehabk...@redhat.com
> > CC: ler...@redhat.com
> > ---
> >  hw/i386/acpi-build.c | 73 
> > +---
> >  1 file changed, 12 insertions(+), 61 deletions(-)
> > 
> > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > index e1ee8ae..1599caa 100644
> > --- a/hw/i386/acpi-build.c
> > +++ b/hw/i386/acpi-build.c
> > @@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, 
> > GArray *tcpalog)
> >  #define HOLE_640K_START  (640 * KiB)
> >  #define HOLE_640K_END   (1 * MiB)
> >  
> > -static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t 
> > base,
> > -   uint64_t len, int default_node)
> > -{
> > -MemoryDeviceInfoList *info_list = qmp_memory_device_list();
> > -MemoryDeviceInfoList *info;
> > -MemoryDeviceInfo *mi;
> > -PCDIMMDeviceInfo *di;
> > -uint64_t end = base + len, cur, size;
> > -bool is_nvdimm;
> > -AcpiSratMemoryAffinity *numamem;
> > -MemoryAffinityFlags flags;
> > -
> > -for (cur = base, info = info_list;
> > - cur < end;
> > - cur += size, info = info->next) {
> > -numamem = acpi_data_push(table_data, sizeof *numamem);
> > -
> > -if (!info) {
> > -/*
> > - * Entry is required for Windows to enable memory hotplug in OS
> > - * and for Linux to enable SWIOTLB when booted with less than
> > - * 4G of RAM. Windows works better if the entry sets proximity
> > - * to the highest NUMA node in the machine at the end of the
> > - * reserved space.
> > - * Memory devices may override proximity set by this entry,
> > - * providing _PXM method if necessary.
> > - */
> > -build_srat_memory(numamem, end - 1, 1, default_node,
> > -  MEM_AFFINITY_HOTPLUGGABLE | 
> > MEM_AFFINITY_ENABLED);
> > -break;
> > -}
> > -
> > -mi = info->value;
> > -is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> > -di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
> > -
> > -if (cur < di->addr) {
> > -build_srat_memory(numamem, cur, di->addr - cur, default_node,
> > -  MEM_AFFINITY_HOTPLUGGABLE | 
> > MEM_AFFINITY_ENABLED);
> > -numamem = acpi_data_push(table_data, sizeof *numamem);
> > -}
> > -
> > -size = di->size;
> > -
> > -flags = MEM_AFFINITY_ENABLED;
> > -if (di->hotpluggable) {
> > -flags |= MEM_AFFINITY_HOTPLUGGABLE;
> > -}
> > -if (is_nvdimm) {
> > -flags |= MEM_AFFINITY_NON_VOLATILE;
> > -}
> > -
> > -build_srat_memory(numamem, di->addr, size, di->node, flags);
> > -}
> > -
> > -qapi_free_MemoryDeviceInfoList(info_list);
> > -}
> > -
> >  static void
> >  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
> >  {
> > @@ -2414,10 +2356,19 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
> > MachineState *machine)
> 

Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-09-07 Thread Michael S. Tsirkin
On Wed, Aug 22, 2018 at 11:46:44AM +0200, Igor Mammedov wrote:
> Commit
>   10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT 
> entry size"
> attemped to fix hotplug regression introduced by
>   848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for DIMM 
> devices"
> 
> fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 based
> kernels (RHEL6) to the point where guest might crash at boot.
> Reason is that 2.6 kernel discards SRAT table due too small last entry
> which down the road leads to crashes. Hack I've tried in 10efd7e108 is also
> not ACPI spec compliant according to which whole possible RAM should be
> described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based kernels.
> 
> With 10efd7e108 reverted, I've also tried splitting SRAT table statically
> in different ways %/node and %/slot but Windows still fails to online
> 2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
> sometimes even coldplugged pc-dimms where affected with static SRAT
> partitioning.
> The only known so far way where Windows stays happy is when we have 1
> SRAT entry in the last node covering all hotplug area.
> 
> Revert 848a1cc1e until we come up with a way to avoid regression
> on Windows with hotplug area split in several entries.
> Tested this with 2.6/3.0 based kernels (RHEL6/7) and WS20[08/12/12R2/16]).
> 
> Signed-off-by: Igor Mammedov 

BTW should this cause any aml test blobs to change?
Does not seem to ...

> ---
> CC: haozhong.zh...@intel.com
> CC: m...@redhat.com
> CC: qemu-sta...@nongnu.org
> CC: ehabk...@redhat.com
> CC: ler...@redhat.com
> ---
>  hw/i386/acpi-build.c | 73 
> +---
>  1 file changed, 12 insertions(+), 61 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index e1ee8ae..1599caa 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, 
> GArray *tcpalog)
>  #define HOLE_640K_START  (640 * KiB)
>  #define HOLE_640K_END   (1 * MiB)
>  
> -static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
> -   uint64_t len, int default_node)
> -{
> -MemoryDeviceInfoList *info_list = qmp_memory_device_list();
> -MemoryDeviceInfoList *info;
> -MemoryDeviceInfo *mi;
> -PCDIMMDeviceInfo *di;
> -uint64_t end = base + len, cur, size;
> -bool is_nvdimm;
> -AcpiSratMemoryAffinity *numamem;
> -MemoryAffinityFlags flags;
> -
> -for (cur = base, info = info_list;
> - cur < end;
> - cur += size, info = info->next) {
> -numamem = acpi_data_push(table_data, sizeof *numamem);
> -
> -if (!info) {
> -/*
> - * Entry is required for Windows to enable memory hotplug in OS
> - * and for Linux to enable SWIOTLB when booted with less than
> - * 4G of RAM. Windows works better if the entry sets proximity
> - * to the highest NUMA node in the machine at the end of the
> - * reserved space.
> - * Memory devices may override proximity set by this entry,
> - * providing _PXM method if necessary.
> - */
> -build_srat_memory(numamem, end - 1, 1, default_node,
> -  MEM_AFFINITY_HOTPLUGGABLE | 
> MEM_AFFINITY_ENABLED);
> -break;
> -}
> -
> -mi = info->value;
> -is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> -di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
> -
> -if (cur < di->addr) {
> -build_srat_memory(numamem, cur, di->addr - cur, default_node,
> -  MEM_AFFINITY_HOTPLUGGABLE | 
> MEM_AFFINITY_ENABLED);
> -numamem = acpi_data_push(table_data, sizeof *numamem);
> -}
> -
> -size = di->size;
> -
> -flags = MEM_AFFINITY_ENABLED;
> -if (di->hotpluggable) {
> -flags |= MEM_AFFINITY_HOTPLUGGABLE;
> -}
> -if (is_nvdimm) {
> -flags |= MEM_AFFINITY_NON_VOLATILE;
> -}
> -
> -build_srat_memory(numamem, di->addr, size, di->node, flags);
> -}
> -
> -qapi_free_MemoryDeviceInfoList(info_list);
> -}
> -
>  static void
>  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>  {
> @@ -2414,10 +2356,19 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
> MachineState *machine)
>  build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
>  }
>  
> +/*
> + * Entry is required for Windows to enable memory hotplug in OS
> + * and for Linux to enable SWIOTLB when booted with less than
> + * 4G of RAM. Windows works better if the entry sets proximity
> + * to the highest NUMA node in the machine.
> + * Memory devices may override proximity set by this entry,
> + * 

Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-24 Thread Eduardo Habkost
On Fri, Aug 24, 2018 at 10:03:05AM +0200, Igor Mammedov wrote:
> On Thu, 23 Aug 2018 14:25:01 -0300
> Eduardo Habkost  wrote:
> 
> > On Thu, Aug 23, 2018 at 10:14:06AM +0200, Igor Mammedov wrote:
> > > On Wed, 22 Aug 2018 15:01:12 -0300
> > > Eduardo Habkost  wrote:  
> > [...]
> > > > However, have you considered keeping adding separate entries for
> > > > NVDIMM devices only (so we follow the spec), but add a single
> > > > (numa_nodes-1, MEM_AFFINITY_HOTPLUGGABLE|MEM_AFFINITY_ENABLED)
> > > > entry to the rest?  
> > > Indeed, I did. It doesn't work either.  
> > 
> > When exactly it didn't work?  Did nvdimm + memory hotplug ever
> > worked together on Windows guests?
> before 848a1cc1e QEMU CLI with nvdimm + memory hotplug worked
> as expected for Windows.

OK, so my suggestion would still have a regression.  Nevermind.

> With approach you suggested, it would create several SRAT entries
> X for nvdimm and Y for the rest (1 in the best case or many if
> nvdimms/pc-dimms are interleaved) and that breaks memory hotplug.
>  
> > For all the other cases there should be absolutely no difference:
> > 
> > nvdimm users would still get a spec-compliant SRAT table (like on
> > QEMU 3.0).
> > 
> > Memory hotplug users w/o nvdimm would get the same ACPI table
> > that they would get after applying this patch (i.e. the one we
> > had before commit 848a1cc1e ("hw/acpi-build: build SRAT memory
> > affinity structures for DIMM devices").
> I did consider it. It still would be a regression but a minor one
> (only Windows nvdimm enabled cases will have regressed memory
> hotplug). I even have a patch for it, but it's still a regression,
> that's why I've posted full 848a1cc1e revert.
> 
> If it were a bug in the newest version of windows (assuming it's
> proven Windows bug), I'd be inclined towards being spec compliant
> and let MS fix Windows as it was with CPU hotplug (hijacked ACPI0010
> container) which they eventually fixed, but in this case it affects
> all guests versions and there is no proof that's a Windows bug.
> 
> So my hope here is that Intel has resources to figure out what
> Windows expectations are wrt SRAT/memory layout and memory hotplug.

Agreed.

-- 
Eduardo



Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-24 Thread Igor Mammedov
On Thu, 23 Aug 2018 14:25:01 -0300
Eduardo Habkost  wrote:

> On Thu, Aug 23, 2018 at 10:14:06AM +0200, Igor Mammedov wrote:
> > On Wed, 22 Aug 2018 15:01:12 -0300
> > Eduardo Habkost  wrote:  
> [...]
> > > However, have you considered keeping adding separate entries for
> > > NVDIMM devices only (so we follow the spec), but add a single
> > > (numa_nodes-1, MEM_AFFINITY_HOTPLUGGABLE|MEM_AFFINITY_ENABLED)
> > > entry to the rest?  
> > Indeed, I did. It doesn't work either.  
> 
> When exactly it didn't work?  Did nvdimm + memory hotplug ever
> worked together on Windows guests?
before 848a1cc1e QEMU CLI with nvdimm + memory hotplug worked
as expected for Windows.
With approach you suggested, it would create several SRAT entries
X for nvdimm and Y for the rest (1 in the best case or many if
nvdimms/pc-dimms are interleaved) and that breaks memory hotplug.
 
> For all the other cases there should be absolutely no difference:
> 
> nvdimm users would still get a spec-compliant SRAT table (like on
> QEMU 3.0).
> 
> Memory hotplug users w/o nvdimm would get the same ACPI table
> that they would get after applying this patch (i.e. the one we
> had before commit 848a1cc1e ("hw/acpi-build: build SRAT memory
> affinity structures for DIMM devices").
I did consider it. It still would be a regression but a minor one
(only Windows nvdimm enabled cases will have regressed memory
hotplug). I even have a patch for it, but it's still a regression,
that's why I've posted full 848a1cc1e revert.

If it were a bug in the newest version of windows (assuming it's
proven Windows bug), I'd be inclined towards being spec compliant
and let MS fix Windows as it was with CPU hotplug (hijacked ACPI0010
container) which they eventually fixed, but in this case it affects
all guests versions and there is no proof that's a Windows bug.

So my hope here is that Intel has resources to figure out what
Windows expectations are wrt SRAT/memory layout and memory hotplug.



Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-24 Thread Yu Zhang
On Thu, Aug 23, 2018 at 02:34:31PM +0200, Igor Mammedov wrote:
> On Thu, 23 Aug 2018 17:01:33 +0800
> Yu Zhang  wrote:
> 
> > On 8/23/2018 2:01 AM, Eduardo Habkost wrote:
> > > On Wed, Aug 22, 2018 at 03:05:36PM +0200, Igor Mammedov wrote:  
> > >> On Wed, 22 Aug 2018 12:06:26 +0200
> > >> Laszlo Ersek  wrote:
> > >>  
> > >>> On 08/22/18 11:46, Igor Mammedov wrote:  
> >  Commit
> > 10efd7e108 "pc: acpi: fix memory hotplug regression by reducing 
> >  stub SRAT entry size"
> >  attemped to fix hotplug regression introduced by
> > 848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for 
> >  DIMM devices"
> > 
> >  fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 
> >  based
> >  kernels (RHEL6) to the point where guest might crash at boot.
> >  Reason is that 2.6 kernel discards SRAT table due too small last entry
> >  which down the road leads to crashes. Hack I've tried in 10efd7e108 is 
> >  also
> >  not ACPI spec compliant according to which whole possible RAM should be
> >  described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based 
> >  kernels.
> > 
> >  With 10efd7e108 reverted, I've also tried splitting SRAT table 
> >  statically
> >  in different ways %/node and %/slot but Windows still fails to online
> >  2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
> >  sometimes even coldplugged pc-dimms where affected with static SRAT
> >  partitioning.
> >  The only known so far way where Windows stays happy is when we have 1
> >  SRAT entry in the last node covering all hotplug area.
> > 
> >  Revert 848a1cc1e until we come up with a way to avoid regression
> >  on Windows with hotplug area split in several entries.
> >  Tested this with 2.6/3.0 based kernels (RHEL6/7) and 
> >  WS20[08/12/12R2/16]).
> > 
> >  Signed-off-by: Igor Mammedov 
> >  ---
> >  CC: haozhong.zh...@intel.com
> >  CC: m...@redhat.com
> >  CC: qemu-sta...@nongnu.org
> >  CC: ehabk...@redhat.com
> >  CC: ler...@redhat.com
> >  ---
> >    hw/i386/acpi-build.c | 73 
> >  +---
> >    1 file changed, 12 insertions(+), 61 deletions(-)
> > 
> >  diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> >  index e1ee8ae..1599caa 100644
> >  --- a/hw/i386/acpi-build.c
> >  +++ b/hw/i386/acpi-build.c
> >  @@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker 
> >  *linker, GArray *tcpalog)
> >    #define HOLE_640K_START  (640 * KiB)
> >    #define HOLE_640K_END   (1 * MiB)
> >    
> >  -static void build_srat_hotpluggable_memory(GArray *table_data, 
> >  uint64_t base,
> >  -   uint64_t len, int 
> >  default_node)
> >  -{
> >  -MemoryDeviceInfoList *info_list = qmp_memory_device_list();
> >  -MemoryDeviceInfoList *info;
> >  -MemoryDeviceInfo *mi;
> >  -PCDIMMDeviceInfo *di;
> >  -uint64_t end = base + len, cur, size;
> >  -bool is_nvdimm;
> >  -AcpiSratMemoryAffinity *numamem;
> >  -MemoryAffinityFlags flags;
> >  -
> >  -for (cur = base, info = info_list;
> >  - cur < end;
> >  - cur += size, info = info->next) {
> >  -numamem = acpi_data_push(table_data, sizeof *numamem);
> >  -
> >  -if (!info) {
> >  -/*
> >  - * Entry is required for Windows to enable memory hotplug 
> >  in OS
> >  - * and for Linux to enable SWIOTLB when booted with less 
> >  than
> >  - * 4G of RAM. Windows works better if the entry sets 
> >  proximity
> >  - * to the highest NUMA node in the machine at the end of 
> >  the
> >  - * reserved space.
> >  - * Memory devices may override proximity set by this 
> >  entry,
> >  - * providing _PXM method if necessary.
> >  - */
> >  -build_srat_memory(numamem, end - 1, 1, default_node,
> >  -  MEM_AFFINITY_HOTPLUGGABLE | 
> >  MEM_AFFINITY_ENABLED);
> >  -break;
> >  -}
> >  -
> >  -mi = info->value;
> >  -is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> >  -di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
> >  -
> >  -if (cur < di->addr) {
> >  -build_srat_memory(numamem, cur, di->addr - cur, 
> >  default_node,
> >  -  MEM_AFFINITY_HOTPLUGGABLE | 
> >  MEM_AFFINITY_ENABLED);
> >  -numamem = acpi_data_push(table_data, sizeof *numamem);
> >  -}
> >  -
> >  -size = di->size;
> >  -
> > 

Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-23 Thread Eduardo Habkost
On Thu, Aug 23, 2018 at 10:14:06AM +0200, Igor Mammedov wrote:
> On Wed, 22 Aug 2018 15:01:12 -0300
> Eduardo Habkost  wrote:
[...]
> > However, have you considered keeping adding separate entries for
> > NVDIMM devices only (so we follow the spec), but add a single
> > (numa_nodes-1, MEM_AFFINITY_HOTPLUGGABLE|MEM_AFFINITY_ENABLED)
> > entry to the rest?
> Indeed, I did. It doesn't work either.

When exactly it didn't work?  Did nvdimm + memory hotplug ever
worked together on Windows guests?

For all the other cases there should be absolutely no difference:

nvdimm users would still get a spec-compliant SRAT table (like on
QEMU 3.0).

Memory hotplug users w/o nvdimm would get the same ACPI table
that they would get after applying this patch (i.e. the one we
had before commit 848a1cc1e ("hw/acpi-build: build SRAT memory
affinity structures for DIMM devices").

-- 
Eduardo



Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-23 Thread Igor Mammedov
On Thu, 23 Aug 2018 17:01:33 +0800
Yu Zhang  wrote:

> On 8/23/2018 2:01 AM, Eduardo Habkost wrote:
> > On Wed, Aug 22, 2018 at 03:05:36PM +0200, Igor Mammedov wrote:  
> >> On Wed, 22 Aug 2018 12:06:26 +0200
> >> Laszlo Ersek  wrote:
> >>  
> >>> On 08/22/18 11:46, Igor Mammedov wrote:  
>  Commit
> 10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub 
>  SRAT entry size"
>  attemped to fix hotplug regression introduced by
> 848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for 
>  DIMM devices"
> 
>  fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 
>  based
>  kernels (RHEL6) to the point where guest might crash at boot.
>  Reason is that 2.6 kernel discards SRAT table due too small last entry
>  which down the road leads to crashes. Hack I've tried in 10efd7e108 is 
>  also
>  not ACPI spec compliant according to which whole possible RAM should be
>  described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based 
>  kernels.
> 
>  With 10efd7e108 reverted, I've also tried splitting SRAT table statically
>  in different ways %/node and %/slot but Windows still fails to online
>  2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
>  sometimes even coldplugged pc-dimms where affected with static SRAT
>  partitioning.
>  The only known so far way where Windows stays happy is when we have 1
>  SRAT entry in the last node covering all hotplug area.
> 
>  Revert 848a1cc1e until we come up with a way to avoid regression
>  on Windows with hotplug area split in several entries.
>  Tested this with 2.6/3.0 based kernels (RHEL6/7) and 
>  WS20[08/12/12R2/16]).
> 
>  Signed-off-by: Igor Mammedov 
>  ---
>  CC: haozhong.zh...@intel.com
>  CC: m...@redhat.com
>  CC: qemu-sta...@nongnu.org
>  CC: ehabk...@redhat.com
>  CC: ler...@redhat.com
>  ---
>    hw/i386/acpi-build.c | 73 
>  +---
>    1 file changed, 12 insertions(+), 61 deletions(-)
> 
>  diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
>  index e1ee8ae..1599caa 100644
>  --- a/hw/i386/acpi-build.c
>  +++ b/hw/i386/acpi-build.c
>  @@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker 
>  *linker, GArray *tcpalog)
>    #define HOLE_640K_START  (640 * KiB)
>    #define HOLE_640K_END   (1 * MiB)
>    
>  -static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t 
>  base,
>  -   uint64_t len, int 
>  default_node)
>  -{
>  -MemoryDeviceInfoList *info_list = qmp_memory_device_list();
>  -MemoryDeviceInfoList *info;
>  -MemoryDeviceInfo *mi;
>  -PCDIMMDeviceInfo *di;
>  -uint64_t end = base + len, cur, size;
>  -bool is_nvdimm;
>  -AcpiSratMemoryAffinity *numamem;
>  -MemoryAffinityFlags flags;
>  -
>  -for (cur = base, info = info_list;
>  - cur < end;
>  - cur += size, info = info->next) {
>  -numamem = acpi_data_push(table_data, sizeof *numamem);
>  -
>  -if (!info) {
>  -/*
>  - * Entry is required for Windows to enable memory hotplug 
>  in OS
>  - * and for Linux to enable SWIOTLB when booted with less 
>  than
>  - * 4G of RAM. Windows works better if the entry sets 
>  proximity
>  - * to the highest NUMA node in the machine at the end of the
>  - * reserved space.
>  - * Memory devices may override proximity set by this entry,
>  - * providing _PXM method if necessary.
>  - */
>  -build_srat_memory(numamem, end - 1, 1, default_node,
>  -  MEM_AFFINITY_HOTPLUGGABLE | 
>  MEM_AFFINITY_ENABLED);
>  -break;
>  -}
>  -
>  -mi = info->value;
>  -is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
>  -di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
>  -
>  -if (cur < di->addr) {
>  -build_srat_memory(numamem, cur, di->addr - cur, 
>  default_node,
>  -  MEM_AFFINITY_HOTPLUGGABLE | 
>  MEM_AFFINITY_ENABLED);
>  -numamem = acpi_data_push(table_data, sizeof *numamem);
>  -}
>  -
>  -size = di->size;
>  -
>  -flags = MEM_AFFINITY_ENABLED;
>  -if (di->hotpluggable) {
>  -flags |= MEM_AFFINITY_HOTPLUGGABLE;
>  -}
>  -if (is_nvdimm) {
>  -flags |= MEM_AFFINITY_NON_VOLATILE;
>  -}
>  -
>  -build_srat_memory(numamem, 

Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-23 Thread Yu Zhang




On 8/23/2018 2:01 AM, Eduardo Habkost wrote:

On Wed, Aug 22, 2018 at 03:05:36PM +0200, Igor Mammedov wrote:

On Wed, 22 Aug 2018 12:06:26 +0200
Laszlo Ersek  wrote:


On 08/22/18 11:46, Igor Mammedov wrote:

Commit
   10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT entry 
size"
attemped to fix hotplug regression introduced by
   848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for DIMM 
devices"

fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 based
kernels (RHEL6) to the point where guest might crash at boot.
Reason is that 2.6 kernel discards SRAT table due too small last entry
which down the road leads to crashes. Hack I've tried in 10efd7e108 is also
not ACPI spec compliant according to which whole possible RAM should be
described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based kernels.

With 10efd7e108 reverted, I've also tried splitting SRAT table statically
in different ways %/node and %/slot but Windows still fails to online
2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
sometimes even coldplugged pc-dimms where affected with static SRAT
partitioning.
The only known so far way where Windows stays happy is when we have 1
SRAT entry in the last node covering all hotplug area.

Revert 848a1cc1e until we come up with a way to avoid regression
on Windows with hotplug area split in several entries.
Tested this with 2.6/3.0 based kernels (RHEL6/7) and WS20[08/12/12R2/16]).

Signed-off-by: Igor Mammedov 
---
CC: haozhong.zh...@intel.com
CC: m...@redhat.com
CC: qemu-sta...@nongnu.org
CC: ehabk...@redhat.com
CC: ler...@redhat.com
---
  hw/i386/acpi-build.c | 73 +---
  1 file changed, 12 insertions(+), 61 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index e1ee8ae..1599caa 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, 
GArray *tcpalog)
  #define HOLE_640K_START  (640 * KiB)
  #define HOLE_640K_END   (1 * MiB)
  
-static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,

-   uint64_t len, int default_node)
-{
-MemoryDeviceInfoList *info_list = qmp_memory_device_list();
-MemoryDeviceInfoList *info;
-MemoryDeviceInfo *mi;
-PCDIMMDeviceInfo *di;
-uint64_t end = base + len, cur, size;
-bool is_nvdimm;
-AcpiSratMemoryAffinity *numamem;
-MemoryAffinityFlags flags;
-
-for (cur = base, info = info_list;
- cur < end;
- cur += size, info = info->next) {
-numamem = acpi_data_push(table_data, sizeof *numamem);
-
-if (!info) {
-/*
- * Entry is required for Windows to enable memory hotplug in OS
- * and for Linux to enable SWIOTLB when booted with less than
- * 4G of RAM. Windows works better if the entry sets proximity
- * to the highest NUMA node in the machine at the end of the
- * reserved space.
- * Memory devices may override proximity set by this entry,
- * providing _PXM method if necessary.
- */
-build_srat_memory(numamem, end - 1, 1, default_node,
-  MEM_AFFINITY_HOTPLUGGABLE | 
MEM_AFFINITY_ENABLED);
-break;
-}
-
-mi = info->value;
-is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
-di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
-
-if (cur < di->addr) {
-build_srat_memory(numamem, cur, di->addr - cur, default_node,
-  MEM_AFFINITY_HOTPLUGGABLE | 
MEM_AFFINITY_ENABLED);
-numamem = acpi_data_push(table_data, sizeof *numamem);
-}
-
-size = di->size;
-
-flags = MEM_AFFINITY_ENABLED;
-if (di->hotpluggable) {
-flags |= MEM_AFFINITY_HOTPLUGGABLE;
-}
-if (is_nvdimm) {
-flags |= MEM_AFFINITY_NON_VOLATILE;
-}
-
-build_srat_memory(numamem, di->addr, size, di->node, flags);
-}
-
-qapi_free_MemoryDeviceInfoList(info_list);
-}
-
  static void
  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
  {
@@ -2414,10 +2356,19 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
  build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
  }
  
+/*

+ * Entry is required for Windows to enable memory hotplug in OS
+ * and for Linux to enable SWIOTLB when booted with less than
+ * 4G of RAM. Windows works better if the entry sets proximity
+ * to the highest NUMA node in the machine.
+ * Memory devices may override proximity set by this entry,
+ * providing _PXM method if necessary.
+ */
  if (hotplugabble_address_space_size) {
-build_srat_hotpluggable_memory(table_data, 

Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-23 Thread Igor Mammedov
On Wed, 22 Aug 2018 15:01:12 -0300
Eduardo Habkost  wrote:

> On Wed, Aug 22, 2018 at 03:05:36PM +0200, Igor Mammedov wrote:
> > On Wed, 22 Aug 2018 12:06:26 +0200
> > Laszlo Ersek  wrote:
> >   
> > > On 08/22/18 11:46, Igor Mammedov wrote:  
> > > > Commit
> > > >   10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub 
> > > > SRAT entry size"
> > > > attemped to fix hotplug regression introduced by
> > > >   848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for 
> > > > DIMM devices"
> > > > 
> > > > fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 
> > > > based
> > > > kernels (RHEL6) to the point where guest might crash at boot.
> > > > Reason is that 2.6 kernel discards SRAT table due too small last entry
> > > > which down the road leads to crashes. Hack I've tried in 10efd7e108 is 
> > > > also
> > > > not ACPI spec compliant according to which whole possible RAM should be
> > > > described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based 
> > > > kernels.
> > > > 
> > > > With 10efd7e108 reverted, I've also tried splitting SRAT table 
> > > > statically
> > > > in different ways %/node and %/slot but Windows still fails to online
> > > > 2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
> > > > sometimes even coldplugged pc-dimms where affected with static SRAT
> > > > partitioning.
> > > > The only known so far way where Windows stays happy is when we have 1
> > > > SRAT entry in the last node covering all hotplug area.
> > > > 
> > > > Revert 848a1cc1e until we come up with a way to avoid regression
> > > > on Windows with hotplug area split in several entries.
> > > > Tested this with 2.6/3.0 based kernels (RHEL6/7) and 
> > > > WS20[08/12/12R2/16]).
> > > > 
> > > > Signed-off-by: Igor Mammedov 
> > > > ---
> > > > CC: haozhong.zh...@intel.com
> > > > CC: m...@redhat.com
> > > > CC: qemu-sta...@nongnu.org
> > > > CC: ehabk...@redhat.com
> > > > CC: ler...@redhat.com
> > > > ---
> > > >  hw/i386/acpi-build.c | 73 
> > > > +---
> > > >  1 file changed, 12 insertions(+), 61 deletions(-)
> > > > 
> > > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > > index e1ee8ae..1599caa 100644
> > > > --- a/hw/i386/acpi-build.c
> > > > +++ b/hw/i386/acpi-build.c
> > > > @@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker 
> > > > *linker, GArray *tcpalog)
> > > >  #define HOLE_640K_START  (640 * KiB)
> > > >  #define HOLE_640K_END   (1 * MiB)
> > > >  
> > > > -static void build_srat_hotpluggable_memory(GArray *table_data, 
> > > > uint64_t base,
> > > > -   uint64_t len, int 
> > > > default_node)
> > > > -{
> > > > -MemoryDeviceInfoList *info_list = qmp_memory_device_list();
> > > > -MemoryDeviceInfoList *info;
> > > > -MemoryDeviceInfo *mi;
> > > > -PCDIMMDeviceInfo *di;
> > > > -uint64_t end = base + len, cur, size;
> > > > -bool is_nvdimm;
> > > > -AcpiSratMemoryAffinity *numamem;
> > > > -MemoryAffinityFlags flags;
> > > > -
> > > > -for (cur = base, info = info_list;
> > > > - cur < end;
> > > > - cur += size, info = info->next) {
> > > > -numamem = acpi_data_push(table_data, sizeof *numamem);
> > > > -
> > > > -if (!info) {
> > > > -/*
> > > > - * Entry is required for Windows to enable memory hotplug 
> > > > in OS
> > > > - * and for Linux to enable SWIOTLB when booted with less 
> > > > than
> > > > - * 4G of RAM. Windows works better if the entry sets 
> > > > proximity
> > > > - * to the highest NUMA node in the machine at the end of 
> > > > the
> > > > - * reserved space.
> > > > - * Memory devices may override proximity set by this entry,
> > > > - * providing _PXM method if necessary.
> > > > - */
> > > > -build_srat_memory(numamem, end - 1, 1, default_node,
> > > > -  MEM_AFFINITY_HOTPLUGGABLE | 
> > > > MEM_AFFINITY_ENABLED);
> > > > -break;
> > > > -}
> > > > -
> > > > -mi = info->value;
> > > > -is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> > > > -di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
> > > > -
> > > > -if (cur < di->addr) {
> > > > -build_srat_memory(numamem, cur, di->addr - cur, 
> > > > default_node,
> > > > -  MEM_AFFINITY_HOTPLUGGABLE | 
> > > > MEM_AFFINITY_ENABLED);
> > > > -numamem = acpi_data_push(table_data, sizeof *numamem);
> > > > -}
> > > > -
> > > > -size = di->size;
> > > > -
> > > > -flags = MEM_AFFINITY_ENABLED;
> > > > -if (di->hotpluggable) {
> > > > -flags |= MEM_AFFINITY_HOTPLUGGABLE;
> > > > -}
> > > > -if (is_nvdimm) {
> > > > -flags |= 

Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-22 Thread Eduardo Habkost


CCing NVDIMM maintainer, and Intel people who may be able to
help.

Summary: SRAT changes added for NVDIMM break memory hotplug on
Windows, and our best option right now is to revert the changes.


On Wed, Aug 22, 2018 at 11:46:44AM +0200, Igor Mammedov wrote:
> Commit
>   10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT 
> entry size"
> attemped to fix hotplug regression introduced by
>   848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for DIMM 
> devices"
> 
> fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 based
> kernels (RHEL6) to the point where guest might crash at boot.
> Reason is that 2.6 kernel discards SRAT table due too small last entry
> which down the road leads to crashes. Hack I've tried in 10efd7e108 is also
> not ACPI spec compliant according to which whole possible RAM should be
> described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based kernels.
> 
> With 10efd7e108 reverted, I've also tried splitting SRAT table statically
> in different ways %/node and %/slot but Windows still fails to online
> 2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
> sometimes even coldplugged pc-dimms where affected with static SRAT
> partitioning.
> The only known so far way where Windows stays happy is when we have 1
> SRAT entry in the last node covering all hotplug area.
> 
> Revert 848a1cc1e until we come up with a way to avoid regression
> on Windows with hotplug area split in several entries.
> Tested this with 2.6/3.0 based kernels (RHEL6/7) and WS20[08/12/12R2/16]).
> 
> Signed-off-by: Igor Mammedov 
> ---
> CC: haozhong.zh...@intel.com
> CC: m...@redhat.com
> CC: qemu-sta...@nongnu.org
> CC: ehabk...@redhat.com
> CC: ler...@redhat.com
> ---
>  hw/i386/acpi-build.c | 73 
> +---
>  1 file changed, 12 insertions(+), 61 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index e1ee8ae..1599caa 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, 
> GArray *tcpalog)
>  #define HOLE_640K_START  (640 * KiB)
>  #define HOLE_640K_END   (1 * MiB)
>  
> -static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
> -   uint64_t len, int default_node)
> -{
> -MemoryDeviceInfoList *info_list = qmp_memory_device_list();
> -MemoryDeviceInfoList *info;
> -MemoryDeviceInfo *mi;
> -PCDIMMDeviceInfo *di;
> -uint64_t end = base + len, cur, size;
> -bool is_nvdimm;
> -AcpiSratMemoryAffinity *numamem;
> -MemoryAffinityFlags flags;
> -
> -for (cur = base, info = info_list;
> - cur < end;
> - cur += size, info = info->next) {
> -numamem = acpi_data_push(table_data, sizeof *numamem);
> -
> -if (!info) {
> -/*
> - * Entry is required for Windows to enable memory hotplug in OS
> - * and for Linux to enable SWIOTLB when booted with less than
> - * 4G of RAM. Windows works better if the entry sets proximity
> - * to the highest NUMA node in the machine at the end of the
> - * reserved space.
> - * Memory devices may override proximity set by this entry,
> - * providing _PXM method if necessary.
> - */
> -build_srat_memory(numamem, end - 1, 1, default_node,
> -  MEM_AFFINITY_HOTPLUGGABLE | 
> MEM_AFFINITY_ENABLED);
> -break;
> -}
> -
> -mi = info->value;
> -is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> -di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
> -
> -if (cur < di->addr) {
> -build_srat_memory(numamem, cur, di->addr - cur, default_node,
> -  MEM_AFFINITY_HOTPLUGGABLE | 
> MEM_AFFINITY_ENABLED);
> -numamem = acpi_data_push(table_data, sizeof *numamem);
> -}
> -
> -size = di->size;
> -
> -flags = MEM_AFFINITY_ENABLED;
> -if (di->hotpluggable) {
> -flags |= MEM_AFFINITY_HOTPLUGGABLE;
> -}
> -if (is_nvdimm) {
> -flags |= MEM_AFFINITY_NON_VOLATILE;
> -}
> -
> -build_srat_memory(numamem, di->addr, size, di->node, flags);
> -}
> -
> -qapi_free_MemoryDeviceInfoList(info_list);
> -}
> -
>  static void
>  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>  {
> @@ -2414,10 +2356,19 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
> MachineState *machine)
>  build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
>  }
>  
> +/*
> + * Entry is required for Windows to enable memory hotplug in OS
> + * and for Linux to enable SWIOTLB when booted with less than
> + * 4G of RAM. Windows works better if the entry sets proximity
> + * 

Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-22 Thread Eduardo Habkost
On Wed, Aug 22, 2018 at 03:05:36PM +0200, Igor Mammedov wrote:
> On Wed, 22 Aug 2018 12:06:26 +0200
> Laszlo Ersek  wrote:
> 
> > On 08/22/18 11:46, Igor Mammedov wrote:
> > > Commit
> > >   10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub 
> > > SRAT entry size"
> > > attemped to fix hotplug regression introduced by
> > >   848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for 
> > > DIMM devices"
> > > 
> > > fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 based
> > > kernels (RHEL6) to the point where guest might crash at boot.
> > > Reason is that 2.6 kernel discards SRAT table due too small last entry
> > > which down the road leads to crashes. Hack I've tried in 10efd7e108 is 
> > > also
> > > not ACPI spec compliant according to which whole possible RAM should be
> > > described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based 
> > > kernels.
> > > 
> > > With 10efd7e108 reverted, I've also tried splitting SRAT table statically
> > > in different ways %/node and %/slot but Windows still fails to online
> > > 2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
> > > sometimes even coldplugged pc-dimms where affected with static SRAT
> > > partitioning.
> > > The only known so far way where Windows stays happy is when we have 1
> > > SRAT entry in the last node covering all hotplug area.
> > > 
> > > Revert 848a1cc1e until we come up with a way to avoid regression
> > > on Windows with hotplug area split in several entries.
> > > Tested this with 2.6/3.0 based kernels (RHEL6/7) and WS20[08/12/12R2/16]).
> > > 
> > > Signed-off-by: Igor Mammedov 
> > > ---
> > > CC: haozhong.zh...@intel.com
> > > CC: m...@redhat.com
> > > CC: qemu-sta...@nongnu.org
> > > CC: ehabk...@redhat.com
> > > CC: ler...@redhat.com
> > > ---
> > >  hw/i386/acpi-build.c | 73 
> > > +---
> > >  1 file changed, 12 insertions(+), 61 deletions(-)
> > > 
> > > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > > index e1ee8ae..1599caa 100644
> > > --- a/hw/i386/acpi-build.c
> > > +++ b/hw/i386/acpi-build.c
> > > @@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, 
> > > GArray *tcpalog)
> > >  #define HOLE_640K_START  (640 * KiB)
> > >  #define HOLE_640K_END   (1 * MiB)
> > >  
> > > -static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t 
> > > base,
> > > -   uint64_t len, int 
> > > default_node)
> > > -{
> > > -MemoryDeviceInfoList *info_list = qmp_memory_device_list();
> > > -MemoryDeviceInfoList *info;
> > > -MemoryDeviceInfo *mi;
> > > -PCDIMMDeviceInfo *di;
> > > -uint64_t end = base + len, cur, size;
> > > -bool is_nvdimm;
> > > -AcpiSratMemoryAffinity *numamem;
> > > -MemoryAffinityFlags flags;
> > > -
> > > -for (cur = base, info = info_list;
> > > - cur < end;
> > > - cur += size, info = info->next) {
> > > -numamem = acpi_data_push(table_data, sizeof *numamem);
> > > -
> > > -if (!info) {
> > > -/*
> > > - * Entry is required for Windows to enable memory hotplug in 
> > > OS
> > > - * and for Linux to enable SWIOTLB when booted with less than
> > > - * 4G of RAM. Windows works better if the entry sets 
> > > proximity
> > > - * to the highest NUMA node in the machine at the end of the
> > > - * reserved space.
> > > - * Memory devices may override proximity set by this entry,
> > > - * providing _PXM method if necessary.
> > > - */
> > > -build_srat_memory(numamem, end - 1, 1, default_node,
> > > -  MEM_AFFINITY_HOTPLUGGABLE | 
> > > MEM_AFFINITY_ENABLED);
> > > -break;
> > > -}
> > > -
> > > -mi = info->value;
> > > -is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> > > -di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
> > > -
> > > -if (cur < di->addr) {
> > > -build_srat_memory(numamem, cur, di->addr - cur, default_node,
> > > -  MEM_AFFINITY_HOTPLUGGABLE | 
> > > MEM_AFFINITY_ENABLED);
> > > -numamem = acpi_data_push(table_data, sizeof *numamem);
> > > -}
> > > -
> > > -size = di->size;
> > > -
> > > -flags = MEM_AFFINITY_ENABLED;
> > > -if (di->hotpluggable) {
> > > -flags |= MEM_AFFINITY_HOTPLUGGABLE;
> > > -}
> > > -if (is_nvdimm) {
> > > -flags |= MEM_AFFINITY_NON_VOLATILE;
> > > -}
> > > -
> > > -build_srat_memory(numamem, di->addr, size, di->node, flags);
> > > -}
> > > -
> > > -qapi_free_MemoryDeviceInfoList(info_list);
> > > -}
> > > -
> > >  static void
> > >  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
> > >  {
> > > @@ -2414,10 +2356,19 

Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-22 Thread Igor Mammedov
On Wed, 22 Aug 2018 12:06:26 +0200
Laszlo Ersek  wrote:

> On 08/22/18 11:46, Igor Mammedov wrote:
> > Commit
> >   10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT 
> > entry size"
> > attemped to fix hotplug regression introduced by
> >   848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for DIMM 
> > devices"
> > 
> > fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 based
> > kernels (RHEL6) to the point where guest might crash at boot.
> > Reason is that 2.6 kernel discards SRAT table due too small last entry
> > which down the road leads to crashes. Hack I've tried in 10efd7e108 is also
> > not ACPI spec compliant according to which whole possible RAM should be
> > described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based 
> > kernels.
> > 
> > With 10efd7e108 reverted, I've also tried splitting SRAT table statically
> > in different ways %/node and %/slot but Windows still fails to online
> > 2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
> > sometimes even coldplugged pc-dimms where affected with static SRAT
> > partitioning.
> > The only known so far way where Windows stays happy is when we have 1
> > SRAT entry in the last node covering all hotplug area.
> > 
> > Revert 848a1cc1e until we come up with a way to avoid regression
> > on Windows with hotplug area split in several entries.
> > Tested this with 2.6/3.0 based kernels (RHEL6/7) and WS20[08/12/12R2/16]).
> > 
> > Signed-off-by: Igor Mammedov 
> > ---
> > CC: haozhong.zh...@intel.com
> > CC: m...@redhat.com
> > CC: qemu-sta...@nongnu.org
> > CC: ehabk...@redhat.com
> > CC: ler...@redhat.com
> > ---
> >  hw/i386/acpi-build.c | 73 
> > +---
> >  1 file changed, 12 insertions(+), 61 deletions(-)
> > 
> > diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> > index e1ee8ae..1599caa 100644
> > --- a/hw/i386/acpi-build.c
> > +++ b/hw/i386/acpi-build.c
> > @@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, 
> > GArray *tcpalog)
> >  #define HOLE_640K_START  (640 * KiB)
> >  #define HOLE_640K_END   (1 * MiB)
> >  
> > -static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t 
> > base,
> > -   uint64_t len, int default_node)
> > -{
> > -MemoryDeviceInfoList *info_list = qmp_memory_device_list();
> > -MemoryDeviceInfoList *info;
> > -MemoryDeviceInfo *mi;
> > -PCDIMMDeviceInfo *di;
> > -uint64_t end = base + len, cur, size;
> > -bool is_nvdimm;
> > -AcpiSratMemoryAffinity *numamem;
> > -MemoryAffinityFlags flags;
> > -
> > -for (cur = base, info = info_list;
> > - cur < end;
> > - cur += size, info = info->next) {
> > -numamem = acpi_data_push(table_data, sizeof *numamem);
> > -
> > -if (!info) {
> > -/*
> > - * Entry is required for Windows to enable memory hotplug in OS
> > - * and for Linux to enable SWIOTLB when booted with less than
> > - * 4G of RAM. Windows works better if the entry sets proximity
> > - * to the highest NUMA node in the machine at the end of the
> > - * reserved space.
> > - * Memory devices may override proximity set by this entry,
> > - * providing _PXM method if necessary.
> > - */
> > -build_srat_memory(numamem, end - 1, 1, default_node,
> > -  MEM_AFFINITY_HOTPLUGGABLE | 
> > MEM_AFFINITY_ENABLED);
> > -break;
> > -}
> > -
> > -mi = info->value;
> > -is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> > -di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
> > -
> > -if (cur < di->addr) {
> > -build_srat_memory(numamem, cur, di->addr - cur, default_node,
> > -  MEM_AFFINITY_HOTPLUGGABLE | 
> > MEM_AFFINITY_ENABLED);
> > -numamem = acpi_data_push(table_data, sizeof *numamem);
> > -}
> > -
> > -size = di->size;
> > -
> > -flags = MEM_AFFINITY_ENABLED;
> > -if (di->hotpluggable) {
> > -flags |= MEM_AFFINITY_HOTPLUGGABLE;
> > -}
> > -if (is_nvdimm) {
> > -flags |= MEM_AFFINITY_NON_VOLATILE;
> > -}
> > -
> > -build_srat_memory(numamem, di->addr, size, di->node, flags);
> > -}
> > -
> > -qapi_free_MemoryDeviceInfoList(info_list);
> > -}
> > -
> >  static void
> >  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
> >  {
> > @@ -2414,10 +2356,19 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
> > MachineState *machine)
> >  build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
> >  }
> >  
> > +/*
> > + * Entry is required for Windows to enable memory hotplug in OS
> > + * and for Linux to enable SWIOTLB when booted with less than

Re: [Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-22 Thread Laszlo Ersek
On 08/22/18 11:46, Igor Mammedov wrote:
> Commit
>   10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT 
> entry size"
> attemped to fix hotplug regression introduced by
>   848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for DIMM 
> devices"
> 
> fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 based
> kernels (RHEL6) to the point where guest might crash at boot.
> Reason is that 2.6 kernel discards SRAT table due too small last entry
> which down the road leads to crashes. Hack I've tried in 10efd7e108 is also
> not ACPI spec compliant according to which whole possible RAM should be
> described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based kernels.
> 
> With 10efd7e108 reverted, I've also tried splitting SRAT table statically
> in different ways %/node and %/slot but Windows still fails to online
> 2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
> sometimes even coldplugged pc-dimms where affected with static SRAT
> partitioning.
> The only known so far way where Windows stays happy is when we have 1
> SRAT entry in the last node covering all hotplug area.
> 
> Revert 848a1cc1e until we come up with a way to avoid regression
> on Windows with hotplug area split in several entries.
> Tested this with 2.6/3.0 based kernels (RHEL6/7) and WS20[08/12/12R2/16]).
> 
> Signed-off-by: Igor Mammedov 
> ---
> CC: haozhong.zh...@intel.com
> CC: m...@redhat.com
> CC: qemu-sta...@nongnu.org
> CC: ehabk...@redhat.com
> CC: ler...@redhat.com
> ---
>  hw/i386/acpi-build.c | 73 
> +---
>  1 file changed, 12 insertions(+), 61 deletions(-)
> 
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index e1ee8ae..1599caa 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, 
> GArray *tcpalog)
>  #define HOLE_640K_START  (640 * KiB)
>  #define HOLE_640K_END   (1 * MiB)
>  
> -static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
> -   uint64_t len, int default_node)
> -{
> -MemoryDeviceInfoList *info_list = qmp_memory_device_list();
> -MemoryDeviceInfoList *info;
> -MemoryDeviceInfo *mi;
> -PCDIMMDeviceInfo *di;
> -uint64_t end = base + len, cur, size;
> -bool is_nvdimm;
> -AcpiSratMemoryAffinity *numamem;
> -MemoryAffinityFlags flags;
> -
> -for (cur = base, info = info_list;
> - cur < end;
> - cur += size, info = info->next) {
> -numamem = acpi_data_push(table_data, sizeof *numamem);
> -
> -if (!info) {
> -/*
> - * Entry is required for Windows to enable memory hotplug in OS
> - * and for Linux to enable SWIOTLB when booted with less than
> - * 4G of RAM. Windows works better if the entry sets proximity
> - * to the highest NUMA node in the machine at the end of the
> - * reserved space.
> - * Memory devices may override proximity set by this entry,
> - * providing _PXM method if necessary.
> - */
> -build_srat_memory(numamem, end - 1, 1, default_node,
> -  MEM_AFFINITY_HOTPLUGGABLE | 
> MEM_AFFINITY_ENABLED);
> -break;
> -}
> -
> -mi = info->value;
> -is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
> -di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
> -
> -if (cur < di->addr) {
> -build_srat_memory(numamem, cur, di->addr - cur, default_node,
> -  MEM_AFFINITY_HOTPLUGGABLE | 
> MEM_AFFINITY_ENABLED);
> -numamem = acpi_data_push(table_data, sizeof *numamem);
> -}
> -
> -size = di->size;
> -
> -flags = MEM_AFFINITY_ENABLED;
> -if (di->hotpluggable) {
> -flags |= MEM_AFFINITY_HOTPLUGGABLE;
> -}
> -if (is_nvdimm) {
> -flags |= MEM_AFFINITY_NON_VOLATILE;
> -}
> -
> -build_srat_memory(numamem, di->addr, size, di->node, flags);
> -}
> -
> -qapi_free_MemoryDeviceInfoList(info_list);
> -}
> -
>  static void
>  build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
>  {
> @@ -2414,10 +2356,19 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
> MachineState *machine)
>  build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
>  }
>  
> +/*
> + * Entry is required for Windows to enable memory hotplug in OS
> + * and for Linux to enable SWIOTLB when booted with less than
> + * 4G of RAM. Windows works better if the entry sets proximity
> + * to the highest NUMA node in the machine.
> + * Memory devices may override proximity set by this entry,
> + * providing _PXM method if necessary.
> + */
>  if (hotplugabble_address_space_size) {
> -

[Qemu-devel] [PATCH] pc: acpi: revert back to 1 SRAT entry for hotpluggable area

2018-08-22 Thread Igor Mammedov
Commit
  10efd7e108 "pc: acpi: fix memory hotplug regression by reducing stub SRAT 
entry size"
attemped to fix hotplug regression introduced by
  848a1cc1e "hw/acpi-build: build SRAT memory affinity structures for DIMM 
devices"

fixed issue for Windows/3.0+ linux kernels, however it regressed 2.6 based
kernels (RHEL6) to the point where guest might crash at boot.
Reason is that 2.6 kernel discards SRAT table due too small last entry
which down the road leads to crashes. Hack I've tried in 10efd7e108 is also
not ACPI spec compliant according to which whole possible RAM should be
described in SRAT. Revert 10efd7e108 to fix regression for 2.6 based kernels.

With 10efd7e108 reverted, I've also tried splitting SRAT table statically
in different ways %/node and %/slot but Windows still fails to online
2nd pc-dimm hot-plugged into node 0 (as described in 10efd7e108) and
sometimes even coldplugged pc-dimms where affected with static SRAT
partitioning.
The only known so far way where Windows stays happy is when we have 1
SRAT entry in the last node covering all hotplug area.

Revert 848a1cc1e until we come up with a way to avoid regression
on Windows with hotplug area split in several entries.
Tested this with 2.6/3.0 based kernels (RHEL6/7) and WS20[08/12/12R2/16]).

Signed-off-by: Igor Mammedov 
---
CC: haozhong.zh...@intel.com
CC: m...@redhat.com
CC: qemu-sta...@nongnu.org
CC: ehabk...@redhat.com
CC: ler...@redhat.com
---
 hw/i386/acpi-build.c | 73 +---
 1 file changed, 12 insertions(+), 61 deletions(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index e1ee8ae..1599caa 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2251,64 +2251,6 @@ build_tpm2(GArray *table_data, BIOSLinker *linker, 
GArray *tcpalog)
 #define HOLE_640K_START  (640 * KiB)
 #define HOLE_640K_END   (1 * MiB)
 
-static void build_srat_hotpluggable_memory(GArray *table_data, uint64_t base,
-   uint64_t len, int default_node)
-{
-MemoryDeviceInfoList *info_list = qmp_memory_device_list();
-MemoryDeviceInfoList *info;
-MemoryDeviceInfo *mi;
-PCDIMMDeviceInfo *di;
-uint64_t end = base + len, cur, size;
-bool is_nvdimm;
-AcpiSratMemoryAffinity *numamem;
-MemoryAffinityFlags flags;
-
-for (cur = base, info = info_list;
- cur < end;
- cur += size, info = info->next) {
-numamem = acpi_data_push(table_data, sizeof *numamem);
-
-if (!info) {
-/*
- * Entry is required for Windows to enable memory hotplug in OS
- * and for Linux to enable SWIOTLB when booted with less than
- * 4G of RAM. Windows works better if the entry sets proximity
- * to the highest NUMA node in the machine at the end of the
- * reserved space.
- * Memory devices may override proximity set by this entry,
- * providing _PXM method if necessary.
- */
-build_srat_memory(numamem, end - 1, 1, default_node,
-  MEM_AFFINITY_HOTPLUGGABLE | 
MEM_AFFINITY_ENABLED);
-break;
-}
-
-mi = info->value;
-is_nvdimm = (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM);
-di = !is_nvdimm ? mi->u.dimm.data : mi->u.nvdimm.data;
-
-if (cur < di->addr) {
-build_srat_memory(numamem, cur, di->addr - cur, default_node,
-  MEM_AFFINITY_HOTPLUGGABLE | 
MEM_AFFINITY_ENABLED);
-numamem = acpi_data_push(table_data, sizeof *numamem);
-}
-
-size = di->size;
-
-flags = MEM_AFFINITY_ENABLED;
-if (di->hotpluggable) {
-flags |= MEM_AFFINITY_HOTPLUGGABLE;
-}
-if (is_nvdimm) {
-flags |= MEM_AFFINITY_NON_VOLATILE;
-}
-
-build_srat_memory(numamem, di->addr, size, di->node, flags);
-}
-
-qapi_free_MemoryDeviceInfoList(info_list);
-}
-
 static void
 build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine)
 {
@@ -2414,10 +2356,19 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
 build_srat_memory(numamem, 0, 0, 0, MEM_AFFINITY_NOFLAGS);
 }
 
+/*
+ * Entry is required for Windows to enable memory hotplug in OS
+ * and for Linux to enable SWIOTLB when booted with less than
+ * 4G of RAM. Windows works better if the entry sets proximity
+ * to the highest NUMA node in the machine.
+ * Memory devices may override proximity set by this entry,
+ * providing _PXM method if necessary.
+ */
 if (hotplugabble_address_space_size) {
-build_srat_hotpluggable_memory(table_data, 
machine->device_memory->base,
-   hotplugabble_address_space_size,
-   pcms->numa_nodes - 1);
+numamem = acpi_data_push(table_data, sizeof *numamem);
+