Re: [Qemu-devel] [PATCH qemu v17 11/12] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-06-08 Thread David Gibson
On Wed, Jun 08, 2016 at 04:09:57PM +1000, Alexey Kardashevskiy wrote:
> On 08/06/16 15:57, David Gibson wrote:
> > On Mon, Jun 06, 2016 at 06:12:58PM +1000, Alexey Kardashevskiy wrote:
> >> On 06/06/16 15:57, David Gibson wrote:
> >>> On Wed, Jun 01, 2016 at 06:57:42PM +1000, Alexey Kardashevskiy wrote:
>  This adds support for Dynamic DMA Windows (DDW) option defined by
>  the SPAPR specification which allows to have additional DMA window(s)
> 
>  The "ddw" property is enabled by default on a PHB but for compatibility
>  the pseries-2.5 machine (TODO: update version) and older disable it.
> >>>
> >>> Looks like your todo is now todone, but you need to update the commit
> >>> message.
> >>>
>  This also creates a single DMA window for the older machines to
>  maintain backward migration.
> 
>  This implements DDW for PHB with emulated and VFIO devices. The host
>  kernel support is required. The advertised IOMMU page sizes are 4K and
>  64K; 16M pages are supported but not advertised by default, in order to
>  enable them, the user has to specify "pgsz" property for PHB and
>  enable huge pages for RAM.
> 
>  The existing linux guests try creating one additional huge DMA window
>  with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
>  the guest switches to dma_direct_ops and never calls TCE hypercalls
>  (H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
>  and not waste time on map/unmap later. This adds a "dma64_win_addr"
>  property which is a bus address for the 64bit window and by default
>  set to 0x800... as this is what the modern POWER8 hardware
>  uses and this allows having emulated and VFIO devices on the same bus.
> 
>  This adds 4 RTAS handlers:
>  * ibm,query-pe-dma-window
>  * ibm,create-pe-dma-window
>  * ibm,remove-pe-dma-window
>  * ibm,reset-pe-dma-window
>  These are registered from type_init() callback.
> 
>  These RTAS handlers are implemented in a separate file to avoid polluting
>  spapr_iommu.c with PCI.
> 
>  This changes sPAPRPHBState::dma_liobn to an array to allow 2 LIOBNs.
> 
>  Signed-off-by: Alexey Kardashevskiy 
> >>>
> >>> Looks pretty close to ready.
> >>>
> >>> There are a handful of nits and one real error noted below.
> >>>
>  ---
>  Changes:
>  v17:
>  * fixed: "query" did return non-page-shifted value when memory hotplug 
>  is enabled
> 
>  v16:
>  * s/dma_liobn/dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]/
>  * s/SPAPR_PCI_LIOBN()/dma_liobn[]/
> 
>  v15:
>  * moved page mask filtering to PHB realize(), use "-mempath" to know
>  if there are huge pages
>  * fixed error reporting in RTAS handlers
>  * max window size accounts now hotpluggable memory boundaries
>  ---
>   hw/ppc/Makefile.objs|   1 +
>   hw/ppc/spapr.c  |   5 +
>   hw/ppc/spapr_pci.c  |  77 +---
>   hw/ppc/spapr_rtas_ddw.c | 293 
>  
>   include/hw/pci-host/spapr.h |   8 +-
>   include/hw/ppc/spapr.h  |  16 ++-
>   trace-events|   4 +
>   7 files changed, 383 insertions(+), 21 deletions(-)
>   create mode 100644 hw/ppc/spapr_rtas_ddw.c
> 
>  diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
>  index c1ffc77..986b36f 100644
>  --- a/hw/ppc/Makefile.objs
>  +++ b/hw/ppc/Makefile.objs
>  @@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o 
>  spapr_drc.o spapr_rng.o
>   ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>   obj-y += spapr_pci_vfio.o
>   endif
>  +obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
>   # PowerPC 4xx boards
>   obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
>   obj-y += ppc4xx_pci.o
>  diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>  index 44e401a..6ddcda9 100644
>  --- a/hw/ppc/spapr.c
>  +++ b/hw/ppc/spapr.c
>  @@ -2366,6 +2366,11 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
>   .driver   = "spapr-vlan", \
>   .property = "use-rx-buffer-pools", \
>   .value= "off", \
>  +}, \
>  +{\
>  +.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
>  +.property = "ddw",\
>  +.value= stringify(off),\
>   },
>   
>   static void spapr_machine_2_5_instance_options(MachineState *machine)
>  diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>  index 68de523..bcf0360 100644
>  --- a/hw/ppc/spapr_pci.c
>  +++ b/hw/ppc/spapr_pci.c
>  @@ -35,6 +35,7 @@
>   #include "hw/ppc/spapr.h"
>   #include "hw/pci-host/spapr.h"
>   #include "exec/address-spaces.h"
>  +#include "exec/ram_addr.h"
>   #include 
>   #include "trace.h"
>   

Re: [Qemu-devel] [PATCH qemu v17 11/12] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-06-08 Thread Alexey Kardashevskiy
On 08/06/16 15:57, David Gibson wrote:
> On Mon, Jun 06, 2016 at 06:12:58PM +1000, Alexey Kardashevskiy wrote:
>> On 06/06/16 15:57, David Gibson wrote:
>>> On Wed, Jun 01, 2016 at 06:57:42PM +1000, Alexey Kardashevskiy wrote:
 This adds support for Dynamic DMA Windows (DDW) option defined by
 the SPAPR specification which allows to have additional DMA window(s)

 The "ddw" property is enabled by default on a PHB but for compatibility
 the pseries-2.5 machine (TODO: update version) and older disable it.
>>>
>>> Looks like your todo is now todone, but you need to update the commit
>>> message.
>>>
 This also creates a single DMA window for the older machines to
 maintain backward migration.

 This implements DDW for PHB with emulated and VFIO devices. The host
 kernel support is required. The advertised IOMMU page sizes are 4K and
 64K; 16M pages are supported but not advertised by default, in order to
 enable them, the user has to specify "pgsz" property for PHB and
 enable huge pages for RAM.

 The existing linux guests try creating one additional huge DMA window
 with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
 the guest switches to dma_direct_ops and never calls TCE hypercalls
 (H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
 and not waste time on map/unmap later. This adds a "dma64_win_addr"
 property which is a bus address for the 64bit window and by default
 set to 0x800... as this is what the modern POWER8 hardware
 uses and this allows having emulated and VFIO devices on the same bus.

 This adds 4 RTAS handlers:
 * ibm,query-pe-dma-window
 * ibm,create-pe-dma-window
 * ibm,remove-pe-dma-window
 * ibm,reset-pe-dma-window
 These are registered from type_init() callback.

 These RTAS handlers are implemented in a separate file to avoid polluting
 spapr_iommu.c with PCI.

 This changes sPAPRPHBState::dma_liobn to an array to allow 2 LIOBNs.

 Signed-off-by: Alexey Kardashevskiy 
>>>
>>> Looks pretty close to ready.
>>>
>>> There are a handful of nits and one real error noted below.
>>>
 ---
 Changes:
 v17:
 * fixed: "query" did return non-page-shifted value when memory hotplug is 
 enabled

 v16:
 * s/dma_liobn/dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]/
 * s/SPAPR_PCI_LIOBN()/dma_liobn[]/

 v15:
 * moved page mask filtering to PHB realize(), use "-mempath" to know
 if there are huge pages
 * fixed error reporting in RTAS handlers
 * max window size accounts now hotpluggable memory boundaries
 ---
  hw/ppc/Makefile.objs|   1 +
  hw/ppc/spapr.c  |   5 +
  hw/ppc/spapr_pci.c  |  77 +---
  hw/ppc/spapr_rtas_ddw.c | 293 
 
  include/hw/pci-host/spapr.h |   8 +-
  include/hw/ppc/spapr.h  |  16 ++-
  trace-events|   4 +
  7 files changed, 383 insertions(+), 21 deletions(-)
  create mode 100644 hw/ppc/spapr_rtas_ddw.c

 diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
 index c1ffc77..986b36f 100644
 --- a/hw/ppc/Makefile.objs
 +++ b/hw/ppc/Makefile.objs
 @@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o 
 spapr_drc.o spapr_rng.o
  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
  obj-y += spapr_pci_vfio.o
  endif
 +obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
  # PowerPC 4xx boards
  obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
  obj-y += ppc4xx_pci.o
 diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
 index 44e401a..6ddcda9 100644
 --- a/hw/ppc/spapr.c
 +++ b/hw/ppc/spapr.c
 @@ -2366,6 +2366,11 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
  .driver   = "spapr-vlan", \
  .property = "use-rx-buffer-pools", \
  .value= "off", \
 +}, \
 +{\
 +.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
 +.property = "ddw",\
 +.value= stringify(off),\
  },
  
  static void spapr_machine_2_5_instance_options(MachineState *machine)
 diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
 index 68de523..bcf0360 100644
 --- a/hw/ppc/spapr_pci.c
 +++ b/hw/ppc/spapr_pci.c
 @@ -35,6 +35,7 @@
  #include "hw/ppc/spapr.h"
  #include "hw/pci-host/spapr.h"
  #include "exec/address-spaces.h"
 +#include "exec/ram_addr.h"
  #include 
  #include "trace.h"
  #include "qemu/error-report.h"
 @@ -45,6 +46,7 @@
  #include "hw/ppc/spapr_drc.h"
  #include "sysemu/device_tree.h"
  #include "sysemu/kvm.h"
 +#include "sysemu/hostmem.h"
  
  #include "hw/vfio/vfio.h"
  
 @@ -1088,7 +1090,7 @@ static void 
 

Re: [Qemu-devel] [PATCH qemu v17 11/12] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-06-08 Thread David Gibson
On Mon, Jun 06, 2016 at 06:12:58PM +1000, Alexey Kardashevskiy wrote:
> On 06/06/16 15:57, David Gibson wrote:
> > On Wed, Jun 01, 2016 at 06:57:42PM +1000, Alexey Kardashevskiy wrote:
> >> This adds support for Dynamic DMA Windows (DDW) option defined by
> >> the SPAPR specification which allows to have additional DMA window(s)
> >>
> >> The "ddw" property is enabled by default on a PHB but for compatibility
> >> the pseries-2.5 machine (TODO: update version) and older disable it.
> > 
> > Looks like your todo is now todone, but you need to update the commit
> > message.
> > 
> >> This also creates a single DMA window for the older machines to
> >> maintain backward migration.
> >>
> >> This implements DDW for PHB with emulated and VFIO devices. The host
> >> kernel support is required. The advertised IOMMU page sizes are 4K and
> >> 64K; 16M pages are supported but not advertised by default, in order to
> >> enable them, the user has to specify "pgsz" property for PHB and
> >> enable huge pages for RAM.
> >>
> >> The existing linux guests try creating one additional huge DMA window
> >> with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
> >> the guest switches to dma_direct_ops and never calls TCE hypercalls
> >> (H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
> >> and not waste time on map/unmap later. This adds a "dma64_win_addr"
> >> property which is a bus address for the 64bit window and by default
> >> set to 0x800... as this is what the modern POWER8 hardware
> >> uses and this allows having emulated and VFIO devices on the same bus.
> >>
> >> This adds 4 RTAS handlers:
> >> * ibm,query-pe-dma-window
> >> * ibm,create-pe-dma-window
> >> * ibm,remove-pe-dma-window
> >> * ibm,reset-pe-dma-window
> >> These are registered from type_init() callback.
> >>
> >> These RTAS handlers are implemented in a separate file to avoid polluting
> >> spapr_iommu.c with PCI.
> >>
> >> This changes sPAPRPHBState::dma_liobn to an array to allow 2 LIOBNs.
> >>
> >> Signed-off-by: Alexey Kardashevskiy 
> > 
> > Looks pretty close to ready.
> > 
> > There are a handful of nits and one real error noted below.
> > 
> >> ---
> >> Changes:
> >> v17:
> >> * fixed: "query" did return non-page-shifted value when memory hotplug is 
> >> enabled
> >>
> >> v16:
> >> * s/dma_liobn/dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]/
> >> * s/SPAPR_PCI_LIOBN()/dma_liobn[]/
> >>
> >> v15:
> >> * moved page mask filtering to PHB realize(), use "-mempath" to know
> >> if there are huge pages
> >> * fixed error reporting in RTAS handlers
> >> * max window size accounts now hotpluggable memory boundaries
> >> ---
> >>  hw/ppc/Makefile.objs|   1 +
> >>  hw/ppc/spapr.c  |   5 +
> >>  hw/ppc/spapr_pci.c  |  77 +---
> >>  hw/ppc/spapr_rtas_ddw.c | 293 
> >> 
> >>  include/hw/pci-host/spapr.h |   8 +-
> >>  include/hw/ppc/spapr.h  |  16 ++-
> >>  trace-events|   4 +
> >>  7 files changed, 383 insertions(+), 21 deletions(-)
> >>  create mode 100644 hw/ppc/spapr_rtas_ddw.c
> >>
> >> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> >> index c1ffc77..986b36f 100644
> >> --- a/hw/ppc/Makefile.objs
> >> +++ b/hw/ppc/Makefile.objs
> >> @@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o 
> >> spapr_drc.o spapr_rng.o
> >>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
> >>  obj-y += spapr_pci_vfio.o
> >>  endif
> >> +obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
> >>  # PowerPC 4xx boards
> >>  obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
> >>  obj-y += ppc4xx_pci.o
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 44e401a..6ddcda9 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2366,6 +2366,11 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
> >>  .driver   = "spapr-vlan", \
> >>  .property = "use-rx-buffer-pools", \
> >>  .value= "off", \
> >> +}, \
> >> +{\
> >> +.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
> >> +.property = "ddw",\
> >> +.value= stringify(off),\
> >>  },
> >>  
> >>  static void spapr_machine_2_5_instance_options(MachineState *machine)
> >> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> >> index 68de523..bcf0360 100644
> >> --- a/hw/ppc/spapr_pci.c
> >> +++ b/hw/ppc/spapr_pci.c
> >> @@ -35,6 +35,7 @@
> >>  #include "hw/ppc/spapr.h"
> >>  #include "hw/pci-host/spapr.h"
> >>  #include "exec/address-spaces.h"
> >> +#include "exec/ram_addr.h"
> >>  #include 
> >>  #include "trace.h"
> >>  #include "qemu/error-report.h"
> >> @@ -45,6 +46,7 @@
> >>  #include "hw/ppc/spapr_drc.h"
> >>  #include "sysemu/device_tree.h"
> >>  #include "sysemu/kvm.h"
> >> +#include "sysemu/hostmem.h"
> >>  
> >>  #include "hw/vfio/vfio.h"
> >>  
> >> @@ -1088,7 +1090,7 @@ static void 
> >> spapr_phb_add_pci_device(sPAPRDRConnector *drc,
> >>  int 

Re: [Qemu-devel] [PATCH qemu v17 11/12] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-06-06 Thread Alexey Kardashevskiy
On 06/06/16 15:57, David Gibson wrote:
> On Wed, Jun 01, 2016 at 06:57:42PM +1000, Alexey Kardashevskiy wrote:
>> This adds support for Dynamic DMA Windows (DDW) option defined by
>> the SPAPR specification which allows to have additional DMA window(s)
>>
>> The "ddw" property is enabled by default on a PHB but for compatibility
>> the pseries-2.5 machine (TODO: update version) and older disable it.
> 
> Looks like your todo is now todone, but you need to update the commit
> message.
> 
>> This also creates a single DMA window for the older machines to
>> maintain backward migration.
>>
>> This implements DDW for PHB with emulated and VFIO devices. The host
>> kernel support is required. The advertised IOMMU page sizes are 4K and
>> 64K; 16M pages are supported but not advertised by default, in order to
>> enable them, the user has to specify "pgsz" property for PHB and
>> enable huge pages for RAM.
>>
>> The existing linux guests try creating one additional huge DMA window
>> with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
>> the guest switches to dma_direct_ops and never calls TCE hypercalls
>> (H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
>> and not waste time on map/unmap later. This adds a "dma64_win_addr"
>> property which is a bus address for the 64bit window and by default
>> set to 0x800... as this is what the modern POWER8 hardware
>> uses and this allows having emulated and VFIO devices on the same bus.
>>
>> This adds 4 RTAS handlers:
>> * ibm,query-pe-dma-window
>> * ibm,create-pe-dma-window
>> * ibm,remove-pe-dma-window
>> * ibm,reset-pe-dma-window
>> These are registered from type_init() callback.
>>
>> These RTAS handlers are implemented in a separate file to avoid polluting
>> spapr_iommu.c with PCI.
>>
>> This changes sPAPRPHBState::dma_liobn to an array to allow 2 LIOBNs.
>>
>> Signed-off-by: Alexey Kardashevskiy 
> 
> Looks pretty close to ready.
> 
> There are a handful of nits and one real error noted below.
> 
>> ---
>> Changes:
>> v17:
>> * fixed: "query" did return non-page-shifted value when memory hotplug is 
>> enabled
>>
>> v16:
>> * s/dma_liobn/dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]/
>> * s/SPAPR_PCI_LIOBN()/dma_liobn[]/
>>
>> v15:
>> * moved page mask filtering to PHB realize(), use "-mempath" to know
>> if there are huge pages
>> * fixed error reporting in RTAS handlers
>> * max window size accounts now hotpluggable memory boundaries
>> ---
>>  hw/ppc/Makefile.objs|   1 +
>>  hw/ppc/spapr.c  |   5 +
>>  hw/ppc/spapr_pci.c  |  77 +---
>>  hw/ppc/spapr_rtas_ddw.c | 293 
>> 
>>  include/hw/pci-host/spapr.h |   8 +-
>>  include/hw/ppc/spapr.h  |  16 ++-
>>  trace-events|   4 +
>>  7 files changed, 383 insertions(+), 21 deletions(-)
>>  create mode 100644 hw/ppc/spapr_rtas_ddw.c
>>
>> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
>> index c1ffc77..986b36f 100644
>> --- a/hw/ppc/Makefile.objs
>> +++ b/hw/ppc/Makefile.objs
>> @@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
>> spapr_rng.o
>>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>>  obj-y += spapr_pci_vfio.o
>>  endif
>> +obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
>>  # PowerPC 4xx boards
>>  obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
>>  obj-y += ppc4xx_pci.o
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 44e401a..6ddcda9 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2366,6 +2366,11 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
>>  .driver   = "spapr-vlan", \
>>  .property = "use-rx-buffer-pools", \
>>  .value= "off", \
>> +}, \
>> +{\
>> +.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
>> +.property = "ddw",\
>> +.value= stringify(off),\
>>  },
>>  
>>  static void spapr_machine_2_5_instance_options(MachineState *machine)
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index 68de523..bcf0360 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -35,6 +35,7 @@
>>  #include "hw/ppc/spapr.h"
>>  #include "hw/pci-host/spapr.h"
>>  #include "exec/address-spaces.h"
>> +#include "exec/ram_addr.h"
>>  #include 
>>  #include "trace.h"
>>  #include "qemu/error-report.h"
>> @@ -45,6 +46,7 @@
>>  #include "hw/ppc/spapr_drc.h"
>>  #include "sysemu/device_tree.h"
>>  #include "sysemu/kvm.h"
>> +#include "sysemu/hostmem.h"
>>  
>>  #include "hw/vfio/vfio.h"
>>  
>> @@ -1088,7 +1090,7 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector 
>> *drc,
>>  int fdt_start_offset = 0, fdt_size;
>>  
>>  if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
>> -sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
>> +sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
>>  
>>  spapr_tce_set_need_vfio(tcet, true);
>>  }
> 
> Hang 

Re: [Qemu-devel] [PATCH qemu v17 11/12] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-06-06 Thread David Gibson
On Wed, Jun 01, 2016 at 06:57:42PM +1000, Alexey Kardashevskiy wrote:
> This adds support for Dynamic DMA Windows (DDW) option defined by
> the SPAPR specification which allows to have additional DMA window(s)
> 
> The "ddw" property is enabled by default on a PHB but for compatibility
> the pseries-2.5 machine (TODO: update version) and older disable it.

Looks like your todo is now todone, but you need to update the commit
message.

> This also creates a single DMA window for the older machines to
> maintain backward migration.
> 
> This implements DDW for PHB with emulated and VFIO devices. The host
> kernel support is required. The advertised IOMMU page sizes are 4K and
> 64K; 16M pages are supported but not advertised by default, in order to
> enable them, the user has to specify "pgsz" property for PHB and
> enable huge pages for RAM.
> 
> The existing linux guests try creating one additional huge DMA window
> with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
> the guest switches to dma_direct_ops and never calls TCE hypercalls
> (H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
> and not waste time on map/unmap later. This adds a "dma64_win_addr"
> property which is a bus address for the 64bit window and by default
> set to 0x800... as this is what the modern POWER8 hardware
> uses and this allows having emulated and VFIO devices on the same bus.
> 
> This adds 4 RTAS handlers:
> * ibm,query-pe-dma-window
> * ibm,create-pe-dma-window
> * ibm,remove-pe-dma-window
> * ibm,reset-pe-dma-window
> These are registered from type_init() callback.
> 
> These RTAS handlers are implemented in a separate file to avoid polluting
> spapr_iommu.c with PCI.
> 
> This changes sPAPRPHBState::dma_liobn to an array to allow 2 LIOBNs.
> 
> Signed-off-by: Alexey Kardashevskiy 

Looks pretty close to ready.

There are a handful of nits and one real error noted below.

> ---
> Changes:
> v17:
> * fixed: "query" did return non-page-shifted value when memory hotplug is 
> enabled
> 
> v16:
> * s/dma_liobn/dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]/
> * s/SPAPR_PCI_LIOBN()/dma_liobn[]/
> 
> v15:
> * moved page mask filtering to PHB realize(), use "-mempath" to know
> if there are huge pages
> * fixed error reporting in RTAS handlers
> * max window size accounts now hotpluggable memory boundaries
> ---
>  hw/ppc/Makefile.objs|   1 +
>  hw/ppc/spapr.c  |   5 +
>  hw/ppc/spapr_pci.c  |  77 +---
>  hw/ppc/spapr_rtas_ddw.c | 293 
> 
>  include/hw/pci-host/spapr.h |   8 +-
>  include/hw/ppc/spapr.h  |  16 ++-
>  trace-events|   4 +
>  7 files changed, 383 insertions(+), 21 deletions(-)
>  create mode 100644 hw/ppc/spapr_rtas_ddw.c
> 
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index c1ffc77..986b36f 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
> spapr_rng.o
>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>  obj-y += spapr_pci_vfio.o
>  endif
> +obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
>  # PowerPC 4xx boards
>  obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
>  obj-y += ppc4xx_pci.o
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 44e401a..6ddcda9 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2366,6 +2366,11 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
>  .driver   = "spapr-vlan", \
>  .property = "use-rx-buffer-pools", \
>  .value= "off", \
> +}, \
> +{\
> +.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
> +.property = "ddw",\
> +.value= stringify(off),\
>  },
>  
>  static void spapr_machine_2_5_instance_options(MachineState *machine)
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 68de523..bcf0360 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -35,6 +35,7 @@
>  #include "hw/ppc/spapr.h"
>  #include "hw/pci-host/spapr.h"
>  #include "exec/address-spaces.h"
> +#include "exec/ram_addr.h"
>  #include 
>  #include "trace.h"
>  #include "qemu/error-report.h"
> @@ -45,6 +46,7 @@
>  #include "hw/ppc/spapr_drc.h"
>  #include "sysemu/device_tree.h"
>  #include "sysemu/kvm.h"
> +#include "sysemu/hostmem.h"
>  
>  #include "hw/vfio/vfio.h"
>  
> @@ -1088,7 +1090,7 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector 
> *drc,
>  int fdt_start_offset = 0, fdt_size;
>  
>  if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
> -sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
> +sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
>  
>  spapr_tce_set_need_vfio(tcet, true);
>  }

Hang on.. I thought you'd got rid of the need for this explicit
set_need_vfio() stuff.

> @@ -1310,11 +1312,14 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> **errp)

[Qemu-devel] [PATCH qemu v17 11/12] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-06-01 Thread Alexey Kardashevskiy
This adds support for Dynamic DMA Windows (DDW) option defined by
the SPAPR specification which allows to have additional DMA window(s)

The "ddw" property is enabled by default on a PHB but for compatibility
the pseries-2.5 machine (TODO: update version) and older disable it.
This also creates a single DMA window for the older machines to
maintain backward migration.

This implements DDW for PHB with emulated and VFIO devices. The host
kernel support is required. The advertised IOMMU page sizes are 4K and
64K; 16M pages are supported but not advertised by default, in order to
enable them, the user has to specify "pgsz" property for PHB and
enable huge pages for RAM.

The existing linux guests try creating one additional huge DMA window
with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
the guest switches to dma_direct_ops and never calls TCE hypercalls
(H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
and not waste time on map/unmap later. This adds a "dma64_win_addr"
property which is a bus address for the 64bit window and by default
set to 0x800... as this is what the modern POWER8 hardware
uses and this allows having emulated and VFIO devices on the same bus.

This adds 4 RTAS handlers:
* ibm,query-pe-dma-window
* ibm,create-pe-dma-window
* ibm,remove-pe-dma-window
* ibm,reset-pe-dma-window
These are registered from type_init() callback.

These RTAS handlers are implemented in a separate file to avoid polluting
spapr_iommu.c with PCI.

This changes sPAPRPHBState::dma_liobn to an array to allow 2 LIOBNs.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v17:
* fixed: "query" did return non-page-shifted value when memory hotplug is 
enabled

v16:
* s/dma_liobn/dma_liobn[SPAPR_PCI_DMA_MAX_WINDOWS]/
* s/SPAPR_PCI_LIOBN()/dma_liobn[]/

v15:
* moved page mask filtering to PHB realize(), use "-mempath" to know
if there are huge pages
* fixed error reporting in RTAS handlers
* max window size accounts now hotpluggable memory boundaries
---
 hw/ppc/Makefile.objs|   1 +
 hw/ppc/spapr.c  |   5 +
 hw/ppc/spapr_pci.c  |  77 +---
 hw/ppc/spapr_rtas_ddw.c | 293 
 include/hw/pci-host/spapr.h |   8 +-
 include/hw/ppc/spapr.h  |  16 ++-
 trace-events|   4 +
 7 files changed, 383 insertions(+), 21 deletions(-)
 create mode 100644 hw/ppc/spapr_rtas_ddw.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c1ffc77..986b36f 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
spapr_rng.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
+obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
 # PowerPC 4xx boards
 obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
 obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 44e401a..6ddcda9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2366,6 +2366,11 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
 .driver   = "spapr-vlan", \
 .property = "use-rx-buffer-pools", \
 .value= "off", \
+}, \
+{\
+.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
+.property = "ddw",\
+.value= stringify(off),\
 },
 
 static void spapr_machine_2_5_instance_options(MachineState *machine)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 68de523..bcf0360 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -35,6 +35,7 @@
 #include "hw/ppc/spapr.h"
 #include "hw/pci-host/spapr.h"
 #include "exec/address-spaces.h"
+#include "exec/ram_addr.h"
 #include 
 #include "trace.h"
 #include "qemu/error-report.h"
@@ -45,6 +46,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
 #include "sysemu/kvm.h"
+#include "sysemu/hostmem.h"
 
 #include "hw/vfio/vfio.h"
 
@@ -1088,7 +1090,7 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector 
*drc,
 int fdt_start_offset = 0, fdt_size;
 
 if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
-sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
+sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
 
 spapr_tce_set_need_vfio(tcet, true);
 }
@@ -1310,11 +1312,14 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 PCIBus *bus;
 uint64_t msi_window_size = 4096;
 sPAPRTCETable *tcet;
+const unsigned windows_supported =
+sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
 
 if (sphb->index != (uint32_t)-1) {
 hwaddr windows_base;
 
-if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn != (uint32_t)-1)
+if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
(uint32_t)-1)
+|| ((sphb->dma_liobn[1] != (uint32_t)-1) && (windows_supported > 
1))
 || (sphb->mem_win_addr != (hwaddr)-1)
 ||