Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-21 Thread David Gibson
On Tue, Apr 21, 2015 at 09:47:54PM +1000, Alexey Kardashevskiy wrote:
> On 04/21/2015 07:43 PM, David Gibson wrote:
> >On Mon, Apr 20, 2015 at 04:55:32PM +1000, Alexey Kardashevskiy wrote:
> >>On 04/20/2015 12:44 PM, David Gibson wrote:
> >>>On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:
> On 04/16/2015 04:07 PM, David Gibson wrote:
> >On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:
> >>At the moment the iommu_table struct has a set_bypass() which enables/
> >>disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
> >>which calls this callback when external IOMMU users such as VFIO are
> >>about to get over a PHB.
> >>
> >>The set_bypass() callback is not really an iommu_table function but
> >>IOMMU/PE function. This introduces a iommu_table_group_ops struct and
> >>adds a set_ownership() callback to it which is called when an external
> >>user takes control over the IOMMU.
> >
> >Do you really need separate ops structures at both the single table
> >and table group level?  The different tables in a group will all
> >belong to the same basic iommu won't they?
> 
> 
> IOMMU tables exist alone in VIO. Also, the platform code uses just a table
> (or it is in bypass mode) and does not care about table groups. It looked
> more clean for myself to keep them separated. Should I still merge
> those?
> >>>
> >>>Ok, that sounds like a reasonable argument for keeping them separate,
> >>>at least for now.
> >>>
> >>This renames set_bypass() to set_ownership() as it is not necessarily
> >>just enabling bypassing, it can be something else/more so let's give it
> >>more generic name. The bool parameter is inverted.
> >>
> >>The callback is implemented for IODA2 only. Other platforms (P5IOC2,
> >>IODA1) will use the old iommu_take_ownership/iommu_release_ownership 
> >>API.
> >>
> >>Signed-off-by: Alexey Kardashevskiy 
> >>---
> >>  arch/powerpc/include/asm/iommu.h  | 14 +-
> >>  arch/powerpc/platforms/powernv/pci-ioda.c | 30 
> >> ++
> >>  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 
> >> +
> >>  3 files changed, 56 insertions(+), 13 deletions(-)
> >>
> >>diff --git a/arch/powerpc/include/asm/iommu.h 
> >>b/arch/powerpc/include/asm/iommu.h
> >>index b9e50d3..d1f8c6c 100644
> >>--- a/arch/powerpc/include/asm/iommu.h
> >>+++ b/arch/powerpc/include/asm/iommu.h
> >>@@ -92,7 +92,6 @@ struct iommu_table {
> >>unsigned long  it_page_shift;/* table iommu page size */
> >>struct iommu_table_group *it_group;
> >>struct iommu_table_ops *it_ops;
> >>-   void (*set_bypass)(struct iommu_table *tbl, bool enable);
> >>  };
> >>
> >>  /* Pure 2^n version of get_order */
> >>@@ -127,11 +126,24 @@ extern struct iommu_table 
> >>*iommu_init_table(struct iommu_table * tbl,
> >>
> >>  #define IOMMU_TABLE_GROUP_MAX_TABLES  1
> >>
> >>+struct iommu_table_group;
> >>+
> >>+struct iommu_table_group_ops {
> >>+   /*
> >>+* Switches ownership from the kernel itself to an external
> >>+* user. While onwership is enabled, the kernel cannot use IOMMU
> >>+* for itself.
> >>+*/
> >>+   void (*set_ownership)(struct iommu_table_group *table_group,
> >>+   bool enable);
> >
> >The meaning of "enable" in a function called "set_ownership" is
> >entirely obscure.
> 
> Suggest something better please :) I have nothing better...
> >>>
> >>>Well, given it's "set_ownershuip" you could have "owner" - that would
> >>>want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
> >>>rather than a bool.
> >>
> >>
> >>It is iommu_take_ownership() in upstream and it is assumed that the owner is
> >>anything but the platform code (for now and probably for ever - VFIO). I am
> >>not changing this now, just using same naming approach when adding a
> >>callback with a similar name.
> >
> >So "enabled" is actually that non kernel ownership is enabled.  That
> >is totally non-obvious.
> >
> >>>Or you could leave it a bool but call it "allow_bypass".
> >>
> >>Commented below.
> >>
> >>+};
> >>+
> >>  struct iommu_table_group {
> >>  #ifdef CONFIG_IOMMU_API
> >>struct iommu_group *group;
> >>  #endif
> >>struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
> >>+   struct iommu_table_group_ops *ops;
> >>  };
> >>
> >>  #ifdef CONFIG_IOMMU_API
> >>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> >>b/arch/powerpc/platforms/powernv/pci-ioda.c
> >>index a964c50..9687731 100644
> >>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
> >>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> 

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-21 Thread Alexey Kardashevskiy

On 04/21/2015 07:43 PM, David Gibson wrote:

On Mon, Apr 20, 2015 at 04:55:32PM +1000, Alexey Kardashevskiy wrote:

On 04/20/2015 12:44 PM, David Gibson wrote:

On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:

On 04/16/2015 04:07 PM, David Gibson wrote:

On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:

At the moment the iommu_table struct has a set_bypass() which enables/
disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
which calls this callback when external IOMMU users such as VFIO are
about to get over a PHB.

The set_bypass() callback is not really an iommu_table function but
IOMMU/PE function. This introduces a iommu_table_group_ops struct and
adds a set_ownership() callback to it which is called when an external
user takes control over the IOMMU.


Do you really need separate ops structures at both the single table
and table group level?  The different tables in a group will all
belong to the same basic iommu won't they?



IOMMU tables exist alone in VIO. Also, the platform code uses just a table
(or it is in bypass mode) and does not care about table groups. It looked
more clean for myself to keep them separated. Should I still merge
those?


Ok, that sounds like a reasonable argument for keeping them separate,
at least for now.


This renames set_bypass() to set_ownership() as it is not necessarily
just enabling bypassing, it can be something else/more so let's give it
more generic name. The bool parameter is inverted.

The callback is implemented for IODA2 only. Other platforms (P5IOC2,
IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.

Signed-off-by: Alexey Kardashevskiy 
---
  arch/powerpc/include/asm/iommu.h  | 14 +-
  arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++
  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
  3 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index b9e50d3..d1f8c6c 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -92,7 +92,6 @@ struct iommu_table {
unsigned long  it_page_shift;/* table iommu page size */
struct iommu_table_group *it_group;
struct iommu_table_ops *it_ops;
-   void (*set_bypass)(struct iommu_table *tbl, bool enable);
  };

  /* Pure 2^n version of get_order */
@@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
iommu_table * tbl,

  #define IOMMU_TABLE_GROUP_MAX_TABLES  1

+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+   /*
+* Switches ownership from the kernel itself to an external
+* user. While onwership is enabled, the kernel cannot use IOMMU
+* for itself.
+*/
+   void (*set_ownership)(struct iommu_table_group *table_group,
+   bool enable);


The meaning of "enable" in a function called "set_ownership" is
entirely obscure.


Suggest something better please :) I have nothing better...


Well, given it's "set_ownershuip" you could have "owner" - that would
want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
rather than a bool.



It is iommu_take_ownership() in upstream and it is assumed that the owner is
anything but the platform code (for now and probably for ever - VFIO). I am
not changing this now, just using same naming approach when adding a
callback with a similar name.


So "enabled" is actually that non kernel ownership is enabled.  That
is totally non-obvious.


Or you could leave it a bool but call it "allow_bypass".


Commented below.


+};
+
  struct iommu_table_group {
  #ifdef CONFIG_IOMMU_API
struct iommu_group *group;
  #endif
struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+   struct iommu_table_group_ops *ops;
  };

  #ifdef CONFIG_IOMMU_API
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index a964c50..9687731 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  }

-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
  {
-   struct pnv_ioda_pe *pe = container_of(tbl->it_group, struct pnv_ioda_pe,
- table_group);
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;

@@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
*tbl, bool enable)
 * host side.
 */
if (pe->pdev)
-   set_iommu_table_base(>pdev->dev, tbl);
+   set_iommu_table_base(>pdev->dev,
+  

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-21 Thread David Gibson
On Mon, Apr 20, 2015 at 04:55:32PM +1000, Alexey Kardashevskiy wrote:
> On 04/20/2015 12:44 PM, David Gibson wrote:
> >On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:
> >>On 04/16/2015 04:07 PM, David Gibson wrote:
> >>>On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:
> At the moment the iommu_table struct has a set_bypass() which enables/
> disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
> which calls this callback when external IOMMU users such as VFIO are
> about to get over a PHB.
> 
> The set_bypass() callback is not really an iommu_table function but
> IOMMU/PE function. This introduces a iommu_table_group_ops struct and
> adds a set_ownership() callback to it which is called when an external
> user takes control over the IOMMU.
> >>>
> >>>Do you really need separate ops structures at both the single table
> >>>and table group level?  The different tables in a group will all
> >>>belong to the same basic iommu won't they?
> >>
> >>
> >>IOMMU tables exist alone in VIO. Also, the platform code uses just a table
> >>(or it is in bypass mode) and does not care about table groups. It looked
> >>more clean for myself to keep them separated. Should I still merge
> >>those?
> >
> >Ok, that sounds like a reasonable argument for keeping them separate,
> >at least for now.
> >
> This renames set_bypass() to set_ownership() as it is not necessarily
> just enabling bypassing, it can be something else/more so let's give it
> more generic name. The bool parameter is inverted.
> 
> The callback is implemented for IODA2 only. Other platforms (P5IOC2,
> IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
>   arch/powerpc/include/asm/iommu.h  | 14 +-
>   arch/powerpc/platforms/powernv/pci-ioda.c | 30 
>  ++
>   drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
>   3 files changed, 56 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/iommu.h 
> b/arch/powerpc/include/asm/iommu.h
> index b9e50d3..d1f8c6c 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -92,7 +92,6 @@ struct iommu_table {
>   unsigned long  it_page_shift;/* table iommu page size */
>   struct iommu_table_group *it_group;
>   struct iommu_table_ops *it_ops;
> - void (*set_bypass)(struct iommu_table *tbl, bool enable);
>   };
> 
>   /* Pure 2^n version of get_order */
> @@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
> iommu_table * tbl,
> 
>   #define IOMMU_TABLE_GROUP_MAX_TABLES1
> 
> +struct iommu_table_group;
> +
> +struct iommu_table_group_ops {
> + /*
> +  * Switches ownership from the kernel itself to an external
> +  * user. While onwership is enabled, the kernel cannot use IOMMU
> +  * for itself.
> +  */
> + void (*set_ownership)(struct iommu_table_group *table_group,
> + bool enable);
> >>>
> >>>The meaning of "enable" in a function called "set_ownership" is
> >>>entirely obscure.
> >>
> >>Suggest something better please :) I have nothing better...
> >
> >Well, given it's "set_ownershuip" you could have "owner" - that would
> >want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
> >rather than a bool.
> 
> 
> It is iommu_take_ownership() in upstream and it is assumed that the owner is
> anything but the platform code (for now and probably for ever - VFIO). I am
> not changing this now, just using same naming approach when adding a
> callback with a similar name.

So "enabled" is actually that non kernel ownership is enabled.  That
is totally non-obvious.

> >Or you could leave it a bool but call it "allow_bypass".
> 
> Commented below.
> 
> +};
> +
>   struct iommu_table_group {
>   #ifdef CONFIG_IOMMU_API
>   struct iommu_group *group;
>   #endif
>   struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
> + struct iommu_table_group_ops *ops;
>   };
> 
>   #ifdef CONFIG_IOMMU_API
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index a964c50..9687731 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct 
> pnv_phb *phb,
>   __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * 
>  segs));
>   }
> 
> -static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool 
> enable)
> +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
>   {
> - struct 

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-21 Thread David Gibson
On Mon, Apr 20, 2015 at 04:55:32PM +1000, Alexey Kardashevskiy wrote:
 On 04/20/2015 12:44 PM, David Gibson wrote:
 On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:
 On 04/16/2015 04:07 PM, David Gibson wrote:
 On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:
 At the moment the iommu_table struct has a set_bypass() which enables/
 disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
 which calls this callback when external IOMMU users such as VFIO are
 about to get over a PHB.
 
 The set_bypass() callback is not really an iommu_table function but
 IOMMU/PE function. This introduces a iommu_table_group_ops struct and
 adds a set_ownership() callback to it which is called when an external
 user takes control over the IOMMU.
 
 Do you really need separate ops structures at both the single table
 and table group level?  The different tables in a group will all
 belong to the same basic iommu won't they?
 
 
 IOMMU tables exist alone in VIO. Also, the platform code uses just a table
 (or it is in bypass mode) and does not care about table groups. It looked
 more clean for myself to keep them separated. Should I still merge
 those?
 
 Ok, that sounds like a reasonable argument for keeping them separate,
 at least for now.
 
 This renames set_bypass() to set_ownership() as it is not necessarily
 just enabling bypassing, it can be something else/more so let's give it
 more generic name. The bool parameter is inverted.
 
 The callback is implemented for IODA2 only. Other platforms (P5IOC2,
 IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
   arch/powerpc/include/asm/iommu.h  | 14 +-
   arch/powerpc/platforms/powernv/pci-ioda.c | 30 
  ++
   drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
   3 files changed, 56 insertions(+), 13 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index b9e50d3..d1f8c6c 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -92,7 +92,6 @@ struct iommu_table {
   unsigned long  it_page_shift;/* table iommu page size */
   struct iommu_table_group *it_group;
   struct iommu_table_ops *it_ops;
 - void (*set_bypass)(struct iommu_table *tbl, bool enable);
   };
 
   /* Pure 2^n version of get_order */
 @@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
 iommu_table * tbl,
 
   #define IOMMU_TABLE_GROUP_MAX_TABLES1
 
 +struct iommu_table_group;
 +
 +struct iommu_table_group_ops {
 + /*
 +  * Switches ownership from the kernel itself to an external
 +  * user. While onwership is enabled, the kernel cannot use IOMMU
 +  * for itself.
 +  */
 + void (*set_ownership)(struct iommu_table_group *table_group,
 + bool enable);
 
 The meaning of enable in a function called set_ownership is
 entirely obscure.
 
 Suggest something better please :) I have nothing better...
 
 Well, given it's set_ownershuip you could have owner - that would
 want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
 rather than a bool.
 
 
 It is iommu_take_ownership() in upstream and it is assumed that the owner is
 anything but the platform code (for now and probably for ever - VFIO). I am
 not changing this now, just using same naming approach when adding a
 callback with a similar name.

So enabled is actually that non kernel ownership is enabled.  That
is totally non-obvious.

 Or you could leave it a bool but call it allow_bypass.
 
 Commented below.
 
 +};
 +
   struct iommu_table_group {
   #ifdef CONFIG_IOMMU_API
   struct iommu_group *group;
   #endif
   struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
 + struct iommu_table_group_ops *ops;
   };
 
   #ifdef CONFIG_IOMMU_API
 diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
 b/arch/powerpc/platforms/powernv/pci-ioda.c
 index a964c50..9687731 100644
 --- a/arch/powerpc/platforms/powernv/pci-ioda.c
 +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
 @@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct 
 pnv_phb *phb,
   __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * 
  segs));
   }
 
 -static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool 
 enable)
 +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
   {
 - struct pnv_ioda_pe *pe = container_of(tbl-it_group, struct pnv_ioda_pe,
 -   table_group);
   uint16_t window_id = (pe-pe_number  1 ) + 1;
   int64_t rc;
 
 @@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct 
 iommu_table *tbl, bool enable)
* host side.
*/
   if (pe-pdev)
 - set_iommu_table_base(pe-pdev-dev, tbl);
 + 

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-21 Thread Alexey Kardashevskiy

On 04/21/2015 07:43 PM, David Gibson wrote:

On Mon, Apr 20, 2015 at 04:55:32PM +1000, Alexey Kardashevskiy wrote:

On 04/20/2015 12:44 PM, David Gibson wrote:

On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:

On 04/16/2015 04:07 PM, David Gibson wrote:

On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:

At the moment the iommu_table struct has a set_bypass() which enables/
disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
which calls this callback when external IOMMU users such as VFIO are
about to get over a PHB.

The set_bypass() callback is not really an iommu_table function but
IOMMU/PE function. This introduces a iommu_table_group_ops struct and
adds a set_ownership() callback to it which is called when an external
user takes control over the IOMMU.


Do you really need separate ops structures at both the single table
and table group level?  The different tables in a group will all
belong to the same basic iommu won't they?



IOMMU tables exist alone in VIO. Also, the platform code uses just a table
(or it is in bypass mode) and does not care about table groups. It looked
more clean for myself to keep them separated. Should I still merge
those?


Ok, that sounds like a reasonable argument for keeping them separate,
at least for now.


This renames set_bypass() to set_ownership() as it is not necessarily
just enabling bypassing, it can be something else/more so let's give it
more generic name. The bool parameter is inverted.

The callback is implemented for IODA2 only. Other platforms (P5IOC2,
IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
  arch/powerpc/include/asm/iommu.h  | 14 +-
  arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++
  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
  3 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index b9e50d3..d1f8c6c 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -92,7 +92,6 @@ struct iommu_table {
unsigned long  it_page_shift;/* table iommu page size */
struct iommu_table_group *it_group;
struct iommu_table_ops *it_ops;
-   void (*set_bypass)(struct iommu_table *tbl, bool enable);
  };

  /* Pure 2^n version of get_order */
@@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
iommu_table * tbl,

  #define IOMMU_TABLE_GROUP_MAX_TABLES  1

+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+   /*
+* Switches ownership from the kernel itself to an external
+* user. While onwership is enabled, the kernel cannot use IOMMU
+* for itself.
+*/
+   void (*set_ownership)(struct iommu_table_group *table_group,
+   bool enable);


The meaning of enable in a function called set_ownership is
entirely obscure.


Suggest something better please :) I have nothing better...


Well, given it's set_ownershuip you could have owner - that would
want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
rather than a bool.



It is iommu_take_ownership() in upstream and it is assumed that the owner is
anything but the platform code (for now and probably for ever - VFIO). I am
not changing this now, just using same naming approach when adding a
callback with a similar name.


So enabled is actually that non kernel ownership is enabled.  That
is totally non-obvious.


Or you could leave it a bool but call it allow_bypass.


Commented below.


+};
+
  struct iommu_table_group {
  #ifdef CONFIG_IOMMU_API
struct iommu_group *group;
  #endif
struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+   struct iommu_table_group_ops *ops;
  };

  #ifdef CONFIG_IOMMU_API
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index a964c50..9687731 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  }

-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
  {
-   struct pnv_ioda_pe *pe = container_of(tbl-it_group, struct pnv_ioda_pe,
- table_group);
uint16_t window_id = (pe-pe_number  1 ) + 1;
int64_t rc;

@@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
*tbl, bool enable)
 * host side.
 */
if (pe-pdev)
-   set_iommu_table_base(pe-pdev-dev, tbl);
+   set_iommu_table_base(pe-pdev-dev,
+   

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-21 Thread David Gibson
On Tue, Apr 21, 2015 at 09:47:54PM +1000, Alexey Kardashevskiy wrote:
 On 04/21/2015 07:43 PM, David Gibson wrote:
 On Mon, Apr 20, 2015 at 04:55:32PM +1000, Alexey Kardashevskiy wrote:
 On 04/20/2015 12:44 PM, David Gibson wrote:
 On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:
 On 04/16/2015 04:07 PM, David Gibson wrote:
 On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:
 At the moment the iommu_table struct has a set_bypass() which enables/
 disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
 which calls this callback when external IOMMU users such as VFIO are
 about to get over a PHB.
 
 The set_bypass() callback is not really an iommu_table function but
 IOMMU/PE function. This introduces a iommu_table_group_ops struct and
 adds a set_ownership() callback to it which is called when an external
 user takes control over the IOMMU.
 
 Do you really need separate ops structures at both the single table
 and table group level?  The different tables in a group will all
 belong to the same basic iommu won't they?
 
 
 IOMMU tables exist alone in VIO. Also, the platform code uses just a table
 (or it is in bypass mode) and does not care about table groups. It looked
 more clean for myself to keep them separated. Should I still merge
 those?
 
 Ok, that sounds like a reasonable argument for keeping them separate,
 at least for now.
 
 This renames set_bypass() to set_ownership() as it is not necessarily
 just enabling bypassing, it can be something else/more so let's give it
 more generic name. The bool parameter is inverted.
 
 The callback is implemented for IODA2 only. Other platforms (P5IOC2,
 IODA1) will use the old iommu_take_ownership/iommu_release_ownership 
 API.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
   arch/powerpc/include/asm/iommu.h  | 14 +-
   arch/powerpc/platforms/powernv/pci-ioda.c | 30 
  ++
   drivers/vfio/vfio_iommu_spapr_tce.c   | 25 
  +
   3 files changed, 56 insertions(+), 13 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index b9e50d3..d1f8c6c 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -92,7 +92,6 @@ struct iommu_table {
 unsigned long  it_page_shift;/* table iommu page size */
 struct iommu_table_group *it_group;
 struct iommu_table_ops *it_ops;
 -   void (*set_bypass)(struct iommu_table *tbl, bool enable);
   };
 
   /* Pure 2^n version of get_order */
 @@ -127,11 +126,24 @@ extern struct iommu_table 
 *iommu_init_table(struct iommu_table * tbl,
 
   #define IOMMU_TABLE_GROUP_MAX_TABLES  1
 
 +struct iommu_table_group;
 +
 +struct iommu_table_group_ops {
 +   /*
 +* Switches ownership from the kernel itself to an external
 +* user. While onwership is enabled, the kernel cannot use IOMMU
 +* for itself.
 +*/
 +   void (*set_ownership)(struct iommu_table_group *table_group,
 +   bool enable);
 
 The meaning of enable in a function called set_ownership is
 entirely obscure.
 
 Suggest something better please :) I have nothing better...
 
 Well, given it's set_ownershuip you could have owner - that would
 want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
 rather than a bool.
 
 
 It is iommu_take_ownership() in upstream and it is assumed that the owner is
 anything but the platform code (for now and probably for ever - VFIO). I am
 not changing this now, just using same naming approach when adding a
 callback with a similar name.
 
 So enabled is actually that non kernel ownership is enabled.  That
 is totally non-obvious.
 
 Or you could leave it a bool but call it allow_bypass.
 
 Commented below.
 
 +};
 +
   struct iommu_table_group {
   #ifdef CONFIG_IOMMU_API
 struct iommu_group *group;
   #endif
 struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
 +   struct iommu_table_group_ops *ops;
   };
 
   #ifdef CONFIG_IOMMU_API
 diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
 b/arch/powerpc/platforms/powernv/pci-ioda.c
 index a964c50..9687731 100644
 --- a/arch/powerpc/platforms/powernv/pci-ioda.c
 +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
 @@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct 
 pnv_phb *phb,
 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * 
  segs));
   }
 
 -static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool 
 enable)
 +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool 
 enable)
   {
 -   struct pnv_ioda_pe *pe = container_of(tbl-it_group, struct 
 pnv_ioda_pe,
 - table_group);
 uint16_t window_id = (pe-pe_number  1 ) + 1;
 int64_t rc;
 
 @@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct 
 iommu_table *tbl, bool enable)
   

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-20 Thread Alexey Kardashevskiy

On 04/20/2015 12:44 PM, David Gibson wrote:

On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:

On 04/16/2015 04:07 PM, David Gibson wrote:

On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:

At the moment the iommu_table struct has a set_bypass() which enables/
disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
which calls this callback when external IOMMU users such as VFIO are
about to get over a PHB.

The set_bypass() callback is not really an iommu_table function but
IOMMU/PE function. This introduces a iommu_table_group_ops struct and
adds a set_ownership() callback to it which is called when an external
user takes control over the IOMMU.


Do you really need separate ops structures at both the single table
and table group level?  The different tables in a group will all
belong to the same basic iommu won't they?



IOMMU tables exist alone in VIO. Also, the platform code uses just a table
(or it is in bypass mode) and does not care about table groups. It looked
more clean for myself to keep them separated. Should I still merge
those?


Ok, that sounds like a reasonable argument for keeping them separate,
at least for now.


This renames set_bypass() to set_ownership() as it is not necessarily
just enabling bypassing, it can be something else/more so let's give it
more generic name. The bool parameter is inverted.

The callback is implemented for IODA2 only. Other platforms (P5IOC2,
IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.

Signed-off-by: Alexey Kardashevskiy 
---
  arch/powerpc/include/asm/iommu.h  | 14 +-
  arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++
  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
  3 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index b9e50d3..d1f8c6c 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -92,7 +92,6 @@ struct iommu_table {
unsigned long  it_page_shift;/* table iommu page size */
struct iommu_table_group *it_group;
struct iommu_table_ops *it_ops;
-   void (*set_bypass)(struct iommu_table *tbl, bool enable);
  };

  /* Pure 2^n version of get_order */
@@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
iommu_table * tbl,

  #define IOMMU_TABLE_GROUP_MAX_TABLES  1

+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+   /*
+* Switches ownership from the kernel itself to an external
+* user. While onwership is enabled, the kernel cannot use IOMMU
+* for itself.
+*/
+   void (*set_ownership)(struct iommu_table_group *table_group,
+   bool enable);


The meaning of "enable" in a function called "set_ownership" is
entirely obscure.


Suggest something better please :) I have nothing better...


Well, given it's "set_ownershuip" you could have "owner" - that would
want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
rather than a bool.



It is iommu_take_ownership() in upstream and it is assumed that the owner 
is anything but the platform code (for now and probably for ever - VFIO). I 
am not changing this now, just using same naming approach when adding a 
callback with a similar name.




Or you could leave it a bool but call it "allow_bypass".


Commented below.









+};
+
  struct iommu_table_group {
  #ifdef CONFIG_IOMMU_API
struct iommu_group *group;
  #endif
struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+   struct iommu_table_group_ops *ops;
  };

  #ifdef CONFIG_IOMMU_API
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index a964c50..9687731 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  }

-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
  {
-   struct pnv_ioda_pe *pe = container_of(tbl->it_group, struct pnv_ioda_pe,
- table_group);
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;

@@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
*tbl, bool enable)
 * host side.
 */
if (pe->pdev)
-   set_iommu_table_base(>pdev->dev, tbl);
+   set_iommu_table_base(>pdev->dev,
+   >table_group.tables[0]);
else
pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
}
@@ -1302,13 +1301,27 @@ static void 

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-20 Thread David Gibson
On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:
> On 04/16/2015 04:07 PM, David Gibson wrote:
> >On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:
> >>At the moment the iommu_table struct has a set_bypass() which enables/
> >>disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
> >>which calls this callback when external IOMMU users such as VFIO are
> >>about to get over a PHB.
> >>
> >>The set_bypass() callback is not really an iommu_table function but
> >>IOMMU/PE function. This introduces a iommu_table_group_ops struct and
> >>adds a set_ownership() callback to it which is called when an external
> >>user takes control over the IOMMU.
> >
> >Do you really need separate ops structures at both the single table
> >and table group level?  The different tables in a group will all
> >belong to the same basic iommu won't they?
> 
> 
> IOMMU tables exist alone in VIO. Also, the platform code uses just a table
> (or it is in bypass mode) and does not care about table groups. It looked
> more clean for myself to keep them separated. Should I still merge
> those?

Ok, that sounds like a reasonable argument for keeping them separate,
at least for now.

> >>This renames set_bypass() to set_ownership() as it is not necessarily
> >>just enabling bypassing, it can be something else/more so let's give it
> >>more generic name. The bool parameter is inverted.
> >>
> >>The callback is implemented for IODA2 only. Other platforms (P5IOC2,
> >>IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.
> >>
> >>Signed-off-by: Alexey Kardashevskiy 
> >>---
> >>  arch/powerpc/include/asm/iommu.h  | 14 +-
> >>  arch/powerpc/platforms/powernv/pci-ioda.c | 30 
> >> ++
> >>  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
> >>  3 files changed, 56 insertions(+), 13 deletions(-)
> >>
> >>diff --git a/arch/powerpc/include/asm/iommu.h 
> >>b/arch/powerpc/include/asm/iommu.h
> >>index b9e50d3..d1f8c6c 100644
> >>--- a/arch/powerpc/include/asm/iommu.h
> >>+++ b/arch/powerpc/include/asm/iommu.h
> >>@@ -92,7 +92,6 @@ struct iommu_table {
> >>unsigned long  it_page_shift;/* table iommu page size */
> >>struct iommu_table_group *it_group;
> >>struct iommu_table_ops *it_ops;
> >>-   void (*set_bypass)(struct iommu_table *tbl, bool enable);
> >>  };
> >>
> >>  /* Pure 2^n version of get_order */
> >>@@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
> >>iommu_table * tbl,
> >>
> >>  #define IOMMU_TABLE_GROUP_MAX_TABLES  1
> >>
> >>+struct iommu_table_group;
> >>+
> >>+struct iommu_table_group_ops {
> >>+   /*
> >>+* Switches ownership from the kernel itself to an external
> >>+* user. While onwership is enabled, the kernel cannot use IOMMU
> >>+* for itself.
> >>+*/
> >>+   void (*set_ownership)(struct iommu_table_group *table_group,
> >>+   bool enable);
> >
> >The meaning of "enable" in a function called "set_ownership" is
> >entirely obscure.
> 
> Suggest something better please :) I have nothing better...

Well, given it's "set_ownershuip" you could have "owner" - that would
want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
rather than a bool.

Or you could leave it a bool but call it "allow_bypass".

> 
> 
> >
> >>+};
> >>+
> >>  struct iommu_table_group {
> >>  #ifdef CONFIG_IOMMU_API
> >>struct iommu_group *group;
> >>  #endif
> >>struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
> >>+   struct iommu_table_group_ops *ops;
> >>  };
> >>
> >>  #ifdef CONFIG_IOMMU_API
> >>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> >>b/arch/powerpc/platforms/powernv/pci-ioda.c
> >>index a964c50..9687731 100644
> >>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
> >>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> >>@@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
> >>*phb,
> >>__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
> >>  }
> >>
> >>-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
> >>+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
> >>  {
> >>-   struct pnv_ioda_pe *pe = container_of(tbl->it_group, struct pnv_ioda_pe,
> >>- table_group);
> >>uint16_t window_id = (pe->pe_number << 1 ) + 1;
> >>int64_t rc;
> >>
> >>@@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct 
> >>iommu_table *tbl, bool enable)
> >> * host side.
> >> */
> >>if (pe->pdev)
> >>-   set_iommu_table_base(>pdev->dev, tbl);
> >>+   set_iommu_table_base(>pdev->dev,
> >>+   >table_group.tables[0]);
> >>else
> >>pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
> >>}
> >>@@ -1302,13 +1301,27 @@ static void 

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-20 Thread David Gibson
On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:
 On 04/16/2015 04:07 PM, David Gibson wrote:
 On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:
 At the moment the iommu_table struct has a set_bypass() which enables/
 disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
 which calls this callback when external IOMMU users such as VFIO are
 about to get over a PHB.
 
 The set_bypass() callback is not really an iommu_table function but
 IOMMU/PE function. This introduces a iommu_table_group_ops struct and
 adds a set_ownership() callback to it which is called when an external
 user takes control over the IOMMU.
 
 Do you really need separate ops structures at both the single table
 and table group level?  The different tables in a group will all
 belong to the same basic iommu won't they?
 
 
 IOMMU tables exist alone in VIO. Also, the platform code uses just a table
 (or it is in bypass mode) and does not care about table groups. It looked
 more clean for myself to keep them separated. Should I still merge
 those?

Ok, that sounds like a reasonable argument for keeping them separate,
at least for now.

 This renames set_bypass() to set_ownership() as it is not necessarily
 just enabling bypassing, it can be something else/more so let's give it
 more generic name. The bool parameter is inverted.
 
 The callback is implemented for IODA2 only. Other platforms (P5IOC2,
 IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
   arch/powerpc/include/asm/iommu.h  | 14 +-
   arch/powerpc/platforms/powernv/pci-ioda.c | 30 
  ++
   drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
   3 files changed, 56 insertions(+), 13 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index b9e50d3..d1f8c6c 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -92,7 +92,6 @@ struct iommu_table {
 unsigned long  it_page_shift;/* table iommu page size */
 struct iommu_table_group *it_group;
 struct iommu_table_ops *it_ops;
 -   void (*set_bypass)(struct iommu_table *tbl, bool enable);
   };
 
   /* Pure 2^n version of get_order */
 @@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
 iommu_table * tbl,
 
   #define IOMMU_TABLE_GROUP_MAX_TABLES  1
 
 +struct iommu_table_group;
 +
 +struct iommu_table_group_ops {
 +   /*
 +* Switches ownership from the kernel itself to an external
 +* user. While onwership is enabled, the kernel cannot use IOMMU
 +* for itself.
 +*/
 +   void (*set_ownership)(struct iommu_table_group *table_group,
 +   bool enable);
 
 The meaning of enable in a function called set_ownership is
 entirely obscure.
 
 Suggest something better please :) I have nothing better...

Well, given it's set_ownershuip you could have owner - that would
want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
rather than a bool.

Or you could leave it a bool but call it allow_bypass.

 
 
 
 +};
 +
   struct iommu_table_group {
   #ifdef CONFIG_IOMMU_API
 struct iommu_group *group;
   #endif
 struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
 +   struct iommu_table_group_ops *ops;
   };
 
   #ifdef CONFIG_IOMMU_API
 diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
 b/arch/powerpc/platforms/powernv/pci-ioda.c
 index a964c50..9687731 100644
 --- a/arch/powerpc/platforms/powernv/pci-ioda.c
 +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
 @@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
 *phb,
 __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
   }
 
 -static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
 +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
   {
 -   struct pnv_ioda_pe *pe = container_of(tbl-it_group, struct pnv_ioda_pe,
 - table_group);
 uint16_t window_id = (pe-pe_number  1 ) + 1;
 int64_t rc;
 
 @@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct 
 iommu_table *tbl, bool enable)
  * host side.
  */
 if (pe-pdev)
 -   set_iommu_table_base(pe-pdev-dev, tbl);
 +   set_iommu_table_base(pe-pdev-dev,
 +   pe-table_group.tables[0]);
 else
 pnv_ioda_setup_bus_dma(pe, pe-pbus, false);
 }
 @@ -1302,13 +1301,27 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct 
 pnv_phb *phb,
 /* TVE #1 is selected by PCI address bit 59 */
 pe-tce_bypass_base = 1ull  59;
 
 -   /* Install set_bypass callback for VFIO */
 -   pe-table_group.tables[0].set_bypass = pnv_pci_ioda2_set_bypass;
 -
 /* Enable bypass by default */
 -   

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-20 Thread Alexey Kardashevskiy

On 04/20/2015 12:44 PM, David Gibson wrote:

On Fri, Apr 17, 2015 at 08:09:29PM +1000, Alexey Kardashevskiy wrote:

On 04/16/2015 04:07 PM, David Gibson wrote:

On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:

At the moment the iommu_table struct has a set_bypass() which enables/
disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
which calls this callback when external IOMMU users such as VFIO are
about to get over a PHB.

The set_bypass() callback is not really an iommu_table function but
IOMMU/PE function. This introduces a iommu_table_group_ops struct and
adds a set_ownership() callback to it which is called when an external
user takes control over the IOMMU.


Do you really need separate ops structures at both the single table
and table group level?  The different tables in a group will all
belong to the same basic iommu won't they?



IOMMU tables exist alone in VIO. Also, the platform code uses just a table
(or it is in bypass mode) and does not care about table groups. It looked
more clean for myself to keep them separated. Should I still merge
those?


Ok, that sounds like a reasonable argument for keeping them separate,
at least for now.


This renames set_bypass() to set_ownership() as it is not necessarily
just enabling bypassing, it can be something else/more so let's give it
more generic name. The bool parameter is inverted.

The callback is implemented for IODA2 only. Other platforms (P5IOC2,
IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
  arch/powerpc/include/asm/iommu.h  | 14 +-
  arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++
  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
  3 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index b9e50d3..d1f8c6c 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -92,7 +92,6 @@ struct iommu_table {
unsigned long  it_page_shift;/* table iommu page size */
struct iommu_table_group *it_group;
struct iommu_table_ops *it_ops;
-   void (*set_bypass)(struct iommu_table *tbl, bool enable);
  };

  /* Pure 2^n version of get_order */
@@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
iommu_table * tbl,

  #define IOMMU_TABLE_GROUP_MAX_TABLES  1

+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+   /*
+* Switches ownership from the kernel itself to an external
+* user. While onwership is enabled, the kernel cannot use IOMMU
+* for itself.
+*/
+   void (*set_ownership)(struct iommu_table_group *table_group,
+   bool enable);


The meaning of enable in a function called set_ownership is
entirely obscure.


Suggest something better please :) I have nothing better...


Well, given it's set_ownershuip you could have owner - that would
want to be an enum with OWNER_KERNEL and OWNER_VFIO or something
rather than a bool.



It is iommu_take_ownership() in upstream and it is assumed that the owner 
is anything but the platform code (for now and probably for ever - VFIO). I 
am not changing this now, just using same naming approach when adding a 
callback with a similar name.




Or you could leave it a bool but call it allow_bypass.


Commented below.









+};
+
  struct iommu_table_group {
  #ifdef CONFIG_IOMMU_API
struct iommu_group *group;
  #endif
struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+   struct iommu_table_group_ops *ops;
  };

  #ifdef CONFIG_IOMMU_API
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index a964c50..9687731 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  }

-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
  {
-   struct pnv_ioda_pe *pe = container_of(tbl-it_group, struct pnv_ioda_pe,
- table_group);
uint16_t window_id = (pe-pe_number  1 ) + 1;
int64_t rc;

@@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
*tbl, bool enable)
 * host side.
 */
if (pe-pdev)
-   set_iommu_table_base(pe-pdev-dev, tbl);
+   set_iommu_table_base(pe-pdev-dev,
+   pe-table_group.tables[0]);
else
pnv_ioda_setup_bus_dma(pe, pe-pbus, false);
}
@@ -1302,13 +1301,27 @@ static 

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-17 Thread Alexey Kardashevskiy

On 04/16/2015 04:07 PM, David Gibson wrote:

On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:

At the moment the iommu_table struct has a set_bypass() which enables/
disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
which calls this callback when external IOMMU users such as VFIO are
about to get over a PHB.

The set_bypass() callback is not really an iommu_table function but
IOMMU/PE function. This introduces a iommu_table_group_ops struct and
adds a set_ownership() callback to it which is called when an external
user takes control over the IOMMU.


Do you really need separate ops structures at both the single table
and table group level?  The different tables in a group will all
belong to the same basic iommu won't they?



IOMMU tables exist alone in VIO. Also, the platform code uses just a table 
(or it is in bypass mode) and does not care about table groups. It looked 
more clean for myself to keep them separated. Should I still merge those?






This renames set_bypass() to set_ownership() as it is not necessarily
just enabling bypassing, it can be something else/more so let's give it
more generic name. The bool parameter is inverted.

The callback is implemented for IODA2 only. Other platforms (P5IOC2,
IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.

Signed-off-by: Alexey Kardashevskiy 
---
  arch/powerpc/include/asm/iommu.h  | 14 +-
  arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++
  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
  3 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index b9e50d3..d1f8c6c 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -92,7 +92,6 @@ struct iommu_table {
unsigned long  it_page_shift;/* table iommu page size */
struct iommu_table_group *it_group;
struct iommu_table_ops *it_ops;
-   void (*set_bypass)(struct iommu_table *tbl, bool enable);
  };

  /* Pure 2^n version of get_order */
@@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
iommu_table * tbl,

  #define IOMMU_TABLE_GROUP_MAX_TABLES  1

+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+   /*
+* Switches ownership from the kernel itself to an external
+* user. While onwership is enabled, the kernel cannot use IOMMU
+* for itself.
+*/
+   void (*set_ownership)(struct iommu_table_group *table_group,
+   bool enable);


The meaning of "enable" in a function called "set_ownership" is
entirely obscure.


Suggest something better please :) I have nothing better...





+};
+
  struct iommu_table_group {
  #ifdef CONFIG_IOMMU_API
struct iommu_group *group;
  #endif
struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+   struct iommu_table_group_ops *ops;
  };

  #ifdef CONFIG_IOMMU_API
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index a964c50..9687731 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  }

-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
  {
-   struct pnv_ioda_pe *pe = container_of(tbl->it_group, struct pnv_ioda_pe,
- table_group);
uint16_t window_id = (pe->pe_number << 1 ) + 1;
int64_t rc;

@@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
*tbl, bool enable)
 * host side.
 */
if (pe->pdev)
-   set_iommu_table_base(>pdev->dev, tbl);
+   set_iommu_table_base(>pdev->dev,
+   >table_group.tables[0]);
else
pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
}
@@ -1302,13 +1301,27 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct 
pnv_phb *phb,
/* TVE #1 is selected by PCI address bit 59 */
pe->tce_bypass_base = 1ull << 59;

-   /* Install set_bypass callback for VFIO */
-   pe->table_group.tables[0].set_bypass = pnv_pci_ioda2_set_bypass;
-
/* Enable bypass by default */
-   pnv_pci_ioda2_set_bypass(>table_group.tables[0], true);
+   pnv_pci_ioda2_set_bypass(pe, true);
  }

+static void pnv_ioda2_set_ownership(struct iommu_table_group *table_group,
+bool enable)
+{
+   struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+   table_group);
+ 

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-17 Thread Alexey Kardashevskiy

On 04/16/2015 04:07 PM, David Gibson wrote:

On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:

At the moment the iommu_table struct has a set_bypass() which enables/
disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
which calls this callback when external IOMMU users such as VFIO are
about to get over a PHB.

The set_bypass() callback is not really an iommu_table function but
IOMMU/PE function. This introduces a iommu_table_group_ops struct and
adds a set_ownership() callback to it which is called when an external
user takes control over the IOMMU.


Do you really need separate ops structures at both the single table
and table group level?  The different tables in a group will all
belong to the same basic iommu won't they?



IOMMU tables exist alone in VIO. Also, the platform code uses just a table 
(or it is in bypass mode) and does not care about table groups. It looked 
more clean for myself to keep them separated. Should I still merge those?






This renames set_bypass() to set_ownership() as it is not necessarily
just enabling bypassing, it can be something else/more so let's give it
more generic name. The bool parameter is inverted.

The callback is implemented for IODA2 only. Other platforms (P5IOC2,
IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
  arch/powerpc/include/asm/iommu.h  | 14 +-
  arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++
  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
  3 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index b9e50d3..d1f8c6c 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -92,7 +92,6 @@ struct iommu_table {
unsigned long  it_page_shift;/* table iommu page size */
struct iommu_table_group *it_group;
struct iommu_table_ops *it_ops;
-   void (*set_bypass)(struct iommu_table *tbl, bool enable);
  };

  /* Pure 2^n version of get_order */
@@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
iommu_table * tbl,

  #define IOMMU_TABLE_GROUP_MAX_TABLES  1

+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+   /*
+* Switches ownership from the kernel itself to an external
+* user. While onwership is enabled, the kernel cannot use IOMMU
+* for itself.
+*/
+   void (*set_ownership)(struct iommu_table_group *table_group,
+   bool enable);


The meaning of enable in a function called set_ownership is
entirely obscure.


Suggest something better please :) I have nothing better...





+};
+
  struct iommu_table_group {
  #ifdef CONFIG_IOMMU_API
struct iommu_group *group;
  #endif
struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+   struct iommu_table_group_ops *ops;
  };

  #ifdef CONFIG_IOMMU_API
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index a964c50..9687731 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
*phb,
__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  }

-static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
  {
-   struct pnv_ioda_pe *pe = container_of(tbl-it_group, struct pnv_ioda_pe,
- table_group);
uint16_t window_id = (pe-pe_number  1 ) + 1;
int64_t rc;

@@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
*tbl, bool enable)
 * host side.
 */
if (pe-pdev)
-   set_iommu_table_base(pe-pdev-dev, tbl);
+   set_iommu_table_base(pe-pdev-dev,
+   pe-table_group.tables[0]);
else
pnv_ioda_setup_bus_dma(pe, pe-pbus, false);
}
@@ -1302,13 +1301,27 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct 
pnv_phb *phb,
/* TVE #1 is selected by PCI address bit 59 */
pe-tce_bypass_base = 1ull  59;

-   /* Install set_bypass callback for VFIO */
-   pe-table_group.tables[0].set_bypass = pnv_pci_ioda2_set_bypass;
-
/* Enable bypass by default */
-   pnv_pci_ioda2_set_bypass(pe-table_group.tables[0], true);
+   pnv_pci_ioda2_set_bypass(pe, true);
  }

+static void pnv_ioda2_set_ownership(struct iommu_table_group *table_group,
+bool enable)
+{
+   struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+   

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-16 Thread David Gibson
On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:
> At the moment the iommu_table struct has a set_bypass() which enables/
> disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
> which calls this callback when external IOMMU users such as VFIO are
> about to get over a PHB.
> 
> The set_bypass() callback is not really an iommu_table function but
> IOMMU/PE function. This introduces a iommu_table_group_ops struct and
> adds a set_ownership() callback to it which is called when an external
> user takes control over the IOMMU.

Do you really need separate ops structures at both the single table
and table group level?  The different tables in a group will all
belong to the same basic iommu won't they?

> This renames set_bypass() to set_ownership() as it is not necessarily
> just enabling bypassing, it can be something else/more so let's give it
> more generic name. The bool parameter is inverted.
> 
> The callback is implemented for IODA2 only. Other platforms (P5IOC2,
> IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
>  arch/powerpc/include/asm/iommu.h  | 14 +-
>  arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++
>  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
>  3 files changed, 56 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/iommu.h 
> b/arch/powerpc/include/asm/iommu.h
> index b9e50d3..d1f8c6c 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -92,7 +92,6 @@ struct iommu_table {
>   unsigned long  it_page_shift;/* table iommu page size */
>   struct iommu_table_group *it_group;
>   struct iommu_table_ops *it_ops;
> - void (*set_bypass)(struct iommu_table *tbl, bool enable);
>  };
>  
>  /* Pure 2^n version of get_order */
> @@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
> iommu_table * tbl,
>  
>  #define IOMMU_TABLE_GROUP_MAX_TABLES 1
>  
> +struct iommu_table_group;
> +
> +struct iommu_table_group_ops {
> + /*
> +  * Switches ownership from the kernel itself to an external
> +  * user. While onwership is enabled, the kernel cannot use IOMMU
> +  * for itself.
> +  */
> + void (*set_ownership)(struct iommu_table_group *table_group,
> + bool enable);

The meaning of "enable" in a function called "set_ownership" is
entirely obscure.

> +};
> +
>  struct iommu_table_group {
>  #ifdef CONFIG_IOMMU_API
>   struct iommu_group *group;
>  #endif
>   struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
> + struct iommu_table_group_ops *ops;
>  };
>  
>  #ifdef CONFIG_IOMMU_API
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index a964c50..9687731 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
> *phb,
>   __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
>  }
>  
> -static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
> +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
>  {
> - struct pnv_ioda_pe *pe = container_of(tbl->it_group, struct pnv_ioda_pe,
> -   table_group);
>   uint16_t window_id = (pe->pe_number << 1 ) + 1;
>   int64_t rc;
>  
> @@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
> *tbl, bool enable)
>* host side.
>*/
>   if (pe->pdev)
> - set_iommu_table_base(>pdev->dev, tbl);
> + set_iommu_table_base(>pdev->dev,
> + >table_group.tables[0]);
>   else
>   pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
>   }
> @@ -1302,13 +1301,27 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct 
> pnv_phb *phb,
>   /* TVE #1 is selected by PCI address bit 59 */
>   pe->tce_bypass_base = 1ull << 59;
>  
> - /* Install set_bypass callback for VFIO */
> - pe->table_group.tables[0].set_bypass = pnv_pci_ioda2_set_bypass;
> -
>   /* Enable bypass by default */
> - pnv_pci_ioda2_set_bypass(>table_group.tables[0], true);
> + pnv_pci_ioda2_set_bypass(pe, true);
>  }
>  
> +static void pnv_ioda2_set_ownership(struct iommu_table_group *table_group,
> +  bool enable)
> +{
> + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
> + table_group);
> + if (enable)
> + iommu_take_ownership(table_group);
> + else
> + iommu_release_ownership(table_group);
> +
> + pnv_pci_ioda2_set_bypass(pe, !enable);
> +}
> +
> +static struct 

Re: [PATCH kernel v8 14/31] vfio: powerpc/spapr: powerpc/powernv/ioda2: Rework IOMMU ownership control

2015-04-16 Thread David Gibson
On Fri, Apr 10, 2015 at 04:30:56PM +1000, Alexey Kardashevskiy wrote:
 At the moment the iommu_table struct has a set_bypass() which enables/
 disables DMA bypass on IODA2 PHB. This is exposed to POWERPC IOMMU code
 which calls this callback when external IOMMU users such as VFIO are
 about to get over a PHB.
 
 The set_bypass() callback is not really an iommu_table function but
 IOMMU/PE function. This introduces a iommu_table_group_ops struct and
 adds a set_ownership() callback to it which is called when an external
 user takes control over the IOMMU.

Do you really need separate ops structures at both the single table
and table group level?  The different tables in a group will all
belong to the same basic iommu won't they?

 This renames set_bypass() to set_ownership() as it is not necessarily
 just enabling bypassing, it can be something else/more so let's give it
 more generic name. The bool parameter is inverted.
 
 The callback is implemented for IODA2 only. Other platforms (P5IOC2,
 IODA1) will use the old iommu_take_ownership/iommu_release_ownership API.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
  arch/powerpc/include/asm/iommu.h  | 14 +-
  arch/powerpc/platforms/powernv/pci-ioda.c | 30 ++
  drivers/vfio/vfio_iommu_spapr_tce.c   | 25 +
  3 files changed, 56 insertions(+), 13 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index b9e50d3..d1f8c6c 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -92,7 +92,6 @@ struct iommu_table {
   unsigned long  it_page_shift;/* table iommu page size */
   struct iommu_table_group *it_group;
   struct iommu_table_ops *it_ops;
 - void (*set_bypass)(struct iommu_table *tbl, bool enable);
  };
  
  /* Pure 2^n version of get_order */
 @@ -127,11 +126,24 @@ extern struct iommu_table *iommu_init_table(struct 
 iommu_table * tbl,
  
  #define IOMMU_TABLE_GROUP_MAX_TABLES 1
  
 +struct iommu_table_group;
 +
 +struct iommu_table_group_ops {
 + /*
 +  * Switches ownership from the kernel itself to an external
 +  * user. While onwership is enabled, the kernel cannot use IOMMU
 +  * for itself.
 +  */
 + void (*set_ownership)(struct iommu_table_group *table_group,
 + bool enable);

The meaning of enable in a function called set_ownership is
entirely obscure.

 +};
 +
  struct iommu_table_group {
  #ifdef CONFIG_IOMMU_API
   struct iommu_group *group;
  #endif
   struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
 + struct iommu_table_group_ops *ops;
  };
  
  #ifdef CONFIG_IOMMU_API
 diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
 b/arch/powerpc/platforms/powernv/pci-ioda.c
 index a964c50..9687731 100644
 --- a/arch/powerpc/platforms/powernv/pci-ioda.c
 +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
 @@ -1255,10 +1255,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb 
 *phb,
   __free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
  }
  
 -static void pnv_pci_ioda2_set_bypass(struct iommu_table *tbl, bool enable)
 +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
  {
 - struct pnv_ioda_pe *pe = container_of(tbl-it_group, struct pnv_ioda_pe,
 -   table_group);
   uint16_t window_id = (pe-pe_number  1 ) + 1;
   int64_t rc;
  
 @@ -1286,7 +1284,8 @@ static void pnv_pci_ioda2_set_bypass(struct iommu_table 
 *tbl, bool enable)
* host side.
*/
   if (pe-pdev)
 - set_iommu_table_base(pe-pdev-dev, tbl);
 + set_iommu_table_base(pe-pdev-dev,
 + pe-table_group.tables[0]);
   else
   pnv_ioda_setup_bus_dma(pe, pe-pbus, false);
   }
 @@ -1302,13 +1301,27 @@ static void pnv_pci_ioda2_setup_bypass_pe(struct 
 pnv_phb *phb,
   /* TVE #1 is selected by PCI address bit 59 */
   pe-tce_bypass_base = 1ull  59;
  
 - /* Install set_bypass callback for VFIO */
 - pe-table_group.tables[0].set_bypass = pnv_pci_ioda2_set_bypass;
 -
   /* Enable bypass by default */
 - pnv_pci_ioda2_set_bypass(pe-table_group.tables[0], true);
 + pnv_pci_ioda2_set_bypass(pe, true);
  }
  
 +static void pnv_ioda2_set_ownership(struct iommu_table_group *table_group,
 +  bool enable)
 +{
 + struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
 + table_group);
 + if (enable)
 + iommu_take_ownership(table_group);
 + else
 + iommu_release_ownership(table_group);
 +
 + pnv_pci_ioda2_set_bypass(pe, !enable);
 +}
 +
 +static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
 + .set_ownership = pnv_ioda2_set_ownership,
 +};
 +
  static