Re: [PATCH kernel v11 04/10] powerpc/vfio_spapr_tce: Add reference counting to iommu_table

2017-03-24 Thread Alex Williamson
On Wed, 22 Mar 2017 15:21:50 +1100
Alexey Kardashevskiy  wrote:

> So far iommu_table obejcts were only used in virtual mode and had
> a single owner. We are going to change this by implementing in-kernel
> acceleration of DMA mapping requests. The proposed acceleration
> will handle requests in real mode and KVM will keep references to tables.
> 
> This adds a kref to iommu_table and defines new helpers to update it.
> This replaces iommu_free_table() with iommu_tce_table_put() and makes
> iommu_free_table() static. iommu_tce_table_get() is not used in this patch
> but it will be in the following patch.
> 
> Since this touches prototypes, this also removes @node_name parameter as
> it has never been really useful on powernv and carrying it for
> the pseries platform code to iommu_free_table() seems to be quite
> useless as well.
> 
> This should cause no behavioral change.
> 
> Signed-off-by: Alexey Kardashevskiy 
> Reviewed-by: David Gibson 
> ---
> Changes:
> v10:
> * iommu_tce_table_get() can fail now if a table is being destroyed, will be
> used in 10/10
> * iommu_tce_table_put() returns what kref_put() returned
> * iommu_tce_table_put() got WARN_ON(!tbl) as the callers already check
> for it and do not call _put() when tbl==NULL
> 
> v9:
> * s/iommu_table_get/iommu_tce_table_get/ and
> s/iommu_table_put/iommu_tce_table_put/ -- so I removed r-b/a-b
> ---
>  arch/powerpc/include/asm/iommu.h  |  5 +++--
>  arch/powerpc/kernel/iommu.c   | 27 ++-
>  arch/powerpc/platforms/powernv/pci-ioda.c | 14 +++---
>  arch/powerpc/platforms/powernv/pci.c  |  1 +
>  arch/powerpc/platforms/pseries/iommu.c|  3 ++-
>  arch/powerpc/platforms/pseries/vio.c  |  2 +-
>  drivers/vfio/vfio_iommu_spapr_tce.c   |  2 +-
>  7 files changed, 37 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/iommu.h 
> b/arch/powerpc/include/asm/iommu.h
> index 4554699aec02..d96142572e6d 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -119,6 +119,7 @@ struct iommu_table {
>   struct list_head it_group_list;/* List of iommu_table_group_link */
>   unsigned long *it_userspace; /* userspace view of the table */
>   struct iommu_table_ops *it_ops;
> + struct krefit_kref;
>  };
>  
>  #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
> @@ -151,8 +152,8 @@ static inline void *get_iommu_table_base(struct device 
> *dev)
>  
>  extern int dma_iommu_dma_supported(struct device *dev, u64 mask);
>  
> -/* Frees table for an individual device node */
> -extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
> +extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl);
> +extern int iommu_tce_table_put(struct iommu_table *tbl);
>  
>  /* Initializes an iommu_table based in values set in the passed-in
>   * structure
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index bc142d87130f..af915da5e03a 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -711,13 +711,13 @@ struct iommu_table *iommu_init_table(struct iommu_table 
> *tbl, int nid)
>   return tbl;
>  }
>  
> -void iommu_free_table(struct iommu_table *tbl, const char *node_name)
> +static void iommu_table_free(struct kref *kref)
>  {
>   unsigned long bitmap_sz;
>   unsigned int order;
> + struct iommu_table *tbl;
>  
> - if (!tbl)
> - return;
> + tbl = container_of(kref, struct iommu_table, it_kref);
>  
>   if (tbl->it_ops->free)
>   tbl->it_ops->free(tbl);
> @@ -736,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char 
> *node_name)
>  
>   /* verify that table contains no entries */
>   if (!bitmap_empty(tbl->it_map, tbl->it_size))
> - pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
> + pr_warn("%s: Unexpected TCEs\n", __func__);
>  
>   /* calculate bitmap size in bytes */
>   bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
> @@ -748,7 +748,24 @@ void iommu_free_table(struct iommu_table *tbl, const 
> char *node_name)
>   /* free table */
>   kfree(tbl);
>  }
> -EXPORT_SYMBOL_GPL(iommu_free_table);
> +
> +struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
> +{
> + if (kref_get_unless_zero(>it_kref))
> + return tbl;
> +
> + return NULL;
> +}
> +EXPORT_SYMBOL_GPL(iommu_tce_table_get);
> +
> +int iommu_tce_table_put(struct iommu_table *tbl)
> +{
> + if (WARN_ON(!tbl))
> + return 0;
> +
> + return kref_put(>it_kref, iommu_table_free);
> +}
> +EXPORT_SYMBOL_GPL(iommu_tce_table_put);
>  
>  /* Creates TCEs for a user provided buffer.  The user buffer must be
>   * contiguous real kernel storage (not vmalloc).  The address passed here
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> 

[PATCH kernel v11 04/10] powerpc/vfio_spapr_tce: Add reference counting to iommu_table

2017-03-21 Thread Alexey Kardashevskiy
So far iommu_table obejcts were only used in virtual mode and had
a single owner. We are going to change this by implementing in-kernel
acceleration of DMA mapping requests. The proposed acceleration
will handle requests in real mode and KVM will keep references to tables.

This adds a kref to iommu_table and defines new helpers to update it.
This replaces iommu_free_table() with iommu_tce_table_put() and makes
iommu_free_table() static. iommu_tce_table_get() is not used in this patch
but it will be in the following patch.

Since this touches prototypes, this also removes @node_name parameter as
it has never been really useful on powernv and carrying it for
the pseries platform code to iommu_free_table() seems to be quite
useless as well.

This should cause no behavioral change.

Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
---
Changes:
v10:
* iommu_tce_table_get() can fail now if a table is being destroyed, will be
used in 10/10
* iommu_tce_table_put() returns what kref_put() returned
* iommu_tce_table_put() got WARN_ON(!tbl) as the callers already check
for it and do not call _put() when tbl==NULL

v9:
* s/iommu_table_get/iommu_tce_table_get/ and
s/iommu_table_put/iommu_tce_table_put/ -- so I removed r-b/a-b
---
 arch/powerpc/include/asm/iommu.h  |  5 +++--
 arch/powerpc/kernel/iommu.c   | 27 ++-
 arch/powerpc/platforms/powernv/pci-ioda.c | 14 +++---
 arch/powerpc/platforms/powernv/pci.c  |  1 +
 arch/powerpc/platforms/pseries/iommu.c|  3 ++-
 arch/powerpc/platforms/pseries/vio.c  |  2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c   |  2 +-
 7 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 4554699aec02..d96142572e6d 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -119,6 +119,7 @@ struct iommu_table {
struct list_head it_group_list;/* List of iommu_table_group_link */
unsigned long *it_userspace; /* userspace view of the table */
struct iommu_table_ops *it_ops;
+   struct krefit_kref;
 };
 
 #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
@@ -151,8 +152,8 @@ static inline void *get_iommu_table_base(struct device *dev)
 
 extern int dma_iommu_dma_supported(struct device *dev, u64 mask);
 
-/* Frees table for an individual device node */
-extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
+extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl);
+extern int iommu_tce_table_put(struct iommu_table *tbl);
 
 /* Initializes an iommu_table based in values set in the passed-in
  * structure
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index bc142d87130f..af915da5e03a 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -711,13 +711,13 @@ struct iommu_table *iommu_init_table(struct iommu_table 
*tbl, int nid)
return tbl;
 }
 
-void iommu_free_table(struct iommu_table *tbl, const char *node_name)
+static void iommu_table_free(struct kref *kref)
 {
unsigned long bitmap_sz;
unsigned int order;
+   struct iommu_table *tbl;
 
-   if (!tbl)
-   return;
+   tbl = container_of(kref, struct iommu_table, it_kref);
 
if (tbl->it_ops->free)
tbl->it_ops->free(tbl);
@@ -736,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char 
*node_name)
 
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
-   pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
+   pr_warn("%s: Unexpected TCEs\n", __func__);
 
/* calculate bitmap size in bytes */
bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
@@ -748,7 +748,24 @@ void iommu_free_table(struct iommu_table *tbl, const char 
*node_name)
/* free table */
kfree(tbl);
 }
-EXPORT_SYMBOL_GPL(iommu_free_table);
+
+struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
+{
+   if (kref_get_unless_zero(>it_kref))
+   return tbl;
+
+   return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_get);
+
+int iommu_tce_table_put(struct iommu_table *tbl)
+{
+   if (WARN_ON(!tbl))
+   return 0;
+
+   return kref_put(>it_kref, iommu_table_free);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_put);
 
 /* Creates TCEs for a user provided buffer.  The user buffer must be
  * contiguous real kernel storage (not vmalloc).  The address passed here
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5dae54cb11e3..ee4cdb5b893f 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1424,7 +1424,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev 
*dev, struct pnv_ioda_pe