Re: [PATCH kernel v10 23/34] powerpc/iommu/powernv: Release replaced TCE

2015-05-15 Thread Thomas Huth
On Thu, 14 May 2015 13:53:57 +1000
Alexey Kardashevskiy  wrote:

> On 05/14/2015 01:00 AM, Thomas Huth wrote:
> > On Tue, 12 May 2015 01:39:12 +1000
> > Alexey Kardashevskiy  wrote:
...
> >> -/*
> >> - * hwaddr is a kernel virtual address here (0xc... bazillion),
> >> - * tce_build converts it to a physical address.
> >> - */
> >> -int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
> >> -  unsigned long hwaddr, enum dma_data_direction direction)
> >> -{
> >> -  int ret = -EBUSY;
> >> -  unsigned long oldtce;
> >> -  struct iommu_pool *pool = get_pool(tbl, entry);
> >> -
> >> -  spin_lock(&(pool->lock));
> >> -
> >> -  oldtce = tbl->it_ops->get(tbl, entry);
> >> -  /* Add new entry if it is not busy */
> >> -  if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
> >> -  ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL);
> >> -
> >> -  spin_unlock(&(pool->lock));
> >> +  if (!ret && ((*direction == DMA_FROM_DEVICE) ||
> >> +  (*direction == DMA_BIDIRECTIONAL)))
> >
> > You could drop some of the parentheses:
> >
> > if (!ret && (*direction == DMA_FROM_DEVICE ||
> > *direction == DMA_BIDIRECTIONAL))
> 
> I really (really) like braces. Is there any kernel code design rule against 
> it?

I don't think so ... but for me it's rather the other way round: If I
see too many braces, I always wonder whether there is a reason for it in
the sense that I did not understand the statement right at the first
glance. Additionally, this is something that Pascal programmers like to
do, so IMHO this just looks ugly in C.

> >> @@ -405,19 +410,26 @@ static long tce_iommu_ioctl(void *iommu_data,
> >>return -EINVAL;
> >>
> >>/* iova is checked by the IOMMU API */
> >> -  tce = param.vaddr;
> >>if (param.flags & VFIO_DMA_MAP_FLAG_READ)
> >> -  tce |= TCE_PCI_READ;
> >> -  if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
> >> -  tce |= TCE_PCI_WRITE;
> >> +  if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
> >> +  direction = DMA_BIDIRECTIONAL;
> >> +  else
> >> +  direction = DMA_TO_DEVICE;
> >> +  else
> >> +  if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
> >> +  direction = DMA_FROM_DEVICE;
> >> +  else
> >> +  return -EINVAL;
> >
> > IMHO some curly braces for the outer if-statement would be really fine
> > here.
> 
> I believe checkpatch.pl won't like it. There is a check against single 
> lines having braces after "if" statements.

If you write your code like this (I was only talking about the outer
braces!):

if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
direction = DMA_BIDIRECTIONAL;
else
direction = DMA_TO_DEVICE;
} else {
if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
direction = DMA_FROM_DEVICE;
else
return -EINVAL;
}

... then checkpatch should not complain, as far as I know - in this
case, the braces include three lines, don't they?

 Thomas
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH kernel v10 23/34] powerpc/iommu/powernv: Release replaced TCE

2015-05-15 Thread Thomas Huth
On Thu, 14 May 2015 13:53:57 +1000
Alexey Kardashevskiy a...@ozlabs.ru wrote:

 On 05/14/2015 01:00 AM, Thomas Huth wrote:
  On Tue, 12 May 2015 01:39:12 +1000
  Alexey Kardashevskiy a...@ozlabs.ru wrote:
...
  -/*
  - * hwaddr is a kernel virtual address here (0xc... bazillion),
  - * tce_build converts it to a physical address.
  - */
  -int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
  -  unsigned long hwaddr, enum dma_data_direction direction)
  -{
  -  int ret = -EBUSY;
  -  unsigned long oldtce;
  -  struct iommu_pool *pool = get_pool(tbl, entry);
  -
  -  spin_lock((pool-lock));
  -
  -  oldtce = tbl-it_ops-get(tbl, entry);
  -  /* Add new entry if it is not busy */
  -  if (!(oldtce  (TCE_PCI_WRITE | TCE_PCI_READ)))
  -  ret = tbl-it_ops-set(tbl, entry, 1, hwaddr, direction, NULL);
  -
  -  spin_unlock((pool-lock));
  +  if (!ret  ((*direction == DMA_FROM_DEVICE) ||
  +  (*direction == DMA_BIDIRECTIONAL)))
 
  You could drop some of the parentheses:
 
  if (!ret  (*direction == DMA_FROM_DEVICE ||
  *direction == DMA_BIDIRECTIONAL))
 
 I really (really) like braces. Is there any kernel code design rule against 
 it?

I don't think so ... but for me it's rather the other way round: If I
see too many braces, I always wonder whether there is a reason for it in
the sense that I did not understand the statement right at the first
glance. Additionally, this is something that Pascal programmers like to
do, so IMHO this just looks ugly in C.

  @@ -405,19 +410,26 @@ static long tce_iommu_ioctl(void *iommu_data,
 return -EINVAL;
 
 /* iova is checked by the IOMMU API */
  -  tce = param.vaddr;
 if (param.flags  VFIO_DMA_MAP_FLAG_READ)
  -  tce |= TCE_PCI_READ;
  -  if (param.flags  VFIO_DMA_MAP_FLAG_WRITE)
  -  tce |= TCE_PCI_WRITE;
  +  if (param.flags  VFIO_DMA_MAP_FLAG_WRITE)
  +  direction = DMA_BIDIRECTIONAL;
  +  else
  +  direction = DMA_TO_DEVICE;
  +  else
  +  if (param.flags  VFIO_DMA_MAP_FLAG_WRITE)
  +  direction = DMA_FROM_DEVICE;
  +  else
  +  return -EINVAL;
 
  IMHO some curly braces for the outer if-statement would be really fine
  here.
 
 I believe checkpatch.pl won't like it. There is a check against single 
 lines having braces after if statements.

If you write your code like this (I was only talking about the outer
braces!):

if (param.flags  VFIO_DMA_MAP_FLAG_READ) {
if (param.flags  VFIO_DMA_MAP_FLAG_WRITE)
direction = DMA_BIDIRECTIONAL;
else
direction = DMA_TO_DEVICE;
} else {
if (param.flags  VFIO_DMA_MAP_FLAG_WRITE)
direction = DMA_FROM_DEVICE;
else
return -EINVAL;
}

... then checkpatch should not complain, as far as I know - in this
case, the braces include three lines, don't they?

 Thomas
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH kernel v10 23/34] powerpc/iommu/powernv: Release replaced TCE

2015-05-13 Thread Alexey Kardashevskiy

On 05/14/2015 01:00 AM, Thomas Huth wrote:

On Tue, 12 May 2015 01:39:12 +1000
Alexey Kardashevskiy  wrote:


At the moment writing new TCE value to the IOMMU table fails with EBUSY
if there is a valid entry already. However PAPR specification allows
the guest to write new TCE value without clearing it first.

Another problem this patch is addressing is the use of pool locks for
external IOMMU users such as VFIO. The pool locks are to protect
DMA page allocator rather than entries and since the host kernel does
not control what pages are in use, there is no point in pool locks and
exchange()+put_page(oldtce) is sufficient to avoid possible races.

This adds an exchange() callback to iommu_table_ops which does the same
thing as set() plus it returns replaced TCE and DMA direction so
the caller can release the pages afterwards. The exchange() receives
a physical address unlike set() which receives linear mapping address;
and returns a physical address as the clear() does.

This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement
for a platform to have exchange() implemented in order to support VFIO.

This replaces iommu_tce_build() and iommu_clear_tce() with
a single iommu_tce_xchg().

This makes sure that TCE permission bits are not set in TCE passed to
IOMMU API as those are to be calculated by platform code from
DMA direction.

This moves SetPageDirty() to the IOMMU code to make it work for both
VFIO ioctl interface in in-kernel TCE acceleration (when it becomes
available later).

Signed-off-by: Alexey Kardashevskiy 
[aw: for the vfio related changes]
Acked-by: Alex Williamson 

[...]

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 6275164..1287d49 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -962,10 +962,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
  int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce)
  {
-   if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
-   return -EINVAL;
-
-   if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
+   if (tce & ~IOMMU_PAGE_MASK(tbl))
return -EINVAL;

if (ioba & ~IOMMU_PAGE_MASK(tbl))
@@ -982,44 +979,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
  }
  EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);

-unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
+long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+   unsigned long *hpa, enum dma_data_direction *direction)
  {
-   unsigned long oldtce;
-   struct iommu_pool *pool = get_pool(tbl, entry);
+   long ret;

-   spin_lock(&(pool->lock));
+   ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);

-   oldtce = tbl->it_ops->get(tbl, entry);
-   if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
-   tbl->it_ops->clear(tbl, entry, 1);
-   else
-   oldtce = 0;
-
-   spin_unlock(&(pool->lock));
-
-   return oldtce;
-}
-EXPORT_SYMBOL_GPL(iommu_clear_tce);
-
-/*
- * hwaddr is a kernel virtual address here (0xc... bazillion),
- * tce_build converts it to a physical address.
- */
-int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-   unsigned long hwaddr, enum dma_data_direction direction)
-{
-   int ret = -EBUSY;
-   unsigned long oldtce;
-   struct iommu_pool *pool = get_pool(tbl, entry);
-
-   spin_lock(&(pool->lock));
-
-   oldtce = tbl->it_ops->get(tbl, entry);
-   /* Add new entry if it is not busy */
-   if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
-   ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL);
-
-   spin_unlock(&(pool->lock));
+   if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+   (*direction == DMA_BIDIRECTIONAL)))


You could drop some of the parentheses:

if (!ret && (*direction == DMA_FROM_DEVICE ||
*direction == DMA_BIDIRECTIONAL))


I really (really) like braces. Is there any kernel code design rule against it?





+   SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));

/* if (unlikely(ret))
pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx 
ret=%d\n",

[...]

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
b/drivers/vfio/vfio_iommu_spapr_tce.c
index 2ead291..0724ec8 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -236,18 +236,11 @@ static void tce_iommu_release(void *iommu_data)

[...]

@@ -405,19 +410,26 @@ static long tce_iommu_ioctl(void *iommu_data,
return -EINVAL;

/* iova is checked by the IOMMU API */
-   tce = param.vaddr;
if (param.flags & VFIO_DMA_MAP_FLAG_READ)
-   tce |= TCE_PCI_READ;
-   if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
-

Re: [PATCH kernel v10 23/34] powerpc/iommu/powernv: Release replaced TCE

2015-05-13 Thread Thomas Huth
On Tue, 12 May 2015 01:39:12 +1000
Alexey Kardashevskiy  wrote:

> At the moment writing new TCE value to the IOMMU table fails with EBUSY
> if there is a valid entry already. However PAPR specification allows
> the guest to write new TCE value without clearing it first.
> 
> Another problem this patch is addressing is the use of pool locks for
> external IOMMU users such as VFIO. The pool locks are to protect
> DMA page allocator rather than entries and since the host kernel does
> not control what pages are in use, there is no point in pool locks and
> exchange()+put_page(oldtce) is sufficient to avoid possible races.
> 
> This adds an exchange() callback to iommu_table_ops which does the same
> thing as set() plus it returns replaced TCE and DMA direction so
> the caller can release the pages afterwards. The exchange() receives
> a physical address unlike set() which receives linear mapping address;
> and returns a physical address as the clear() does.
> 
> This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement
> for a platform to have exchange() implemented in order to support VFIO.
> 
> This replaces iommu_tce_build() and iommu_clear_tce() with
> a single iommu_tce_xchg().
> 
> This makes sure that TCE permission bits are not set in TCE passed to
> IOMMU API as those are to be calculated by platform code from
> DMA direction.
> 
> This moves SetPageDirty() to the IOMMU code to make it work for both
> VFIO ioctl interface in in-kernel TCE acceleration (when it becomes
> available later).
> 
> Signed-off-by: Alexey Kardashevskiy 
> [aw: for the vfio related changes]
> Acked-by: Alex Williamson 
[...]
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index 6275164..1287d49 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -962,10 +962,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
>  int iommu_tce_put_param_check(struct iommu_table *tbl,
>   unsigned long ioba, unsigned long tce)
>  {
> - if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
> - return -EINVAL;
> -
> - if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
> + if (tce & ~IOMMU_PAGE_MASK(tbl))
>   return -EINVAL;
>  
>   if (ioba & ~IOMMU_PAGE_MASK(tbl))
> @@ -982,44 +979,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
>  }
>  EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
>  
> -unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
> +long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
> + unsigned long *hpa, enum dma_data_direction *direction)
>  {
> - unsigned long oldtce;
> - struct iommu_pool *pool = get_pool(tbl, entry);
> + long ret;
>  
> - spin_lock(&(pool->lock));
> + ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
>  
> - oldtce = tbl->it_ops->get(tbl, entry);
> - if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
> - tbl->it_ops->clear(tbl, entry, 1);
> - else
> - oldtce = 0;
> -
> - spin_unlock(&(pool->lock));
> -
> - return oldtce;
> -}
> -EXPORT_SYMBOL_GPL(iommu_clear_tce);
> -
> -/*
> - * hwaddr is a kernel virtual address here (0xc... bazillion),
> - * tce_build converts it to a physical address.
> - */
> -int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
> - unsigned long hwaddr, enum dma_data_direction direction)
> -{
> - int ret = -EBUSY;
> - unsigned long oldtce;
> - struct iommu_pool *pool = get_pool(tbl, entry);
> -
> - spin_lock(&(pool->lock));
> -
> - oldtce = tbl->it_ops->get(tbl, entry);
> - /* Add new entry if it is not busy */
> - if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
> - ret = tbl->it_ops->set(tbl, entry, 1, hwaddr, direction, NULL);
> -
> - spin_unlock(&(pool->lock));
> + if (!ret && ((*direction == DMA_FROM_DEVICE) ||
> + (*direction == DMA_BIDIRECTIONAL)))

You could drop some of the parentheses:

if (!ret && (*direction == DMA_FROM_DEVICE ||
*direction == DMA_BIDIRECTIONAL))

> + SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
>  
>   /* if (unlikely(ret))
>   pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx 
> ret=%d\n",
[...]
> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
> b/drivers/vfio/vfio_iommu_spapr_tce.c
> index 2ead291..0724ec8 100644
> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> @@ -236,18 +236,11 @@ static void tce_iommu_release(void *iommu_data)
[...]
> @@ -405,19 +410,26 @@ static long tce_iommu_ioctl(void *iommu_data,
>   return -EINVAL;
>  
>   /* iova is checked by the IOMMU API */
> - tce = param.vaddr;
>   if (param.flags & VFIO_DMA_MAP_FLAG_READ)
> - tce |= TCE_PCI_READ;
> - if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)

Re: [PATCH kernel v10 23/34] powerpc/iommu/powernv: Release replaced TCE

2015-05-13 Thread Thomas Huth
On Tue, 12 May 2015 01:39:12 +1000
Alexey Kardashevskiy a...@ozlabs.ru wrote:

 At the moment writing new TCE value to the IOMMU table fails with EBUSY
 if there is a valid entry already. However PAPR specification allows
 the guest to write new TCE value without clearing it first.
 
 Another problem this patch is addressing is the use of pool locks for
 external IOMMU users such as VFIO. The pool locks are to protect
 DMA page allocator rather than entries and since the host kernel does
 not control what pages are in use, there is no point in pool locks and
 exchange()+put_page(oldtce) is sufficient to avoid possible races.
 
 This adds an exchange() callback to iommu_table_ops which does the same
 thing as set() plus it returns replaced TCE and DMA direction so
 the caller can release the pages afterwards. The exchange() receives
 a physical address unlike set() which receives linear mapping address;
 and returns a physical address as the clear() does.
 
 This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement
 for a platform to have exchange() implemented in order to support VFIO.
 
 This replaces iommu_tce_build() and iommu_clear_tce() with
 a single iommu_tce_xchg().
 
 This makes sure that TCE permission bits are not set in TCE passed to
 IOMMU API as those are to be calculated by platform code from
 DMA direction.
 
 This moves SetPageDirty() to the IOMMU code to make it work for both
 VFIO ioctl interface in in-kernel TCE acceleration (when it becomes
 available later).
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 [aw: for the vfio related changes]
 Acked-by: Alex Williamson alex.william...@redhat.com
[...]
 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
 index 6275164..1287d49 100644
 --- a/arch/powerpc/kernel/iommu.c
 +++ b/arch/powerpc/kernel/iommu.c
 @@ -962,10 +962,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
  int iommu_tce_put_param_check(struct iommu_table *tbl,
   unsigned long ioba, unsigned long tce)
  {
 - if (!(tce  (TCE_PCI_WRITE | TCE_PCI_READ)))
 - return -EINVAL;
 -
 - if (tce  ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
 + if (tce  ~IOMMU_PAGE_MASK(tbl))
   return -EINVAL;
  
   if (ioba  ~IOMMU_PAGE_MASK(tbl))
 @@ -982,44 +979,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
  }
  EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
  
 -unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
 +long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
 + unsigned long *hpa, enum dma_data_direction *direction)
  {
 - unsigned long oldtce;
 - struct iommu_pool *pool = get_pool(tbl, entry);
 + long ret;
  
 - spin_lock((pool-lock));
 + ret = tbl-it_ops-exchange(tbl, entry, hpa, direction);
  
 - oldtce = tbl-it_ops-get(tbl, entry);
 - if (oldtce  (TCE_PCI_WRITE | TCE_PCI_READ))
 - tbl-it_ops-clear(tbl, entry, 1);
 - else
 - oldtce = 0;
 -
 - spin_unlock((pool-lock));
 -
 - return oldtce;
 -}
 -EXPORT_SYMBOL_GPL(iommu_clear_tce);
 -
 -/*
 - * hwaddr is a kernel virtual address here (0xc... bazillion),
 - * tce_build converts it to a physical address.
 - */
 -int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
 - unsigned long hwaddr, enum dma_data_direction direction)
 -{
 - int ret = -EBUSY;
 - unsigned long oldtce;
 - struct iommu_pool *pool = get_pool(tbl, entry);
 -
 - spin_lock((pool-lock));
 -
 - oldtce = tbl-it_ops-get(tbl, entry);
 - /* Add new entry if it is not busy */
 - if (!(oldtce  (TCE_PCI_WRITE | TCE_PCI_READ)))
 - ret = tbl-it_ops-set(tbl, entry, 1, hwaddr, direction, NULL);
 -
 - spin_unlock((pool-lock));
 + if (!ret  ((*direction == DMA_FROM_DEVICE) ||
 + (*direction == DMA_BIDIRECTIONAL)))

You could drop some of the parentheses:

if (!ret  (*direction == DMA_FROM_DEVICE ||
*direction == DMA_BIDIRECTIONAL))

 + SetPageDirty(pfn_to_page(*hpa  PAGE_SHIFT));
  
   /* if (unlikely(ret))
   pr_err(iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx 
 ret=%d\n,
[...]
 diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
 b/drivers/vfio/vfio_iommu_spapr_tce.c
 index 2ead291..0724ec8 100644
 --- a/drivers/vfio/vfio_iommu_spapr_tce.c
 +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
 @@ -236,18 +236,11 @@ static void tce_iommu_release(void *iommu_data)
[...]
 @@ -405,19 +410,26 @@ static long tce_iommu_ioctl(void *iommu_data,
   return -EINVAL;
  
   /* iova is checked by the IOMMU API */
 - tce = param.vaddr;
   if (param.flags  VFIO_DMA_MAP_FLAG_READ)
 - tce |= TCE_PCI_READ;
 - if (param.flags  VFIO_DMA_MAP_FLAG_WRITE)
 - tce |= TCE_PCI_WRITE;
 + if (param.flags  

Re: [PATCH kernel v10 23/34] powerpc/iommu/powernv: Release replaced TCE

2015-05-13 Thread Alexey Kardashevskiy

On 05/14/2015 01:00 AM, Thomas Huth wrote:

On Tue, 12 May 2015 01:39:12 +1000
Alexey Kardashevskiy a...@ozlabs.ru wrote:


At the moment writing new TCE value to the IOMMU table fails with EBUSY
if there is a valid entry already. However PAPR specification allows
the guest to write new TCE value without clearing it first.

Another problem this patch is addressing is the use of pool locks for
external IOMMU users such as VFIO. The pool locks are to protect
DMA page allocator rather than entries and since the host kernel does
not control what pages are in use, there is no point in pool locks and
exchange()+put_page(oldtce) is sufficient to avoid possible races.

This adds an exchange() callback to iommu_table_ops which does the same
thing as set() plus it returns replaced TCE and DMA direction so
the caller can release the pages afterwards. The exchange() receives
a physical address unlike set() which receives linear mapping address;
and returns a physical address as the clear() does.

This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement
for a platform to have exchange() implemented in order to support VFIO.

This replaces iommu_tce_build() and iommu_clear_tce() with
a single iommu_tce_xchg().

This makes sure that TCE permission bits are not set in TCE passed to
IOMMU API as those are to be calculated by platform code from
DMA direction.

This moves SetPageDirty() to the IOMMU code to make it work for both
VFIO ioctl interface in in-kernel TCE acceleration (when it becomes
available later).

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
[aw: for the vfio related changes]
Acked-by: Alex Williamson alex.william...@redhat.com

[...]

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 6275164..1287d49 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -962,10 +962,7 @@ EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
  int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce)
  {
-   if (!(tce  (TCE_PCI_WRITE | TCE_PCI_READ)))
-   return -EINVAL;
-
-   if (tce  ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
+   if (tce  ~IOMMU_PAGE_MASK(tbl))
return -EINVAL;

if (ioba  ~IOMMU_PAGE_MASK(tbl))
@@ -982,44 +979,16 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
  }
  EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);

-unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
+long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+   unsigned long *hpa, enum dma_data_direction *direction)
  {
-   unsigned long oldtce;
-   struct iommu_pool *pool = get_pool(tbl, entry);
+   long ret;

-   spin_lock((pool-lock));
+   ret = tbl-it_ops-exchange(tbl, entry, hpa, direction);

-   oldtce = tbl-it_ops-get(tbl, entry);
-   if (oldtce  (TCE_PCI_WRITE | TCE_PCI_READ))
-   tbl-it_ops-clear(tbl, entry, 1);
-   else
-   oldtce = 0;
-
-   spin_unlock((pool-lock));
-
-   return oldtce;
-}
-EXPORT_SYMBOL_GPL(iommu_clear_tce);
-
-/*
- * hwaddr is a kernel virtual address here (0xc... bazillion),
- * tce_build converts it to a physical address.
- */
-int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-   unsigned long hwaddr, enum dma_data_direction direction)
-{
-   int ret = -EBUSY;
-   unsigned long oldtce;
-   struct iommu_pool *pool = get_pool(tbl, entry);
-
-   spin_lock((pool-lock));
-
-   oldtce = tbl-it_ops-get(tbl, entry);
-   /* Add new entry if it is not busy */
-   if (!(oldtce  (TCE_PCI_WRITE | TCE_PCI_READ)))
-   ret = tbl-it_ops-set(tbl, entry, 1, hwaddr, direction, NULL);
-
-   spin_unlock((pool-lock));
+   if (!ret  ((*direction == DMA_FROM_DEVICE) ||
+   (*direction == DMA_BIDIRECTIONAL)))


You could drop some of the parentheses:

if (!ret  (*direction == DMA_FROM_DEVICE ||
*direction == DMA_BIDIRECTIONAL))


I really (really) like braces. Is there any kernel code design rule against it?





+   SetPageDirty(pfn_to_page(*hpa  PAGE_SHIFT));

/* if (unlikely(ret))
pr_err(iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx 
ret=%d\n,

[...]

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
b/drivers/vfio/vfio_iommu_spapr_tce.c
index 2ead291..0724ec8 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -236,18 +236,11 @@ static void tce_iommu_release(void *iommu_data)

[...]

@@ -405,19 +410,26 @@ static long tce_iommu_ioctl(void *iommu_data,
return -EINVAL;

/* iova is checked by the IOMMU API */
-   tce = param.vaddr;
if (param.flags  VFIO_DMA_MAP_FLAG_READ)
-   tce |= TCE_PCI_READ;
-   if (param.flags  

[PATCH kernel v10 23/34] powerpc/iommu/powernv: Release replaced TCE

2015-05-11 Thread Alexey Kardashevskiy
At the moment writing new TCE value to the IOMMU table fails with EBUSY
if there is a valid entry already. However PAPR specification allows
the guest to write new TCE value without clearing it first.

Another problem this patch is addressing is the use of pool locks for
external IOMMU users such as VFIO. The pool locks are to protect
DMA page allocator rather than entries and since the host kernel does
not control what pages are in use, there is no point in pool locks and
exchange()+put_page(oldtce) is sufficient to avoid possible races.

This adds an exchange() callback to iommu_table_ops which does the same
thing as set() plus it returns replaced TCE and DMA direction so
the caller can release the pages afterwards. The exchange() receives
a physical address unlike set() which receives linear mapping address;
and returns a physical address as the clear() does.

This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement
for a platform to have exchange() implemented in order to support VFIO.

This replaces iommu_tce_build() and iommu_clear_tce() with
a single iommu_tce_xchg().

This makes sure that TCE permission bits are not set in TCE passed to
IOMMU API as those are to be calculated by platform code from
DMA direction.

This moves SetPageDirty() to the IOMMU code to make it work for both
VFIO ioctl interface in in-kernel TCE acceleration (when it becomes
available later).

Signed-off-by: Alexey Kardashevskiy 
[aw: for the vfio related changes]
Acked-by: Alex Williamson 
---
Changes:
v10:
* did s/tce/hpa/ in iommu_table_ops::exchange and tce_iommu_unuse_page()
* removed permission bits check from iommu_tce_put_param_check as
permission bits are not allowed in the address
* added BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)) to pnv_tce_xchg()

v9:
* changed exchange() to work with physical addresses as these addresses
are never accessed by the code and physical addresses are actual values
we put into the IOMMU table
---
 arch/powerpc/include/asm/iommu.h| 22 +--
 arch/powerpc/kernel/iommu.c | 59 +---
 arch/powerpc/platforms/powernv/pci-ioda.c   | 34 
 arch/powerpc/platforms/powernv/pci-p5ioc2.c |  3 ++
 arch/powerpc/platforms/powernv/pci.c| 18 +
 arch/powerpc/platforms/powernv/pci.h|  2 +
 drivers/vfio/vfio_iommu_spapr_tce.c | 60 +
 7 files changed, 130 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index c5375c5..d4ad118 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -45,13 +45,29 @@ extern int iommu_is_off;
 extern int iommu_force_on;
 
 struct iommu_table_ops {
+   /*
+* When called with direction==DMA_NONE, it is equal to clear().
+* uaddr is a linear map address.
+*/
int (*set)(struct iommu_table *tbl,
long index, long npages,
unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs);
+#ifdef CONFIG_IOMMU_API
+   /*
+* Exchanges existing TCE with new TCE plus direction bits;
+* returns old TCE and DMA direction mask.
+* @tce is a physical address.
+*/
+   int (*exchange)(struct iommu_table *tbl,
+   long index,
+   unsigned long *hpa,
+   enum dma_data_direction *direction);
+#endif
void (*clear)(struct iommu_table *tbl,
long index, long npages);
+   /* get() returns a physical address */
unsigned long (*get)(struct iommu_table *tbl, long index);
void (*flush)(struct iommu_table *tbl);
 };
@@ -155,6 +171,8 @@ extern void iommu_register_group(struct iommu_table_group 
*table_group,
 extern int iommu_add_device(struct device *dev);
 extern void iommu_del_device(struct device *dev);
 extern int __init tce_iommu_bus_notifier_init(void);
+extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+   unsigned long *hpa, enum dma_data_direction *direction);
 #else
 static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
@@ -227,10 +245,6 @@ extern int iommu_tce_clear_param_check(struct iommu_table 
*tbl,
unsigned long npages);
 extern int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce);
-extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-   unsigned long hwaddr, enum dma_data_direction direction);
-extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
-   unsigned long entry);
 
 extern void iommu_flush_tce(struct iommu_table *tbl);
 extern int iommu_take_ownership(struct iommu_table *tbl);
diff --git 

[PATCH kernel v10 23/34] powerpc/iommu/powernv: Release replaced TCE

2015-05-11 Thread Alexey Kardashevskiy
At the moment writing new TCE value to the IOMMU table fails with EBUSY
if there is a valid entry already. However PAPR specification allows
the guest to write new TCE value without clearing it first.

Another problem this patch is addressing is the use of pool locks for
external IOMMU users such as VFIO. The pool locks are to protect
DMA page allocator rather than entries and since the host kernel does
not control what pages are in use, there is no point in pool locks and
exchange()+put_page(oldtce) is sufficient to avoid possible races.

This adds an exchange() callback to iommu_table_ops which does the same
thing as set() plus it returns replaced TCE and DMA direction so
the caller can release the pages afterwards. The exchange() receives
a physical address unlike set() which receives linear mapping address;
and returns a physical address as the clear() does.

This implements exchange() for P5IOC2/IODA/IODA2. This adds a requirement
for a platform to have exchange() implemented in order to support VFIO.

This replaces iommu_tce_build() and iommu_clear_tce() with
a single iommu_tce_xchg().

This makes sure that TCE permission bits are not set in TCE passed to
IOMMU API as those are to be calculated by platform code from
DMA direction.

This moves SetPageDirty() to the IOMMU code to make it work for both
VFIO ioctl interface in in-kernel TCE acceleration (when it becomes
available later).

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
[aw: for the vfio related changes]
Acked-by: Alex Williamson alex.william...@redhat.com
---
Changes:
v10:
* did s/tce/hpa/ in iommu_table_ops::exchange and tce_iommu_unuse_page()
* removed permission bits check from iommu_tce_put_param_check as
permission bits are not allowed in the address
* added BUG_ON(*hpa  ~IOMMU_PAGE_MASK(tbl)) to pnv_tce_xchg()

v9:
* changed exchange() to work with physical addresses as these addresses
are never accessed by the code and physical addresses are actual values
we put into the IOMMU table
---
 arch/powerpc/include/asm/iommu.h| 22 +--
 arch/powerpc/kernel/iommu.c | 59 +---
 arch/powerpc/platforms/powernv/pci-ioda.c   | 34 
 arch/powerpc/platforms/powernv/pci-p5ioc2.c |  3 ++
 arch/powerpc/platforms/powernv/pci.c| 18 +
 arch/powerpc/platforms/powernv/pci.h|  2 +
 drivers/vfio/vfio_iommu_spapr_tce.c | 60 +
 7 files changed, 130 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index c5375c5..d4ad118 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -45,13 +45,29 @@ extern int iommu_is_off;
 extern int iommu_force_on;
 
 struct iommu_table_ops {
+   /*
+* When called with direction==DMA_NONE, it is equal to clear().
+* uaddr is a linear map address.
+*/
int (*set)(struct iommu_table *tbl,
long index, long npages,
unsigned long uaddr,
enum dma_data_direction direction,
struct dma_attrs *attrs);
+#ifdef CONFIG_IOMMU_API
+   /*
+* Exchanges existing TCE with new TCE plus direction bits;
+* returns old TCE and DMA direction mask.
+* @tce is a physical address.
+*/
+   int (*exchange)(struct iommu_table *tbl,
+   long index,
+   unsigned long *hpa,
+   enum dma_data_direction *direction);
+#endif
void (*clear)(struct iommu_table *tbl,
long index, long npages);
+   /* get() returns a physical address */
unsigned long (*get)(struct iommu_table *tbl, long index);
void (*flush)(struct iommu_table *tbl);
 };
@@ -155,6 +171,8 @@ extern void iommu_register_group(struct iommu_table_group 
*table_group,
 extern int iommu_add_device(struct device *dev);
 extern void iommu_del_device(struct device *dev);
 extern int __init tce_iommu_bus_notifier_init(void);
+extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
+   unsigned long *hpa, enum dma_data_direction *direction);
 #else
 static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
@@ -227,10 +245,6 @@ extern int iommu_tce_clear_param_check(struct iommu_table 
*tbl,
unsigned long npages);
 extern int iommu_tce_put_param_check(struct iommu_table *tbl,
unsigned long ioba, unsigned long tce);
-extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
-   unsigned long hwaddr, enum dma_data_direction direction);
-extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
-   unsigned long entry);
 
 extern void iommu_flush_tce(struct iommu_table *tbl);
 extern int iommu_take_ownership(struct iommu_table