Re: [PATCH kernel 2/3] powerpc/pseries: Allow not having ibm,hypertas-functions::hcall-multi-tce for DDW

2019-12-13 Thread Ram Pai
On Fri, Dec 13, 2019 at 07:45:36PM +1100, Alexey Kardashevskiy wrote:
> By default a pseries guest supports a H_PUT_TCE hypercall which maps
> a single IOMMU page in a DMA window. Additionally the hypervisor may
> support H_PUT_TCE_INDIRECT/H_STUFF_TCE which update multiple TCEs at once;
> this is advertised via the device tree /rtas/ibm,hypertas-functions
> property which Linux converts to FW_FEATURE_MULTITCE.

Thanks Alexey for the patches!

> 
> FW_FEATURE_MULTITCE is checked when dma_iommu_ops is used; however
> the code managing the huge DMA window (DDW) ignores it and calls
> H_PUT_TCE_INDIRECT even if it is explicitly disabled via
> the "multitce=off" kernel command line parameter.

Also H_PUT_TCE_INDIRECT should not be called in secure VM, even when
"multitce=on".  right? Or does it get disabled somehow in the 
secure guest?


> 
> This adds FW_FEATURE_MULTITCE checking to the DDW code path.
> 
> This changes tce_build_pSeriesLP to take liobn and page size as
> the huge window does not have iommu_table descriptor which usually
> the place to store these numbers.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
> 
> The idea is then set FW_FEATURE_MULTITCE in init_svm() and have the guest

I think, you mean unset FW_FEATURE_MULTITCE.

> use H_PUT_TCE without sharing a (temporary) page for H_PUT_TCE_INDIRECT.
> ---
>  arch/powerpc/platforms/pseries/iommu.c | 44 ++
>  1 file changed, 30 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/iommu.c 
> b/arch/powerpc/platforms/pseries/iommu.c
> index df7db33ca93b..f6e9b87c82fc 100644
> --- a/arch/powerpc/platforms/pseries/iommu.c
> +++ b/arch/powerpc/platforms/pseries/iommu.c
> @@ -132,10 +132,10 @@ static unsigned long tce_get_pseries(struct iommu_table 
> *tbl, long index)
>   return be64_to_cpu(*tcep);
>  }
> 
> -static void tce_free_pSeriesLP(struct iommu_table*, long, long);
> +static void tce_free_pSeriesLP(unsigned long liobn, long, long);
>  static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
> 
> -static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
> +static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long 
> tceshift,
>   long npages, unsigned long uaddr,
>   enum dma_data_direction direction,
>   unsigned long attrs)
> @@ -146,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, 
> long tcenum,
>   int ret = 0;
>   long tcenum_start = tcenum, npages_start = npages;
> 
> - rpn = __pa(uaddr) >> TCE_SHIFT;
> + rpn = __pa(uaddr) >> tceshift;
>   proto_tce = TCE_PCI_READ;
>   if (direction != DMA_TO_DEVICE)
>   proto_tce |= TCE_PCI_WRITE;
> 
>   while (npages--) {
> - tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
> - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
> + tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
> + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
> 
>   if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
>   ret = (int)rc;
> - tce_free_pSeriesLP(tbl, tcenum_start,
> + tce_free_pSeriesLP(liobn, tcenum_start,
>  (npages_start - (npages + 1)));
>   break;
>   }
> 
>   if (rc && printk_ratelimit()) {
>   printk("tce_build_pSeriesLP: plpar_tce_put failed. 
> rc=%lld\n", rc);
> - printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
> + printk("\tindex   = 0x%llx\n", (u64)liobn);
>   printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
>   printk("\ttce val = 0x%llx\n", tce );
>   dump_stack();
> @@ -193,7 +193,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table 
> *tbl, long tcenum,
>   unsigned long flags;
> 
>   if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
> - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
> + return tce_build_pSeriesLP(tbl->it_index, tcenum,
> +tbl->it_page_shift, npages, uaddr,
>  direction, attrs);
>   }
> 
> @@ -209,8 +210,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table 
> *tbl, long tcenum,
>   /* If allocation fails, fall back to the loop implementation */
>   if (!tcep) {
>   local_irq_restore(flags);
> - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
> - direction, attrs);
> + return tce_build_pSeriesLP(tbl->it_index, tcenum,
> + tbl->it_page_shift,
> + 

[PATCH kernel 2/3] powerpc/pseries: Allow not having ibm, hypertas-functions::hcall-multi-tce for DDW

2019-12-13 Thread Alexey Kardashevskiy
By default a pseries guest supports a H_PUT_TCE hypercall which maps
a single IOMMU page in a DMA window. Additionally the hypervisor may
support H_PUT_TCE_INDIRECT/H_STUFF_TCE which update multiple TCEs at once;
this is advertised via the device tree /rtas/ibm,hypertas-functions
property which Linux converts to FW_FEATURE_MULTITCE.

FW_FEATURE_MULTITCE is checked when dma_iommu_ops is used; however
the code managing the huge DMA window (DDW) ignores it and calls
H_PUT_TCE_INDIRECT even if it is explicitly disabled via
the "multitce=off" kernel command line parameter.

This adds FW_FEATURE_MULTITCE checking to the DDW code path.

This changes tce_build_pSeriesLP to take liobn and page size as
the huge window does not have iommu_table descriptor which usually
the place to store these numbers.

Signed-off-by: Alexey Kardashevskiy 
---

The idea is then set FW_FEATURE_MULTITCE in init_svm() and have the guest
use H_PUT_TCE without sharing a (temporary) page for H_PUT_TCE_INDIRECT.
---
 arch/powerpc/platforms/pseries/iommu.c | 44 ++
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/iommu.c 
b/arch/powerpc/platforms/pseries/iommu.c
index df7db33ca93b..f6e9b87c82fc 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -132,10 +132,10 @@ static unsigned long tce_get_pseries(struct iommu_table 
*tbl, long index)
return be64_to_cpu(*tcep);
 }
 
-static void tce_free_pSeriesLP(struct iommu_table*, long, long);
+static void tce_free_pSeriesLP(unsigned long liobn, long, long);
 static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
 
-static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
+static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
unsigned long attrs)
@@ -146,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, 
long tcenum,
int ret = 0;
long tcenum_start = tcenum, npages_start = npages;
 
-   rpn = __pa(uaddr) >> TCE_SHIFT;
+   rpn = __pa(uaddr) >> tceshift;
proto_tce = TCE_PCI_READ;
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
 
while (npages--) {
-   tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
-   rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce);
+   tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift;
+   rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
 
if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
ret = (int)rc;
-   tce_free_pSeriesLP(tbl, tcenum_start,
+   tce_free_pSeriesLP(liobn, tcenum_start,
   (npages_start - (npages + 1)));
break;
}
 
if (rc && printk_ratelimit()) {
printk("tce_build_pSeriesLP: plpar_tce_put failed. 
rc=%lld\n", rc);
-   printk("\tindex   = 0x%llx\n", (u64)tbl->it_index);
+   printk("\tindex   = 0x%llx\n", (u64)liobn);
printk("\ttcenum  = 0x%llx\n", (u64)tcenum);
printk("\ttce val = 0x%llx\n", tce );
dump_stack();
@@ -193,7 +193,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table 
*tbl, long tcenum,
unsigned long flags;
 
if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) {
-   return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+   return tce_build_pSeriesLP(tbl->it_index, tcenum,
+  tbl->it_page_shift, npages, uaddr,
   direction, attrs);
}
 
@@ -209,8 +210,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table 
*tbl, long tcenum,
/* If allocation fails, fall back to the loop implementation */
if (!tcep) {
local_irq_restore(flags);
-   return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
-   direction, attrs);
+   return tce_build_pSeriesLP(tbl->it_index, tcenum,
+   tbl->it_page_shift,
+   npages, uaddr, direction, attrs);
}
__this_cpu_write(tce_page, tcep);
}
@@ -261,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table 
*tbl, long tcenum,
return ret;
 }
 
-static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long 
npages)
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages)
 {