Re: [PATCH v2 2/6] powerpc/pseries/iommu: Update call to ibm,query-pe-dma-windows

2020-07-01 Thread Leonardo Bras
On Thu, 2020-07-02 at 10:18 +1000, Alexey Kardashevskiy wrote:
> 
> On 02/07/2020 00:04, Leonardo Bras wrote:
> > On Wed, 2020-07-01 at 18:17 +1000, Alexey Kardashevskiy wrote:
> > > > +#define DDW_EXT_SIZE   0
> > > > +#define DDW_EXT_RESET_DMA_WIN  1
> > > > +#define DDW_EXT_QUERY_OUT_SIZE 2
> > > 
> > > #define DDW_EXT_LAST (DDW_EXT_QUERY_OUT_SIZE + 1)
> > > ...
> > > 
> > > 
> > > > +
> > > >  static struct iommu_table_group *iommu_pseries_alloc_group(int node)
> > > >  {
> > > > struct iommu_table_group *table_group;
> > > > @@ -339,7 +343,7 @@ struct direct_window {
> > > >  /* Dynamic DMA Window support */
> > > >  struct ddw_query_response {
> > > > u32 windows_available;
> > > > -   u32 largest_available_block;
> > > > +   u64 largest_available_block;
> > > > u32 page_size;
> > > > u32 migration_capable;
> > > >  };
> > > > @@ -875,13 +879,29 @@ static int find_existing_ddw_windows(void)
> > > >  machine_arch_initcall(pseries, find_existing_ddw_windows);
> > > >  
> > > >  static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
> > > > -   struct ddw_query_response *query)
> > > > +struct ddw_query_response *query,
> > > > +struct device_node *parent)
> > > >  {
> > > > struct device_node *dn;
> > > > struct pci_dn *pdn;
> > > > -   u32 cfg_addr;
> > > > +   u32 cfg_addr, query_out[5], ddw_ext[DDW_EXT_QUERY_OUT_SIZE + 1];
> > > 
> > > ... and use DDW_EXT_LAST here.
> > 
> > Because of the growing nature of ddw-extensions, I intentionally let
> > this be (DDW_EXT_QUERY_OUT_SIZE + 1). If we create a DDW_EXT_LAST, it
> > will be incremented in the future if more extensions come to exist.
> > 
> > I mean, I previously saw no reason for allocating space for extensions
> > after the desired one, as they won't be used here.
> 
> Ah, my bad, you're right.
> 
> 
> > > 
> > > > u64 buid;
> > > > -   int ret;
> > > > +   int ret, out_sz;
> > > > +
> > > > +   /*
> > > > +* From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule 
> > > > how many
> > > > +* output parameters ibm,query-pe-dma-windows will have, 
> > > > ranging from
> > > > +* 5 to 6.
> > > > +*/
> > > > +
> > > > +   ret = of_property_read_u32_array(parent, "ibm,ddw-extensions",
> > > > +_ext[0],
> > > > +DDW_EXT_QUERY_OUT_SIZE + 1);
> > 
> > In this case, I made sure not to cross (DDW_EXT_QUERY_OUT_SIZE + 1)
> > while reading the extensions from the property.
> > 
> > What do you think about it? 
> 
> I think you want something like:
> 
> static inline int ddw_read_ext(const struct device_node *np, int extnum,
> u32 *ret)
> {
> retun of_property_read_u32_index(np, "ibm,ddw-extensions", extnum + 1, ret);
> }
> 
> These "+1"'s all over the place are confusing.

That's a great idea!

I was not aware it was possible to read a single value[index] directly
from the property, but it makes total sense to use it.

Thank you!



Re: [PATCH v2 2/6] powerpc/pseries/iommu: Update call to ibm,query-pe-dma-windows

2020-07-01 Thread Alexey Kardashevskiy



On 02/07/2020 00:04, Leonardo Bras wrote:
> On Wed, 2020-07-01 at 18:17 +1000, Alexey Kardashevskiy wrote:
>>
>>> +#define DDW_EXT_SIZE   0
>>> +#define DDW_EXT_RESET_DMA_WIN  1
>>> +#define DDW_EXT_QUERY_OUT_SIZE 2
>>
>> #define DDW_EXT_LAST (DDW_EXT_QUERY_OUT_SIZE + 1)
>> ...
>>
>>
>>> +
>>>  static struct iommu_table_group *iommu_pseries_alloc_group(int node)
>>>  {
>>> struct iommu_table_group *table_group;
>>> @@ -339,7 +343,7 @@ struct direct_window {
>>>  /* Dynamic DMA Window support */
>>>  struct ddw_query_response {
>>> u32 windows_available;
>>> -   u32 largest_available_block;
>>> +   u64 largest_available_block;
>>> u32 page_size;
>>> u32 migration_capable;
>>>  };
>>> @@ -875,13 +879,29 @@ static int find_existing_ddw_windows(void)
>>>  machine_arch_initcall(pseries, find_existing_ddw_windows);
>>>  
>>>  static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
>>> -   struct ddw_query_response *query)
>>> +struct ddw_query_response *query,
>>> +struct device_node *parent)
>>>  {
>>> struct device_node *dn;
>>> struct pci_dn *pdn;
>>> -   u32 cfg_addr;
>>> +   u32 cfg_addr, query_out[5], ddw_ext[DDW_EXT_QUERY_OUT_SIZE + 1];
>>
>> ... and use DDW_EXT_LAST here.
> 
> Because of the growing nature of ddw-extensions, I intentionally let
> this be (DDW_EXT_QUERY_OUT_SIZE + 1). If we create a DDW_EXT_LAST, it
> will be incremented in the future if more extensions come to exist.
> 
> I mean, I previously saw no reason for allocating space for extensions
> after the desired one, as they won't be used here.

Ah, my bad, you're right.


> 
>>
>>
>>> u64 buid;
>>> -   int ret;
>>> +   int ret, out_sz;
>>> +
>>> +   /*
>>> +* From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
>>> +* output parameters ibm,query-pe-dma-windows will have, ranging from
>>> +* 5 to 6.
>>> +*/
>>> +
>>> +   ret = of_property_read_u32_array(parent, "ibm,ddw-extensions",
>>> +_ext[0],
>>> +DDW_EXT_QUERY_OUT_SIZE + 1);
> 
> In this case, I made sure not to cross (DDW_EXT_QUERY_OUT_SIZE + 1)
> while reading the extensions from the property.
> 
> What do you think about it? 

I think you want something like:

static inline int ddw_read_ext(const struct device_node *np, int extnum,
u32 *ret)
{
retun of_property_read_u32_index(np, "ibm,ddw-extensions", extnum + 1, ret);
}

These "+1"'s all over the place are confusing.


-- 
Alexey


Re: [PATCH v2 2/6] powerpc/pseries/iommu: Update call to ibm,query-pe-dma-windows

2020-07-01 Thread Leonardo Bras
On Wed, 2020-07-01 at 18:17 +1000, Alexey Kardashevskiy wrote:
> 
> > +#define DDW_EXT_SIZE   0
> > +#define DDW_EXT_RESET_DMA_WIN  1
> > +#define DDW_EXT_QUERY_OUT_SIZE 2
> 
> #define DDW_EXT_LAST (DDW_EXT_QUERY_OUT_SIZE + 1)
> ...
> 
> 
> > +
> >  static struct iommu_table_group *iommu_pseries_alloc_group(int node)
> >  {
> > struct iommu_table_group *table_group;
> > @@ -339,7 +343,7 @@ struct direct_window {
> >  /* Dynamic DMA Window support */
> >  struct ddw_query_response {
> > u32 windows_available;
> > -   u32 largest_available_block;
> > +   u64 largest_available_block;
> > u32 page_size;
> > u32 migration_capable;
> >  };
> > @@ -875,13 +879,29 @@ static int find_existing_ddw_windows(void)
> >  machine_arch_initcall(pseries, find_existing_ddw_windows);
> >  
> >  static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
> > -   struct ddw_query_response *query)
> > +struct ddw_query_response *query,
> > +struct device_node *parent)
> >  {
> > struct device_node *dn;
> > struct pci_dn *pdn;
> > -   u32 cfg_addr;
> > +   u32 cfg_addr, query_out[5], ddw_ext[DDW_EXT_QUERY_OUT_SIZE + 1];
> 
> ... and use DDW_EXT_LAST here.

Because of the growing nature of ddw-extensions, I intentionally let
this be (DDW_EXT_QUERY_OUT_SIZE + 1). If we create a DDW_EXT_LAST, it
will be incremented in the future if more extensions come to exist.

I mean, I previously saw no reason for allocating space for extensions
after the desired one, as they won't be used here.

> 
> 
> > u64 buid;
> > -   int ret;
> > +   int ret, out_sz;
> > +
> > +   /*
> > +* From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
> > +* output parameters ibm,query-pe-dma-windows will have, ranging from
> > +* 5 to 6.
> > +*/
> > +
> > +   ret = of_property_read_u32_array(parent, "ibm,ddw-extensions",
> > +_ext[0],
> > +DDW_EXT_QUERY_OUT_SIZE + 1);

In this case, I made sure not to cross (DDW_EXT_QUERY_OUT_SIZE + 1)
while reading the extensions from the property.

What do you think about it? 

Best regards,
Leonardo 



Re: [PATCH v2 2/6] powerpc/pseries/iommu: Update call to ibm,query-pe-dma-windows

2020-07-01 Thread Alexey Kardashevskiy



On 24/06/2020 16:24, Leonardo Bras wrote:
> From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can make the number of
> outputs from "ibm,query-pe-dma-windows" go from 5 to 6.
> 
> This change of output size is meant to expand the address size of
> largest_available_block PE TCE from 32-bit to 64-bit, which ends up
> shifting page_size and migration_capable.
> 
> This ends up requiring the update of
> ddw_query_response->largest_available_block from u32 to u64, and manually
> assigning the values from the buffer into this struct, according to
> output size.
> 
> Signed-off-by: Leonardo Bras 
> ---
>  arch/powerpc/platforms/pseries/iommu.c | 57 +-
>  1 file changed, 47 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/iommu.c 
> b/arch/powerpc/platforms/pseries/iommu.c
> index 68d2aa9c71a8..558e5441c355 100644
> --- a/arch/powerpc/platforms/pseries/iommu.c
> +++ b/arch/powerpc/platforms/pseries/iommu.c
> @@ -44,6 +44,10 @@
>  #define DDW_REMOVE_PE_DMA_WIN2
>  #define DDW_APPLICABLE_SIZE  3
>  
> +#define DDW_EXT_SIZE 0
> +#define DDW_EXT_RESET_DMA_WIN1
> +#define DDW_EXT_QUERY_OUT_SIZE   2


#define DDW_EXT_LAST (DDW_EXT_QUERY_OUT_SIZE + 1)
...


> +
>  static struct iommu_table_group *iommu_pseries_alloc_group(int node)
>  {
>   struct iommu_table_group *table_group;
> @@ -339,7 +343,7 @@ struct direct_window {
>  /* Dynamic DMA Window support */
>  struct ddw_query_response {
>   u32 windows_available;
> - u32 largest_available_block;
> + u64 largest_available_block;
>   u32 page_size;
>   u32 migration_capable;
>  };
> @@ -875,13 +879,29 @@ static int find_existing_ddw_windows(void)
>  machine_arch_initcall(pseries, find_existing_ddw_windows);
>  
>  static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
> - struct ddw_query_response *query)
> +  struct ddw_query_response *query,
> +  struct device_node *parent)
>  {
>   struct device_node *dn;
>   struct pci_dn *pdn;
> - u32 cfg_addr;
> + u32 cfg_addr, query_out[5], ddw_ext[DDW_EXT_QUERY_OUT_SIZE + 1];


... and use DDW_EXT_LAST here.


>   u64 buid;
> - int ret;
> + int ret, out_sz;
> +
> + /*
> +  * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
> +  * output parameters ibm,query-pe-dma-windows will have, ranging from
> +  * 5 to 6.
> +  */
> +
> + ret = of_property_read_u32_array(parent, "ibm,ddw-extensions",
> +  _ext[0],
> +  DDW_EXT_QUERY_OUT_SIZE + 1);
> + if (ret && ddw_ext[DDW_EXT_SIZE] > 1 &&

>= DDW_EXT_QUERY_OUT_SIZE ?  Thanks,


> + ddw_ext[DDW_EXT_QUERY_OUT_SIZE] == 1)
> + out_sz = 6;
> + else
> + out_sz = 5;
>  
>   /*
>* Get the config address and phb buid of the PE window.
> @@ -894,11 +914,28 @@ static int query_ddw(struct pci_dev *dev, const u32 
> *ddw_avail,
>   buid = pdn->phb->buid;
>   cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
>  
> - ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, 5, (u32 *)query,
> + ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
>   cfg_addr, BUID_HI(buid), BUID_LO(buid));
> - dev_info(>dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
> - " returned %d\n", ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr,
> -  BUID_HI(buid), BUID_LO(buid), ret);
> + dev_info(>dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned 
> %d\n",
> +  ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
> +  BUID_LO(buid), ret);
> +
> + switch (out_sz) {
> + case 5:
> + query->windows_available = query_out[0];
> + query->largest_available_block = query_out[1];
> + query->page_size = query_out[2];
> + query->migration_capable = query_out[3];
> + break;
> + case 6:
> + query->windows_available = query_out[0];
> + query->largest_available_block = ((u64)query_out[1] << 32) |
> +  query_out[2];
> + query->page_size = query_out[3];
> + query->migration_capable = query_out[4];
> + break;
> + }
> +
>   return ret;
>  }
>  
> @@ -1046,7 +1083,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
> device_node *pdn)
>* of page sizes: supported and supported for migrate-dma.
>*/
>   dn = pci_device_to_OF_node(dev);
> - ret = query_ddw(dev, ddw_avail, );
> + ret = query_ddw(dev, ddw_avail, , pdn);
>   if (ret != 0)
>   goto out_failed;
>  
> @@ -1074,7 +,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
> device_node *pdn)
>   /* check largest block * page size > max memory hotplug addr */
>   max_addr =