On 02/07/2020 05:48, Leonardo Bras wrote:
> On Wed, 2020-07-01 at 18:17 +1000, Alexey Kardashevskiy wrote:
>>
>> On 24/06/2020 16:24, Leonardo Bras wrote:
>>> On LoPAR "DMA Window Manipulation Calls", it's recommended to remove the
>>> default DMA window for the device, before attempting to configure a DDW,
>>> in order to make the maximum resources available for the next DDW to be
>>> created.
>>>
>>> This is a requirement for some devices to use DDW, given they only
>>> allow one DMA window.
>>
>> Devices never know about these windows, it is purely PHB's side of
>> things. A device can access any address on the bus, the bus can generate
>> an exception if there is no window behind the address OR some other
>> device's MMIO. We could actually create a second window in addition to
>> the first one and allocate bus addresses from both, we just simplifying
>> this by merging two separate non-adjacent windows into one.
> 
> That's interesting, I was not aware of this. 
> I will try to improve this commit message with this info.
> Thanks for sharing!
> 
>>>>> If setting up a new DDW fails anywhere after the removal of this
>>> default DMA window, it's needed to restore the default DMA window.
>>> For this, an implementation of ibm,reset-pe-dma-windows rtas call is
>>> needed:
>>>
>>> Platforms supporting the DDW option starting with LoPAR level 2.7 implement
>>> ibm,ddw-extensions. The first extension available (index 2) carries the
>>> token for ibm,reset-pe-dma-windows rtas call, which is used to restore
>>> the default DMA window for a device, if it has been deleted.
>>>
>>> It does so by resetting the TCE table allocation for the PE to it's
>>> boot time value, available in "ibm,dma-window" device tree node.
>>>
>>> Signed-off-by: Leonardo Bras <leobra...@gmail.com>
>>> ---
>>>  arch/powerpc/platforms/pseries/iommu.c | 70 ++++++++++++++++++++++----
>>>  1 file changed, 61 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/powerpc/platforms/pseries/iommu.c 
>>> b/arch/powerpc/platforms/pseries/iommu.c
>>> index a8840d9e1c35..4fcf00016fb1 100644
>>> --- a/arch/powerpc/platforms/pseries/iommu.c
>>> +++ b/arch/powerpc/platforms/pseries/iommu.c
>>> @@ -1029,6 +1029,39 @@ static phys_addr_t ddw_memory_hotplug_max(void)
>>>     return max_addr;
>>>  }
>>>  
>>> +/*
>>> + * Platforms supporting the DDW option starting with LoPAR level 2.7 
>>> implement
>>> + * ibm,ddw-extensions, which carries the rtas token for
>>> + * ibm,reset-pe-dma-windows.
>>> + * That rtas-call can be used to restore the default DMA window for the 
>>> device.
>>> + */
>>> +static void reset_dma_window(struct pci_dev *dev, struct device_node 
>>> *par_dn)
>>> +{
>>> +   int ret;
>>> +   u32 cfg_addr, ddw_ext[DDW_EXT_RESET_DMA_WIN + 1];
>>> +   u64 buid;
>>> +   struct device_node *dn;
>>> +   struct pci_dn *pdn;
>>> +
>>> +   ret = of_property_read_u32_array(par_dn, "ibm,ddw-extensions",
>>> +                                    &ddw_ext[0], DDW_EXT_RESET_DMA_WIN + 
>>> 1);
>>> +   if (ret)
>>> +           return;
>>> +
>>> +   dn = pci_device_to_OF_node(dev);
>>> +   pdn = PCI_DN(dn);
>>> +   buid = pdn->phb->buid;
>>> +   cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
>>> +
>>> +   ret = rtas_call(ddw_ext[DDW_EXT_RESET_DMA_WIN], 3, 1, NULL, cfg_addr,
>>> +                   BUID_HI(buid), BUID_LO(buid));
>>> +   if (ret)
>>> +           dev_info(&dev->dev,
>>> +                    "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
>>> +                    ddw_ext[1], cfg_addr, BUID_HI(buid), BUID_LO(buid),
>>
>> s/ddw_ext[1]/ddw_ext[DDW_EXT_RESET_DMA_WIN]/
> 
> Good catch! I missed this one.
> 
>>
>>
>>> +                    ret);
>>> +}
>>> +
>>>  /*
>>>   * If the PE supports dynamic dma windows, and there is space for a table
>>>   * that can map all pages in a linear offset, then setup such a table,
>>> @@ -1049,8 +1082,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
>>> device_node *pdn)
>>>     u64 dma_addr, max_addr;
>>>     struct device_node *dn;
>>>     u32 ddw_avail[DDW_APPLICABLE_SIZE];
>>> +
>>
>> Unrelated new empty line.
> 
> Fixed!
> 
>>
>>
>>>     struct direct_window *window;
>>> -   struct property *win64;
>>> +   struct property *win64, *default_win = NULL, *ddw_ext = NULL;
>>>     struct dynamic_dma_window_prop *ddwprop;
>>>     struct failed_ddw_pdn *fpdn;
>>>  
>>> @@ -1085,7 +1119,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
>>> device_node *pdn)
>>>     if (ret)
>>>             goto out_failed;
>>>  
>>> -       /*
>>> +   /*
>>>      * Query if there is a second window of size to map the
>>>      * whole partition.  Query returns number of windows, largest
>>>      * block assigned to PE (partition endpoint), and two bitmasks
>>> @@ -1096,15 +1130,31 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
>>> device_node *pdn)
>>>     if (ret != 0)
>>>             goto out_failed;
>>>  
>>> +   /*
>>> +    * If there is no window available, remove the default DMA window,
>>> +    * if it's present. This will make all the resources available to the
>>> +    * new DDW window.
>>> +    * If anything fails after this, we need to restore it, so also check
>>> +    * for extensions presence.
>>> +    */
>>>     if (query.windows_available == 0) {
>>
>> Does phyp really always advertise 0 windows for these VFs? What is in
>> the largest_available_block when windows_available==0?
> 
> For this VF, it always advertise 0 windows before removing the default
> DMA window. The largest available block size is the same as after the
> removal (256GB). The only value that changes after removal is the
> number of available windows. Here some debug prints:

> 
> [    3.473149] mlx5_core 4005:01:00.0: ibm,query-pe-dma-windows(53)
> 10000 8000000 29004005 returned 0
> [    3.473162] mlx5_core 4005:01:00.0: windows_available = 0,
> largest_block = 400000, page_size = 3, migration_capable = 3
> [    3.473332] mlx5_core 4005:01:00.0: ibm,query-pe-dma-windows(53)
> 10000 8000000 29004005 returned 0
> [    3.473345] mlx5_core 4005:01:00.0: windows_available = 1,
> largest_block = 400000, page_size = 3, migration_capable = 3



Ah, I see, thanks for the info. Ok, they really do not want us to have 2
windows. Oh well.



> 
>>
>>
>>> -           /*
>>> -            * no additional windows are available for this device.
>>> -            * We might be able to reallocate the existing window,
>>> -            * trading in for a larger page size.
>>> -            */
>>> -           dev_dbg(&dev->dev, "no free dynamic windows");
>>> -           goto out_failed;
>>> +           default_win = of_find_property(pdn, "ibm,dma-window", NULL);
>>> +           ddw_ext = of_find_property(pdn, "ibm,ddw-extensions", NULL);
>>> +           if (default_win && ddw_ext)
>>> +                   remove_dma_window(pdn, ddw_avail, default_win);
>>> +
>>> +           /* Query again, to check if the window is available */
>>> +           ret = query_ddw(dev, ddw_avail, &query, pdn);
>>> +           if (ret != 0)
>>> +                   goto out_failed;
>>> +
>>> +           if (query.windows_available == 0) {
>>> +                   /* no windows are available for this device. */
>>> +                   dev_dbg(&dev->dev, "no free dynamic windows");
>>> +                   goto out_failed;
>>> +           }
>>>     }
>>> +
>>
>> Unrelated new empty line. Thanks,
> Fixed!
> Thank you!
> 
>>
>>>     if (query.page_size & 4) {
>>>             page_shift = 24; /* 16MB */
>>>     } else if (query.page_size & 2) {
>>> @@ -1194,6 +1244,8 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
>>> device_node *pdn)
>>>     kfree(win64);
>>>  
>>>  out_failed:
>>> +   if (default_win && ddw_ext)
>>> +           reset_dma_window(dev, pdn);
>>>  
>>>     fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
>>>     if (!fpdn)
>>>
> 

-- 
Alexey

Reply via email to