Re: [PATCH kernel v3 2/2] powerpc/dma: Fallback to dma_ops when persistent memory present

2020-10-29 Thread Michael Ellerman
Alexey Kardashevskiy  writes:
> On 29/10/2020 11:40, Michael Ellerman wrote:
>> Alexey Kardashevskiy  writes:
>>> @@ -1126,7 +1129,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
>>> device_node *pdn)
>>>   
>>> mutex_lock(_window_init_mutex);
>>>   
>>> -   dma_addr = find_existing_ddw(pdn);
>>> +   dma_addr = find_existing_ddw(pdn, );
>> 
>> I don't see len used anywhere?
>> 
>>> if (dma_addr != 0)
>>> goto out_unlock;
>>>   
>>> @@ -1212,14 +1215,26 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
>>> device_node *pdn)
>>> }
>>> /* verify the window * number of ptes will map the partition */
>>> /* check largest block * page size > max memory hotplug addr */
>>> -   max_addr = ddw_memory_hotplug_max();
>>> -   if (query.largest_available_block < (max_addr >> page_shift)) {
>>> -   dev_dbg(>dev, "can't map partition max 0x%llx with %llu "
>>> - "%llu-sized pages\n", max_addr,  
>>> query.largest_available_block,
>>> - 1ULL << page_shift);
>>> +   /*
>>> +* The "ibm,pmemory" can appear anywhere in the address space.
>>> +* Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
>>> +* for the upper limit and fallback to max RAM otherwise but this
>>> +* disables device::dma_ops_bypass.
>>> +*/
>>> +   len = max_ram_len;
>> 
>> Here you override whatever find_existing_ddw() wrote to len?
>
> Not always, there is a bunch of gotos before this line to the end of the 
> function and one (which returns the existing window) is legit. Thanks,

Ah yep I see it.

Gotos considered confusing ;)

cheers
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH kernel v3 2/2] powerpc/dma: Fallback to dma_ops when persistent memory present

2020-10-28 Thread Alexey Kardashevskiy




On 29/10/2020 11:40, Michael Ellerman wrote:

Alexey Kardashevskiy  writes:

diff --git a/arch/powerpc/platforms/pseries/iommu.c 
b/arch/powerpc/platforms/pseries/iommu.c
index e4198700ed1a..91112e748491 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -,11 +1112,13 @@ static void reset_dma_window(struct pci_dev *dev, 
struct device_node *par_dn)
   */
  static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
  {
-   int len, ret;
+   int len = 0, ret;
+   bool pmem_present = of_find_node_by_type(NULL, "ibm,pmemory") != NULL;


That leaks a reference on the returned node.

dn = of_find_node_by_type(NULL, "ibm,pmemory");
pmem_present = dn != NULL;
of_node_put(dn);



ah, true. v4 then.






@@ -1126,7 +1129,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
device_node *pdn)
  
  	mutex_lock(_window_init_mutex);
  
-	dma_addr = find_existing_ddw(pdn);

+   dma_addr = find_existing_ddw(pdn, );


I don't see len used anywhere?


if (dma_addr != 0)
goto out_unlock;
  
@@ -1212,14 +1215,26 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)

}
/* verify the window * number of ptes will map the partition */
/* check largest block * page size > max memory hotplug addr */
-   max_addr = ddw_memory_hotplug_max();
-   if (query.largest_available_block < (max_addr >> page_shift)) {
-   dev_dbg(>dev, "can't map partition max 0x%llx with %llu "
- "%llu-sized pages\n", max_addr,  
query.largest_available_block,
- 1ULL << page_shift);
+   /*
+* The "ibm,pmemory" can appear anywhere in the address space.
+* Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
+* for the upper limit and fallback to max RAM otherwise but this
+* disables device::dma_ops_bypass.
+*/
+   len = max_ram_len;


Here you override whatever find_existing_ddw() wrote to len?



Not always, there is a bunch of gotos before this line to the end of the 
function and one (which returns the existing window) is legit. Thanks,







+   if (pmem_present) {
+   if (query.largest_available_block >=
+   (1ULL << (MAX_PHYSMEM_BITS - page_shift)))
+   len = MAX_PHYSMEM_BITS - page_shift;
+   else
+   dev_info(>dev, "Skipping ibm,pmemory");
+   }
+
+   if (query.largest_available_block < (1ULL << (len - page_shift))) {
+   dev_dbg(>dev, "can't map partition max 0x%llx with %llu 
%llu-sized pages\n",
+   1ULL << len, query.largest_available_block, 1ULL << 
page_shift);
goto out_failed;
}
-   len = order_base_2(max_addr);
win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
if (!win64) {
dev_info(>dev,



cheers



--
Alexey
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH kernel v3 2/2] powerpc/dma: Fallback to dma_ops when persistent memory present

2020-10-28 Thread Michael Ellerman
Alexey Kardashevskiy  writes:
> diff --git a/arch/powerpc/platforms/pseries/iommu.c 
> b/arch/powerpc/platforms/pseries/iommu.c
> index e4198700ed1a..91112e748491 100644
> --- a/arch/powerpc/platforms/pseries/iommu.c
> +++ b/arch/powerpc/platforms/pseries/iommu.c
> @@ -,11 +1112,13 @@ static void reset_dma_window(struct pci_dev *dev, 
> struct device_node *par_dn)
>   */
>  static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
>  {
> - int len, ret;
> + int len = 0, ret;
> + bool pmem_present = of_find_node_by_type(NULL, "ibm,pmemory") != NULL;

That leaks a reference on the returned node.

dn = of_find_node_by_type(NULL, "ibm,pmemory");
pmem_present = dn != NULL;
of_node_put(dn);


> @@ -1126,7 +1129,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
> device_node *pdn)
>  
>   mutex_lock(_window_init_mutex);
>  
> - dma_addr = find_existing_ddw(pdn);
> + dma_addr = find_existing_ddw(pdn, );

I don't see len used anywhere?

>   if (dma_addr != 0)
>   goto out_unlock;
>  
> @@ -1212,14 +1215,26 @@ static u64 enable_ddw(struct pci_dev *dev, struct 
> device_node *pdn)
>   }
>   /* verify the window * number of ptes will map the partition */
>   /* check largest block * page size > max memory hotplug addr */
> - max_addr = ddw_memory_hotplug_max();
> - if (query.largest_available_block < (max_addr >> page_shift)) {
> - dev_dbg(>dev, "can't map partition max 0x%llx with %llu "
> -   "%llu-sized pages\n", max_addr,  
> query.largest_available_block,
> -   1ULL << page_shift);
> + /*
> +  * The "ibm,pmemory" can appear anywhere in the address space.
> +  * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
> +  * for the upper limit and fallback to max RAM otherwise but this
> +  * disables device::dma_ops_bypass.
> +  */
> + len = max_ram_len;

Here you override whatever find_existing_ddw() wrote to len?

> + if (pmem_present) {
> + if (query.largest_available_block >=
> + (1ULL << (MAX_PHYSMEM_BITS - page_shift)))
> + len = MAX_PHYSMEM_BITS - page_shift;
> + else
> + dev_info(>dev, "Skipping ibm,pmemory");
> + }
> +
> + if (query.largest_available_block < (1ULL << (len - page_shift))) {
> + dev_dbg(>dev, "can't map partition max 0x%llx with %llu 
> %llu-sized pages\n",
> + 1ULL << len, query.largest_available_block, 1ULL << 
> page_shift);
>   goto out_failed;
>   }
> - len = order_base_2(max_addr);
>   win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
>   if (!win64) {
>   dev_info(>dev,


cheers
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH kernel v3 2/2] powerpc/dma: Fallback to dma_ops when persistent memory present

2020-10-28 Thread Alexey Kardashevskiy
So far we have been using huge DMA windows to map all the RAM available.
The RAM is normally mapped to the VM address space contiguously, and
there is always a reasonable upper limit for possible future hot plugged
RAM which makes it easy to map all RAM via IOMMU.

Now there is persistent memory ("ibm,pmemory" in the FDT) which (unlike
normal RAM) can map anywhere in the VM space beyond the maximum RAM size
and since it can be used for DMA, it requires extending the huge window
up to MAX_PHYSMEM_BITS which requires hypervisor support for:
1. huge TCE tables;
2. multilevel TCE tables;
3. huge IOMMU pages.

Certain hypervisors cannot do either so the only option left is
restricting the huge DMA window to include only RAM and fallback to
the default DMA window for persistent memory.

This defines arch_dma_map_direct/etc to allow generic DMA code perform
additional checks on whether direct DMA is still possible.

This checks if the system has persistent memory. If it does not,
the DMA bypass mode is selected, i.e.
* dev->bus_dma_limit = 0
* dev->dma_ops_bypass = true <- this avoid calling dma_ops for mapping.

If there is such memory, this creates identity mapping only for RAM and
sets the dev->bus_dma_limit to let the generic code decide whether to
call into the direct DMA or the indirect DMA ops.

This should not change the existing behaviour when no persistent memory
as dev->dma_ops_bypass is expected to be set.

Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/kernel/dma-iommu.c| 70 +-
 arch/powerpc/platforms/pseries/iommu.c | 44 
 arch/powerpc/Kconfig   |  1 +
 3 files changed, 103 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index a1c744194018..21e2d9f059a9 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -10,6 +10,63 @@
 #include 
 #include 
 
+#define can_map_direct(dev, addr) \
+   ((dev)->bus_dma_limit >= phys_to_dma((dev), (addr)))
+
+bool arch_dma_map_page_direct(struct device *dev, phys_addr_t addr)
+{
+   if (likely(!dev->bus_dma_limit))
+   return false;
+
+   return can_map_direct(dev, addr);
+}
+EXPORT_SYMBOL_GPL(arch_dma_map_page_direct);
+
+#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset)
+
+bool arch_dma_unmap_page_direct(struct device *dev, dma_addr_t dma_handle)
+{
+   if (likely(!dev->bus_dma_limit))
+   return false;
+
+   return is_direct_handle(dev, dma_handle);
+}
+EXPORT_SYMBOL_GPL(arch_dma_unmap_page_direct);
+
+bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, int 
nents)
+{
+   struct scatterlist *s;
+   int i;
+
+   if (likely(!dev->bus_dma_limit))
+   return false;
+
+   for_each_sg(sg, s, nents, i) {
+   if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length))
+   return false;
+   }
+
+   return true;
+}
+EXPORT_SYMBOL(arch_dma_map_sg_direct);
+
+bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, int 
nents)
+{
+   struct scatterlist *s;
+   int i;
+
+   if (likely(!dev->bus_dma_limit))
+   return false;
+
+   for_each_sg(sg, s, nents, i) {
+   if (!is_direct_handle(dev, s->dma_address + s->length))
+   return false;
+   }
+
+   return true;
+}
+EXPORT_SYMBOL(arch_dma_unmap_sg_direct);
+
 /*
  * Generic iommu implementation
  */
@@ -90,8 +147,17 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
struct iommu_table *tbl = get_iommu_table_base(dev);
 
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
-   dev->dma_ops_bypass = true;
-   dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+   /*
+* dma_iommu_bypass_supported() sets dma_max when there is
+* 1:1 mapping but it is somehow limited.
+* ibm,pmemory is one example.
+*/
+   dev->dma_ops_bypass = dev->bus_dma_limit == 0;
+   if (!dev->dma_ops_bypass)
+   dev_warn(dev, "iommu: 64-bit OK but direct DMA is 
limited by %llx\n",
+dev->bus_dma_limit);
+   else
+   dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
 
diff --git a/arch/powerpc/platforms/pseries/iommu.c 
b/arch/powerpc/platforms/pseries/iommu.c
index e4198700ed1a..91112e748491 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -839,7 +839,7 @@ static void remove_ddw(struct device_node *np, bool 
remove_prop)
np, ret);
 }
 
-static u64 find_existing_ddw(struct device_node *pdn)
+static u64 find_existing_ddw(struct device_node *pdn, int *window_shift)
 {
struct direct_window *window;