[PATCH 1/7] vfio: iommu_type1: Clear added dirty bit when unwind pin

2020-12-09 Thread Keqian Zhu
Currently we do not clear added dirty bit of bitmap when unwind
pin, so if pin failed at halfway, we set unnecessary dirty bit
in bitmap. Clearing added dirty bit when unwind pin, userspace
will see less dirty page, which can save much time to handle them.

Note that we should distinguish the bits added by pin and the bits
already set before pin, so introduce bitmap_added to record this.

Signed-off-by: Keqian Zhu 
---
 drivers/vfio/vfio_iommu_type1.c | 33 ++---
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 67e827638995..f129d24a6ec3 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -637,7 +637,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_group *group;
int i, j, ret;
+   unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
unsigned long remote_vaddr;
+   unsigned long bitmap_offset;
+   unsigned long *bitmap_added;
+   dma_addr_t iova;
struct vfio_dma *dma;
bool do_accounting;
 
@@ -650,6 +654,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
 
mutex_lock(>lock);
 
+   bitmap_added = bitmap_zalloc(npage, GFP_KERNEL);
+   if (!bitmap_added) {
+   ret = -ENOMEM;
+   goto pin_done;
+   }
+
/* Fail if notifier list is empty */
if (!iommu->notifier.head) {
ret = -EINVAL;
@@ -664,7 +674,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
 
for (i = 0; i < npage; i++) {
-   dma_addr_t iova;
struct vfio_pfn *vpfn;
 
iova = user_pfn[i] << PAGE_SHIFT;
@@ -699,14 +708,10 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
}
 
if (iommu->dirty_page_tracking) {
-   unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
-
-   /*
-* Bitmap populated with the smallest supported page
-* size
-*/
-   bitmap_set(dma->bitmap,
-  (iova - dma->iova) >> pgshift, 1);
+   /* Populated with the smallest supported page size */
+   bitmap_offset = (iova - dma->iova) >> pgshift;
+   if (!test_and_set_bit(bitmap_offset, dma->bitmap))
+   set_bit(i, bitmap_added);
}
}
ret = i;
@@ -722,14 +727,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
 pin_unwind:
phys_pfn[i] = 0;
for (j = 0; j < i; j++) {
-   dma_addr_t iova;
-
iova = user_pfn[j] << PAGE_SHIFT;
dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
vfio_unpin_page_external(dma, iova, do_accounting);
phys_pfn[j] = 0;
+
+   if (test_bit(j, bitmap_added)) {
+   bitmap_offset = (iova - dma->iova) >> pgshift;
+   clear_bit(bitmap_offset, dma->bitmap);
+   }
}
 pin_done:
+   if (bitmap_added)
+   bitmap_free(bitmap_added);
+
mutex_unlock(>lock);
return ret;
 }
-- 
2.23.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 6/7] vfio: iommu_type1: Drop parameter "pgsize" of vfio_iova_dirty_bitmap.

2020-12-09 Thread Keqian Zhu
We always use the smallest supported page size of vfio_iommu as
pgsize. Remove parameter "pgsize" of vfio_iova_dirty_bitmap.

Signed-off-by: Keqian Zhu 
---
 drivers/vfio/vfio_iommu_type1.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 32ab889c8193..2d7a5cd9b916 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1026,11 +1026,12 @@ static int update_user_bitmap(u64 __user *bitmap, 
struct vfio_iommu *iommu,
 }
 
 static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
- dma_addr_t iova, size_t size, size_t pgsize)
+ dma_addr_t iova, size_t size)
 {
struct vfio_dma *dma;
struct rb_node *n;
-   unsigned long pgshift = __ffs(pgsize);
+   unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
+   size_t pgsize = (size_t)1 << pgshift;
int ret;
 
/*
@@ -2861,8 +2862,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu 
*iommu,
if (iommu->dirty_page_tracking)
ret = vfio_iova_dirty_bitmap(range.bitmap.data,
 iommu, range.iova,
-range.size,
-range.bitmap.pgsize);
+range.size);
else
ret = -EINVAL;
 out_unlock:
-- 
2.23.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/7] vfio: iommu_type1: Initially set the pinned_page_dirty_scope

2020-12-09 Thread Keqian Zhu
Currently there are 3 ways to promote the pinned_page_dirty_scope
status of vfio_iommu:

1. Through pin interface.
2. Detach a group without dirty tracking.
3. Attach a group with dirty tracking.

For point 3, the only chance to change the pinned status is that
the vfio_iommu is newly created.

Consider that we can safely set the pinned status when create a
new vfio_iommu, as we do it, the point 3 can be removed to reduce
operations.

Signed-off-by: Keqian Zhu 
---
 drivers/vfio/vfio_iommu_type1.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index f129d24a6ec3..c52bcefba96b 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2064,12 +2064,8 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
 * Non-iommu backed group cannot dirty memory directly,
 * it can only use interfaces that provide dirty
 * tracking.
-* The iommu scope can only be promoted with the
-* addition of a dirty tracking group.
 */
group->pinned_page_dirty_scope = true;
-   if (!iommu->pinned_page_dirty_scope)
-   update_pinned_page_dirty_scope(iommu);
mutex_unlock(>lock);
 
return 0;
@@ -2457,6 +2453,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
INIT_LIST_HEAD(>iova_list);
iommu->dma_list = RB_ROOT;
iommu->dma_avail = dma_entry_limit;
+   iommu->pinned_page_dirty_scope = true;
mutex_init(>lock);
BLOCKING_INIT_NOTIFIER_HEAD(>notifier);
 
-- 
2.23.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/7] vfio: iommu_type1: Fix missing dirty page when promote pinned_scope

2020-12-09 Thread Keqian Zhu
When we pin or detach a group which is not dirty tracking capable,
we will try to promote pinned_scope of vfio_iommu.

If we succeed to do so, vfio only report pinned_scope as dirty to
userspace next time, but these memory written before pin or detach
is missed.

The solution is that we must populate all dma range as dirty before
promoting pinned_scope of vfio_iommu.

Signed-off-by: Keqian Zhu 
---
 drivers/vfio/vfio_iommu_type1.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index bd9a94590ebc..00684597b098 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1633,6 +1633,20 @@ static struct vfio_group 
*vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
return group;
 }
 
+static void vfio_populate_bitmap_all(struct vfio_iommu *iommu)
+{
+   struct rb_node *n;
+   unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
+
+   for (n = rb_first(>dma_list); n; n = rb_next(n)) {
+   struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
+   unsigned long nbits = dma->size >> pgshift;
+
+   if (dma->iommu_mapped)
+   bitmap_set(dma->bitmap, 0, nbits);
+   }
+}
+
 static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu)
 {
struct vfio_domain *domain;
@@ -1657,6 +1671,10 @@ static void promote_pinned_page_dirty_scope(struct 
vfio_iommu *iommu)
}
 
iommu->pinned_page_dirty_scope = true;
+
+   /* Set all bitmap to avoid missing dirty page */
+   if (iommu->dirty_page_tracking)
+   vfio_populate_bitmap_all(iommu);
 }
 
 static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
-- 
2.23.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/7] vfio: iommu_type1: Make an explicit "promote" semantic

2020-12-09 Thread Keqian Zhu
When we want to promote pinned_page_scope of vfio_iommu, we
should call the "update" function to visit all vfio_group,
but when we want to downgrade it, we can set the flag directly.

Giving above, we can give an explicit "promote" semantic to
that function. BTW, if vfio_iommu has been promoted, then it
can return early.

Signed-off-by: Keqian Zhu 
---
 drivers/vfio/vfio_iommu_type1.c | 27 +--
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c52bcefba96b..bd9a94590ebc 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -148,7 +148,7 @@ static int put_pfn(unsigned long pfn, int prot);
 static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
   struct iommu_group *iommu_group);
 
-static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu);
+static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu);
 /*
  * This code handles mapping and unmapping of user data buffers
  * into DMA'ble space using the IOMMU
@@ -719,7 +719,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
group = vfio_iommu_find_iommu_group(iommu, iommu_group);
if (!group->pinned_page_dirty_scope) {
group->pinned_page_dirty_scope = true;
-   update_pinned_page_dirty_scope(iommu);
+   promote_pinned_page_dirty_scope(iommu);
}
 
goto pin_done;
@@ -1633,27 +1633,26 @@ static struct vfio_group 
*vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
return group;
 }
 
-static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu)
+static void promote_pinned_page_dirty_scope(struct vfio_iommu *iommu)
 {
struct vfio_domain *domain;
struct vfio_group *group;
 
+   if (iommu->pinned_page_dirty_scope)
+   return;
+
list_for_each_entry(domain, >domain_list, next) {
list_for_each_entry(group, >group_list, next) {
-   if (!group->pinned_page_dirty_scope) {
-   iommu->pinned_page_dirty_scope = false;
+   if (!group->pinned_page_dirty_scope)
return;
-   }
}
}
 
if (iommu->external_domain) {
domain = iommu->external_domain;
list_for_each_entry(group, >group_list, next) {
-   if (!group->pinned_page_dirty_scope) {
-   iommu->pinned_page_dirty_scope = false;
+   if (!group->pinned_page_dirty_scope)
return;
-   }
}
}
 
@@ -2348,7 +2347,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_domain *domain;
struct vfio_group *group;
-   bool update_dirty_scope = false;
+   bool promote_dirty_scope = false;
LIST_HEAD(iova_copy);
 
mutex_lock(>lock);
@@ -2356,7 +2355,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
if (iommu->external_domain) {
group = find_iommu_group(iommu->external_domain, iommu_group);
if (group) {
-   update_dirty_scope = !group->pinned_page_dirty_scope;
+   promote_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
kfree(group);
 
@@ -2386,7 +2385,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
continue;
 
vfio_iommu_detach_group(domain, group);
-   update_dirty_scope = !group->pinned_page_dirty_scope;
+   promote_dirty_scope = !group->pinned_page_dirty_scope;
list_del(>next);
kfree(group);
/*
@@ -2422,8 +2421,8 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
 * Removal of a group without dirty tracking may allow the iommu scope
 * to be promoted.
 */
-   if (update_dirty_scope)
-   update_pinned_page_dirty_scope(iommu);
+   if (promote_dirty_scope)
+   promote_pinned_page_dirty_scope(iommu);
mutex_unlock(>lock);
 }
 
-- 
2.23.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 5/7] vfio: iommu_type1: Drop parameter "pgsize" of vfio_dma_bitmap_alloc_all

2020-12-09 Thread Keqian Zhu
We always use the smallest supported page size of vfio_iommu as
pgsize. Remove parameter "pgsize" of vfio_dma_bitmap_alloc_all.

Signed-off-by: Keqian Zhu 
---
 drivers/vfio/vfio_iommu_type1.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 00684597b098..32ab889c8193 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -236,9 +236,10 @@ static void vfio_dma_populate_bitmap(struct vfio_dma *dma, 
size_t pgsize)
}
 }
 
-static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize)
+static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu)
 {
struct rb_node *n;
+   size_t pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap);
 
for (n = rb_first(>dma_list); n; n = rb_next(n)) {
struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
@@ -2798,12 +2799,9 @@ static int vfio_iommu_type1_dirty_pages(struct 
vfio_iommu *iommu,
return -EINVAL;
 
if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) {
-   size_t pgsize;
-
mutex_lock(>lock);
-   pgsize = 1 << __ffs(iommu->pgsize_bitmap);
if (!iommu->dirty_page_tracking) {
-   ret = vfio_dma_bitmap_alloc_all(iommu, pgsize);
+   ret = vfio_dma_bitmap_alloc_all(iommu);
if (!ret)
iommu->dirty_page_tracking = true;
}
-- 
2.23.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH 04/15] lib/scatterlist: Add flag for indicating P2PDMA segments in an SGL

2020-12-09 Thread Dan Williams
On Wed, Dec 9, 2020 at 6:07 PM Logan Gunthorpe  wrote:
>
>
>
> On 2020-12-09 6:22 p.m., Dan Williams wrote:
> > On Mon, Nov 9, 2020 at 8:47 AM Logan Gunthorpe  wrote:
> >>
> >>
> >>
> >> On 2020-11-09 2:12 a.m., Christoph Hellwig wrote:
> >>> On Fri, Nov 06, 2020 at 10:00:25AM -0700, Logan Gunthorpe wrote:
>  We make use of the top bit of the dma_length to indicate a P2PDMA
>  segment.
> >>>
> >>> I don't think "we" can.  There is nothing limiting the size of a SGL
> >>> segment.
> >>
> >> Yes, I expected this would be the unacceptable part. Any alternative ideas?
> >
> > Why is the SG_P2PDMA_FLAG needed as compared to checking the SGL
> > segment-pages for is_pci_p2pdma_page()?
>
> Because the DMA and page segments in the SGL aren't necessarily aligned...
>
> The IOMMU implementations can coalesce multiple pages into fewer DMA
> address ranges, so the page pointed to by sg->page_link may not be the
> one that corresponds to the address in sg->dma_address for a given segment.
>
> If that makes sense -- it's not the easiest thing to explain.

It does...

Did someone already grab, or did you already consider the 3rd
available bit in page_link? AFAICS only SG_CHAIN and SG_END are
reserved. However, if you have a CONFIG_64BIT dependency for
user-directed p2pdma that would seem to allow SG_P2PDMA_FLAG to be
(0x4) in page_link.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RESEND PATCH 1/1] iommu/amd: Remove unnecessary assignment

2020-12-09 Thread Adrian Huang
From: Adrian Huang 

From: Adrian Huang 

The values of local variables are assigned after local variables
are declared, so no need to assign the initial value during the
variable declaration.

And, no need to assign NULL for the local variable 'ivrs_base'
after invoking acpi_put_table().

Signed-off-by: Adrian Huang 
---
 drivers/iommu/amd/init.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 23a790f8f550..103cbf8fc2d9 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1916,7 +1916,7 @@ static void print_iommu_info(void)
 static int __init amd_iommu_init_pci(void)
 {
struct amd_iommu *iommu;
-   int ret = 0;
+   int ret;
 
for_each_iommu(iommu) {
ret = iommu_init_pci(iommu);
@@ -2555,8 +2555,8 @@ static void __init free_dma_resources(void)
 static int __init early_amd_iommu_init(void)
 {
struct acpi_table_header *ivrs_base;
+   int i, remap_cache_sz, ret;
acpi_status status;
-   int i, remap_cache_sz, ret = 0;
u32 pci_id;
 
if (!amd_iommu_detected)
@@ -2698,7 +2698,6 @@ static int __init early_amd_iommu_init(void)
 out:
/* Don't leak any ACPI memory */
acpi_put_table(ivrs_base);
-   ivrs_base = NULL;
 
return ret;
 }
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH 04/15] lib/scatterlist: Add flag for indicating P2PDMA segments in an SGL

2020-12-09 Thread Logan Gunthorpe



On 2020-12-09 6:22 p.m., Dan Williams wrote:
> On Mon, Nov 9, 2020 at 8:47 AM Logan Gunthorpe  wrote:
>>
>>
>>
>> On 2020-11-09 2:12 a.m., Christoph Hellwig wrote:
>>> On Fri, Nov 06, 2020 at 10:00:25AM -0700, Logan Gunthorpe wrote:
 We make use of the top bit of the dma_length to indicate a P2PDMA
 segment.
>>>
>>> I don't think "we" can.  There is nothing limiting the size of a SGL
>>> segment.
>>
>> Yes, I expected this would be the unacceptable part. Any alternative ideas?
> 
> Why is the SG_P2PDMA_FLAG needed as compared to checking the SGL
> segment-pages for is_pci_p2pdma_page()?

Because the DMA and page segments in the SGL aren't necessarily aligned...

The IOMMU implementations can coalesce multiple pages into fewer DMA
address ranges, so the page pointed to by sg->page_link may not be the
one that corresponds to the address in sg->dma_address for a given segment.

If that makes sense -- it's not the easiest thing to explain.

Logan


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v9] x86, swiotlb: Adjust SWIOTLB bounce buffer size for SEV guests

2020-12-09 Thread Ashish Kalra
From: Ashish Kalra 

For SEV, all DMA to and from guest has to use shared (un-encrypted) pages.
SEV uses SWIOTLB to make this happen without requiring changes to device
drivers.  However, depending on the workload being run, the default 64MB
of it might not be enough and it may run out of buffers to use for DMA,
resulting in I/O errors and/or performance degradation for high 
I/O workloads.

Adjust the default size of SWIOTLB for SEV guests using a
percentage of the total memory available to guest for the SWIOTLB buffers.

Adds a new sev_setup_arch() function which is invoked from setup_arch()
and it calls into a new swiotlb generic code function swiotlb_adjust_size()
to do the SWIOTLB buffer adjustment.

v5 fixed build errors and warnings as
Reported-by: kbuild test robot 

Signed-off-by: Ashish Kalra 
Co-developed-by: Borislav Petkov 
Signed-off-by: Borislav Petkov 
---
 arch/x86/include/asm/mem_encrypt.h |  2 ++
 arch/x86/kernel/setup.c|  6 ++
 arch/x86/mm/mem_encrypt.c  | 31 ++
 include/linux/swiotlb.h|  8 
 kernel/dma/swiotlb.c   | 20 +--
 5 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 83012af1660c..39e84aa162c7 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -38,6 +38,7 @@ void __init sme_map_bootdata(char *real_mode_data);
 void __init sme_unmap_bootdata(char *real_mode_data);
 
 void __init sme_early_init(void);
+void __init sev_setup_arch(void);
 
 void __init sme_encrypt_kernel(struct boot_params *bp);
 void __init sme_enable(struct boot_params *bp);
@@ -73,6 +74,7 @@ static inline void __init sme_map_bootdata(char 
*real_mode_data) { }
 static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
 
 static inline void __init sme_early_init(void) { }
+static inline void __init sev_setup_arch(void) { }
 
 static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
 static inline void __init sme_enable(struct boot_params *bp) { }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 84f581c91db4..874b2c17af41 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1054,6 +1054,12 @@ void __init setup_arch(char **cmdline_p)
memblock_set_current_limit(ISA_END_ADDRESS);
e820__memblock_setup();
 
+   /*
+* Needs to run after memblock setup because it needs the physical
+* memory size.
+*/
+   sev_setup_arch();
+
reserve_bios_regions();
 
efi_fake_memmap();
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index cc1a4c762149..96c31b3f215b 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -246,6 +246,37 @@ static void set_memory_enc_dec_hypercall(unsigned long 
vaddr, int npages,
}
 }
 
+void __init sev_setup_arch(void)
+{
+   phys_addr_t total_mem = memblock_phys_mem_size();
+   unsigned long size;
+
+   if (!sev_active())
+   return;
+
+   /*
+* For SEV, all DMA has to occur via shared/unencrypted pages.
+* SEV uses SWOTLB to make this happen without changing device
+* drivers. However, depending on the workload being run, the
+* default 64MB of SWIOTLB may not be enough and`SWIOTLB may
+* run out of buffers for DMA, resulting in I/O errors and/or
+* performance degradation especially with high I/O workloads.
+*
+* Adjust the default size of SWIOTLB for SEV guests using
+* a percentage of guest memory for SWIOTLB buffers.
+* Also, as the SWIOTLB bounce buffer memory is allocated
+* from low memory, ensure that the adjusted size is within
+* the limits of low available memory.
+*
+* The percentage of guest memory used here for SWIOTLB buffers
+* is more of an approximation of the static adjustment which
+* 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6%
+*/
+   size = total_mem * 6 / 100;
+   size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
+   swiotlb_adjust_size(size);
+}
+
 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 {
pgprot_t old_prot, new_prot;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 3bb72266a75a..075748f367ea 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -29,6 +29,9 @@ enum swiotlb_force {
  */
 #define IO_TLB_SHIFT 11
 
+/* default to 64MB */
+#define IO_TLB_DEFAULT_SIZE (64UL<<20)
+
 extern void swiotlb_init(int verbose);
 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
 extern unsigned long swiotlb_nr_tbl(void);
@@ -77,6 +80,7 @@ void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
 bool is_swiotlb_active(void);
+void 

Re: [RFC PATCH 04/15] lib/scatterlist: Add flag for indicating P2PDMA segments in an SGL

2020-12-09 Thread Dan Williams
On Mon, Nov 9, 2020 at 8:47 AM Logan Gunthorpe  wrote:
>
>
>
> On 2020-11-09 2:12 a.m., Christoph Hellwig wrote:
> > On Fri, Nov 06, 2020 at 10:00:25AM -0700, Logan Gunthorpe wrote:
> >> We make use of the top bit of the dma_length to indicate a P2PDMA
> >> segment.
> >
> > I don't think "we" can.  There is nothing limiting the size of a SGL
> > segment.
>
> Yes, I expected this would be the unacceptable part. Any alternative ideas?

Why is the SG_P2PDMA_FLAG needed as compared to checking the SGL
segment-pages for is_pci_p2pdma_page()?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v10 10/13] iommu/arm-smmu-v3: Check for SVA features

2020-12-09 Thread Krishna Reddy
> > The Tegra Next Generation SOC uses arm-smmu-v3, but it doesn't have support 
> > for BTM.
> > Do you have plan to get your earlier patch to handle invalidate 
> > notifications into upstream sometime soon?

>Is that a limitation of the SMMU implementation, the interconnect or the 
>integration?

It is the limitation of interconnect. The DVM messages don't reach SMMU. 
The BTM bit in SMMU IDR does indicate that it doesn't support BTM. 

-KR
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 10/13] iommu/arm-smmu-v3: Check for SVA features

2020-12-09 Thread Will Deacon
On Wed, Dec 09, 2020 at 07:49:09PM +, Krishna Reddy wrote:
> > > Why is BTM mandated for SVA? I couldn't find this requirement in 
> > > SMMU spec (Sorry if I missed it or this got discussed earlier). But 
> > > if performance is the
> > only concern here,
> > > is it better just to allow it with a warning rather than limiting 
> > > SMMUs without
> > BTM?
> >
> > It's a performance concern and requires to support multiple 
> > configurations, but the spec allows it. Are there SMMUs without BTM 
> > that need it?
> 
> The Tegra Next Generation SOC uses arm-smmu-v3, but it doesn't have support 
> for BTM.
> Do you have plan to get your earlier patch to handle invalidate
> notifications into upstream sometime soon? 

Is that a limitation of the SMMU implementation, the interconnect or the
integration?

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v10 10/13] iommu/arm-smmu-v3: Check for SVA features

2020-12-09 Thread Krishna Reddy
Hi Jean,
> > Why is BTM mandated for SVA? I couldn't find this requirement in 
> > SMMU spec (Sorry if I missed it or this got discussed earlier). But 
> > if performance is the
> only concern here,
> > is it better just to allow it with a warning rather than limiting 
> > SMMUs without
> BTM?
>
> It's a performance concern and requires to support multiple 
> configurations, but the spec allows it. Are there SMMUs without BTM 
> that need it?

The Tegra Next Generation SOC uses arm-smmu-v3, but it doesn't have support for 
BTM.
Do you have plan to get your earlier patch to handle invalidate notifications 
into upstream sometime soon? 
Can the dependency on BTM be relaxed with the patch?

PATCH v9 13/13] iommu/arm-smmu-v3: Hook up ATC invalidation to mm ops
https://www.spinics.net/lists/arm-kernel/msg825099.html

-KR
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-12-09 Thread Borislav Petkov
On Wed, Dec 09, 2020 at 07:34:16PM +, Ashish Kalra wrote:
> This should work, but i am concerned about making IO_TLB_DEFAULT_SIZE
> (which is pretty much private to generic swiotlb code) to be visible
> externally, i don't know if there are any concerns with that ?

Meh, it's just a define and it is not a secret that swiotlb size by
default is 64M.

Btw, pls trim your reply by removing quoted text you're not responding
to.

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-12-09 Thread Ashish Kalra
On Wed, Dec 09, 2020 at 06:51:05PM +0100, Borislav Petkov wrote:
> On Wed, Dec 09, 2020 at 01:19:46PM +, Ashish Kalra wrote:
> > reserve_crashkernel() calls swiotlb_size_or_default() to get SWIOTLB
> ...
> 
> Thanks for explaining.
> 
> > There is a need to introduce an architecture specific callback
> > for swiotlb_adjust() because of the following reason :
> 
> So what your version currently does is:
> 
> 1. from arch code, call generic code - swiotlb_adjust
> 
> 2. in generic code, call back into arch code - arch_swiotlb_adjust
> 
> But that's twice the work needed to get you where you wanna go.
> 
> What you wanna do is, from arch code, call into swiotlb generic code.
> That's it, no more.
> 
> Just like mem_encrypt.c calls swiotlb_update_mem_attributes(), for
> example.
> 
> And other architectures can simply do the same thing and you have it all
> solved and other architectures don't even need to refactor - they simply
> copy what x86 does.
> 
> IOW, something like this:
> 

This should work, but i am concerned about making IO_TLB_DEFAULT_SIZE
(which is pretty much private to generic swiotlb code) to be visible
externally, i don't know if there are any concerns with that ?

Thanks,
Ashish

> ---
> diff --git a/arch/x86/include/asm/mem_encrypt.h 
> b/arch/x86/include/asm/mem_encrypt.h
> index 2f62bbdd9d12..31c4df123aa0 100644
> --- a/arch/x86/include/asm/mem_encrypt.h
> +++ b/arch/x86/include/asm/mem_encrypt.h
> @@ -37,6 +37,7 @@ void __init sme_map_bootdata(char *real_mode_data);
>  void __init sme_unmap_bootdata(char *real_mode_data);
>  
>  void __init sme_early_init(void);
> +void __init sev_setup_arch(void);
>  
>  void __init sme_encrypt_kernel(struct boot_params *bp);
>  void __init sme_enable(struct boot_params *bp);
> @@ -69,6 +70,7 @@ static inline void __init sme_map_bootdata(char 
> *real_mode_data) { }
>  static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
>  
>  static inline void __init sme_early_init(void) { }
> +static inline void __init sev_setup_arch(void) { }
>  
>  static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
>  static inline void __init sme_enable(struct boot_params *bp) { }
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index a23130c86bdd..740f3bdb3f61 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -1049,6 +1049,12 @@ void __init setup_arch(char **cmdline_p)
>   memblock_set_current_limit(ISA_END_ADDRESS);
>   e820__memblock_setup();
>  
> + /*
> +  * Needs to run after memblock setup because it needs the physical
> +  * memory size.
> +  */
> + sev_setup_arch();
> +
>   reserve_bios_regions();
>  
>   efi_fake_memmap();
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index bc0833713be9..f3db85673eae 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -198,6 +198,37 @@ void __init sme_early_init(void)
>   swiotlb_force = SWIOTLB_FORCE;
>  }
>  
> +void __init sev_setup_arch(void)
> +{
> + phys_addr_t total_mem = memblock_phys_mem_size();
> + unsigned long size;
> +
> + if (!sev_active())
> + return;
> +
> + /*
> +  * For SEV, all DMA has to occur via shared/unencrypted pages.
> +  * SEV uses SWOTLB to make this happen without changing device
> +  * drivers. However, depending on the workload being run, the
> +  * default 64MB of SWIOTLB may not be enough and`SWIOTLB may
> +  * run out of buffers for DMA, resulting in I/O errors and/or
> +  * performance degradation especially with high I/O workloads.
> +  *
> +  * Adjust the default size of SWIOTLB for SEV guests using
> +  * a percentage of guest memory for SWIOTLB buffers.
> +  * Also as the SWIOTLB bounce buffer memory is allocated
> +  * from low memory, ensure that the adjusted size is within
> +  * the limits of low available memory.
> +  *
> +  * The percentage of guest memory used here for SWIOTLB buffers
> +  * is more of an approximation of the static adjustment which
> +  * 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6%
> +  */
> + size = total_mem * 6 / 100;
> + size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
> + swiotlb_adjust_size(size);
> +}
> +
>  static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
>  {
>   pgprot_t old_prot, new_prot;
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index fbdc65782195..7aa94e2f99c6 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -30,6 +30,9 @@ enum swiotlb_force {
>   */
>  #define IO_TLB_SHIFT 11
>  
> +/* default to 64MB */
> +#define IO_TLB_DEFAULT_SIZE (64UL<<20)
> +
>  extern void swiotlb_init(int verbose);
>  int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
>  extern unsigned long swiotlb_nr_tbl(void);
> @@ -78,6 +81,7 @@ void __init swiotlb_exit(void);
>  

Re: [GIT PULL] IOMMU fix for 5.10 (-final)

2020-12-09 Thread Jerry Snitselaar
On Wed, Dec 9, 2020 at 12:18 PM Linus Torvalds
 wrote:
>
> On Wed, Dec 9, 2020 at 11:12 AM Jerry Snitselaar  wrote:
> >
> > Since the field in the device table entry format expects it to be n
> > where there are 2^n entries in the table I guess it should be:
> >
> > #define DTE_IRQ_TABLE_LEN 9
> > #define MAX_IRQS_PER_TABLE (1 << DTE_IRQ_TABLE_LEN)
>
> No, that "DTE_IRQ_TABLE_LEN" is not the size shift - it's the size
> shift value in that DTE field, which is shifted up by 1.
>
> That's why the current code does that
>
>#define DTE_IRQ_TABLE_LEN   (9ULL << 1)
>
> there..
>
> Which was why I suggested that new #define that is the *actual* shift
> value, and then the DTE thing and the MAX_IRQS_PER_TABLE values would
> depend on that.
>
>Linus
>

Yes, when I read it my head was translating it as setting them both to
512 and then
I forgot that it gets shifted over 1. Which considering I was the once
who noticed the
original problem  of it still being 8 was a nice brain fart. This
should be fixed like you suggest.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [GIT PULL] IOMMU fix for 5.10 (-final)

2020-12-09 Thread Linus Torvalds
On Wed, Dec 9, 2020 at 11:12 AM Jerry Snitselaar  wrote:
>
> Since the field in the device table entry format expects it to be n
> where there are 2^n entries in the table I guess it should be:
>
> #define DTE_IRQ_TABLE_LEN 9
> #define MAX_IRQS_PER_TABLE (1 << DTE_IRQ_TABLE_LEN)

No, that "DTE_IRQ_TABLE_LEN" is not the size shift - it's the size
shift value in that DTE field, which is shifted up by 1.

That's why the current code does that

   #define DTE_IRQ_TABLE_LEN   (9ULL << 1)

there..

Which was why I suggested that new #define that is the *actual* shift
value, and then the DTE thing and the MAX_IRQS_PER_TABLE values would
depend on that.

   Linus
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [GIT PULL] IOMMU fix for 5.10 (-final)

2020-12-09 Thread Jerry Snitselaar
On Wed, Dec 9, 2020 at 12:12 PM Jerry Snitselaar  wrote:
>
>
> Will Deacon @ 2020-12-09 11:50 MST:
>
> > On Wed, Dec 09, 2020 at 10:07:46AM -0800, Linus Torvalds wrote:
> >> On Wed, Dec 9, 2020 at 6:12 AM Will Deacon  wrote:
> >> >
> >> > Please pull this one-liner AMD IOMMU fix for 5.10. It's actually a fix
> >> > for a fix, where the size of the interrupt remapping table was increased
> >> > but a related constant for the size of the interrupt table was forgotten.
> >>
> >> Pulled.
> >
> > Thanks.
> >
> >> However, why didn't this then add some sanity checking for the two
> >> different #defines to be in sync?
> >>
> >> IOW, something like
> >>
> >>#define AMD_IOMMU_IRQ_TABLE_SHIFT 9
> >>
> >>#define MAX_IRQS_PER_TABLE (1 << AMD_IOMMU_IRQ_TABLE_SHIFT)
> >>#define DTE_IRQ_TABLE_LEN ((u64)AMD_IOMMU_IRQ_TABLE_SHIFT << 1)
>
> Since the field in the device table entry format expects it to be n
> where there are 2^n entries in the table I guess it should be:
>
> #define DTE_IRQ_TABLE_LEN 9
> #define MAX_IRQS_PER_TABLE (1 << DTE_IRQ_TABLE_LEN)
>
No, ignore that. I'm being stupid.


> >>
> >> or whatever. Hmm?
> >
> > This looks like a worthwhile change to me, but I don't have any hardware
> > so I've been very reluctant to make even "obvious" driver changes here.
> >
> > Suravee -- please can you post a patch implementing the above?
> >
> >> That way this won't happen again, but perhaps equally importantly the
> >> linkage will be more clear, and there won't be those random constants.
> >>
> >> Naming above is probably garbage - I assume there's some actual
> >> architectural name for that irq table length field in the DTE?
> >
> > The one in the spec is even better: "IntTabLen".
> >
> > Will
> > ___
> > iommu mailing list
> > iommu@lists.linux-foundation.org
> > https://lists.linuxfoundation.org/mailman/listinfo/iommu
>

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [GIT PULL] IOMMU fix for 5.10 (-final)

2020-12-09 Thread Jerry Snitselaar


Will Deacon @ 2020-12-09 11:50 MST:

> On Wed, Dec 09, 2020 at 10:07:46AM -0800, Linus Torvalds wrote:
>> On Wed, Dec 9, 2020 at 6:12 AM Will Deacon  wrote:
>> >
>> > Please pull this one-liner AMD IOMMU fix for 5.10. It's actually a fix
>> > for a fix, where the size of the interrupt remapping table was increased
>> > but a related constant for the size of the interrupt table was forgotten.
>> 
>> Pulled.
>
> Thanks.
>
>> However, why didn't this then add some sanity checking for the two
>> different #defines to be in sync?
>> 
>> IOW, something like
>> 
>>#define AMD_IOMMU_IRQ_TABLE_SHIFT 9
>> 
>>#define MAX_IRQS_PER_TABLE (1 << AMD_IOMMU_IRQ_TABLE_SHIFT)
>>#define DTE_IRQ_TABLE_LEN ((u64)AMD_IOMMU_IRQ_TABLE_SHIFT << 1)

Since the field in the device table entry format expects it to be n
where there are 2^n entries in the table I guess it should be:

#define DTE_IRQ_TABLE_LEN 9
#define MAX_IRQS_PER_TABLE (1 << DTE_IRQ_TABLE_LEN)

>> 
>> or whatever. Hmm?
>
> This looks like a worthwhile change to me, but I don't have any hardware
> so I've been very reluctant to make even "obvious" driver changes here.
>
> Suravee -- please can you post a patch implementing the above?
>
>> That way this won't happen again, but perhaps equally importantly the
>> linkage will be more clear, and there won't be those random constants.
>> 
>> Naming above is probably garbage - I assume there's some actual
>> architectural name for that irq table length field in the DTE?
>
> The one in the spec is even better: "IntTabLen".
>
> Will
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [GIT PULL] IOMMU fix for 5.10 (-final)

2020-12-09 Thread Will Deacon
On Wed, Dec 09, 2020 at 10:07:46AM -0800, Linus Torvalds wrote:
> On Wed, Dec 9, 2020 at 6:12 AM Will Deacon  wrote:
> >
> > Please pull this one-liner AMD IOMMU fix for 5.10. It's actually a fix
> > for a fix, where the size of the interrupt remapping table was increased
> > but a related constant for the size of the interrupt table was forgotten.
> 
> Pulled.

Thanks.

> However, why didn't this then add some sanity checking for the two
> different #defines to be in sync?
> 
> IOW, something like
> 
>#define AMD_IOMMU_IRQ_TABLE_SHIFT 9
> 
>#define MAX_IRQS_PER_TABLE (1 << AMD_IOMMU_IRQ_TABLE_SHIFT)
>#define DTE_IRQ_TABLE_LEN ((u64)AMD_IOMMU_IRQ_TABLE_SHIFT << 1)
> 
> or whatever. Hmm?

This looks like a worthwhile change to me, but I don't have any hardware
so I've been very reluctant to make even "obvious" driver changes here.

Suravee -- please can you post a patch implementing the above?

> That way this won't happen again, but perhaps equally importantly the
> linkage will be more clear, and there won't be those random constants.
> 
> Naming above is probably garbage - I assume there's some actual
> architectural name for that irq table length field in the DTE?

The one in the spec is even better: "IntTabLen".

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 2/3] iommu/iova: Avoid double-negatives in magazine helpers

2020-12-09 Thread John Garry
A similar crash to the following could be observed if initial CPU rcache
magazine allocations fail in init_iova_rcaches():

Unable to handle kernel NULL pointer dereference at virtual address 

Mem abort info:

  free_iova_fast+0xfc/0x280
  iommu_dma_free_iova+0x64/0x70
  __iommu_dma_unmap+0x9c/0xf8
  iommu_dma_unmap_sg+0xa8/0xc8
  dma_unmap_sg_attrs+0x28/0x50
  cq_thread_v3_hw+0x2dc/0x528
  irq_thread_fn+0x2c/0xa0
  irq_thread+0x130/0x1e0
  kthread+0x154/0x158
  ret_from_fork+0x10/0x34

The issue is that expression !iova_magazine_full(NULL) evaluates true; this
falls over in __iova_rcache_insert() when we attempt to cache a mag and
cpu_rcache->loaded == NULL:

if (!iova_magazine_full(cpu_rcache->loaded)) {
can_insert = true;
...

if (can_insert)
iova_magazine_push(cpu_rcache->loaded, iova_pfn);

As above, can_insert is evaluated true, which it shouldn't be, and we try
to insert pfns in a NULL mag, which is not safe.

To avoid this, stop using double-negatives, like !iova_magazine_full() and
!iova_magazine_empty(), and use positive tests, like
iova_magazine_has_space() and iova_magazine_has_pfns(), respectively; these
can safely deal with cpu_rcache->{loaded, prev} = NULL.

Signed-off-by: John Garry 
Tested-by: Xiang Chen 
Reviewed-by: Zhen Lei 
---
 drivers/iommu/iova.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index cf1aacda2fe4..732ee687e0e2 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -767,14 +767,18 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct 
iova_domain *iovad)
mag->size = 0;
 }
 
-static bool iova_magazine_full(struct iova_magazine *mag)
+static bool iova_magazine_has_space(struct iova_magazine *mag)
 {
-   return (mag && mag->size == IOVA_MAG_SIZE);
+   if (!mag)
+   return false;
+   return mag->size < IOVA_MAG_SIZE;
 }
 
-static bool iova_magazine_empty(struct iova_magazine *mag)
+static bool iova_magazine_has_pfns(struct iova_magazine *mag)
 {
-   return (!mag || mag->size == 0);
+   if (!mag)
+   return false;
+   return mag->size;
 }
 
 static unsigned long iova_magazine_pop(struct iova_magazine *mag,
@@ -783,7 +787,7 @@ static unsigned long iova_magazine_pop(struct iova_magazine 
*mag,
int i;
unsigned long pfn;
 
-   BUG_ON(iova_magazine_empty(mag));
+   BUG_ON(!iova_magazine_has_pfns(mag));
 
/* Only fall back to the rbtree if we have no suitable pfns at all */
for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
@@ -799,7 +803,7 @@ static unsigned long iova_magazine_pop(struct iova_magazine 
*mag,
 
 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
 {
-   BUG_ON(iova_magazine_full(mag));
+   BUG_ON(!iova_magazine_has_space(mag));
 
mag->pfns[mag->size++] = pfn;
 }
@@ -845,9 +849,9 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
spin_lock_irqsave(_rcache->lock, flags);
 
-   if (!iova_magazine_full(cpu_rcache->loaded)) {
+   if (iova_magazine_has_space(cpu_rcache->loaded)) {
can_insert = true;
-   } else if (!iova_magazine_full(cpu_rcache->prev)) {
+   } else if (iova_magazine_has_space(cpu_rcache->prev)) {
swap(cpu_rcache->prev, cpu_rcache->loaded);
can_insert = true;
} else {
@@ -856,8 +860,9 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
if (new_mag) {
spin_lock(>lock);
if (rcache->depot_size < MAX_GLOBAL_MAGS) {
-   rcache->depot[rcache->depot_size++] =
-   cpu_rcache->loaded;
+   if (cpu_rcache->loaded)
+   rcache->depot[rcache->depot_size++] =
+   cpu_rcache->loaded;
} else {
mag_to_free = cpu_rcache->loaded;
}
@@ -908,9 +913,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache 
*rcache,
cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
spin_lock_irqsave(_rcache->lock, flags);
 
-   if (!iova_magazine_empty(cpu_rcache->loaded)) {
+   if (iova_magazine_has_pfns(cpu_rcache->loaded)) {
has_pfn = true;
-   } else if (!iova_magazine_empty(cpu_rcache->prev)) {
+   } else if (iova_magazine_has_pfns(cpu_rcache->prev)) {
swap(cpu_rcache->prev, cpu_rcache->loaded);
has_pfn = true;
} else {
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 1/3] iommu/iova: Add free_all_cpu_cached_iovas()

2020-12-09 Thread John Garry
Add a helper function to free the CPU rcache for all online CPUs.

There also exists a function of the same name in
drivers/iommu/intel/iommu.c, but the parameters are different, and there
should be no conflict.

Signed-off-by: John Garry 
Tested-by: Xiang Chen 
Reviewed-by: Zhen Lei 
---
 drivers/iommu/iova.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index f9c35852018d..cf1aacda2fe4 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -238,6 +238,14 @@ static int __alloc_and_insert_iova_range(struct 
iova_domain *iovad,
return -ENOMEM;
 }
 
+static void free_all_cpu_cached_iovas(struct iova_domain *iovad)
+{
+   unsigned int cpu;
+
+   for_each_online_cpu(cpu)
+   free_cpu_cached_iovas(cpu, iovad);
+}
+
 static struct kmem_cache *iova_cache;
 static unsigned int iova_cache_users;
 static DEFINE_MUTEX(iova_cache_mutex);
@@ -435,15 +443,12 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long 
size,
 retry:
new_iova = alloc_iova(iovad, size, limit_pfn, true);
if (!new_iova) {
-   unsigned int cpu;
-
if (!flush_rcache)
return 0;
 
/* Try replenishing IOVAs by flushing rcache. */
flush_rcache = false;
-   for_each_online_cpu(cpu)
-   free_cpu_cached_iovas(cpu, iovad);
+   free_all_cpu_cached_iovas(iovad);
free_global_cached_iovas(iovad);
goto retry;
}
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [GIT PULL] IOMMU fix for 5.10 (-final)

2020-12-09 Thread pr-tracker-bot
The pull request you sent on Wed, 9 Dec 2020 14:12:38 +:

> git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git tags/iommu-fixes

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/ca4bbdaf171604841f77648a2877e2e43db69b71

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-12-09 Thread Borislav Petkov
On Wed, Dec 09, 2020 at 01:19:46PM +, Ashish Kalra wrote:
> reserve_crashkernel() calls swiotlb_size_or_default() to get SWIOTLB
...

Thanks for explaining.

> There is a need to introduce an architecture specific callback
> for swiotlb_adjust() because of the following reason :

So what your version currently does is:

1. from arch code, call generic code - swiotlb_adjust

2. in generic code, call back into arch code - arch_swiotlb_adjust

But that's twice the work needed to get you where you wanna go.

What you wanna do is, from arch code, call into swiotlb generic code.
That's it, no more.

Just like mem_encrypt.c calls swiotlb_update_mem_attributes(), for
example.

And other architectures can simply do the same thing and you have it all
solved and other architectures don't even need to refactor - they simply
copy what x86 does.

IOW, something like this:

---
diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 2f62bbdd9d12..31c4df123aa0 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -37,6 +37,7 @@ void __init sme_map_bootdata(char *real_mode_data);
 void __init sme_unmap_bootdata(char *real_mode_data);
 
 void __init sme_early_init(void);
+void __init sev_setup_arch(void);
 
 void __init sme_encrypt_kernel(struct boot_params *bp);
 void __init sme_enable(struct boot_params *bp);
@@ -69,6 +70,7 @@ static inline void __init sme_map_bootdata(char 
*real_mode_data) { }
 static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
 
 static inline void __init sme_early_init(void) { }
+static inline void __init sev_setup_arch(void) { }
 
 static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
 static inline void __init sme_enable(struct boot_params *bp) { }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a23130c86bdd..740f3bdb3f61 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1049,6 +1049,12 @@ void __init setup_arch(char **cmdline_p)
memblock_set_current_limit(ISA_END_ADDRESS);
e820__memblock_setup();
 
+   /*
+* Needs to run after memblock setup because it needs the physical
+* memory size.
+*/
+   sev_setup_arch();
+
reserve_bios_regions();
 
efi_fake_memmap();
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index bc0833713be9..f3db85673eae 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -198,6 +198,37 @@ void __init sme_early_init(void)
swiotlb_force = SWIOTLB_FORCE;
 }
 
+void __init sev_setup_arch(void)
+{
+   phys_addr_t total_mem = memblock_phys_mem_size();
+   unsigned long size;
+
+   if (!sev_active())
+   return;
+
+   /*
+* For SEV, all DMA has to occur via shared/unencrypted pages.
+* SEV uses SWOTLB to make this happen without changing device
+* drivers. However, depending on the workload being run, the
+* default 64MB of SWIOTLB may not be enough and`SWIOTLB may
+* run out of buffers for DMA, resulting in I/O errors and/or
+* performance degradation especially with high I/O workloads.
+*
+* Adjust the default size of SWIOTLB for SEV guests using
+* a percentage of guest memory for SWIOTLB buffers.
+* Also as the SWIOTLB bounce buffer memory is allocated
+* from low memory, ensure that the adjusted size is within
+* the limits of low available memory.
+*
+* The percentage of guest memory used here for SWIOTLB buffers
+* is more of an approximation of the static adjustment which
+* 64MB for <1G, and ~128M to 256M for 1G-to-4G, i.e., the 6%
+*/
+   size = total_mem * 6 / 100;
+   size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
+   swiotlb_adjust_size(size);
+}
+
 static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 {
pgprot_t old_prot, new_prot;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index fbdc65782195..7aa94e2f99c6 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -30,6 +30,9 @@ enum swiotlb_force {
  */
 #define IO_TLB_SHIFT 11
 
+/* default to 64MB */
+#define IO_TLB_DEFAULT_SIZE (64UL<<20)
+
 extern void swiotlb_init(int verbose);
 int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
 extern unsigned long swiotlb_nr_tbl(void);
@@ -78,6 +81,7 @@ void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
 bool is_swiotlb_active(void);
+void __init swiotlb_adjust_size(unsigned long new_size);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
@@ -100,6 +104,10 @@ static inline bool is_swiotlb_active(void)
 {
return false;
 }
+
+static void swiotlb_adjust_size(unsigned long new_size)
+{
+}
 #endif /* CONFIG_SWIOTLB */
 

Re: [PATCH 1/1] iommu/amd: Remove unnecessary assignment

2020-12-09 Thread Will Deacon
On Wed, Dec 09, 2020 at 03:32:50PM +, Adrian Huang12 wrote:
> Gentle ping.

Sorry, I hadn't noticed this patch.

However, I haven't been able to apply this successfully as b4 doesn't seem
to identify it as a patch and I only have this reply in my mailbox. Please
can you send it again, with me on cc?

Cheers,

Will
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/3] IOMMU: Some more IOVA code tidy-up

2020-12-09 Thread John Garry
Based on arm64 for-next/iommu/core

I'll try to bunch this sort of stuff more in future, Thanks

John Garry (3):
  iova: Make has_iova_flush_queue() private
  iova: Delete copy_reserved_iova()
  iova: Stop exporting some more functions

 drivers/iommu/iova.c | 36 +---
 include/linux/iova.h | 12 
 2 files changed, 1 insertion(+), 47 deletions(-)

-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/3] iova: Delete copy_reserved_iova()

2020-12-09 Thread John Garry
Since commit c588072bba6b ("iommu/vt-d: Convert intel iommu driver to the
iommu ops"), function copy_reserved_iova() is not referenced, so delete
it.

Signed-off-by: John Garry 
---
 drivers/iommu/iova.c | 30 --
 include/linux/iova.h |  6 --
 2 files changed, 36 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 0a758ec2a1c4..04f0a3ae1c63 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -710,36 +710,6 @@ reserve_iova(struct iova_domain *iovad,
 }
 EXPORT_SYMBOL_GPL(reserve_iova);
 
-/**
- * copy_reserved_iova - copies the reserved between domains
- * @from: - source doamin from where to copy
- * @to: - destination domin where to copy
- * This function copies reserved iova's from one doamin to
- * other.
- */
-void
-copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
-{
-   unsigned long flags;
-   struct rb_node *node;
-
-   spin_lock_irqsave(>iova_rbtree_lock, flags);
-   for (node = rb_first(>rbroot); node; node = rb_next(node)) {
-   struct iova *iova = rb_entry(node, struct iova, node);
-   struct iova *new_iova;
-
-   if (iova->pfn_lo == IOVA_ANCHOR)
-   continue;
-
-   new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi);
-   if (!new_iova)
-   pr_err("Reserve iova range %lx@%lx failed\n",
-  iova->pfn_lo, iova->pfn_lo);
-   }
-   spin_unlock_irqrestore(>iova_rbtree_lock, flags);
-}
-EXPORT_SYMBOL_GPL(copy_reserved_iova);
-
 /*
  * Magazine caches for IOVA ranges.  For an introduction to magazines,
  * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 2b76e0be1c5b..c834c01c0a5b 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -150,7 +150,6 @@ unsigned long alloc_iova_fast(struct iova_domain *iovad, 
unsigned long size,
  unsigned long limit_pfn, bool flush_rcache);
 struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
unsigned long pfn_hi);
-void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
unsigned long start_pfn);
 int init_iova_flush_queue(struct iova_domain *iovad,
@@ -211,11 +210,6 @@ static inline struct iova *reserve_iova(struct iova_domain 
*iovad,
return NULL;
 }
 
-static inline void copy_reserved_iova(struct iova_domain *from,
- struct iova_domain *to)
-{
-}
-
 static inline void init_iova_domain(struct iova_domain *iovad,
unsigned long granule,
unsigned long start_pfn)
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] iova: Make has_iova_flush_queue() private

2020-12-09 Thread John Garry
Function has_iova_flush_queue() has no users outside iova.c, so make it
private.

Signed-off-by: John Garry 
---
 drivers/iommu/iova.c | 2 +-
 include/linux/iova.h | 6 --
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 4bb3293ae4d7..0a758ec2a1c4 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -55,7 +55,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long 
granule,
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
 
-bool has_iova_flush_queue(struct iova_domain *iovad)
+static bool has_iova_flush_queue(struct iova_domain *iovad)
 {
return !!iovad->fq;
 }
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 76e16ae20729..2b76e0be1c5b 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -153,7 +153,6 @@ struct iova *reserve_iova(struct iova_domain *iovad, 
unsigned long pfn_lo,
 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
unsigned long start_pfn);
-bool has_iova_flush_queue(struct iova_domain *iovad);
 int init_iova_flush_queue(struct iova_domain *iovad,
  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
@@ -223,11 +222,6 @@ static inline void init_iova_domain(struct iova_domain 
*iovad,
 {
 }
 
-static inline bool has_iova_flush_queue(struct iova_domain *iovad)
-{
-   return false;
-}
-
 static inline int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb,
iova_entry_dtor entry_dtor)
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH 1/1] iommu/amd: Remove unnecessary assignment

2020-12-09 Thread Adrian Huang12
Gentle ping.

-- Adrian

> -Original Message-
> From: Adrian Huang 
> Sent: Monday, October 12, 2020 3:01 PM
> To: Joerg Roedel 
> Cc: iommu@lists.linux-foundation.org; Adrian Huang
> ; Adrian Huang12 
> Subject: [External] [PATCH 1/1] iommu/amd: Remove unnecessary assignment
> 
> From: Adrian Huang 
> 
> The values of local variables are assigned after local variables are 
> declared, so no
> need to assign the initial value during the variable declaration.
> 
> And, no need to assign NULL for the local variable 'ivrs_base'
> after invoking acpi_put_table().
> 
> Signed-off-by: Adrian Huang 
> ---
>  drivers/iommu/amd/init.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index
> 1ba6b4cc56e8..f171078f7ea0 100644
> --- a/drivers/iommu/amd/init.c
> +++ b/drivers/iommu/amd/init.c
> @@ -1858,7 +1858,7 @@ static void print_iommu_info(void)  static int __init
> amd_iommu_init_pci(void)  {
>   struct amd_iommu *iommu;
> - int ret = 0;
> + int ret;
> 
>   for_each_iommu(iommu) {
>   ret = iommu_init_pci(iommu);
> @@ -2494,8 +2494,8 @@ static void __init free_dma_resources(void)  static int
> __init early_amd_iommu_init(void)  {
>   struct acpi_table_header *ivrs_base;
> + int i, remap_cache_sz, ret;
>   acpi_status status;
> - int i, remap_cache_sz, ret = 0;
>   u32 pci_id;
> 
>   if (!amd_iommu_detected)
> @@ -2637,7 +2637,6 @@ static int __init early_amd_iommu_init(void)
>  out:
>   /* Don't leak any ACPI memory */
>   acpi_put_table(ivrs_base);
> - ivrs_base = NULL;
> 
>   return ret;
>  }
> --
> 2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 5/6] media: uvcvideo: Use dma_alloc_noncontiguos API

2020-12-09 Thread Sergey Senozhatsky via iommu
On Tue, Dec 8, 2020 at 1:54 PM Tomasz Figa  wrote:

>
> In any case, Sergey is going to share a preliminary patch on how the
> current API would be used in the V4L2 videobuf2 framework. That should
> give us more input on how such a helper could look.
>


My current WIP (deep WIP) series can be found at [0]. The patch that adds
new
DMA API support is at the head of the series [1]. New DMA API requires us to
have several internal videobuf2 API changes before we can proceed [2]: v4l2
and
videobuf2 core do not pass enough information to the vb2 allocators now.
Previously,
if user space requests non-coherent allocation v4l2 would set queue
dma_attr bit,
videobuf2 core would pass queue->dma_attrs to vb2 allocator, which would
those dma_attrs for dma_alloc(). So everything was transparent (sort of).
Since we
don't have that dma_attr flag anymore, there is no way for v4l2 to pass the
request
information (coherent or non-coherent) to the vb2 allocator. Hence we need
to rework
the vb2 allocator API. I currently pass vb2 pointer, but we decided to
rework it again
and to pass dedicated VB2_ALLOC_FLAGS from the videobuf2 core to the
allocator. This is still in my private tree and not completely ready, will
push those
patches to github later.

Another thing to notice is that the new API requires us to have two
execution branches
in allocators - one for the current API; and one for the new API (if it's
supported and
if user-space requested non-coherent allocation).

[0] https://github.com/sergey-senozhatsky/linux-next-ss/commits/master
[1]
https://github.com/sergey-senozhatsky/linux-next-ss/commit/a45f48b483daee59594c62e4aaf01790aab960c8
[2]
https://github.com/sergey-senozhatsky/linux-next-ss/commit/b784145101c398da7fe9e2608b6001e58e05a9b5

-ss
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

RE: [PATCH v13 05/15] iommu/smmuv3: Get prepared for nested stage support

2020-12-09 Thread Shameerali Kolothum Thodi
Hi Eric,

> -Original Message-
> From: Eric Auger [mailto:eric.au...@redhat.com]
> Sent: 18 November 2020 11:22
> To: eric.auger@gmail.com; eric.au...@redhat.com;
> iommu@lists.linux-foundation.org; linux-ker...@vger.kernel.org;
> k...@vger.kernel.org; kvm...@lists.cs.columbia.edu; w...@kernel.org;
> j...@8bytes.org; m...@kernel.org; robin.mur...@arm.com;
> alex.william...@redhat.com
> Cc: jean-phili...@linaro.org; zhangfei@linaro.org;
> zhangfei@gmail.com; vivek.gau...@arm.com; Shameerali Kolothum
> Thodi ;
> jacob.jun@linux.intel.com; yi.l@intel.com; t...@semihalf.com;
> nicoleots...@gmail.com; yuzenghui 
> Subject: [PATCH v13 05/15] iommu/smmuv3: Get prepared for nested stage
> support
> 
> When nested stage translation is setup, both s1_cfg and
> s2_cfg are set.
> 
> We introduce a new smmu domain abort field that will be set
> upon guest stage1 configuration passing.
> 
> arm_smmu_write_strtab_ent() is modified to write both stage
> fields in the STE and deal with the abort field.
> 
> In nested mode, only stage 2 is "finalized" as the host does
> not own/configure the stage 1 context descriptor; guest does.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> v10 -> v11:
> - Fix an issue reported by Shameer when switching from with vSMMU
>   to without vSMMU. Despite the spec does not seem to mention it
>   seems to be needed to reset the 2 high 64b when switching from
>   S1+S2 cfg to S1 only. Especially dst[3] needs to be reset (S2TTB).
>   On some implementations, if the S2TTB is not reset, this causes
>   a C_BAD_STE error
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 64
> +
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  2 +
>  2 files changed, 56 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 18ac5af1b284..412ea1bafa50 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1181,8 +1181,10 @@ static void arm_smmu_write_strtab_ent(struct
> arm_smmu_master *master, u32 sid,
>* three cases at the moment:
>*
>* 1. Invalid (all zero) -> bypass/fault (init)
> -  * 2. Bypass/fault -> translation/bypass (attach)
> -  * 3. Translation/bypass -> bypass/fault (detach)
> +  * 2. Bypass/fault -> single stage translation/bypass (attach)
> +  * 3. Single or nested stage Translation/bypass -> bypass/fault (detach)
> +  * 4. S2 -> S1 + S2 (attach_pasid_table)
> +  * 5. S1 + S2 -> S2 (detach_pasid_table)
>*
>* Given that we can't update the STE atomically and the SMMU
>* doesn't read the thing in a defined order, that leaves us
> @@ -1193,7 +1195,8 @@ static void arm_smmu_write_strtab_ent(struct
> arm_smmu_master *master, u32 sid,
>* 3. Update Config, sync
>*/
>   u64 val = le64_to_cpu(dst[0]);
> - bool ste_live = false;
> + bool s1_live = false, s2_live = false, ste_live;
> + bool abort, nested = false, translate = false;
>   struct arm_smmu_device *smmu = NULL;
>   struct arm_smmu_s1_cfg *s1_cfg;
>   struct arm_smmu_s2_cfg *s2_cfg;
> @@ -1233,6 +1236,8 @@ static void arm_smmu_write_strtab_ent(struct
> arm_smmu_master *master, u32 sid,
>   default:
>   break;
>   }
> + nested = s1_cfg->set && s2_cfg->set;

This is a problem when the Guest is booted with iommu.passthrough = 1 as we
set s1_cfg.set = false for IOMMU_PASID_CONFIG_BYPASS. 

Results in BUG_ON(ste_live && !nested).

Can we instead have nested = true set a bit above in the code, where we set
s2_cfg->set = true for the ARM_SMMU_DOMAIN_NESTED case?

Please take a look.

Thanks,
Shameer

> + translate = s1_cfg->set || s2_cfg->set;
>   }
> 
>   if (val & STRTAB_STE_0_V) {
> @@ -1240,23 +1245,36 @@ static void arm_smmu_write_strtab_ent(struct
> arm_smmu_master *master, u32 sid,
>   case STRTAB_STE_0_CFG_BYPASS:
>   break;
>   case STRTAB_STE_0_CFG_S1_TRANS:
> + s1_live = true;
> + break;
>   case STRTAB_STE_0_CFG_S2_TRANS:
> - ste_live = true;
> + s2_live = true;
> + break;
> + case STRTAB_STE_0_CFG_NESTED:
> + s1_live = true;
> + s2_live = true;
>   break;
>   case STRTAB_STE_0_CFG_ABORT:
> - BUG_ON(!disable_bypass);
>   break;
>   default:
>   BUG(); /* STE corruption */
>   }
>   }
> 
> + ste_live = s1_live || s2_live;
> +
>   /* Nuke the existing STE_0 value, as we're going to rewrite it */
>   val = STRTAB_STE_0_V;
> 
>   /* Bypass/fault */
> - if (!smmu_domain || !(s1_cfg->set || s2_cfg->set)) {
> - 

RE: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as broken

2020-12-09 Thread Deucher, Alexander
[AMD Public Use]

> -Original Message-
> From: Merger, Edgar [AUTOSOL/MAS/AUGS] 
> Sent: Wednesday, December 9, 2020 2:59 AM
> To: Deucher, Alexander ; Huang, Ray 
> ; Kuehling, Felix 
> Cc: Will Deacon ; linux-ker...@vger.kernel.org; 
> linux- p...@vger.kernel.org; iommu@lists.linux-foundation.org; Bjorn 
> Helgaas ; Joerg Roedel ; Zhu, 
> Changfeng 
> Subject: RE: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as 
> broken
> 
> Alex,
> 
> I had to revise the patch. Please see attachment. It is actually two 
> more SSIDs affected to that.

Other than some minor whitespace issues, the patch looks fine to me.  Please 
align the subsystem_device lines and put the closing parenthesis on the same 
line as the last check.

Thanks!

Alex

> 
> Best regards,
> Edgar
> 
> -Original Message-
> From: Merger, Edgar [AUTOSOL/MAS/AUGS]
> Sent: Dienstag, 8. Dezember 2020 09:23
> To: 'Deucher, Alexander' ; 'Huang, Ray'
> ; 'Kuehling, Felix' 
> Cc: 'Will Deacon' ; 'linux-ker...@vger.kernel.org' 
> ; 'linux-...@vger.kernel.org'  p...@vger.kernel.org>; 'iommu@lists.linux-foundation.org'
> ; 'Bjorn Helgaas'
> ; 'Joerg Roedel' ; 'Zhu, 
> Changfeng' 
> Subject: RE: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as 
> broken
> 
> Applied the patch as in attachment. Verified that ATS for GPU-Device 
> had been disabled. See attachment "dmesg_ATS.log".
> 
> Was running that build over night successfully.
> 
> -Original Message-
> From: Merger, Edgar [AUTOSOL/MAS/AUGS]
> Sent: Montag, 7. Dezember 2020 05:53
> To: Deucher, Alexander ; Huang, Ray 
> ; Kuehling, Felix 
> Cc: Will Deacon ; linux-ker...@vger.kernel.org; 
> linux- p...@vger.kernel.org; iommu@lists.linux-foundation.org; Bjorn 
> Helgaas ; Joerg Roedel ; Zhu, 
> Changfeng 
> Subject: RE: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as 
> broken
> 
> Hi Alex,
> 
> I believe in the patch file, this
> + (pdev->subsystem_device == 0x0c19 ||
> +  pdev->subsystem_device == 0x0c10))
> 
> Has to be changed to:
> + (pdev->subsystem_device == 0xce19 ||
> +  pdev->subsystem_device == 0xcc10))
> 
> Because our SSIDs are "ea50:ce19" and "ea50:cc10" respectively and 
> another one would "ea50:cc08".
> 
> I will apply that patch and feedback the results soon plus the patch 
> file that I actually had applied.
> 
> 
> -Original Message-
> From: Deucher, Alexander 
> Sent: Montag, 30. November 2020 19:36
> To: Merger, Edgar [AUTOSOL/MAS/AUGS] ; 
> Huang, Ray ; Kuehling, Felix 
> 
> Cc: Will Deacon ; linux-ker...@vger.kernel.org; 
> linux- p...@vger.kernel.org; iommu@lists.linux-foundation.org; Bjorn 
> Helgaas ; Joerg Roedel ; Zhu, 
> Changfeng 
> Subject: RE: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as 
> broken
> 
> [AMD Public Use]
> 
> > -Original Message-
> > From: Merger, Edgar [AUTOSOL/MAS/AUGS]
> 
> > Sent: Thursday, November 26, 2020 4:24 AM
> > To: Deucher, Alexander ; Huang, Ray 
> > ; Kuehling, Felix 
> > Cc: Will Deacon ; linux-ker...@vger.kernel.org;
> > linux- p...@vger.kernel.org; iommu@lists.linux-foundation.org; Bjorn 
> > Helgaas ; Joerg Roedel ; Zhu, 
> > Changfeng 
> > Subject: RE: [EXTERNAL] Re: [PATCH] PCI: Mark AMD Raven iGPU ATS as 
> > broken
> >
> > Alex,
> >
> > This is pretty much the same patch as what I have received from 
> > Joerg previously, except that it is tied to the particular Emerson 
> > platform and its derivatives (listed with Subsystem IDs).
> 
> Right.  As per my original point, I don't want to disable ATS on all 
> Picasso chips because doing so would break GPU compute on them, so I'd 
> like to apply this quirk as narrowly as possible.
> 
> >
> > Below patch was what Joerg provided me and I successfully tested.
> >
> > This diff to the kernel should do that:
> >
> > diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 
> > f70692ac79c5..3911b0ec57ba 100644
> > --- a/drivers/pci/quirks.c
> > +++ b/drivers/pci/quirks.c
> > @@ -5176,6 +5176,8 @@
> DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI,
> > 0x6900, quirk_amd_harvest_no_ats);
> > DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, 
> > quirk_amd_harvest_no_ats);
> >  /* AMD Navi14 dGPU */
> >  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, 
> > quirk_amd_harvest_no_ats);
> > +/* AMD Raven platform iGPU */
> > +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, 
> > +quirk_amd_harvest_no_ats);
> >  #endif /* CONFIG_PCI_ATS */
> >
> >  /* Freescale PCIe doesn't support MSI in RC mode */
> >
> > So far I have seen this issue on two instances of this chip, but I 
> > must admit that I did test only two of them to this extent, so I 
> > guess it is not a bad chip in particular, but the chips we use are 
> > from the same production lot, so it might be a systematical problem 
> > of that
> production lot?
> >
> > UEFI-Setup shows:
> > Processor Family: 17h
> > Procossor Model: 20h - 2Fh
> > CPUID: 00820F01
> > Microcode Patch Level: 8200103
> >
> > Looking 

[GIT PULL] IOMMU fix for 5.10 (-final)

2020-12-09 Thread Will Deacon
Hi Linus,

Please pull this one-liner AMD IOMMU fix for 5.10. It's actually a fix
for a fix, where the size of the interrupt remapping table was increased
but a related constant for the size of the interrupt table was forgotten.

Cheers,

Will

--->8

The following changes since commit d76b42e92780c3587c1a998a3a943b501c137553:

  iommu/vt-d: Don't read VCCAP register unless it exists (2020-11-26 14:50:24 
+)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git tags/iommu-fixes

for you to fetch changes up to 4165bf015ba9454f45beaad621d16c516d5c5afe:

  iommu/amd: Set DTE[IntTabLen] to represent 512 IRTEs (2020-12-07 11:00:24 
+)


iommu fix for 5.10

- Fix interrupt table length definition for AMD IOMMU


Suravee Suthikulpanit (1):
  iommu/amd: Set DTE[IntTabLen] to represent 512 IRTEs

 drivers/iommu/amd/amd_iommu_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-12-09 Thread Ashish Kalra
On Wed, Dec 09, 2020 at 01:54:42PM +0100, Borislav Petkov wrote:
> On Wed, Dec 09, 2020 at 12:29:07PM +, Ashish Kalra wrote:
> > As i mentioned in the main comments above, this cannot be called in
> > mem_encrypt_init() as that breaks reserve_crashkernel() which depends
> > on SWIOTLB buffer size
> 
> Please elaborate how does it break.
> 

reserve_crashkernel() calls swiotlb_size_or_default() to get SWIOTLB
buffer size and then accordingly allocates low memory for crashkernel. 
If SWIOTLB buffer size is adjusted after reserve_crashkernel() and
swiotlb_size_or_default(), then SWIOTLB buffers will overlap the memory
reserved for crashkernel. Hence any SWIOTLB buffer adjustment needs to
be done before or in swiotlb_size_or_default(), but Konrad is not in
favor of modifying swiotlb_size_or_default(), hence this separate
swiotlb_adjust() interface is introduced. 

> > and is called before mem_encrypt_init(), therefore, it needs to be
> > called from setup_atch() before reserve_crashkernel().
> 
> I know you have your requirements what needs to be called when like all
> the other vendors who want to run stuff early in a particular order but
> our boot init order is a single fragile mess. So this better be done
> right!
> 
> Also,
> 
> [0.016630] software IO TLB: swiotlb_adjust:
> [0.017005] reserve_crashkernel:
> [0.050523] software IO TLB: swiotlb_init:
> 
> this looks strange - we're doing a swiotlb size adjust before init.
> 
> It probably makes sense as in: adjust the size before the SWIOTLB is
> initialized so that it uses the correct size but this better be spelled
> out.
> 

Yes the adjustment is done before init. 

> > I believe that other memory encryption architectures such as s390 are
> > also looking for something similar to be available.
> 
> Until you have something more palpable than belief, "let the others
> extend it when they really need it." as I already mentioned.

There is a need to introduce an architecture specific callback
for swiotlb_adjust() because of the following reason :

The sev_active() function is only available to x86, so this will break
other archs, if we use this function in generic swiotlb code.

Therefore, we need arch-specific callback/interface to be invoked from
generic swiotlb code to do the SEV specific actions such as SWIOTLB
buffer size adjustment.

Thanks,
Ashish

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dma-iommu: remove __iommu_dma_mmap

2020-12-09 Thread Will Deacon
On Wed, 9 Dec 2020 12:20:19 +0100, Christoph Hellwig wrote:
> The function has a single caller, so open code it there and take
> advantage of the precalculated page count variable.

Applied to arm64 (for-next/iommu/core), thanks!

[1/1] dma-iommu: remove __iommu_dma_mmap
  https://git.kernel.org/arm64/c/71fe89ceb55b

Cheers,
-- 
Will

https://fixes.arm64.dev
https://next.arm64.dev
https://will.arm64.dev
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 5/6] media: uvcvideo: Use dma_alloc_noncontiguos API

2020-12-09 Thread Robin Murphy

On 2020-12-09 11:12, Christoph Hellwig wrote:

On Tue, Dec 08, 2020 at 01:54:00PM +0900, Tomasz Figa wrote:

>From the media perspective, it would be good to have the vmap
optional, similarly to the DMA_ATTR_NO_KERNEL_MAPPING attribute for
coherent allocations. Actually, in the media drivers, the need to have
a kernel mapping of the DMA buffers corresponds to a minority of the
drivers. Most of them only need to map them to the userspace.

Nevertheless, that minority actually happens to be quite widely used,
e.g. the uvcvideo driver, so we can't go to the other extreme and just
drop the vmap at all.


My main problem is that the DMA_ATTR_NO_KERNEL_MAPPING makes a mess
of an API.  I'd much rather have low-level API that returns the
discontiguous allocations and another one that vmaps them rather
than starting to overload arguments like in dma_alloc_attrs with
DMA_ATTR_NO_KERNEL_MAPPING.


Agreed - if iommu-dma's dma_alloc_coherent() ends up as little more than 
a thin wrapper around those two functions I think that would be a good 
sign. It also seems like it might be a good idea for this API to use 
scatterlists rather than page arrays as it's fundamental format, to help 
reduce impedance with dma-buf - if we can end up with a wider redesign 
that also gets rid of dma_get_sgtable(), all the better!


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-12-09 Thread Borislav Petkov
On Wed, Dec 09, 2020 at 12:29:07PM +, Ashish Kalra wrote:
> As i mentioned in the main comments above, this cannot be called in
> mem_encrypt_init() as that breaks reserve_crashkernel() which depends
> on SWIOTLB buffer size

Please elaborate how does it break.

> and is called before mem_encrypt_init(), therefore, it needs to be
> called from setup_atch() before reserve_crashkernel().

I know you have your requirements what needs to be called when like all
the other vendors who want to run stuff early in a particular order but
our boot init order is a single fragile mess. So this better be done
right!

Also,

[0.016630] software IO TLB: swiotlb_adjust:
[0.017005] reserve_crashkernel:
[0.050523] software IO TLB: swiotlb_init:

this looks strange - we're doing a swiotlb size adjust before init.

It probably makes sense as in: adjust the size before the SWIOTLB is
initialized so that it uses the correct size but this better be spelled
out.

> I believe that other memory encryption architectures such as s390 are
> also looking for something similar to be available.

Until you have something more palpable than belief, "let the others
extend it when they really need it." as I already mentioned.

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-12-09 Thread Ashish Kalra
On Wed, Dec 09, 2020 at 12:01:15PM +0100, Borislav Petkov wrote:
> > Subject: Re: [PATCH v8] swiotlb: Adjust SWIOTBL bounce buffer size for SEV 
> > guests.
> 
> Fix subject prefix to "x86, swiotlb: ... SWIOTLB ... for SEV guests
> 
> Fix typo and no fullstop at the end.
> 
> On Mon, Dec 07, 2020 at 11:10:57PM +, Ashish Kalra wrote:
> > From: Ashish Kalra 
> > 
> > For SEV, all DMA to and from guest has to use shared (un-encrypted) pages.
> > SEV uses SWIOTLB to make this happen without requiring changes to device
> > drivers.  However, depending on workload being run, the default 64MB of
>^
>the
> 
> > SWIOTLB might not be enough and SWIOTLB may run out of buffers to use
> 
>   s/SWIOTLB/it/
> 
> > for DMA, resulting in I/O errors and/or performance degradation for
> > high I/O workloads.
> > 
> > Adjust the default size of SWIOTLB for SEV guests using a
> > percentage of the total memory available to guest for SWIOTLB buffers.
>^
>the
> 
> > 
> > Using late_initcall() interface to invoke swiotlb_adjust() does not
> > work as the size adjustment needs to be done before mem_encrypt_init()
> > and reserve_crashkernel() which use the allocated SWIOTLB buffer size,
> > hence call it explicitly from setup_arch().
> 
> So setup_arch() is x86-specific and already a dumping ground for all
> kinds of init stuff.
> 
> Why don't you call swiotlb_adjust() in mem_encrypt_init() where it
> already does swiotlb stuff - swiotlb_update_mem_attributes() - and avoid
> all the arch-agnostic function glue?
> 

As i mentioned in the main comments above, this cannot be called in
mem_encrypt_init() as that breaks reserve_crashkernel() which depends on
SWIOTLB buffer size and is called before mem_encrypt_init(), therefore,
it needs to be called from setup_atch() before reserve_crashkernel(). 

> That is, unless Konrad wants to do other swiotlb adjusting on !x86 too...
> 
> > The SWIOTLB default size adjustment needs to be added as an architecture
> > specific interface/callback to allow architectures such as those supporting
> > memory encryption to adjust/expand SWIOTLB size for their use.
> 
> So are other arches wanting this or is this just an assumption? If
> latter, you can do x86 only now and let the others extend it when they
> really need it.

I believe that other memory encryption architectures such as s390 are
also looking for something similar to be available. 

Thanks,
Ashish

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] drivers/iommu: fix a null-ptr-deref bug in fsl_pamu_domain.c

2020-12-09 Thread tangzhenhao
At line 362 in drivers/iommu/fsl_pamu_domain.c, the ret-val of 
kmem_cache_zalloc should be checked to avoid null-ptr-deref bug.

Signed-off-by: tangzhenhao 
---
 drivers/iommu/fsl_pamu_domain.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index b2110767caf4..9ebd5135f4a8 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -360,6 +360,10 @@ static void attach_device(struct fsl_dma_domain 
*dma_domain, int liodn, struct d
}
 
info = kmem_cache_zalloc(iommu_devinfo_cache, GFP_ATOMIC);
+   if (!info) {
+   pr_debug("device_domain_info allocation failed\n");
+   return;
+   }
 
info->dev = dev;
info->liodn = liodn;
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dma-iommu: remove __iommu_dma_mmap

2020-12-09 Thread Robin Murphy

On 2020-12-09 11:20, Christoph Hellwig wrote:

The function has a single caller, so open code it there and take
advantage of the precalculated page count variable.


I can't shake the feeling that we've written this patch at least twice 
before through all the refactoring, so definitely no objection from me 
to an obvious cleanup:


Reviewed-by: Robin Murphy 


Signed-off-by: Christoph Hellwig 
---
  drivers/iommu/dma-iommu.c | 17 +
  1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 0cbcd3fc3e7e8d..f6ea1dabc6a894 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -655,21 +655,6 @@ static void *iommu_dma_alloc_remap(struct device *dev, 
size_t size,
return NULL;
  }
  
-/**

- * __iommu_dma_mmap - Map a buffer into provided user VMA
- * @pages: Array representing buffer from __iommu_dma_alloc()
- * @size: Size of buffer in bytes
- * @vma: VMA describing requested userspace mapping
- *
- * Maps the pages of the buffer in @pages into @vma. The caller is responsible
- * for verifying the correct size and protection of @vma beforehand.
- */
-static int __iommu_dma_mmap(struct page **pages, size_t size,
-   struct vm_area_struct *vma)
-{
-   return vm_map_pages(vma, pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
-}
-
  static void iommu_dma_sync_single_for_cpu(struct device *dev,
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
  {
@@ -1102,7 +1087,7 @@ static int iommu_dma_mmap(struct device *dev, struct 
vm_area_struct *vma,
struct page **pages = dma_common_find_pages(cpu_addr);
  
  		if (pages)

-   return __iommu_dma_mmap(pages, size, vma);
+   return vm_map_pages(vma, pages, nr_pages);
pfn = vmalloc_to_pfn(cpu_addr);
} else {
pfn = page_to_pfn(virt_to_page(cpu_addr));


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RESEND PATCH v3 2/4] iommu/iova: Avoid double-negatives in magazine helpers

2020-12-09 Thread Leizhen (ThunderTown)


On 2020/12/9 19:39, John Garry wrote:
> On 09/12/2020 09:03, Leizhen (ThunderTown) wrote:
>>
>>
>> On 2020/11/17 18:25, John Garry wrote:
>>> A similar crash to the following could be observed if initial CPU rcache
>>> magazine allocations fail in init_iova_rcaches():
>>>
>>> Unable to handle kernel NULL pointer dereference at virtual address 
>>> 
>>> Mem abort info:
>>>     ESR = 0x9604
>>>     EC = 0x25: DABT (current EL), IL = 32 bits
>>>     SET = 0, FnV = 0
>>>     EA = 0, S1PTW = 0
>>> Data abort info:
>>>     ISV = 0, ISS = 0x0004
>>>     CM = 0, WnR = 0
>>> [] user address but active_mm is swapper
>>> Internal error: Oops: 9604 [#1] PREEMPT SMP
>>> Modules linked in:
>>> CPU: 11 PID: 696 Comm: irq/40-hisi_sas Not tainted 5.9.0-rc7-dirty #109
>>> Hardware name: Huawei D06 /D06, BIOS Hisilicon D06 UEFI RC0 - V1.16.01 
>>> 03/15/2019
>>> Call trace:
>>>    free_iova_fast+0xfc/0x280
>>>    iommu_dma_free_iova+0x64/0x70
>>>    __iommu_dma_unmap+0x9c/0xf8
>>>    iommu_dma_unmap_sg+0xa8/0xc8
>>>    dma_unmap_sg_attrs+0x28/0x50
>>>    cq_thread_v3_hw+0x2dc/0x528
>>>    irq_thread_fn+0x2c/0xa0
>>>    irq_thread+0x130/0x1e0
>>>    kthread+0x154/0x158
>>>    ret_from_fork+0x10/0x34
>>>
>>> Code: f9400060 f102001f 54000981 d421 (f9400043)
>>>
>>>   ---[ end trace 4afcbdfc61b60467 ]---
>>>
>>> The issue is that expression !iova_magazine_full(NULL) evaluates true; this
>>> falls over in in __iova_rcache_insert() when we attempt to cache a mag
>>> and cpu_rcache->loaded == NULL:
>>>
>>> if (!iova_magazine_full(cpu_rcache->loaded)) {
>>> can_insert = true;
>>> ...
>>>
>>> if (can_insert)
>>> iova_magazine_push(cpu_rcache->loaded, iova_pfn);
>>>
>>> As above, can_insert is evaluated true, which it shouldn't be, and we try
>>> to insert pfns in a NULL mag, which is not safe.
>>>
>>> To avoid this, stop using double-negatives, like !iova_magazine_full() and
>>> !iova_magazine_empty(), and use positive tests, like
>>> iova_magazine_has_space() and iova_magazine_has_pfns(), respectively; these
>>> can safely deal with cpu_rcache->{loaded, prev} = NULL.
>>>
>>> Signed-off-by: John Garry 
> 
> Thanks for checking here...
> 
>>> ---
>>>   drivers/iommu/iova.c | 29 +
>>>   1 file changed, 17 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
>>> index 81b7399dd5e8..1f3f0f8b12e0 100644
>>> --- a/drivers/iommu/iova.c
>>> +++ b/drivers/iommu/iova.c
>>> @@ -827,14 +827,18 @@ iova_magazine_free_pfns(struct iova_magazine *mag, 
>>> struct iova_domain *iovad)
>>>   mag->size = 0;
>>>   }
>>>   -static bool iova_magazine_full(struct iova_magazine *mag)
>>> +static bool iova_magazine_has_space(struct iova_magazine *mag)
>>>   {
>>> -    return (mag && mag->size == IOVA_MAG_SIZE);
>>> +    if (!mag)
>>> +    return false;
>>> +    return mag->size < IOVA_MAG_SIZE;
>>>   }
>>>   -static bool iova_magazine_empty(struct iova_magazine *mag)
>>> +static bool iova_magazine_has_pfns(struct iova_magazine *mag)
>>>   {
>>> -    return (!mag || mag->size == 0);
>>> +    if (!mag)
>>> +    return false;
>>> +    return mag->size;
>>>   }
>>>     static unsigned long iova_magazine_pop(struct iova_magazine *mag,
>>> @@ -843,7 +847,7 @@ static unsigned long iova_magazine_pop(struct 
>>> iova_magazine *mag,
>>>   int i;
>>>   unsigned long pfn;
>>>   -    BUG_ON(iova_magazine_empty(mag));
>>> +    BUG_ON(!iova_magazine_has_pfns(mag));
>>>     /* Only fall back to the rbtree if we have no suitable pfns at all 
>>> */
>>>   for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--)
>>> @@ -859,7 +863,7 @@ static unsigned long iova_magazine_pop(struct 
>>> iova_magazine *mag,
>>>     static void iova_magazine_push(struct iova_magazine *mag, unsigned long 
>>> pfn)
>>>   {
>>> -    BUG_ON(iova_magazine_full(mag));
>>> +    BUG_ON(!iova_magazine_has_space(mag));
>>>     mag->pfns[mag->size++] = pfn;
>>>   }
>>> @@ -905,9 +909,9 @@ static bool __iova_rcache_insert(struct iova_domain 
>>> *iovad,
>>>   cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches);
>>>   spin_lock_irqsave(_rcache->lock, flags);
>>>   -    if (!iova_magazine_full(cpu_rcache->loaded)) {
> 
> *
> 
>>> +    if (iova_magazine_has_space(cpu_rcache->loaded)) {
>>>   can_insert = true;
>>> -    } else if (!iova_magazine_full(cpu_rcache->prev)) {
>>> +    } else if (iova_magazine_has_space(cpu_rcache->prev)) {
>>>   swap(cpu_rcache->prev, cpu_rcache->loaded);
>>>   can_insert = true;
>>>   } else {
>>> @@ -916,8 +920,9 @@ static bool __iova_rcache_insert(struct iova_domain 
>>> *iovad,
>>>   if (new_mag) {
>>>   spin_lock(>lock);
>>>   if (rcache->depot_size < MAX_GLOBAL_MAGS) {
>>> -    rcache->depot[rcache->depot_size++] =
>>> -    cpu_rcache->loaded;
>>> +    if (cpu_rcache->loaded)
>>
>> Looks like it just needs to change 

Re: [RESEND PATCH v3 3/4] iommu/iova: Flush CPU rcache for when a depot fills

2020-12-09 Thread Leizhen (ThunderTown)


On 2020/12/9 19:22, John Garry wrote:
> On 09/12/2020 09:13, Leizhen (ThunderTown) wrote:
>>
>>
>> On 2020/11/17 18:25, John Garry wrote:
>>> Leizhen reported some time ago that IOVA performance may degrade over time
>>> [0], but unfortunately his solution to fix this problem was not given
>>> attention.
>>>
>>> To summarize, the issue is that as time goes by, the CPU rcache and depot
>>> rcache continue to grow. As such, IOVA RB tree access time also continues
>>> to grow.
>>>
>>> At a certain point, a depot may become full, and also some CPU rcaches may
>>> also be full when inserting another IOVA is attempted. For this scenario,
>>> currently the "loaded" CPU rcache is freed and a new one is created. This
>>> freeing means that many IOVAs in the RB tree need to be freed, which
>>> makes IO throughput performance fall off a cliff in some storage scenarios:
>>>
>>> Jobs: 12 (f=12): [] [0.0% done] [6314MB/0KB/0KB /s] [1616K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [5669MB/0KB/0KB /s] [1451K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [6031MB/0KB/0KB /s] [1544K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [6673MB/0KB/0KB /s] [1708K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [6705MB/0KB/0KB /s] [1717K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [6031MB/0KB/0KB /s] [1544K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [6761MB/0KB/0KB /s] [1731K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [6705MB/0KB/0KB /s] [1717K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [6685MB/0KB/0KB /s] [1711K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [6178MB/0KB/0KB /s] [1582K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [6731MB/0KB/0KB /s] [1723K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [2387MB/0KB/0KB /s] [611K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [2689MB/0KB/0KB /s] [688K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [2278MB/0KB/0KB /s] [583K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [1288MB/0KB/0KB /s] [330K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [1632MB/0KB/0KB /s] [418K/0/0 
>>> iops]
>>> Jobs: 12 (f=12): [] [0.0% done] [1765MB/0KB/0KB /s] [452K/0/0 
>>> iops]
>>>
>>> And continue in this fashion, without recovering. Note that in this
>>> example it was required to wait 16 hours for this to occur. Also note that
>>> IO throughput also becomes gradually becomes more unstable leading up to
>>> this point.
>>>
>>> This problem is only seen for non-strict mode. For strict mode, the rcaches
>>> stay quite compact.
>>>
>>> As a solution to this issue, judge that the IOVA caches have grown too big
>>> when cached magazines need to be free, and just flush all the CPUs rcaches
>>> instead.
>>>
>>> The depot rcaches, however, are not flushed, as they can be used to
>>> immediately replenish active CPUs.
>>>
>>> In future, some IOVA compaction could be implemented to solve the
>>> instabilty issue, which I figure could be quite complex to implement.
>>>
>>> [0] 
>>> https://lore.kernel.org/linux-iommu/20190815121104.29140-3-thunder.leiz...@huawei.com/
>>>
>>> Analyzed-by: Zhen Lei 
>>> Reported-by: Xiang Chen 
>>> Signed-off-by: John Garry 
> 
> Thanks for having a look
> 
>>> ---
>>>   drivers/iommu/iova.c | 16 ++--
>>>   1 file changed, 6 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
>>> index 1f3f0f8b12e0..386005055aca 100644
>>> --- a/drivers/iommu/iova.c
>>> +++ b/drivers/iommu/iova.c
>>> @@ -901,7 +901,6 @@ static bool __iova_rcache_insert(struct iova_domain 
>>> *iovad,
>>>    struct iova_rcache *rcache,
>>>    unsigned long iova_pfn)
>>>   {
>>> -    struct iova_magazine *mag_to_free = NULL;
>>>   struct iova_cpu_rcache *cpu_rcache;
>>>   bool can_insert = false;
>>>   unsigned long flags;
>>> @@ -923,13 +922,12 @@ static bool __iova_rcache_insert(struct iova_domain 
>>> *iovad,
>>>   if (cpu_rcache->loaded)
>>>   rcache->depot[rcache->depot_size++] =
>>>   cpu_rcache->loaded;
>>> -    } else {
>>> -    mag_to_free = cpu_rcache->loaded;
>>> +    can_insert = true;
>>> +    cpu_rcache->loaded = new_mag;
>>>   }
>>>   spin_unlock(>lock);
>>> -
>>> -    cpu_rcache->loaded = new_mag;
>>> -    can_insert = true;
>>> +    if (!can_insert)
>>> +    iova_magazine_free(new_mag);
>>>   }
>>>   }
>>>   @@ -938,10 +936,8 @@ static bool __iova_rcache_insert(struct iova_domain 
>>> *iovad,
>>>     spin_unlock_irqrestore(_rcache->lock, flags);
>>>   -    if (mag_to_free) {
>>> -    iova_magazine_free_pfns(mag_to_free, iovad);
>>> -  

Re: [PATCH v3 5/6] media: uvcvideo: Use dma_alloc_noncontiguos API

2020-12-09 Thread Christoph Hellwig
On Tue, Dec 08, 2020 at 01:54:00PM +0900, Tomasz Figa wrote:
> >From the media perspective, it would be good to have the vmap
> optional, similarly to the DMA_ATTR_NO_KERNEL_MAPPING attribute for
> coherent allocations. Actually, in the media drivers, the need to have
> a kernel mapping of the DMA buffers corresponds to a minority of the
> drivers. Most of them only need to map them to the userspace.
> 
> Nevertheless, that minority actually happens to be quite widely used,
> e.g. the uvcvideo driver, so we can't go to the other extreme and just
> drop the vmap at all.

My main problem is that the DMA_ATTR_NO_KERNEL_MAPPING makes a mess
of an API.  I'd much rather have low-level API that returns the
discontiguous allocations and another one that vmaps them rather
than starting to overload arguments like in dma_alloc_attrs with
DMA_ATTR_NO_KERNEL_MAPPING.

> 
> In any case, Sergey is going to share a preliminary patch on how the
> current API would be used in the V4L2 videobuf2 framework. That should
> give us more input on how such a helper could look.

Awesome!
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8] swiotlb: Adjust SWIOTBL bounce buffer size for SEV guests.

2020-12-09 Thread Borislav Petkov
> Subject: Re: [PATCH v8] swiotlb: Adjust SWIOTBL bounce buffer size for SEV 
> guests.

Fix subject prefix to "x86, swiotlb: ... SWIOTLB ... for SEV guests

Fix typo and no fullstop at the end.

On Mon, Dec 07, 2020 at 11:10:57PM +, Ashish Kalra wrote:
> From: Ashish Kalra 
> 
> For SEV, all DMA to and from guest has to use shared (un-encrypted) pages.
> SEV uses SWIOTLB to make this happen without requiring changes to device
> drivers.  However, depending on workload being run, the default 64MB of
 ^
 the

> SWIOTLB might not be enough and SWIOTLB may run out of buffers to use

s/SWIOTLB/it/

> for DMA, resulting in I/O errors and/or performance degradation for
> high I/O workloads.
> 
> Adjust the default size of SWIOTLB for SEV guests using a
> percentage of the total memory available to guest for SWIOTLB buffers.
 ^
 the

> 
> Using late_initcall() interface to invoke swiotlb_adjust() does not
> work as the size adjustment needs to be done before mem_encrypt_init()
> and reserve_crashkernel() which use the allocated SWIOTLB buffer size,
> hence call it explicitly from setup_arch().

So setup_arch() is x86-specific and already a dumping ground for all
kinds of init stuff.

Why don't you call swiotlb_adjust() in mem_encrypt_init() where it
already does swiotlb stuff - swiotlb_update_mem_attributes() - and avoid
all the arch-agnostic function glue?

That is, unless Konrad wants to do other swiotlb adjusting on !x86 too...

> The SWIOTLB default size adjustment needs to be added as an architecture
> specific interface/callback to allow architectures such as those supporting
> memory encryption to adjust/expand SWIOTLB size for their use.

So are other arches wanting this or is this just an assumption? If
latter, you can do x86 only now and let the others extend it when they
really need it.

> v5 fixed build errors and warnings as
> Reported-by: kbuild test robot 
> 
> Signed-off-by: Ashish Kalra 
> ---
>  arch/x86/kernel/setup.c   |  2 ++
>  arch/x86/mm/mem_encrypt.c | 37 +
>  include/linux/swiotlb.h   |  6 ++
>  kernel/dma/swiotlb.c  | 22 ++
>  4 files changed, 67 insertions(+)
> 
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index 1bcfbcd2bfd7..d1b8d60040cf 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -485,7 +485,44 @@ static void print_mem_encrypt_feature_info(void)
>   pr_cont("\n");
>  }
>  
> +/*
> + * The percentage of guest memory used here for SWIOTLB buffers
> + * is more of an approximation of the static adjustment which
> + * is 128M for <1G guests, 256M for 1G-4G guests and 512M for >4G guests.
> + */
> +#define SEV_ADJUST_SWIOTLB_SIZE_PERCENT  6
> +
>  /* Architecture __weak replacement functions */
> +unsigned long __init arch_swiotlb_adjust(unsigned long iotlb_default_size)
> +{
> + unsigned long size = iotlb_default_size;
> +
> + /*
> +  * For SEV, all DMA has to occur via shared/unencrypted pages.
> +  * SEV uses SWOTLB to make this happen without changing device
> +  * drivers. However, depending on the workload being run, the
> +  * default 64MB of SWIOTLB may not be enough and`SWIOTLB may
> +  * run out of buffers for DMA, resulting in I/O errors and/or
> +  * performance degradation especially with high I/O workloads.

<--- newline in the comment here.

> +  * Adjust the default size of SWIOTLB for SEV guests using
> +  * a percentage of guest memory for SWIOTLB buffers.
> +  * Also as the SWIOTLB bounce buffer memory is allocated
   ^
   ,

> +  * from low memory, ensure that the adjusted size is within
> +  * the limits of low available memory.
> +  *
> +  */
> + if (sev_active()) {
> + phys_addr_t total_mem = memblock_phys_mem_size();
> +
> + size = total_mem * SEV_ADJUST_SWIOTLB_SIZE_PERCENT / 100;
> + size = clamp_val(size, iotlb_default_size, SZ_1G);
> + pr_info("SWIOTLB bounce buffer size adjusted to %luMB for SEV",
> + size >> 20);
> + }
> +
> + return size;
> +}
> +
>  void __init mem_encrypt_init(void)
>  {
>   if (!sme_me_mask)

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [bug report] dma-mapping: add benchmark support for streaming DMA APIs

2020-12-09 Thread Dan Carpenter
On Wed, Dec 09, 2020 at 10:01:49AM +, Song Bao Hua (Barry Song) wrote:
> 
> 
> > -Original Message-
> > From: Dan Carpenter [mailto:dan.carpen...@oracle.com]
> > Sent: Wednesday, December 9, 2020 8:00 PM
> > To: Song Bao Hua (Barry Song) 
> > Cc: iommu@lists.linux-foundation.org
> > Subject: [bug report] dma-mapping: add benchmark support for streaming DMA 
> > APIs
> > 
> > Hello Barry Song,
> > 
> > The patch 65789daa8087: "dma-mapping: add benchmark support for
> > streaming DMA APIs" from Nov 16, 2020, leads to the following static
> > checker warning:
> > 
> > kernel/dma/map_benchmark.c:241 map_benchmark_ioctl()
> > error: undefined (user controlled) shift '1 << (map->bparam.dma_bits)'
> > 
> > kernel/dma/map_benchmark.c
> >191  static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
> >192  unsigned long arg)
> >193  {
> >194  struct map_benchmark_data *map = file->private_data;
> >195  void __user *argp = (void __user *)arg;
> >196  u64 old_dma_mask;
> >197
> >198  int ret;
> >199
> >200  if (copy_from_user(>bparam, argp, sizeof(map->bparam)))
> >^
> > Comes from the user
> > 
> >201  return -EFAULT;
> >202
> >203  switch (cmd) {
> >204  case DMA_MAP_BENCHMARK:
> >205  if (map->bparam.threads == 0 ||
> >206  map->bparam.threads > DMA_MAP_MAX_THREADS) {
> >207  pr_err("invalid thread number\n");
> >208  return -EINVAL;
> >209  }
> >210
> >211  if (map->bparam.seconds == 0 ||
> >212  map->bparam.seconds > DMA_MAP_MAX_SECONDS) {
> >213  pr_err("invalid duration seconds\n");
> >214  return -EINVAL;
> >215  }
> >216
> >217  if (map->bparam.node != NUMA_NO_NODE &&
> >218  !node_possible(map->bparam.node)) {
> >219  pr_err("invalid numa node\n");
> >220  return -EINVAL;
> >221  }
> >222
> >223  switch (map->bparam.dma_dir) {
> >224  case DMA_MAP_BIDIRECTIONAL:
> >225  map->dir = DMA_BIDIRECTIONAL;
> >226  break;
> >227  case DMA_MAP_FROM_DEVICE:
> >228  map->dir = DMA_FROM_DEVICE;
> >229  break;
> >230  case DMA_MAP_TO_DEVICE:
> >231  map->dir = DMA_TO_DEVICE;
> >232  break;
> >233  default:
> >234  pr_err("invalid DMA direction\n");
> >235  return -EINVAL;
> >236  }
> >237
> >238  old_dma_mask = dma_get_mask(map->dev);
> >239
> >240  ret = dma_set_mask(map->dev,
> >241 
> > DMA_BIT_MASK(map->bparam.dma_bits));
> >
> > ^^
> > If this is more than 31 then the behavior is undefined (but in real life
> > it will shift wrap).
> 
> Guess it should be less than 64?
> For 64, it would be ~0ULL, otherwise, it will be 1ULL< 64 is undefined, not 31 as I said.

> 
> In test app,
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=7679325702
>  
> 
> I have some code like:
> + /* suppose the mininum DMA zone is 1MB in the world */
> + if (bits < 20 || bits > 64) {
> + fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
> + exit(1);
> + }
> 
> Maybe I should do the same thing in kernel as well.

Sounds good!

regards,
dan carpenter

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [bug report] dma-mapping: add benchmark support for streaming DMA APIs

2020-12-09 Thread Song Bao Hua (Barry Song)



> -Original Message-
> From: Dan Carpenter [mailto:dan.carpen...@oracle.com]
> Sent: Wednesday, December 9, 2020 8:00 PM
> To: Song Bao Hua (Barry Song) 
> Cc: iommu@lists.linux-foundation.org
> Subject: [bug report] dma-mapping: add benchmark support for streaming DMA 
> APIs
> 
> Hello Barry Song,
> 
> The patch 65789daa8087: "dma-mapping: add benchmark support for
> streaming DMA APIs" from Nov 16, 2020, leads to the following static
> checker warning:
> 
>   kernel/dma/map_benchmark.c:241 map_benchmark_ioctl()
>   error: undefined (user controlled) shift '1 << (map->bparam.dma_bits)'
> 
> kernel/dma/map_benchmark.c
>191  static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
>192  unsigned long arg)
>193  {
>194  struct map_benchmark_data *map = file->private_data;
>195  void __user *argp = (void __user *)arg;
>196  u64 old_dma_mask;
>197
>198  int ret;
>199
>200  if (copy_from_user(>bparam, argp, sizeof(map->bparam)))
>^
> Comes from the user
> 
>201  return -EFAULT;
>202
>203  switch (cmd) {
>204  case DMA_MAP_BENCHMARK:
>205  if (map->bparam.threads == 0 ||
>206  map->bparam.threads > DMA_MAP_MAX_THREADS) {
>207  pr_err("invalid thread number\n");
>208  return -EINVAL;
>209  }
>210
>211  if (map->bparam.seconds == 0 ||
>212  map->bparam.seconds > DMA_MAP_MAX_SECONDS) {
>213  pr_err("invalid duration seconds\n");
>214  return -EINVAL;
>215  }
>216
>217  if (map->bparam.node != NUMA_NO_NODE &&
>218  !node_possible(map->bparam.node)) {
>219  pr_err("invalid numa node\n");
>220  return -EINVAL;
>221  }
>222
>223  switch (map->bparam.dma_dir) {
>224  case DMA_MAP_BIDIRECTIONAL:
>225  map->dir = DMA_BIDIRECTIONAL;
>226  break;
>227  case DMA_MAP_FROM_DEVICE:
>228  map->dir = DMA_FROM_DEVICE;
>229  break;
>230  case DMA_MAP_TO_DEVICE:
>231  map->dir = DMA_TO_DEVICE;
>232  break;
>233  default:
>234  pr_err("invalid DMA direction\n");
>235  return -EINVAL;
>236  }
>237
>238  old_dma_mask = dma_get_mask(map->dev);
>239
>240  ret = dma_set_mask(map->dev,
>241 
> DMA_BIT_MASK(map->bparam.dma_bits));
>^^
> If this is more than 31 then the behavior is undefined (but in real life
> it will shift wrap).

Guess it should be less than 64?
For 64, it would be ~0ULL, otherwise, it will be 1ULL 64) {
+   fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
+   exit(1);
+   }

Maybe I should do the same thing in kernel as well.

> 
>242  if (ret) {
>243  pr_err("failed to set dma_mask on device 
> %s\n",
>244  dev_name(map->dev));
>245  return -EINVAL;
>246  }
>247
>248  ret = do_map_benchmark(map);
>249
>250  /*
>251   * restore the original dma_mask as many devices' 
> dma_mask
> are
>252   * set by architectures, acpi, busses. When we bind 
> them
> back
>253   * to their original drivers, those drivers shouldn't 
> see
>254   * dma_mask changed by benchmark
>255   */
>256  dma_set_mask(map->dev, old_dma_mask);
>257  break;
>258  default:
>259  return -EINVAL;
>260  }
>261
>262  if (copy_to_user(argp, >bparam, sizeof(map->bparam)))
>263  return -EFAULT;
>264
>265  return ret;
>266  }
> 
> regards,
> dan carpenter

Thanks
Barry

___
iommu mailing list
iommu@lists.linux-foundation.org

Re: [RESEND PATCH v3 3/4] iommu/iova: Flush CPU rcache for when a depot fills

2020-12-09 Thread Leizhen (ThunderTown)



On 2020/11/17 18:25, John Garry wrote:
> Leizhen reported some time ago that IOVA performance may degrade over time
> [0], but unfortunately his solution to fix this problem was not given
> attention.
> 
> To summarize, the issue is that as time goes by, the CPU rcache and depot
> rcache continue to grow. As such, IOVA RB tree access time also continues
> to grow.
> 
> At a certain point, a depot may become full, and also some CPU rcaches may
> also be full when inserting another IOVA is attempted. For this scenario,
> currently the "loaded" CPU rcache is freed and a new one is created. This
> freeing means that many IOVAs in the RB tree need to be freed, which
> makes IO throughput performance fall off a cliff in some storage scenarios:
> 
> Jobs: 12 (f=12): [] [0.0% done] [6314MB/0KB/0KB /s] [1616K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [5669MB/0KB/0KB /s] [1451K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [6031MB/0KB/0KB /s] [1544K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [6673MB/0KB/0KB /s] [1708K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [6705MB/0KB/0KB /s] [1717K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [6031MB/0KB/0KB /s] [1544K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [6761MB/0KB/0KB /s] [1731K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [6705MB/0KB/0KB /s] [1717K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [6685MB/0KB/0KB /s] [1711K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [6178MB/0KB/0KB /s] [1582K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [6731MB/0KB/0KB /s] [1723K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [2387MB/0KB/0KB /s] [611K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [2689MB/0KB/0KB /s] [688K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [2278MB/0KB/0KB /s] [583K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [1288MB/0KB/0KB /s] [330K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [1632MB/0KB/0KB /s] [418K/0/0 
> iops]
> Jobs: 12 (f=12): [] [0.0% done] [1765MB/0KB/0KB /s] [452K/0/0 
> iops]
> 
> And continue in this fashion, without recovering. Note that in this
> example it was required to wait 16 hours for this to occur. Also note that
> IO throughput also becomes gradually becomes more unstable leading up to
> this point.
> 
> This problem is only seen for non-strict mode. For strict mode, the rcaches
> stay quite compact.
> 
> As a solution to this issue, judge that the IOVA caches have grown too big
> when cached magazines need to be free, and just flush all the CPUs rcaches
> instead.
> 
> The depot rcaches, however, are not flushed, as they can be used to
> immediately replenish active CPUs.
> 
> In future, some IOVA compaction could be implemented to solve the
> instabilty issue, which I figure could be quite complex to implement.
> 
> [0] 
> https://lore.kernel.org/linux-iommu/20190815121104.29140-3-thunder.leiz...@huawei.com/
> 
> Analyzed-by: Zhen Lei 
> Reported-by: Xiang Chen 
> Signed-off-by: John Garry 
> ---
>  drivers/iommu/iova.c | 16 ++--
>  1 file changed, 6 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
> index 1f3f0f8b12e0..386005055aca 100644
> --- a/drivers/iommu/iova.c
> +++ b/drivers/iommu/iova.c
> @@ -901,7 +901,6 @@ static bool __iova_rcache_insert(struct iova_domain 
> *iovad,
>struct iova_rcache *rcache,
>unsigned long iova_pfn)
>  {
> - struct iova_magazine *mag_to_free = NULL;
>   struct iova_cpu_rcache *cpu_rcache;
>   bool can_insert = false;
>   unsigned long flags;
> @@ -923,13 +922,12 @@ static bool __iova_rcache_insert(struct iova_domain 
> *iovad,
>   if (cpu_rcache->loaded)
>   rcache->depot[rcache->depot_size++] =
>   cpu_rcache->loaded;
> - } else {
> - mag_to_free = cpu_rcache->loaded;
> + can_insert = true;
> + cpu_rcache->loaded = new_mag;
>   }
>   spin_unlock(>lock);
> -
> - cpu_rcache->loaded = new_mag;
> - can_insert = true;
> + if (!can_insert)
> + iova_magazine_free(new_mag);
>   }
>   }
>  
> @@ -938,10 +936,8 @@ static bool __iova_rcache_insert(struct iova_domain 
> *iovad,
>  
>   spin_unlock_irqrestore(_rcache->lock, flags);
>  
> - if (mag_to_free) {
> - iova_magazine_free_pfns(mag_to_free, iovad);
> - iova_magazine_free(mag_to_free);
mag_to_free has been stripped out, that's why lock protection is not required 
here.

> - }
> + if 

[PATCH v5 27/27] MAINTAINERS: Add entry for MediaTek IOMMU

2020-12-09 Thread Yong Wu
I am the author of MediaTek iommu driver, and will to maintain and
develop it further.
Add myself to cover these items.

Signed-off-by: Yong Wu 
Reviewed-by: Chun-Kuang Hu 
---
 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e73636b75f29..462a87ee19c8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11056,6 +11056,15 @@ S: Maintained
 F: Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt
 F: drivers/i2c/busses/i2c-mt65xx.c
 
+MEDIATEK IOMMU DRIVER
+M: Yong Wu 
+L: iommu@lists.linux-foundation.org
+L: linux-media...@lists.infradead.org (moderated for non-subscribers)
+S: Supported
+F: Documentation/devicetree/bindings/iommu/mediatek*
+F: drivers/iommu/mtk-iommu*
+F: include/dt-bindings/memory/mt*-larb-port.h
+
 MEDIATEK JPEG DRIVER
 M: Rick Chang 
 M: Bin Liu 
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 26/27] iommu/mediatek: Add mt8192 support

2020-12-09 Thread Yong Wu
Add mt8192 iommu support.

For multi domain, Add 1M gap for the vdec domain size. That is because
vdec HW has a end address register which require (start_addr +
len) rather than (start_addr + len - 1). Take a example, if the start_addr
is 0xfff0, size is 0x10, then the end_address is 0xfff0 +
0x10 = 0x1  . but the register only is 32bit. thus HW will get
the end address is 0. To avoid this issue, I add 1M gap for this.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 22 ++
 drivers/iommu/mtk_iommu.h |  1 +
 2 files changed, 23 insertions(+)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 92c1e2f0af89..799adf7b39d3 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -174,6 +174,16 @@ static const struct mtk_iommu_iova_region single_domain[] 
= {
{.iova_base = 0,.size = SZ_4G},
 };
 
+static const struct mtk_iommu_iova_region mt8192_multi_dom[] = {
+   { .iova_base = 0x0, .size = SZ_4G}, /* disp: 0 ~ 4G 
*/
+   #if IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT)
+   { .iova_base = SZ_4G,   .size = SZ_4G - SZ_1M}, /* vdec: 4G ~ 
8G gap: 1M */
+   { .iova_base = SZ_4G * 2,   .size = SZ_4G - SZ_1M}, /* CAM/MDP: 8G 
~ 12G */
+   { .iova_base = 0x24000ULL,  .size = 0x400}, /* CCU0 */
+   { .iova_base = 0x24400ULL,  .size = 0x400}, /* CCU1 */
+   #endif
+};
+
 /*
  * There may be 1 or 2 M4U HWs, But we always expect they are in the same 
domain
  * for the performance.
@@ -1035,12 +1045,24 @@ static const struct mtk_iommu_plat_data mt8183_data = {
.larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}},
 };
 
+static const struct mtk_iommu_plat_data mt8192_data = {
+   .m4u_plat   = M4U_MT8192,
+   .flags  = HAS_BCLK | HAS_SUB_COMM | OUT_ORDER_WR_EN |
+ WR_THROT_EN | IOVA_34_EN,
+   .inv_sel_reg= REG_MMU_INV_SEL_GEN2,
+   .iova_region= mt8192_multi_dom,
+   .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom),
+   .larbid_remap   = {{0}, {1}, {4, 5}, {7}, {2}, {9, 11, 19, 20},
+  {0, 14, 16}, {0, 13, 18, 17}},
+};
+
 static const struct of_device_id mtk_iommu_of_ids[] = {
{ .compatible = "mediatek,mt2712-m4u", .data = _data},
{ .compatible = "mediatek,mt6779-m4u", .data = _data},
{ .compatible = "mediatek,mt8167-m4u", .data = _data},
{ .compatible = "mediatek,mt8173-m4u", .data = _data},
{ .compatible = "mediatek,mt8183-m4u", .data = _data},
+   { .compatible = "mediatek,mt8192-m4u", .data = _data},
{}
 };
 
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index b54862307128..e96b1b8639f4 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -43,6 +43,7 @@ enum mtk_iommu_plat {
M4U_MT8167,
M4U_MT8173,
M4U_MT8183,
+   M4U_MT8192,
 };
 
 struct mtk_iommu_iova_region;
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 25/27] iommu/mediatek: Adjust the structure

2020-12-09 Thread Yong Wu
Add "struct mtk_iommu_data *" in the "struct mtk_iommu_domain",
reduce the call mtk_iommu_get_m4u_data().
No functional change.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 160690d56bd2..92c1e2f0af89 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -126,6 +126,7 @@ struct mtk_iommu_domain {
struct io_pgtable_cfg   cfg;
struct io_pgtable_ops   *iop;
 
+   struct mtk_iommu_data   *data;
struct iommu_domain domain;
 };
 
@@ -353,7 +354,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
 
 static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
 {
-   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
+   struct mtk_iommu_data *data = dom->data;
 
/* Use the exist domain as there is only one m4u pgtable here. */
if (data->m4u_dom) {
@@ -402,6 +403,7 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned 
type)
if (iommu_get_dma_cookie(>domain))
goto  free_dom;
 
+   dom->data = data;
if (mtk_iommu_domain_finalise(dom))
goto  put_dma_cookie;
 
@@ -482,10 +484,9 @@ static int mtk_iommu_map(struct iommu_domain *domain, 
unsigned long iova,
 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
 {
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
-   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
 
/* The "4GB mode" M4U physically can not use the lower remap of Dram. */
-   if (data->enable_4GB)
+   if (dom->data->enable_4GB)
paddr |= BIT_ULL(32);
 
/* Synchronize with the tlb_lock */
@@ -503,31 +504,32 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
 
 static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
 {
-   mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data());
+   struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+
+   mtk_iommu_tlb_flush_all(dom->data);
 }
 
 static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
 struct iommu_iotlb_gather *gather)
 {
-   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
+   struct mtk_iommu_domain *dom = to_mtk_domain(domain);
size_t length = gather->end - gather->start;
 
if (gather->start == ULONG_MAX)
return;
 
mtk_iommu_tlb_flush_range_sync(gather->start, length, gather->pgsize,
-  data);
+  dom->data);
 }
 
 static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
  dma_addr_t iova)
 {
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
-   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
phys_addr_t pa;
 
pa = dom->iop->iova_to_phys(dom->iop, iova);
-   if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
+   if (dom->data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
pa &= ~BIT_ULL(32);
 
return pa;
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 24/27] iommu/mediatek: Add support for multi domain

2020-12-09 Thread Yong Wu
Some HW IP(ex: CCU) require the special iova range. That means the
iova got from dma_alloc_attrs for that devices must locate in his
special range. In this patch, we allocate a special iova_range for
each a special requirement and create each a iommu domain for each
a iova_range.

meanwhile we still use one pagetable which support 16GB iova.

After this patch, If the iova range of a master is over 4G, the master
should:
a) Declare its special dma_ranges in its dtsi node. For example, If we
   preassign the iova 4G-8G for vcodec, then the vcodec dtsi node should
   add this:
   /*
* iova start at 0x1__, pa still start at 0x4000_
* size is 0x1__.
*/
   dma-ranges = <0x1 0x0 0x0 0x4000 0x1 0x0>;  /* 4G ~ 8G */
 Note: we don't have a actual bus concept here. the master doesn't have its
 special parent node, thus this dma-ranges can only be put in the master's
 node.

b) Update the dma_mask:
  dma_set_mask_and_coherent(dev, DMA_BIT_MASK(33));

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 47 +++
 drivers/iommu/mtk_iommu.h |  3 ++-
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index ed771133643d..160690d56bd2 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -355,6 +355,14 @@ static int mtk_iommu_domain_finalise(struct 
mtk_iommu_domain *dom)
 {
struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
 
+   /* Use the exist domain as there is only one m4u pgtable here. */
+   if (data->m4u_dom) {
+   dom->iop = data->m4u_dom->iop;
+   dom->cfg = data->m4u_dom->cfg;
+   dom->domain.pgsize_bitmap = data->m4u_dom->cfg.pgsize_bitmap;
+   return 0;
+   }
+
dom->cfg = (struct io_pgtable_cfg) {
.quirks = IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
@@ -380,6 +388,8 @@ static int mtk_iommu_domain_finalise(struct 
mtk_iommu_domain *dom)
 
 static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
 {
+   struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
+   const struct mtk_iommu_iova_region *region;
struct mtk_iommu_domain *dom;
 
if (type != IOMMU_DOMAIN_DMA)
@@ -395,8 +405,9 @@ static struct iommu_domain *mtk_iommu_domain_alloc(unsigned 
type)
if (mtk_iommu_domain_finalise(dom))
goto  put_dma_cookie;
 
-   dom->domain.geometry.aperture_start = 0;
-   dom->domain.geometry.aperture_end = DMA_BIT_MASK(32);
+   region = data->plat_data->iova_region + data->cur_domid;
+   dom->domain.geometry.aperture_start = region->iova_base;
+   dom->domain.geometry.aperture_end = region->iova_base + region->size - 
1;
dom->domain.geometry.force_aperture = true;
 
return >domain;
@@ -548,19 +559,31 @@ static void mtk_iommu_release_device(struct device *dev)
 static struct iommu_group *mtk_iommu_device_group(struct device *dev)
 {
struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
+   struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+   struct iommu_group *group;
+   int domid;
 
if (!data)
return ERR_PTR(-ENODEV);
 
-   /* All the client devices are in the same m4u iommu-group */
-   if (!data->m4u_group) {
-   data->m4u_group = iommu_group_alloc();
-   if (IS_ERR(data->m4u_group))
+   domid = MTK_M4U_TO_DOM(fwspec->ids[0]);
+   if (domid >= data->plat_data->iova_region_nr) {
+   dev_err(dev, "iommu domain id(%d/%d) is error.\n", domid,
+   data->plat_data->iova_region_nr);
+   return ERR_PTR(-EINVAL);
+   }
+
+   group = data->m4u_group[domid];
+   if (!group) {
+   group = iommu_group_alloc();
+   if (IS_ERR(group))
dev_err(dev, "Failed to allocate M4U IOMMU group\n");
+   data->m4u_group[domid] = group;
} else {
-   iommu_group_ref_get(data->m4u_group);
+   iommu_group_ref_get(group);
}
-   return data->m4u_group;
+   data->cur_domid = domid;
+   return group;
 }
 
 static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
@@ -589,14 +612,20 @@ static void mtk_iommu_get_resv_regions(struct device *dev,
   struct list_head *head)
 {
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
-   const struct mtk_iommu_iova_region *resv;
+   const struct mtk_iommu_iova_region *resv, *curdom;
struct iommu_resv_region *region;
int prot = IOMMU_WRITE | IOMMU_READ;
unsigned int i;
 
+   curdom = data->plat_data->iova_region + data->cur_domid;
for (i = 0; i < data->plat_data->iova_region_nr; i++) {
resv = data->plat_data->iova_region + i;
 
+   /* Only 

[PATCH v5 23/27] iommu/mediatek: Support report iova 34bit translation fault in ISR

2020-12-09 Thread Yong Wu
If the iova is over 32bit, the fault status register bit is a little
different.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 17 +++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 164479e1f5c5..ed771133643d 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2015-2016 MediaTek Inc.
  * Author: Yong Wu 
  */
+#include 
 #include 
 #include 
 #include 
@@ -89,6 +90,9 @@
 #define F_REG_MMU1_FAULT_MASK  GENMASK(13, 7)
 
 #define REG_MMU0_FAULT_VA  0x13c
+#define F_MMU_INVAL_VA_31_12_MASK  GENMASK(31, 12)
+#define F_MMU_INVAL_VA_34_32_MASK  GENMASK(11, 9)
+#define F_MMU_INVAL_PA_34_32_MASK  GENMASK(8, 6)
 #define F_MMU_FAULT_VA_WRITE_BIT   BIT(1)
 #define F_MMU_FAULT_VA_LAYER_BIT   BIT(0)
 
@@ -264,8 +268,9 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
 {
struct mtk_iommu_data *data = dev_id;
struct mtk_iommu_domain *dom = data->m4u_dom;
-   u32 int_state, regval, fault_iova, fault_pa;
unsigned int fault_larb, fault_port, sub_comm = 0;
+   u32 int_state, regval, va34_32, pa34_32;
+   u64 fault_iova, fault_pa;
bool layer, write;
 
/* Read error info from registers */
@@ -281,6 +286,14 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
}
layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
+   if (MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN)) {
+   va34_32 = FIELD_GET(F_MMU_INVAL_VA_34_32_MASK, fault_iova);
+   pa34_32 = FIELD_GET(F_MMU_INVAL_PA_34_32_MASK, fault_iova);
+   fault_iova = fault_iova & F_MMU_INVAL_VA_31_12_MASK;
+   fault_iova |=  (u64)va34_32 << 32;
+   fault_pa |= (u64)pa34_32 << 32;
+   }
+
fault_port = F_MMU_INT_ID_PORT_ID(regval);
if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_SUB_COMM)) {
fault_larb = F_MMU_INT_ID_COMM_ID(regval);
@@ -294,7 +307,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
   write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
dev_err_ratelimited(
data->dev,
-   "fault type=0x%x iova=0x%x pa=0x%x larb=%d port=%d 
layer=%d %s\n",
+   "fault type=0x%x iova=0x%llx pa=0x%llx larb=%d port=%d 
layer=%d %s\n",
int_state, fault_iova, fault_pa, fault_larb, fault_port,
layer, write ? "write" : "read");
}
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 20/27] iommu/mediatek: Add single domain

2020-12-09 Thread Yong Wu
Defaultly the iova range is 0-4G. here we add a single-domain(0-4G)
for the previous SoC. this also is a preparing patch for supporting
multi-domains.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 6a909efc984f..c3a6712c497b 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -162,6 +162,10 @@ struct mtk_iommu_iova_region {
unsigned long long  size;
 };
 
+static const struct mtk_iommu_iova_region single_domain[] = {
+   {.iova_base = 0,.size = SZ_4G},
+};
+
 /*
  * There may be 1 or 2 M4U HWs, But we always expect they are in the same 
domain
  * for the performance.
@@ -936,6 +940,8 @@ static const struct mtk_iommu_plat_data mt2712_data = {
.m4u_plat = M4U_MT2712,
.flags= HAS_4GB_MODE | HAS_BCLK | HAS_VLD_PA_RNG,
.inv_sel_reg  = REG_MMU_INV_SEL_GEN1,
+   .iova_region  = single_domain,
+   .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}},
 };
 
@@ -943,6 +949,8 @@ static const struct mtk_iommu_plat_data mt6779_data = {
.m4u_plat  = M4U_MT6779,
.flags = HAS_SUB_COMM | OUT_ORDER_WR_EN | WR_THROT_EN,
.inv_sel_reg   = REG_MMU_INV_SEL_GEN2,
+   .iova_region   = single_domain,
+   .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap  = {{0}, {1}, {2}, {3}, {5}, {7, 8}, {10}, {9}},
 };
 
@@ -950,6 +958,8 @@ static const struct mtk_iommu_plat_data mt8167_data = {
.m4u_plat = M4U_MT8167,
.flags= RESET_AXI | HAS_LEGACY_IVRP_PADDR,
.inv_sel_reg  = REG_MMU_INV_SEL_GEN1,
+   .iova_region  = single_domain,
+   .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap = {{0}, {1}, {2}}, /* Linear mapping. */
 };
 
@@ -958,6 +968,8 @@ static const struct mtk_iommu_plat_data mt8173_data = {
.flags= HAS_4GB_MODE | HAS_BCLK | RESET_AXI |
HAS_LEGACY_IVRP_PADDR,
.inv_sel_reg  = REG_MMU_INV_SEL_GEN1,
+   .iova_region  = single_domain,
+   .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap = {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */
 };
 
@@ -965,6 +977,8 @@ static const struct mtk_iommu_plat_data mt8183_data = {
.m4u_plat = M4U_MT8183,
.flags= RESET_AXI,
.inv_sel_reg  = REG_MMU_INV_SEL_GEN1,
+   .iova_region  = single_domain,
+   .iova_region_nr = ARRAY_SIZE(single_domain),
.larbid_remap = {{0}, {4}, {5}, {6}, {7}, {2}, {3}, {1}},
 };
 
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 22/27] iommu/mediatek: Support up to 34bit iova in tlb flush

2020-12-09 Thread Yong Wu
If the iova is 34bit, the iova[32][33] is the bit0/1 in the tlb flush
register. Add a new macro for this.

there is a minor change unrelated with this patch. it also use the new
macro.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index f206275230b3..164479e1f5c5 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -129,6 +129,9 @@ static const struct iommu_ops mtk_iommu_ops;
 
 static int mtk_iommu_hw_init(const struct mtk_iommu_data *data);
 
+#define MTK_IOMMU_ADDR(addr) ({unsigned long _addr = addr; \
+ (lower_32_bits(_addr) | upper_32_bits(_addr)); })
+
 /*
  * In M4U 4GB mode, the physical address is remapped as below:
  *
@@ -219,8 +222,9 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long 
iova, size_t size,
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
   data->base + data->plat_data->inv_sel_reg);
 
-   writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
-   writel_relaxed(iova + size - 1,
+   writel_relaxed(MTK_IOMMU_ADDR(iova),
+  data->base + REG_MMU_INVLD_START_A);
+   writel_relaxed(MTK_IOMMU_ADDR(iova + size - 1),
   data->base + REG_MMU_INVLD_END_A);
writel_relaxed(F_MMU_INV_RANGE,
   data->base + REG_MMU_INVALIDATE);
@@ -648,8 +652,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data 
*data)
if (MTK_IOMMU_HAS_FLAG(data->plat_data, HAS_LEGACY_IVRP_PADDR))
regval = (data->protect_base >> 1) | (data->enable_4GB << 31);
else
-   regval = lower_32_bits(data->protect_base) |
-upper_32_bits(data->protect_base);
+   regval = MTK_IOMMU_ADDR(data->protect_base);
writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
 
if (data->enable_4GB &&
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 21/27] iommu/mediatek: Support master use iova over 32bit

2020-12-09 Thread Yong Wu
After extending v7s, our pagetable already support iova reach
16GB(34bit). the master got the iova via dma_alloc_attrs may reach
34bits, but its HW register still is 32bit. then how to set the
bit32/bit33 iova? this depend on a SMI larb setting(bank_sel).

we separate whole 16GB iova to four banks:
bank: 0: 0~4G; 1: 4~8G; 2: 8-12G; 3: 12-16G;
The bank number is (iova >> 32).

We will preassign which bank the larbs belong to. currently we don't
have a interface for master to adjust its bank number.

Each a bank is a iova_region which is a independent iommu-domain.
the iova range for each iommu-domain can't cross 4G.

Signed-off-by: Yong Wu 
Acked-by: Krzysztof Kozlowski 
---
 drivers/iommu/mtk_iommu.c  | 12 +---
 drivers/memory/mtk-smi.c   |  7 +++
 include/soc/mediatek/smi.h |  1 +
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index c3a6712c497b..f206275230b3 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -309,17 +309,23 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
 struct device *dev, bool enable)
 {
struct mtk_smi_larb_iommu*larb_mmu;
-   unsigned int larbid, portid;
+   unsigned int larbid, portid, domid;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+   const struct mtk_iommu_iova_region *region;
int i;
 
for (i = 0; i < fwspec->num_ids; ++i) {
larbid = MTK_M4U_TO_LARB(fwspec->ids[i]);
portid = MTK_M4U_TO_PORT(fwspec->ids[i]);
+   domid = MTK_M4U_TO_DOM(fwspec->ids[i]);
+
larb_mmu = >larb_imu[larbid];
+   region = data->plat_data->iova_region + domid;
+   larb_mmu->bank[portid] = upper_32_bits(region->iova_base);
 
-   dev_dbg(dev, "%s iommu port: %d\n",
-   enable ? "enable" : "disable", portid);
+   dev_dbg(dev, "%s iommu for larb(%s) port %d dom %d bank %d.\n",
+   enable ? "enable" : "disable", dev_name(larb_mmu->dev),
+   portid, domid, larb_mmu->bank[portid]);
 
if (enable)
larb_mmu->mmu |= MTK_SMI_MMU_EN(portid);
diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index 2beb67908f3c..2094e4b4eb10 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -44,6 +44,10 @@
 /* mt2712 */
 #define SMI_LARB_NONSEC_CON(id)(0x380 + ((id) * 4))
 #define F_MMU_EN   BIT(0)
+#define BANK_SEL(id)   ({  \
+   u32 _id = (id) & 0x3;   \
+   (_id << 8 | _id << 10 | _id << 12 | _id << 14); \
+})
 
 /* SMI COMMON */
 #define SMI_BUS_SEL0x220
@@ -88,6 +92,7 @@ struct mtk_smi_larb { /* larb: local arbiter */
const struct mtk_smi_larb_gen   *larb_gen;
int larbid;
u32 *mmu;
+   unsigned char   *bank;
 };
 
 static int mtk_smi_clk_enable(const struct mtk_smi *smi)
@@ -154,6 +159,7 @@ mtk_smi_larb_bind(struct device *dev, struct device 
*master, void *data)
if (dev == larb_mmu[i].dev) {
larb->larbid = i;
larb->mmu = _mmu[i].mmu;
+   larb->bank = larb_mmu[i].bank;
return 0;
}
}
@@ -172,6 +178,7 @@ static void mtk_smi_larb_config_port_gen2_general(struct 
device *dev)
for_each_set_bit(i, (unsigned long *)larb->mmu, 32) {
reg = readl_relaxed(larb->base + SMI_LARB_NONSEC_CON(i));
reg |= F_MMU_EN;
+   reg |= BANK_SEL(larb->bank[i]);
writel(reg, larb->base + SMI_LARB_NONSEC_CON(i));
}
 }
diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h
index 9371bf572ab8..4cf445dbbdaa 100644
--- a/include/soc/mediatek/smi.h
+++ b/include/soc/mediatek/smi.h
@@ -16,6 +16,7 @@
 struct mtk_smi_larb_iommu {
struct device *dev;
unsigned int   mmu;
+   unsigned char  bank[32];
 };
 
 /*
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 19/27] iommu/mediatek: Add iova reserved function

2020-12-09 Thread Yong Wu
For multiple iommu_domains, we need to reserve some iova regions. Take a
example, If the default iova region is 0 ~ 4G, but the 0x4000_ ~
0x43ff_ is only for the special CCU0 domain. Thus we should exclude
this region for the default iova region.

This patch adds iova reserved flow. It's a preparing patch for supporting
multi-domain.

Signed-off-by: Anan sun 
Signed-off-by: Chao Hao 
Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 28 
 drivers/iommu/mtk_iommu.h |  5 +
 2 files changed, 33 insertions(+)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 0e9c03cbab32..6a909efc984f 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -157,6 +157,11 @@ static LIST_HEAD(m4ulist); /* List all the M4U HWs */
 
 #define for_each_m4u(data) list_for_each_entry(data, , list)
 
+struct mtk_iommu_iova_region {
+   dma_addr_t  iova_base;
+   unsigned long long  size;
+};
+
 /*
  * There may be 1 or 2 M4U HWs, But we always expect they are in the same 
domain
  * for the performance.
@@ -553,6 +558,27 @@ static int mtk_iommu_of_xlate(struct device *dev, struct 
of_phandle_args *args)
return iommu_fwspec_add_ids(dev, args->args, 1);
 }
 
+static void mtk_iommu_get_resv_regions(struct device *dev,
+  struct list_head *head)
+{
+   struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
+   const struct mtk_iommu_iova_region *resv;
+   struct iommu_resv_region *region;
+   int prot = IOMMU_WRITE | IOMMU_READ;
+   unsigned int i;
+
+   for (i = 0; i < data->plat_data->iova_region_nr; i++) {
+   resv = data->plat_data->iova_region + i;
+
+   region = iommu_alloc_resv_region(resv->iova_base, resv->size,
+prot, IOMMU_RESV_RESERVED);
+   if (!region)
+   return;
+
+   list_add_tail(>list, head);
+   }
+}
+
 static const struct iommu_ops mtk_iommu_ops = {
.domain_alloc   = mtk_iommu_domain_alloc,
.domain_free= mtk_iommu_domain_free,
@@ -567,6 +593,8 @@ static const struct iommu_ops mtk_iommu_ops = {
.release_device = mtk_iommu_release_device,
.device_group   = mtk_iommu_device_group,
.of_xlate   = mtk_iommu_of_xlate,
+   .get_resv_regions = mtk_iommu_get_resv_regions,
+   .put_resv_regions = generic_iommu_put_resv_regions,
.pgsize_bitmap  = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
 };
 
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index 5e03a029c4dc..e867cd3aeeac 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -45,10 +45,15 @@ enum mtk_iommu_plat {
M4U_MT8183,
 };
 
+struct mtk_iommu_iova_region;
+
 struct mtk_iommu_plat_data {
enum mtk_iommu_plat m4u_plat;
u32 flags;
u32 inv_sel_reg;
+
+   unsigned intiova_region_nr;
+   const struct mtk_iommu_iova_region   *iova_region;
unsigned char   larbid_remap[MTK_LARB_COM_MAX][MTK_LARB_SUBCOM_MAX];
 };
 
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 17/27] iommu/mediatek: Add pm runtime callback

2020-12-09 Thread Yong Wu
This patch adds pm runtime callback.

In pm runtime case, all the registers backup/restore and bclk are
controlled in the pm_runtime callback, then pm_suspend is not needed in
this case.

runtime PM is disabled when suspend, thus we call
pm_runtime_status_suspended instead of pm_runtime_suspended.

And, m4u doesn't have its special pm runtime domain in previous SoC, in
this case dev->power.runtime_status is RPM_SUSPENDED defaultly, thus add
a "dev->pm_domain" checking for the SoC that has pm runtime domain.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 5614015e5b96..6fe3ee2b2bf5 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -808,7 +808,7 @@ static int mtk_iommu_remove(struct platform_device *pdev)
return 0;
 }
 
-static int __maybe_unused mtk_iommu_suspend(struct device *dev)
+static int __maybe_unused mtk_iommu_runtime_suspend(struct device *dev)
 {
struct mtk_iommu_data *data = dev_get_drvdata(dev);
struct mtk_iommu_suspend_reg *reg = >reg;
@@ -826,7 +826,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device 
*dev)
return 0;
 }
 
-static int __maybe_unused mtk_iommu_resume(struct device *dev)
+static int __maybe_unused mtk_iommu_runtime_resume(struct device *dev)
 {
struct mtk_iommu_data *data = dev_get_drvdata(dev);
struct mtk_iommu_suspend_reg *reg = >reg;
@@ -853,7 +853,25 @@ static int __maybe_unused mtk_iommu_resume(struct device 
*dev)
return 0;
 }
 
+static int __maybe_unused mtk_iommu_suspend(struct device *dev)
+{
+   /* runtime PM is disabled when suspend in pm_runtime case. */
+   if (dev->pm_domain && pm_runtime_status_suspended(dev))
+   return 0;
+
+   return mtk_iommu_runtime_suspend(dev);
+}
+
+static int __maybe_unused mtk_iommu_resume(struct device *dev)
+{
+   if (dev->pm_domain && pm_runtime_status_suspended(dev))
+   return 0;
+
+   return mtk_iommu_runtime_resume(dev);
+}
+
 static const struct dev_pm_ops mtk_iommu_pm_ops = {
+   SET_RUNTIME_PM_OPS(mtk_iommu_runtime_suspend, mtk_iommu_runtime_resume, 
NULL)
SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume)
 };
 
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 18/27] iommu/mediatek: Add power-domain operation

2020-12-09 Thread Yong Wu
In the previous SoC, the M4U HW is in the EMI power domain which is
always on. the latest M4U is in the display power domain which may be
turned on/off, thus we have to add pm_runtime interface for it.

When the engine work, the engine always enable the power and clocks for
smi-larb/smi-common, then the M4U's power will always be powered on
automatically via the device link with smi-common.

Note: we don't enable the M4U power in iommu_map/unmap for tlb flush.
If its power already is on, of course it is ok. if the power is off,
the main tlb will be reset while M4U power on, thus the tlb flush while
m4u power off is unnecessary, just skip it.

There will be one case that pm runctime status is not expected when tlb
flush. After boot, the display may call dma_alloc_attrs before it call
pm_runtime_get(disp-dev), then the m4u's pm status is not active inside
the dma_alloc_attrs. Since it only happens after boot, the tlb is clean
at that time, I also think this is ok.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 41 +--
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 6fe3ee2b2bf5..0e9c03cbab32 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -184,6 +184,8 @@ static void mtk_iommu_tlb_flush_all(void *cookie)
struct mtk_iommu_data *data = cookie;
 
for_each_m4u(data) {
+   if (!pm_runtime_active(data->dev))
+   continue;
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
   data->base + data->plat_data->inv_sel_reg);
writel_relaxed(F_ALL_INVLD, data->base + REG_MMU_INVALIDATE);
@@ -200,6 +202,10 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long 
iova, size_t size,
u32 tmp;
 
for_each_m4u(data) {
+   /* skip tlb flush when pm is not active. */
+   if (!pm_runtime_active(data->dev))
+   continue;
+
spin_lock_irqsave(>tlb_lock, flags);
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
   data->base + data->plat_data->inv_sel_reg);
@@ -384,6 +390,8 @@ static int mtk_iommu_attach_device(struct iommu_domain 
*domain,
 {
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+   struct device *m4udev = data->dev;
+   bool pm_enabled = pm_runtime_enabled(m4udev);
int ret;
 
if (!data)
@@ -391,12 +399,25 @@ static int mtk_iommu_attach_device(struct iommu_domain 
*domain,
 
/* Update the pgtable base address register of the M4U HW */
if (!data->m4u_dom) {
+   if (pm_enabled) {
+   ret = pm_runtime_get_sync(m4udev);
+   if (ret < 0) {
+   pm_runtime_put_noidle(m4udev);
+   return ret;
+   }
+   }
ret = mtk_iommu_hw_init(data);
-   if (ret)
+   if (ret) {
+   if (pm_enabled)
+   pm_runtime_put(m4udev);
return ret;
+   }
data->m4u_dom = dom;
writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
   data->base + REG_MMU_PT_BASE_ADDR);
+
+   if (pm_enabled)
+   pm_runtime_put(m4udev);
}
 
mtk_iommu_config(data, dev, true);
@@ -747,10 +768,13 @@ static int mtk_iommu_probe(struct platform_device *pdev)
if (dev->pm_domain) {
struct device_link *link;
 
+   pm_runtime_enable(dev);
+
link = device_link_add(data->smicomm_dev, dev,
   DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
if (!link) {
dev_err(dev, "Unable link %s.\n", 
dev_name(data->smicomm_dev));
+   pm_runtime_disable(dev);
return -EINVAL;
}
}
@@ -785,8 +809,10 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 out_sysfs_remove:
iommu_device_sysfs_remove(>iommu);
 out_link_remove:
-   if (dev->pm_domain)
+   if (dev->pm_domain) {
device_link_remove(data->smicomm_dev, dev);
+   pm_runtime_disable(dev);
+   }
return ret;
 }
 
@@ -801,8 +827,10 @@ static int mtk_iommu_remove(struct platform_device *pdev)
bus_set_iommu(_bus_type, NULL);
 
clk_disable_unprepare(data->bclk);
-   if (pdev->dev.pm_domain)
+   if (pdev->dev.pm_domain) {
device_link_remove(data->smicomm_dev, >dev);
+   pm_runtime_disable(>dev);
+   }
devm_free_irq(>dev, data->irq, data);
component_master_del(>dev, _iommu_com_ops);
return 0;
@@ -834,6 

[PATCH v5 16/27] iommu/mediatek: Add device link for smi-common and m4u

2020-12-09 Thread Yong Wu
In the lastest SoC, M4U has its special power domain. thus, If the engine
begin to work, it should help enable the power for M4U firstly.
Currently if the engine work, it always enable the power/clocks for
smi-larbs/smi-common. This patch adds device_link for smi-common and M4U.
then, if smi-common power is enabled, the M4U power also is powered on
automatically.

Normally M4U connect with several smi-larbs and their smi-common always
are the same, In this patch it get smi-common dev from the first smi-larb
device(i==0), then add the device_link only while m4u has power-domain.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 30 --
 drivers/iommu/mtk_iommu.h |  1 +
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 09c8c58feb78..5614015e5b96 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -706,7 +707,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
return larb_nr;
 
for (i = 0; i < larb_nr; i++) {
-   struct device_node *larbnode;
+   struct device_node *larbnode, *smicomm_node;
struct platform_device *plarbdev;
u32 id;
 
@@ -732,6 +733,26 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 
component_match_add_release(dev, , release_of,
compare_of, larbnode);
+   if (i != 0)
+   continue;
+   smicomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0);
+   if (!smicomm_node)
+   return -EINVAL;
+
+   plarbdev = of_find_device_by_node(smicomm_node);
+   of_node_put(smicomm_node);
+   data->smicomm_dev = >dev;
+   }
+
+   if (dev->pm_domain) {
+   struct device_link *link;
+
+   link = device_link_add(data->smicomm_dev, dev,
+  DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
+   if (!link) {
+   dev_err(dev, "Unable link %s.\n", 
dev_name(data->smicomm_dev));
+   return -EINVAL;
+   }
}
 
platform_set_drvdata(pdev, data);
@@ -739,7 +760,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
ret = iommu_device_sysfs_add(>iommu, dev, NULL,
 "mtk-iommu.%pa", );
if (ret)
-   return ret;
+   goto out_link_remove;
 
iommu_device_set_ops(>iommu, _iommu_ops);
iommu_device_set_fwnode(>iommu, >dev.of_node->fwnode);
@@ -763,6 +784,9 @@ static int mtk_iommu_probe(struct platform_device *pdev)
iommu_device_unregister(>iommu);
 out_sysfs_remove:
iommu_device_sysfs_remove(>iommu);
+out_link_remove:
+   if (dev->pm_domain)
+   device_link_remove(data->smicomm_dev, dev);
return ret;
 }
 
@@ -777,6 +801,8 @@ static int mtk_iommu_remove(struct platform_device *pdev)
bus_set_iommu(_bus_type, NULL);
 
clk_disable_unprepare(data->bclk);
+   if (pdev->dev.pm_domain)
+   device_link_remove(data->smicomm_dev, >dev);
devm_free_irq(>dev, data->irq, data);
component_master_del(>dev, _iommu_com_ops);
return 0;
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index d0c93652bdbe..5e03a029c4dc 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -68,6 +68,7 @@ struct mtk_iommu_data {
 
struct iommu_device iommu;
const struct mtk_iommu_plat_data *plat_data;
+   struct device   *smicomm_dev;
 
struct dma_iommu_mapping*mapping; /* For mtk_iommu_v1.c */
 
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 15/27] iommu/mediatek: Add fail handle for sysfs_add and device_register

2020-12-09 Thread Yong Wu
Add fail handle for iommu_device_sysfs_add and iommu_device_register.

Fixes: b16c0170b53c ("iommu/mediatek: Make use of iommu_device_register 
interface")
Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 13 +++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 39478cfbe0f1..09c8c58feb78 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -746,7 +746,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 
ret = iommu_device_register(>iommu);
if (ret)
-   return ret;
+   goto out_sysfs_remove;
 
spin_lock_init(>tlb_lock);
list_add_tail(>list, );
@@ -754,7 +754,16 @@ static int mtk_iommu_probe(struct platform_device *pdev)
if (!iommu_present(_bus_type))
bus_set_iommu(_bus_type, _iommu_ops);
 
-   return component_master_add_with_match(dev, _iommu_com_ops, match);
+   ret = component_master_add_with_match(dev, _iommu_com_ops, match);
+   if (ret)
+   goto out_dev_unreg;
+   return ret;
+
+out_dev_unreg:
+   iommu_device_unregister(>iommu);
+out_sysfs_remove:
+   iommu_device_sysfs_remove(>iommu);
+   return ret;
 }
 
 static int mtk_iommu_remove(struct platform_device *pdev)
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 12/27] iommu/io-pgtable-arm-v7s: Quad lvl1 pgtable for MediaTek

2020-12-09 Thread Yong Wu
The standard input iova bits is 32. MediaTek quad the lvl1 pagetable
(4 * lvl1). No change for lvl2 pagetable. Then the iova bits can reach
34bit.

Signed-off-by: Yong Wu 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 0b3c5b904ddc..5601dc8bf810 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -45,9 +45,10 @@
 /*
  * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2,
  * and 12 bits in a page.
+ * MediaTek extend 2 bits to reach 34bits, 14 bits at lvl1 and 8 bits at lvl2.
  */
 #define ARM_V7S_ADDR_BITS  32
-#define _ARM_V7S_LVL_BITS(lvl, cfg)((lvl) == 1 ? 12 : 8)
+#define _ARM_V7S_LVL_BITS(lvl, cfg)((lvl) == 1 ? ((cfg)->ias - 20) : 8)
 #define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12)
 #define ARM_V7S_TABLE_SHIFT10
 
@@ -61,7 +62,7 @@
 #define _ARM_V7S_IDX_MASK(lvl, cfg)(ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1)
 #define ARM_V7S_LVL_IDX(addr, lvl, cfg)({  
\
int _l = lvl;   \
-   ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \
+   ((addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \
 })
 
 /*
@@ -754,7 +755,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
 {
struct arm_v7s_io_pgtable *data;
 
-   if (cfg->ias > ARM_V7S_ADDR_BITS)
+   if (cfg->ias > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
return NULL;
 
if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS))
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 14/27] iommu/mediatek: Move hw_init into attach_device

2020-12-09 Thread Yong Wu
In attach device, it will update the pagetable base address register.
Move the hw_init function also here. Then it only need call
pm_runtime_get/put one time here if m4u has power domain.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 1bc5e881951c..39478cfbe0f1 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -126,6 +126,8 @@ struct mtk_iommu_domain {
 
 static const struct iommu_ops mtk_iommu_ops;
 
+static int mtk_iommu_hw_init(const struct mtk_iommu_data *data);
+
 /*
  * In M4U 4GB mode, the physical address is remapped as below:
  *
@@ -381,12 +383,16 @@ static int mtk_iommu_attach_device(struct iommu_domain 
*domain,
 {
struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+   int ret;
 
if (!data)
return -ENODEV;
 
/* Update the pgtable base address register of the M4U HW */
if (!data->m4u_dom) {
+   ret = mtk_iommu_hw_init(data);
+   if (ret)
+   return ret;
data->m4u_dom = dom;
writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
   data->base + REG_MMU_PT_BASE_ADDR);
@@ -730,10 +736,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 
platform_set_drvdata(pdev, data);
 
-   ret = mtk_iommu_hw_init(data);
-   if (ret)
-   return ret;
-
ret = iommu_device_sysfs_add(>iommu, dev, NULL,
 "mtk-iommu.%pa", );
if (ret)
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 13/27] iommu/mediatek: Add a flag for iova_34 bit case

2020-12-09 Thread Yong Wu
Add a HW flag for if the HW support 34bit IOVA. the previous SoC
still use 32bit. normally the lvl1 pgtable size is 16KB when ias == 32.
if ias == 34, lvl1 pgtable size is 16KB * 4. The purpose of this patch
is to save 16KB*3 continuous memory for the previous SoC.

Signed-off-by: Yong Wu 
---
 drivers/iommu/mtk_iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index ec3c87d4b172..1bc5e881951c 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -112,6 +112,7 @@
 #define HAS_SUB_COMM   BIT(5)
 #define WR_THROT_ENBIT(6)
 #define HAS_LEGACY_IVRP_PADDR  BIT(7)
+#define IOVA_34_EN BIT(8)
 
 #define MTK_IOMMU_HAS_FLAG(pdata, _x) \
pdata)->flags) & (_x)) == (_x))
@@ -319,7 +320,7 @@ static int mtk_iommu_domain_finalise(struct 
mtk_iommu_domain *dom)
IO_PGTABLE_QUIRK_TLBI_ON_MAP |
IO_PGTABLE_QUIRK_ARM_MTK_EXT,
.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
-   .ias = 32,
+   .ias = MTK_IOMMU_HAS_FLAG(data->plat_data, IOVA_34_EN) ? 34 : 
32,
.oas = 35,
.tlb = _iommu_flush_ops,
.iommu_dev = data->dev,
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 11/27] iommu/io-pgtable-arm-v7s: Add cfg as a param in some macros

2020-12-09 Thread Yong Wu
Add "cfg" as a parameter for some macros. This is a preparing patch for
mediatek extend the lvl1 pgtable. No functional change.

Signed-off-by: Yong Wu 
Acked-by: Will Deacon 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 36 +++---
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 58cc201c10a3..0b3c5b904ddc 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -47,21 +47,21 @@
  * and 12 bits in a page.
  */
 #define ARM_V7S_ADDR_BITS  32
-#define _ARM_V7S_LVL_BITS(lvl) ((lvl) == 1 ? 12 : 8)
+#define _ARM_V7S_LVL_BITS(lvl, cfg)((lvl) == 1 ? 12 : 8)
 #define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12)
 #define ARM_V7S_TABLE_SHIFT10
 
-#define ARM_V7S_PTES_PER_LVL(lvl)  (1 << _ARM_V7S_LVL_BITS(lvl))
-#define ARM_V7S_TABLE_SIZE(lvl)
\
-   (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte))
+#define ARM_V7S_PTES_PER_LVL(lvl, cfg) (1 << _ARM_V7S_LVL_BITS(lvl, cfg))
+#define ARM_V7S_TABLE_SIZE(lvl, cfg)   
\
+   (ARM_V7S_PTES_PER_LVL(lvl, cfg) * sizeof(arm_v7s_iopte))
 
 #define ARM_V7S_BLOCK_SIZE(lvl)(1UL << ARM_V7S_LVL_SHIFT(lvl))
 #define ARM_V7S_LVL_MASK(lvl)  ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl)))
 #define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT))
-#define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1)
-#define ARM_V7S_LVL_IDX(addr, lvl) ({  \
+#define _ARM_V7S_IDX_MASK(lvl, cfg)(ARM_V7S_PTES_PER_LVL(lvl, cfg) - 1)
+#define ARM_V7S_LVL_IDX(addr, lvl, cfg)({  
\
int _l = lvl;   \
-   ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \
+   ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l, cfg); \
 })
 
 /*
@@ -237,7 +237,7 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
struct device *dev = cfg->iommu_dev;
phys_addr_t phys;
dma_addr_t dma;
-   size_t size = ARM_V7S_TABLE_SIZE(lvl);
+   size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
void *table = NULL;
 
if (lvl == 1)
@@ -283,7 +283,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
 {
struct io_pgtable_cfg *cfg = >iop.cfg;
struct device *dev = cfg->iommu_dev;
-   size_t size = ARM_V7S_TABLE_SIZE(lvl);
+   size_t size = ARM_V7S_TABLE_SIZE(lvl, cfg);
 
if (!cfg->coherent_walk)
dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
@@ -427,7 +427,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
arm_v7s_iopte *tblp;
size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
 
-   tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
+   tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl, cfg);
if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
sz, lvl, tblp) != sz))
return -EINVAL;
@@ -480,7 +480,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, 
unsigned long iova,
int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);
 
/* Find our entry at the current level */
-   ptep += ARM_V7S_LVL_IDX(iova, lvl);
+   ptep += ARM_V7S_LVL_IDX(iova, lvl, cfg);
 
/* If we can install a leaf entry at this level, then do so */
if (num_entries)
@@ -553,7 +553,7 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop)
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
int i;
 
-   for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) {
+   for (i = 0; i < ARM_V7S_PTES_PER_LVL(1, >iop.cfg); i++) {
arm_v7s_iopte pte = data->pgd[i];
 
if (ARM_V7S_PTE_IS_TABLE(pte, 1))
@@ -605,9 +605,9 @@ static size_t arm_v7s_split_blk_unmap(struct 
arm_v7s_io_pgtable *data,
if (!tablep)
return 0; /* Bytes unmapped */
 
-   num_ptes = ARM_V7S_PTES_PER_LVL(2);
+   num_ptes = ARM_V7S_PTES_PER_LVL(2, cfg);
num_entries = size >> ARM_V7S_LVL_SHIFT(2);
-   unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
+   unmap_idx = ARM_V7S_LVL_IDX(iova, 2, cfg);
 
pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
if (num_entries > 1)
@@ -649,7 +649,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable 
*data,
if (WARN_ON(lvl > 2))
return 0;
 
-   idx = ARM_V7S_LVL_IDX(iova, lvl);
+   idx = ARM_V7S_LVL_IDX(iova, lvl, >cfg);
ptep += idx;
do {
pte[i] = READ_ONCE(ptep[i]);
@@ -735,7 +735,7 @@ static phys_addr_t arm_v7s_iova_to_phys(struct 
io_pgtable_ops 

[PATCH v5 10/27] iommu/io-pgtable-arm-v7s: Clarify LVL_SHIFT/BITS macro

2020-12-09 Thread Yong Wu
The current _ARM_V7S_LVL_BITS/ARM_V7S_LVL_SHIFT use a formula to calculate
the corresponding value for level1 and level2 to pretend the code sane.
Actually their level1 and level2 values are different from each other.
This patch only clarify the two macro. No functional change.

Suggested-by: Robin Murphy 
Signed-off-by: Yong Wu 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index 4d0aa079470f..58cc201c10a3 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -44,13 +44,11 @@
 
 /*
  * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2,
- * and 12 bits in a page. With some carefully-chosen coefficients we can
- * hide the ugly inconsistencies behind these macros and at least let the
- * rest of the code pretend to be somewhat sane.
+ * and 12 bits in a page.
  */
 #define ARM_V7S_ADDR_BITS  32
-#define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4)
-#define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl)))
+#define _ARM_V7S_LVL_BITS(lvl) ((lvl) == 1 ? 12 : 8)
+#define ARM_V7S_LVL_SHIFT(lvl) ((lvl) == 1 ? 20 : 12)
 #define ARM_V7S_TABLE_SHIFT10
 
 #define ARM_V7S_PTES_PER_LVL(lvl)  (1 << _ARM_V7S_LVL_BITS(lvl))
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 09/27] iommu/io-pgtable-arm-v7s: Extend PA34 for MediaTek

2020-12-09 Thread Yong Wu
MediaTek extend the bit5 in lvl1 and lvl2 descriptor as PA34.

Signed-off-by: Yong Wu 
Acked-by: Will Deacon 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 9 +++--
 drivers/iommu/mtk_iommu.c  | 2 +-
 include/linux/io-pgtable.h | 4 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index e880745ab1e8..4d0aa079470f 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -112,9 +112,10 @@
 #define ARM_V7S_TEX_MASK   0x7
 #define ARM_V7S_ATTR_TEX(val)  (((val) & ARM_V7S_TEX_MASK) << 
ARM_V7S_TEX_SHIFT)
 
-/* MediaTek extend the two bits for PA 32bit/33bit */
+/* MediaTek extend the bits below for PA 32bit/33bit/34bit */
 #define ARM_V7S_ATTR_MTK_PA_BIT32  BIT(9)
 #define ARM_V7S_ATTR_MTK_PA_BIT33  BIT(4)
+#define ARM_V7S_ATTR_MTK_PA_BIT34  BIT(5)
 
 /* *well, except for TEX on level 2 large pages, of course :( */
 #define ARM_V7S_CONT_PAGE_TEX_SHIFT6
@@ -194,6 +195,8 @@ static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int 
lvl,
pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
if (paddr & BIT_ULL(33))
pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
+   if (paddr & BIT_ULL(34))
+   pte |= ARM_V7S_ATTR_MTK_PA_BIT34;
return pte;
 }
 
@@ -218,6 +221,8 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int 
lvl,
paddr |= BIT_ULL(32);
if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
paddr |= BIT_ULL(33);
+   if (pte & ARM_V7S_ATTR_MTK_PA_BIT34)
+   paddr |= BIT_ULL(34);
return paddr;
 }
 
@@ -754,7 +759,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct 
io_pgtable_cfg *cfg,
if (cfg->ias > ARM_V7S_ADDR_BITS)
return NULL;
 
-   if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
+   if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 35 : ARM_V7S_ADDR_BITS))
return NULL;
 
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 6451d83753e1..ec3c87d4b172 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -320,7 +320,7 @@ static int mtk_iommu_domain_finalise(struct 
mtk_iommu_domain *dom)
IO_PGTABLE_QUIRK_ARM_MTK_EXT,
.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
.ias = 32,
-   .oas = 34,
+   .oas = 35,
.tlb = _iommu_flush_ops,
.iommu_dev = data->dev,
};
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 4cde111e425b..1ae0757f4f94 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -77,8 +77,8 @@ struct io_pgtable_cfg {
 *  TLB maintenance when mapping as well as when unmapping.
 *
 * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) MediaTek IOMMUs extend
-*  to support up to 34 bits PA where the bit32 and bit33 are
-*  encoded in the bit9 and bit4 of the PTE respectively.
+*  to support up to 35 bits PA where the bit32, bit33 and bit34 are
+*  encoded in the bit9, bit4 and bit5 of the PTE respectively.
 *
 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
 *  on unmap, for DMA domains using the flush queue mechanism for
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 08/27] iommu/io-pgtable-arm-v7s: Use ias to check the valid iova in unmap

2020-12-09 Thread Yong Wu
Use the ias for the valid iova checking in arm_v7s_unmap. This is a
preparing patch for supporting iova 34bit for MediaTek.

Signed-off-by: Yong Wu 
Reviewed-by: Robin Murphy 
---
 drivers/iommu/io-pgtable-arm-v7s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c 
b/drivers/iommu/io-pgtable-arm-v7s.c
index a688f22cbe3b..e880745ab1e8 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -717,7 +717,7 @@ static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, 
unsigned long iova,
 {
struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
 
-   if (WARN_ON(upper_32_bits(iova)))
+   if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
return 0;
 
return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 07/27] iommu/mediatek: Use the common mtk-smi-larb-port.h

2020-12-09 Thread Yong Wu
Use the common larb-port header in the source code.

Signed-off-by: Yong Wu 
Acked-by: Krzysztof Kozlowski 
---
 drivers/iommu/mtk_iommu.c  | 7 ---
 drivers/iommu/mtk_iommu.h  | 1 +
 drivers/memory/mtk-smi.c   | 1 +
 include/soc/mediatek/smi.h | 2 --
 4 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index c072cee532c2..6451d83753e1 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -103,13 +103,6 @@
 
 #define MTK_PROTECT_PA_ALIGN   256
 
-/*
- * Get the local arbiter ID and the portid within the larb arbiter
- * from mtk_m4u_id which is defined by MTK_M4U_ID.
- */
-#define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0xf)
-#define MTK_M4U_TO_PORT(id)((id) & 0x1f)
-
 #define HAS_4GB_MODE   BIT(0)
 /* HW will use the EMI clock if there isn't the "bclk". */
 #define HAS_BCLK   BIT(1)
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index df32b3e3408b..d0c93652bdbe 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define MTK_LARB_COM_MAX   8
 #define MTK_LARB_SUBCOM_MAX4
diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index ac350f8d1e20..2beb67908f3c 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /* mt8173 */
diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h
index 5a34b87d89e3..9371bf572ab8 100644
--- a/include/soc/mediatek/smi.h
+++ b/include/soc/mediatek/smi.h
@@ -11,8 +11,6 @@
 
 #ifdef CONFIG_MTK_SMI
 
-#define MTK_LARB_NR_MAX16
-
 #define MTK_SMI_MMU_EN(port)   BIT(port)
 
 struct mtk_smi_larb_iommu {
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 06/27] dt-bindings: mediatek: Add binding for mt8192 IOMMU

2020-12-09 Thread Yong Wu
This patch adds decriptions for mt8192 IOMMU and SMI.

mt8192 also is MTK IOMMU gen2 which uses ARM Short-Descriptor translation
table format. The M4U-SMI HW diagram is as below:

  EMI
   |
  M4U
   |
  
   SMI Common
  
   |
  +---+--+--+--+---+
  |   |  |  |   .. |   |
  |   |  |  |  |   |
larb0   larb1  larb2  larb4 ..  larb19   larb20
disp0   disp1   mdpvdec   IPE  IPE

All the connections are HW fixed, SW can NOT adjust it.

mt8192 M4U support 0~16GB iova range. we preassign different engines
into different iova ranges:

domain-id  module iova-range  larbs
   0   disp0 ~ 4G  larb0/1
   1   vcodec  4G ~ 8G larb4/5/7
   2   cam/mdp 8G ~ 12G larb2/9/11/13/14/16/17/18/19/20
   3   CCU00x4000_ ~ 0x43ff_ larb13: port 9/10
   4   CCU10x4400_ ~ 0x47ff_ larb14: port 4/5

The iova range for CCU0/1(camera control unit) is HW requirement.

Signed-off-by: Yong Wu 
Reviewed-by: Rob Herring 
---
 .../bindings/iommu/mediatek,iommu.yaml|  18 +-
 include/dt-bindings/memory/mt8192-larb-port.h | 240 ++
 2 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 include/dt-bindings/memory/mt8192-larb-port.h

diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml 
b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
index ba6626347381..0f26fe14c8e2 100644
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
@@ -76,6 +76,7 @@ properties:
   - mediatek,mt8167-m4u  # generation two
   - mediatek,mt8173-m4u  # generation two
   - mediatek,mt8183-m4u  # generation two
+  - mediatek,mt8192-m4u  # generation two
 
   - description: mt7623 generation one
 items:
@@ -115,7 +116,11 @@ properties:
   dt-binding/memory/mt6779-larb-port.h for mt6779,
   dt-binding/memory/mt8167-larb-port.h for mt8167,
   dt-binding/memory/mt8173-larb-port.h for mt8173,
-  dt-binding/memory/mt8183-larb-port.h for mt8183.
+  dt-binding/memory/mt8183-larb-port.h for mt8183,
+  dt-binding/memory/mt8192-larb-port.h for mt8192.
+
+  power-domains:
+maxItems: 1
 
 required:
   - compatible
@@ -133,11 +138,22 @@ allOf:
   - mediatek,mt2701-m4u
   - mediatek,mt2712-m4u
   - mediatek,mt8173-m4u
+  - mediatek,mt8192-m4u
 
 then:
   required:
 - clocks
 
+  - if:
+  properties:
+compatible:
+  enum:
+- mediatek,mt8192-m4u
+
+then:
+  required:
+- power-domains
+
 additionalProperties: false
 
 examples:
diff --git a/include/dt-bindings/memory/mt8192-larb-port.h 
b/include/dt-bindings/memory/mt8192-larb-port.h
new file mode 100644
index ..ec1ac2ba7094
--- /dev/null
+++ b/include/dt-bindings/memory/mt8192-larb-port.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020 MediaTek Inc.
+ *
+ * Author: Chao Hao 
+ * Author: Yong Wu 
+ */
+#ifndef _DT_BINDINGS_MEMORY_MT8192_LARB_PORT_H_
+#define _DT_BINDINGS_MEMORY_MT8192_LARB_PORT_H_
+
+#include 
+
+/*
+ * MM IOMMU:
+ * domain 0: display: larb0, larb1.
+ * domain 1: vcodec: larb4, larb5, larb7.
+ * domain 2: CAM/MDP: larb2, larb9, larb11, larb13, larb14, larb16,
+ *   larb17, larb18, larb19, larb20,
+ * domain 3: CCU0: larb13 - port9/10.
+ * domain 4: CCU1: larb14 - port4/5.
+ *
+ * larb3/6/8/10/12/15 is null.
+ */
+
+/* larb0 */
+#define M4U_PORT_L0_DISP_POSTMASK0 MTK_M4U_DOM_ID(0, 0, 0)
+#define M4U_PORT_L0_OVL_RDMA0_HDR  MTK_M4U_DOM_ID(0, 0, 1)
+#define M4U_PORT_L0_OVL_RDMA0  MTK_M4U_DOM_ID(0, 0, 2)
+#define M4U_PORT_L0_DISP_RDMA0 MTK_M4U_DOM_ID(0, 0, 3)
+#define M4U_PORT_L0_DISP_WDMA0 MTK_M4U_DOM_ID(0, 0, 4)
+#define M4U_PORT_L0_DISP_FAKE0 MTK_M4U_DOM_ID(0, 0, 5)
+
+/* larb1 */
+#define M4U_PORT_L1_OVL_2L_RDMA0_HDR   MTK_M4U_DOM_ID(0, 1, 0)
+#define M4U_PORT_L1_OVL_2L_RDMA2_HDR   MTK_M4U_DOM_ID(0, 1, 1)
+#define M4U_PORT_L1_OVL_2L_RDMA0   MTK_M4U_DOM_ID(0, 1, 2)
+#define M4U_PORT_L1_OVL_2L_RDMA2   MTK_M4U_DOM_ID(0, 1, 3)
+#define M4U_PORT_L1_DISP_MDP_RDMA4 MTK_M4U_DOM_ID(0, 1, 4)
+#define M4U_PORT_L1_DISP_RDMA4 MTK_M4U_DOM_ID(0, 1, 5)
+#define M4U_PORT_L1_DISP_UFBC_WDMA0MTK_M4U_DOM_ID(0, 1, 6)
+#define M4U_PORT_L1_DISP_FAKE1 MTK_M4U_DOM_ID(0, 1, 7)
+
+/* larb2 */
+#define 

[PATCH v5 05/27] dt-bindings: memory: mediatek: Rename header guard for SMI header file

2020-12-09 Thread Yong Wu
Only rename the header guard for all the SoC larb port header file.
No funtional change.

Suggested-by: Krzysztof Kozlowski 
Signed-off-by: Yong Wu 
---
 include/dt-bindings/memory/mt2701-larb-port.h | 4 ++--
 include/dt-bindings/memory/mt2712-larb-port.h | 4 ++--
 include/dt-bindings/memory/mt6779-larb-port.h | 4 ++--
 include/dt-bindings/memory/mt8167-larb-port.h | 4 ++--
 include/dt-bindings/memory/mt8173-larb-port.h | 4 ++--
 include/dt-bindings/memory/mt8183-larb-port.h | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/dt-bindings/memory/mt2701-larb-port.h 
b/include/dt-bindings/memory/mt2701-larb-port.h
index 2d85c2ec6cfd..25d03526f142 100644
--- a/include/dt-bindings/memory/mt2701-larb-port.h
+++ b/include/dt-bindings/memory/mt2701-larb-port.h
@@ -4,8 +4,8 @@
  * Author: Honghui Zhang 
  */
 
-#ifndef _MT2701_LARB_PORT_H_
-#define _MT2701_LARB_PORT_H_
+#ifndef _DT_BINDINGS_MEMORY_MT2701_LARB_PORT_H_
+#define _DT_BINDINGS_MEMORY_MT2701_LARB_PORT_H_
 
 /*
  * Mediatek m4u generation 1 such as mt2701 has flat m4u port numbers,
diff --git a/include/dt-bindings/memory/mt2712-larb-port.h 
b/include/dt-bindings/memory/mt2712-larb-port.h
index b6b2c6bf4459..5c7f303f078c 100644
--- a/include/dt-bindings/memory/mt2712-larb-port.h
+++ b/include/dt-bindings/memory/mt2712-larb-port.h
@@ -3,8 +3,8 @@
  * Copyright (c) 2017 MediaTek Inc.
  * Author: Yong Wu 
  */
-#ifndef __DTS_IOMMU_PORT_MT2712_H
-#define __DTS_IOMMU_PORT_MT2712_H
+#ifndef _DT_BINDINGS_MEMORY_MT2712_LARB_PORT_H_
+#define _DT_BINDINGS_MEMORY_MT2712_LARB_PORT_H_
 
 #include 
 
diff --git a/include/dt-bindings/memory/mt6779-larb-port.h 
b/include/dt-bindings/memory/mt6779-larb-port.h
index 60f57f54393e..bc93757df2bf 100644
--- a/include/dt-bindings/memory/mt6779-larb-port.h
+++ b/include/dt-bindings/memory/mt6779-larb-port.h
@@ -4,8 +4,8 @@
  * Author: Chao Hao 
  */
 
-#ifndef _DTS_IOMMU_PORT_MT6779_H_
-#define _DTS_IOMMU_PORT_MT6779_H_
+#ifndef _DT_BINDINGS_MEMORY_MT6779_LARB_PORT_H_
+#define _DT_BINDINGS_MEMORY_MT6779_LARB_PORT_H_
 
 #include 
 
diff --git a/include/dt-bindings/memory/mt8167-larb-port.h 
b/include/dt-bindings/memory/mt8167-larb-port.h
index fcb9a49ec60e..8570aab09db8 100644
--- a/include/dt-bindings/memory/mt8167-larb-port.h
+++ b/include/dt-bindings/memory/mt8167-larb-port.h
@@ -5,8 +5,8 @@
  * Author: Honghui Zhang 
  * Author: Fabien Parent 
  */
-#ifndef __DTS_IOMMU_PORT_MT8167_H
-#define __DTS_IOMMU_PORT_MT8167_H
+#ifndef _DT_BINDINGS_MEMORY_MT8167_LARB_PORT_H_
+#define _DT_BINDINGS_MEMORY_MT8167_LARB_PORT_H_
 
 #include 
 
diff --git a/include/dt-bindings/memory/mt8173-larb-port.h 
b/include/dt-bindings/memory/mt8173-larb-port.h
index d8c99c946053..1b568973fc2d 100644
--- a/include/dt-bindings/memory/mt8173-larb-port.h
+++ b/include/dt-bindings/memory/mt8173-larb-port.h
@@ -3,8 +3,8 @@
  * Copyright (c) 2015-2016 MediaTek Inc.
  * Author: Yong Wu 
  */
-#ifndef __DTS_IOMMU_PORT_MT8173_H
-#define __DTS_IOMMU_PORT_MT8173_H
+#ifndef _DT_BINDINGS_MEMORY_MT8173_LARB_PORT_H_
+#define _DT_BINDINGS_MEMORY_MT8173_LARB_PORT_H_
 
 #include 
 
diff --git a/include/dt-bindings/memory/mt8183-larb-port.h 
b/include/dt-bindings/memory/mt8183-larb-port.h
index 275c095a6fd6..3095630bb190 100644
--- a/include/dt-bindings/memory/mt8183-larb-port.h
+++ b/include/dt-bindings/memory/mt8183-larb-port.h
@@ -3,8 +3,8 @@
  * Copyright (c) 2018 MediaTek Inc.
  * Author: Yong Wu 
  */
-#ifndef __DTS_IOMMU_PORT_MT8183_H
-#define __DTS_IOMMU_PORT_MT8183_H
+#ifndef _DT_BINDINGS_MEMORY_MT8183_LARB_PORT_H_
+#define _DT_BINDINGS_MEMORY_MT8183_LARB_PORT_H_
 
 #include 
 
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 04/27] dt-bindings: memory: mediatek: Add domain definition

2020-12-09 Thread Yong Wu
In the latest SoC, there are several HW IP require a sepecial iova
range, mainly CCU and VPU has this requirement. Take CCU as a example,
CCU require its iova locate in the range(0x4000_ ~ 0x43ff_).

In this patch we add a domain definition for the special port. In the
example of CCU, If we preassign CCU port in domain1, then iommu driver
will prepare a independent iommu domain of the special iova range for it,
then the iova got from dma_alloc_attrs(ccu-dev) will locate in its special
range.

This is a preparing patch for multi-domain support.

Signed-off-by: Yong Wu 
Acked-by: Krzysztof Kozlowski 
Acked-by: Rob Herring 
---
 include/dt-bindings/memory/mtk-smi-larb-port.h | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/dt-bindings/memory/mtk-smi-larb-port.h 
b/include/dt-bindings/memory/mtk-smi-larb-port.h
index 7d64103209af..2d4c973c174f 100644
--- a/include/dt-bindings/memory/mtk-smi-larb-port.h
+++ b/include/dt-bindings/memory/mtk-smi-larb-port.h
@@ -7,9 +7,16 @@
 #define __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_
 
 #define MTK_LARB_NR_MAX32
+#define MTK_M4U_DOM_NR_MAX 8
+
+#define MTK_M4U_DOM_ID(domid, larb, port)  \
+   (((domid) & 0x7) << 16 | (((larb) & 0x1f) << 5) | ((port) & 0x1f))
+
+/* The default dom id is 0. */
+#define MTK_M4U_ID(larb, port) MTK_M4U_DOM_ID(0, larb, port)
 
-#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port))
 #define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0x1f)
 #define MTK_M4U_TO_PORT(id)((id) & 0x1f)
+#define MTK_M4U_TO_DOM(id) (((id) >> 16) & 0x7)
 
 #endif
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 01/27] dt-bindings: iommu: mediatek: Convert IOMMU to DT schema

2020-12-09 Thread Yong Wu
Convert MediaTek IOMMU to DT schema.

Signed-off-by: Yong Wu 
Reviewed-by: Rob Herring 
---
 .../bindings/iommu/mediatek,iommu.txt | 105 ---
 .../bindings/iommu/mediatek,iommu.yaml| 167 ++
 2 files changed, 167 insertions(+), 105 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
 create mode 100644 Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml

diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt 
b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
deleted file mode 100644
index ac949f7fe3d4..
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-* Mediatek IOMMU Architecture Implementation
-
-  Some Mediatek SOCs contain a Multimedia Memory Management Unit (M4U), and
-this M4U have two generations of HW architecture. Generation one uses flat
-pagetable, and only supports 4K size page mapping. Generation two uses the
-ARM Short-Descriptor translation table format for address translation.
-
-  About the M4U Hardware Block Diagram, please check below:
-
-  EMI (External Memory Interface)
-   |
-  m4u (Multimedia Memory Management Unit)
-   |
-  ++
-  ||
-  gals0-rx   gals1-rx(Global Async Local Sync rx)
-  ||
-  ||
-  gals0-tx   gals1-tx(Global Async Local Sync tx)
-  ||  Some SoCs may have GALS.
-  ++
-   |
-   SMI Common(Smart Multimedia Interface Common)
-   |
-   ++---
-   ||
-   | gals-rxThere may be GALS in some larbs.
-   ||
-   ||
-   | gals-tx
-   ||
-   SMI larb0SMI larb1   ... SoCs have several SMI local arbiter(larb).
-   (display) (vdec)
-   ||
-   ||
- +-+-+ +++
- | | | |||
- | | |...  |||  ... There are different ports in each larb.
- | | | |||
-OVL0 RDMA0 WDMA0  MC   PP   VLD
-
-  As above, The Multimedia HW will go through SMI and M4U while it
-access EMI. SMI is a bridge between m4u and the Multimedia HW. It contain
-smi local arbiter and smi common. It will control whether the Multimedia
-HW should go though the m4u for translation or bypass it and talk
-directly with EMI. And also SMI help control the power domain and clocks for
-each local arbiter.
-  Normally we specify a local arbiter(larb) for each multimedia HW
-like display, video decode, and camera. And there are different ports
-in each larb. Take a example, There are many ports like MC, PP, VLD in the
-video decode local arbiter, all these ports are according to the video HW.
-  In some SoCs, there may be a GALS(Global Async Local Sync) module between
-smi-common and m4u, and additional GALS module between smi-larb and
-smi-common. GALS can been seen as a "asynchronous fifo" which could help
-synchronize for the modules in different clock frequency.
-
-Required properties:
-- compatible : must be one of the following string:
-   "mediatek,mt2701-m4u" for mt2701 which uses generation one m4u HW.
-   "mediatek,mt2712-m4u" for mt2712 which uses generation two m4u HW.
-   "mediatek,mt6779-m4u" for mt6779 which uses generation two m4u HW.
-   "mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses
-generation one m4u HW.
-   "mediatek,mt8167-m4u" for mt8167 which uses generation two m4u HW.
-   "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW.
-   "mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW.
-- reg : m4u register base and size.
-- interrupts : the interrupt of m4u.
-- clocks : must contain one entry for each clock-names.
-- clock-names : Only 1 optional clock:
-  - "bclk": the block clock of m4u.
-  Here is the list which require this "bclk":
-  - mt2701, mt2712, mt7623 and mt8173.
-  Note that m4u use the EMI clock which always has been enabled before kernel
-  if there is no this "bclk".
-- mediatek,larbs : List of phandle to the local arbiters in the current Socs.
-   Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort
-   according to the local arbiter index, like larb0, larb1, larb2...
-- iommu-cells : must be 1. This is the mtk_m4u_id according to the HW.
-   Specifies the mtk_m4u_id as defined in
-   dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623
-   dt-binding/memory/mt2712-larb-port.h for mt2712,
-   dt-binding/memory/mt6779-larb-port.h for mt6779,
-   dt-binding/memory/mt8167-larb-port.h for mt8167,
-   dt-binding/memory/mt8173-larb-port.h for mt8173, and
-   

[PATCH v5 03/27] dt-bindings: memory: mediatek: Extend LARB_NR_MAX to 32

2020-12-09 Thread Yong Wu
Extend the max larb number definition as mt8192 has larb_nr over 16.

Signed-off-by: Yong Wu 
Acked-by: Rob Herring 
Acked-by: Krzysztof Kozlowski 
---
 Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml | 2 +-
 include/dt-bindings/memory/mtk-smi-larb-port.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml 
b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
index b9946809fc2b..ba6626347381 100644
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
@@ -99,7 +99,7 @@ properties:
   mediatek,larbs:
 $ref: /schemas/types.yaml#/definitions/phandle-array
 minItems: 1
-maxItems: 16
+maxItems: 32
 description: |
   List of phandle to the local arbiters in the current Socs.
   Refer to bindings/memory-controllers/mediatek,smi-larb.yaml. It must sort
diff --git a/include/dt-bindings/memory/mtk-smi-larb-port.h 
b/include/dt-bindings/memory/mtk-smi-larb-port.h
index 53354cf4f6e3..7d64103209af 100644
--- a/include/dt-bindings/memory/mtk-smi-larb-port.h
+++ b/include/dt-bindings/memory/mtk-smi-larb-port.h
@@ -6,10 +6,10 @@
 #ifndef __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_
 #define __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_
 
-#define MTK_LARB_NR_MAX16
+#define MTK_LARB_NR_MAX32
 
 #define MTK_M4U_ID(larb, port) (((larb) << 5) | (port))
-#define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0xf)
+#define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0x1f)
 #define MTK_M4U_TO_PORT(id)((id) & 0x1f)
 
 #endif
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 02/27] dt-bindings: memory: mediatek: Add a common larb-port header file

2020-12-09 Thread Yong Wu
Put all the macros about smi larb/port togethers, this is a preparing
patch for extending LARB_NR and adding new dom-id support.

Signed-off-by: Yong Wu 
Acked-by: Rob Herring 
Acked-by: Krzysztof Kozlowski 
---
 include/dt-bindings/memory/mt2712-larb-port.h  |  2 +-
 include/dt-bindings/memory/mt6779-larb-port.h  |  2 +-
 include/dt-bindings/memory/mt8167-larb-port.h  |  2 +-
 include/dt-bindings/memory/mt8173-larb-port.h  |  2 +-
 include/dt-bindings/memory/mt8183-larb-port.h  |  2 +-
 include/dt-bindings/memory/mtk-smi-larb-port.h | 15 +++
 6 files changed, 20 insertions(+), 5 deletions(-)
 create mode 100644 include/dt-bindings/memory/mtk-smi-larb-port.h

diff --git a/include/dt-bindings/memory/mt2712-larb-port.h 
b/include/dt-bindings/memory/mt2712-larb-port.h
index 6f9aa7349cef..b6b2c6bf4459 100644
--- a/include/dt-bindings/memory/mt2712-larb-port.h
+++ b/include/dt-bindings/memory/mt2712-larb-port.h
@@ -6,7 +6,7 @@
 #ifndef __DTS_IOMMU_PORT_MT2712_H
 #define __DTS_IOMMU_PORT_MT2712_H
 
-#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port))
+#include 
 
 #define M4U_LARB0_ID   0
 #define M4U_LARB1_ID   1
diff --git a/include/dt-bindings/memory/mt6779-larb-port.h 
b/include/dt-bindings/memory/mt6779-larb-port.h
index 2ad0899fbf2f..60f57f54393e 100644
--- a/include/dt-bindings/memory/mt6779-larb-port.h
+++ b/include/dt-bindings/memory/mt6779-larb-port.h
@@ -7,7 +7,7 @@
 #ifndef _DTS_IOMMU_PORT_MT6779_H_
 #define _DTS_IOMMU_PORT_MT6779_H_
 
-#define MTK_M4U_ID(larb, port)  (((larb) << 5) | (port))
+#include 
 
 #define M4U_LARB0_ID0
 #define M4U_LARB1_ID1
diff --git a/include/dt-bindings/memory/mt8167-larb-port.h 
b/include/dt-bindings/memory/mt8167-larb-port.h
index 000fb299a408..fcb9a49ec60e 100644
--- a/include/dt-bindings/memory/mt8167-larb-port.h
+++ b/include/dt-bindings/memory/mt8167-larb-port.h
@@ -8,7 +8,7 @@
 #ifndef __DTS_IOMMU_PORT_MT8167_H
 #define __DTS_IOMMU_PORT_MT8167_H
 
-#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port))
+#include 
 
 #define M4U_LARB0_ID   0
 #define M4U_LARB1_ID   1
diff --git a/include/dt-bindings/memory/mt8173-larb-port.h 
b/include/dt-bindings/memory/mt8173-larb-port.h
index 9f31ccfeca21..d8c99c946053 100644
--- a/include/dt-bindings/memory/mt8173-larb-port.h
+++ b/include/dt-bindings/memory/mt8173-larb-port.h
@@ -6,7 +6,7 @@
 #ifndef __DTS_IOMMU_PORT_MT8173_H
 #define __DTS_IOMMU_PORT_MT8173_H
 
-#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port))
+#include 
 
 #define M4U_LARB0_ID   0
 #define M4U_LARB1_ID   1
diff --git a/include/dt-bindings/memory/mt8183-larb-port.h 
b/include/dt-bindings/memory/mt8183-larb-port.h
index 2c579f305162..275c095a6fd6 100644
--- a/include/dt-bindings/memory/mt8183-larb-port.h
+++ b/include/dt-bindings/memory/mt8183-larb-port.h
@@ -6,7 +6,7 @@
 #ifndef __DTS_IOMMU_PORT_MT8183_H
 #define __DTS_IOMMU_PORT_MT8183_H
 
-#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port))
+#include 
 
 #define M4U_LARB0_ID   0
 #define M4U_LARB1_ID   1
diff --git a/include/dt-bindings/memory/mtk-smi-larb-port.h 
b/include/dt-bindings/memory/mtk-smi-larb-port.h
new file mode 100644
index ..53354cf4f6e3
--- /dev/null
+++ b/include/dt-bindings/memory/mtk-smi-larb-port.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020 MediaTek Inc.
+ * Author: Yong Wu 
+ */
+#ifndef __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_
+#define __DT_BINDINGS_MEMORY_MTK_MEMORY_PORT_H_
+
+#define MTK_LARB_NR_MAX16
+
+#define MTK_M4U_ID(larb, port) (((larb) << 5) | (port))
+#define MTK_M4U_TO_LARB(id)(((id) >> 5) & 0xf)
+#define MTK_M4U_TO_PORT(id)((id) & 0x1f)
+
+#endif
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 00/27] MT8192 IOMMU support

2020-12-09 Thread Yong Wu
This patch mainly adds support for mt8192 Multimedia IOMMU and SMI.

mt8192 also is MTK IOMMU gen2 which uses ARM Short-Descriptor translation
table format. The M4U-SMI HW diagram is as below:

  EMI
   |
  M4U
   |
  
   SMI Common
  
   |
  +---+--+--+--+---+
  |   |  |  |   .. |   |
  |   |  |  |  |   |
larb0   larb1  larb2  larb4 ..  larb19   larb20
disp0   disp1   mdpvdec   IPE  IPE

All the connections are HW fixed, SW can NOT adjust it.

Comparing with the preview SoC, this patchset mainly adds two new functions:
a) add iova 34 bits support.
b) add multi domains support since several HW has the special iova
region requirement.

change note:
v5: a) Add a new patch for the header guard for smi-larb-port.h in [5/27].
b) Add a new patch for error handle for iommu_device_sysfs_add and
 iommu_device_register[15/27].
c) Add a flag for the iova "ias == 34" case. the previous SoC still keep
 32bits to save 16KB*3 lvl1 pgtable memory[13/27].
d) Add include  for FIELD_GET build fail.
e) In PM power domain patch, add a checking "pm_runtime_enabled" when call
 pm_runtime_get_sync for non power-domain case. and add a pm_runtime_put_noidle
 while pm_runtime_get_sync fail case.

v4: 
https://lore.kernel.org/linux-iommu/2020123838.15682-1-yong...@mediatek.com/
  a) rebase on v5.10-rc1
  b) Move the smi part to a independent patchset.
  c) Improve v7s code from Robin and Will.
  d) Add a mediatek iommu entry patch in MAINTAIN.

v3: 
https://lore.kernel.org/linux-iommu/20200930070647.10188-1-yong...@mediatek.com/
  a) Fix DT schema issue commented from Rob.
  b) Fix a v7s issue. Use "_lvl" instead of "_l" in the 
macro(ARM_V7S_PTES_PER_LVL) since 
  it is called in ARM_V7S_LVL_IDX which has already used "_l".
  c) Fix a PM suspend issue: Avoid pm suspend in pm runtime case.

v2: 
https://lore.kernel.org/linux-iommu/20200905080920.13396-1-yong...@mediatek.com/
  a) Convert IOMMU/SMI dt-binding to DT schema.
  b) Fix some comment from Pi-Hsun and Nicolas. like use
  generic_iommu_put_resv_regions.
  c) Reword some comment, like add how to use domain-id.

v1: 
https://lore.kernel.org/linux-iommu/20200711064846.16007-1-yong...@mediatek.com/

Yong Wu (27):
  dt-bindings: iommu: mediatek: Convert IOMMU to DT schema
  dt-bindings: memory: mediatek: Add a common larb-port header file
  dt-bindings: memory: mediatek: Extend LARB_NR_MAX to 32
  dt-bindings: memory: mediatek: Add domain definition
  dt-bindings: memory: mediatek: Rename header guard for SMI header file
  dt-bindings: mediatek: Add binding for mt8192 IOMMU
  iommu/mediatek: Use the common mtk-smi-larb-port.h
  iommu/io-pgtable-arm-v7s: Use ias to check the valid iova in unmap
  iommu/io-pgtable-arm-v7s: Extend PA34 for MediaTek
  iommu/io-pgtable-arm-v7s: Clarify LVL_SHIFT/BITS macro
  iommu/io-pgtable-arm-v7s: Add cfg as a param in some macros
  iommu/io-pgtable-arm-v7s: Quad lvl1 pgtable for MediaTek
  iommu/mediatek: Add a flag for iova_34 bit case
  iommu/mediatek: Move hw_init into attach_device
  iommu/mediatek: Add fail handle for sysfs_add and device_register
  iommu/mediatek: Add device link for smi-common and m4u
  iommu/mediatek: Add pm runtime callback
  iommu/mediatek: Add power-domain operation
  iommu/mediatek: Add iova reserved function
  iommu/mediatek: Add single domain
  iommu/mediatek: Support master use iova over 32bit
  iommu/mediatek: Support up to 34bit iova in tlb flush
  iommu/mediatek: Support report iova 34bit translation fault in ISR
  iommu/mediatek: Add support for multi domain
  iommu/mediatek: Adjust the structure
  iommu/mediatek: Add mt8192 support
  MAINTAINERS: Add entry for MediaTek IOMMU

 .../bindings/iommu/mediatek,iommu.txt | 105 ---
 .../bindings/iommu/mediatek,iommu.yaml| 183 +++
 MAINTAINERS   |   9 +
 drivers/iommu/io-pgtable-arm-v7s.c|  56 ++--
 drivers/iommu/mtk_iommu.c | 289 +++---
 drivers/iommu/mtk_iommu.h |  11 +-
 drivers/memory/mtk-smi.c  |   8 +
 include/dt-bindings/memory/mt2701-larb-port.h |   4 +-
 include/dt-bindings/memory/mt2712-larb-port.h |   6 +-
 include/dt-bindings/memory/mt6779-larb-port.h |   6 +-
 include/dt-bindings/memory/mt8167-larb-port.h |   6 +-
 include/dt-bindings/memory/mt8173-larb-port.h |   6 +-
 include/dt-bindings/memory/mt8183-larb-port.h |   6 +-
 include/dt-bindings/memory/mt8192-larb-port.h | 240 +++
 .../dt-bindings/memory/mtk-smi-larb-port.h|  22 ++
 include/linux/io-pgtable.h|   4 +-
 include/soc/mediatek/smi.h|   3 +-