Re: [Intel-gfx] [PATCH] drm/i915: Fix DMA mapped scatterlist walks

2020-09-10 Thread Tvrtko Ursulin



On 09/09/2020 20:29, Logan Gunthorpe wrote:

On 2020-09-09 6:44 a.m., Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

When walking DMA mapped scatterlists sg_dma_len has to be used since it
can be different (coalesced) from the backing store entry.

This also means we have to end the walk when encountering a zero length
DMA entry and cannot rely on the normal sg list end marker.

Both issues were there in theory for some time but were hidden by the fact
Intel IOMMU driver was never coalescing entries. As there are ongoing
efforts to change this we need to start handling it.

Signed-off-by: Tvrtko Ursulin 
References: 85d1225ec066 ("drm/i915: Introduce & use new lightweight SGL 
iterators")
References: b31144c0daa8 ("drm/i915: Micro-optimise 
gen6_ppgtt_insert_entries()")
Reported-by: Tom Murphy 
Suggested-by: Tom Murphy  # __sgt_iter
Suggested-by: Logan Gunthorpe  # __sgt_iter
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Matthew Auld 
Cc: Lu Baolu 
---
  drivers/gpu/drm/i915/gt/gen6_ppgtt.c|  6 +++---
  drivers/gpu/drm/i915/gt/gen8_ppgtt.c| 17 ++---
  drivers/gpu/drm/i915/gt/intel_gtt.h |  2 +-
  drivers/gpu/drm/i915/i915_scatterlist.h | 12 
  4 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index fd0d24d28763..c0d17f87b00f 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -131,17 +131,17 @@ static void gen6_ppgtt_insert_entries(struct 
i915_address_space *vm,
  
  	vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));

do {
-   GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE);
+   GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
  
  		iter.dma += I915_GTT_PAGE_SIZE;

if (iter.dma == iter.max) {
iter.sg = __sg_next(iter.sg);
-   if (!iter.sg)
+   if (!iter.sg || sg_dma_len(iter.sg) == 0)
break;
  
  			iter.dma = sg_dma_address(iter.sg);

-   iter.max = iter.dma + iter.sg->length;
+   iter.max = iter.dma + sg_dma_len(iter.sg);
}
  
  		if (++act_pte == GEN6_PTES) {

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index eb64f474a78c..0361b3dfdc72 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -372,19 +372,19 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
-   GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
+   GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
  
  		iter->dma += I915_GTT_PAGE_SIZE;

if (iter->dma >= iter->max) {
iter->sg = __sg_next(iter->sg);
-   if (!iter->sg) {
+   if (!iter->sg || sg_dma_len(iter->sg) == 0) {
idx = 0;
break;
}
  
  			iter->dma = sg_dma_address(iter->sg);

-   iter->max = iter->dma + iter->sg->length;
+   iter->max = iter->dma + sg_dma_len(iter->sg);
}
  
  		if (gen8_pd_index(++idx, 0) == 0) {

@@ -414,7 +414,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
  {
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
u64 start = vma->node.start;
-   dma_addr_t rem = iter->sg->length;
+   dma_addr_t rem = sg_dma_len(iter->sg);


Seems a little odd to me to be storing a length in a dma_addr_t. But
besides that small nit, this all makes sense to me.

Reviewed-by: Logan Gunthorpe 


I did not spot that, thanks. I'll improve that in v2 since I need to add 
a 2nd patch to completely prepare i915 for this.


Regards,

Tvrtko




___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fix DMA mapped scatterlist walks

2020-09-09 Thread Logan Gunthorpe



On 2020-09-09 6:44 a.m., Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> When walking DMA mapped scatterlists sg_dma_len has to be used since it
> can be different (coalesced) from the backing store entry.
> 
> This also means we have to end the walk when encountering a zero length
> DMA entry and cannot rely on the normal sg list end marker.
> 
> Both issues were there in theory for some time but were hidden by the fact
> Intel IOMMU driver was never coalescing entries. As there are ongoing
> efforts to change this we need to start handling it.
> 
> Signed-off-by: Tvrtko Ursulin 
> References: 85d1225ec066 ("drm/i915: Introduce & use new lightweight SGL 
> iterators")
> References: b31144c0daa8 ("drm/i915: Micro-optimise 
> gen6_ppgtt_insert_entries()")
> Reported-by: Tom Murphy 
> Suggested-by: Tom Murphy  # __sgt_iter
> Suggested-by: Logan Gunthorpe  # __sgt_iter
> Cc: Joonas Lahtinen 
> Cc: Chris Wilson 
> Cc: Matthew Auld 
> Cc: Lu Baolu 
> ---
>  drivers/gpu/drm/i915/gt/gen6_ppgtt.c|  6 +++---
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c| 17 ++---
>  drivers/gpu/drm/i915/gt/intel_gtt.h |  2 +-
>  drivers/gpu/drm/i915/i915_scatterlist.h | 12 
>  4 files changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c 
> b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> index fd0d24d28763..c0d17f87b00f 100644
> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> @@ -131,17 +131,17 @@ static void gen6_ppgtt_insert_entries(struct 
> i915_address_space *vm,
>  
>   vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
>   do {
> - GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE);
> + GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
>   vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
>  
>   iter.dma += I915_GTT_PAGE_SIZE;
>   if (iter.dma == iter.max) {
>   iter.sg = __sg_next(iter.sg);
> - if (!iter.sg)
> + if (!iter.sg || sg_dma_len(iter.sg) == 0)
>   break;
>  
>   iter.dma = sg_dma_address(iter.sg);
> - iter.max = iter.dma + iter.sg->length;
> + iter.max = iter.dma + sg_dma_len(iter.sg);
>   }
>  
>   if (++act_pte == GEN6_PTES) {
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
> b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index eb64f474a78c..0361b3dfdc72 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -372,19 +372,19 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
>   pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
>   vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
>   do {
> - GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
> + GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
>   vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
>  
>   iter->dma += I915_GTT_PAGE_SIZE;
>   if (iter->dma >= iter->max) {
>   iter->sg = __sg_next(iter->sg);
> - if (!iter->sg) {
> + if (!iter->sg || sg_dma_len(iter->sg) == 0) {
>   idx = 0;
>   break;
>   }
>  
>   iter->dma = sg_dma_address(iter->sg);
> - iter->max = iter->dma + iter->sg->length;
> + iter->max = iter->dma + sg_dma_len(iter->sg);
>   }
>  
>   if (gen8_pd_index(++idx, 0) == 0) {
> @@ -414,7 +414,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
>  {
>   const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
>   u64 start = vma->node.start;
> - dma_addr_t rem = iter->sg->length;
> + dma_addr_t rem = sg_dma_len(iter->sg);

Seems a little odd to me to be storing a length in a dma_addr_t. But
besides that small nit, this all makes sense to me.

Reviewed-by: Logan Gunthorpe 

Thanks,

Logan
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fix DMA mapped scatterlist walks

2020-09-09 Thread Tvrtko Ursulin



On 09/09/2020 14:03, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2020-09-09 13:44:57)

From: Tvrtko Ursulin 

When walking DMA mapped scatterlists sg_dma_len has to be used since it
can be different (coalesced) from the backing store entry.

This also means we have to end the walk when encountering a zero length
DMA entry and cannot rely on the normal sg list end marker.

Both issues were there in theory for some time but were hidden by the fact
Intel IOMMU driver was never coalescing entries. As there are ongoing
efforts to change this we need to start handling it.


Does this change anything if we were already feeding in coalesced
entries? I doubt we made all sg generators compact the lists though.


I don't think so. My takeaway from this was a realization that page and 
dma coalescing can be completely different (more so once the iommu 
patches get in). I did not think of any other problems as long as the 
dma remapping fits into the number of available sg table entries we 
allocated. It is just a bit weird, this duality of sg_table "views".



The change looks reasonable, it seems you have kept the dma and page
iterators distinct, which was the only worry that occurred to me.


It surprised me we have two iterators, well one and a half. The sgt_dma 
one being the half of an iterator. :)


Regards,

Tvrtko

P.S. Well as I was writing this reply full test results came in and 
something is still broken, so scratch that..

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Fix DMA mapped scatterlist walks

2020-09-09 Thread Chris Wilson
Quoting Tvrtko Ursulin (2020-09-09 13:44:57)
> From: Tvrtko Ursulin 
> 
> When walking DMA mapped scatterlists sg_dma_len has to be used since it
> can be different (coalesced) from the backing store entry.
> 
> This also means we have to end the walk when encountering a zero length
> DMA entry and cannot rely on the normal sg list end marker.
> 
> Both issues were there in theory for some time but were hidden by the fact
> Intel IOMMU driver was never coalescing entries. As there are ongoing
> efforts to change this we need to start handling it.

Does this change anything if we were already feeding in coalesced
entries? I doubt we made all sg generators compact the lists though.

The change looks reasonable, it seems you have kept the dma and page
iterators distinct, which was the only worry that occurred to me.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] drm/i915: Fix DMA mapped scatterlist walks

2020-09-09 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

When walking DMA mapped scatterlists sg_dma_len has to be used since it
can be different (coalesced) from the backing store entry.

This also means we have to end the walk when encountering a zero length
DMA entry and cannot rely on the normal sg list end marker.

Both issues were there in theory for some time but were hidden by the fact
Intel IOMMU driver was never coalescing entries. As there are ongoing
efforts to change this we need to start handling it.

Signed-off-by: Tvrtko Ursulin 
References: 85d1225ec066 ("drm/i915: Introduce & use new lightweight SGL 
iterators")
References: b31144c0daa8 ("drm/i915: Micro-optimise 
gen6_ppgtt_insert_entries()")
Reported-by: Tom Murphy 
Suggested-by: Tom Murphy  # __sgt_iter
Suggested-by: Logan Gunthorpe  # __sgt_iter
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Matthew Auld 
Cc: Lu Baolu 
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c|  6 +++---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c| 17 ++---
 drivers/gpu/drm/i915/gt/intel_gtt.h |  2 +-
 drivers/gpu/drm/i915/i915_scatterlist.h | 12 
 4 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index fd0d24d28763..c0d17f87b00f 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -131,17 +131,17 @@ static void gen6_ppgtt_insert_entries(struct 
i915_address_space *vm,
 
vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
do {
-   GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE);
+   GEM_BUG_ON(sg_dma_len(iter.sg) < I915_GTT_PAGE_SIZE);
vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
 
iter.dma += I915_GTT_PAGE_SIZE;
if (iter.dma == iter.max) {
iter.sg = __sg_next(iter.sg);
-   if (!iter.sg)
+   if (!iter.sg || sg_dma_len(iter.sg) == 0)
break;
 
iter.dma = sg_dma_address(iter.sg);
-   iter.max = iter.dma + iter.sg->length;
+   iter.max = iter.dma + sg_dma_len(iter.sg);
}
 
if (++act_pte == GEN6_PTES) {
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index eb64f474a78c..0361b3dfdc72 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -372,19 +372,19 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
do {
-   GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
+   GEM_BUG_ON(sg_dma_len(iter->sg) < I915_GTT_PAGE_SIZE);
vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
 
iter->dma += I915_GTT_PAGE_SIZE;
if (iter->dma >= iter->max) {
iter->sg = __sg_next(iter->sg);
-   if (!iter->sg) {
+   if (!iter->sg || sg_dma_len(iter->sg) == 0) {
idx = 0;
break;
}
 
iter->dma = sg_dma_address(iter->sg);
-   iter->max = iter->dma + iter->sg->length;
+   iter->max = iter->dma + sg_dma_len(iter->sg);
}
 
if (gen8_pd_index(++idx, 0) == 0) {
@@ -414,7 +414,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 {
const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
u64 start = vma->node.start;
-   dma_addr_t rem = iter->sg->length;
+   dma_addr_t rem = sg_dma_len(iter->sg);
 
GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
 
@@ -456,7 +456,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
 
do {
-   GEM_BUG_ON(iter->sg->length < page_size);
+   GEM_BUG_ON(sg_dma_len(iter->sg) < page_size);
vaddr[index++] = encode | iter->dma;
 
start += page_size;
@@ -467,7 +467,10 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
if (!iter->sg)
break;
 
-   rem = iter->sg->length;
+   rem = sg_dma_len(iter->sg);
+   if (!rem)
+   break;
+
iter->dma = sg_dma_address(iter->sg);
iter->max = iter->dma + rem;
 
@@ -525,7 +528,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
}
 
vma->page_sizes.gtt |= page_size;
-   } while (iter->sg);
+   } while (iter->sg &&