Re: [PATCH RFC PKS/PMEM 09/58] drivers/gpu: Utilize new kmap_thread()

2020-10-10 Thread Ira Weiny
On Sat, Oct 10, 2020 at 12:03:49AM +0200, Daniel Vetter wrote:
> On Fri, Oct 09, 2020 at 12:49:44PM -0700, ira.we...@intel.com wrote:
> > From: Ira Weiny 
> > 
> > These kmap() calls in the gpu stack are localized to a single thread.
> > To avoid the over head of global PKRS updates use the new kmap_thread()
> > call.
> > 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Patrik Jakobsson 
> > Signed-off-by: Ira Weiny 
> 
> I'm guessing the entire pile goes in through some other tree.
>

Apologies for not realizing there were multiple maintainers here.

But, I was thinking it would land together through the mm tree once the core
support lands.  I've tried to split these out in a way they can be easily
reviewed/acked by the correct developers.

> If so:
> 
> Acked-by: Daniel Vetter 
> 
> If you want this to land through maintainer trees, then we need a
> per-driver split (since aside from amdgpu and radeon they're all different
> subtrees).

It is just RFC for the moment.  I need to get the core support accepted first
then this can land.

> 
> btw the two kmap calls in drm you highlight in the cover letter should
> also be convertible to kmap_thread. We only hold vmalloc mappings for a
> longer time (or it'd be quite a driver bug). So if you want maybe throw
> those two as two additional patches on top, and we can do some careful
> review & testing for them.

Cool.  I'll add them in.

Ira

> -Daniel
> 
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 12 ++--
> >  drivers/gpu/drm/gma500/gma_display.c |  4 ++--
> >  drivers/gpu/drm/gma500/mmu.c | 10 +-
> >  drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  4 ++--
> >  .../gpu/drm/i915/gem/selftests/i915_gem_context.c|  4 ++--
> >  drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c   |  8 
> >  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c |  4 ++--
> >  drivers/gpu/drm/i915/gt/intel_gtt.c  |  4 ++--
> >  drivers/gpu/drm/i915/gt/shmem_utils.c|  4 ++--
> >  drivers/gpu/drm/i915/i915_gem.c  |  8 
> >  drivers/gpu/drm/i915/i915_gpu_error.c|  4 ++--
> >  drivers/gpu/drm/i915/selftests/i915_perf.c   |  4 ++--
> >  drivers/gpu/drm/radeon/radeon_ttm.c  |  4 ++--
> >  13 files changed, 37 insertions(+), 37 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > index 978bae731398..bd564bccb7a3 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> > @@ -2437,11 +2437,11 @@ static ssize_t amdgpu_ttm_gtt_read(struct file *f, 
> > char __user *buf,
> >  
> > page = adev->gart.pages[p];
> > if (page) {
> > -   ptr = kmap(page);
> > +   ptr = kmap_thread(page);
> > ptr += off;
> >  
> > r = copy_to_user(buf, ptr, cur_size);
> > -   kunmap(adev->gart.pages[p]);
> > +   kunmap_thread(adev->gart.pages[p]);
> > } else
> > r = clear_user(buf, cur_size);
> >  
> > @@ -2507,9 +2507,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char 
> > __user *buf,
> > if (p->mapping != adev->mman.bdev.dev_mapping)
> > return -EPERM;
> >  
> > -   ptr = kmap(p);
> > +   ptr = kmap_thread(p);
> > r = copy_to_user(buf, ptr + off, bytes);
> > -   kunmap(p);
> > +   kunmap_thread(p);
> > if (r)
> > return -EFAULT;
> >  
> > @@ -2558,9 +2558,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, 
> > const char __user *buf,
> > if (p->mapping != adev->mman.bdev.dev_mapping)
> > return -EPERM;
> >  
> > -   ptr = kmap(p);
> > +   ptr = kmap_thread(p);
> > r = copy_from_user(ptr + off, buf, bytes);
> > -   kunmap(p);
> > +   kunmap_thread(p);
> > if (r)
> > return -EFAULT;
> >  
> > diff --git a/drivers/gpu/drm/gma500/gma_display.c 
> > b/drivers/gpu/drm/gma500/gma_display.c
> > index 3df6d6e850f5..35f4e55c941f 100644
> > --- a/drivers/gpu/drm/gma500/gma_display.c
> > +++ b/drivers/gpu/drm/gma500/gma_display.c
> > @@ -400,9 +400,9 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc,
> > /* Copy the cursor to cursor mem */
> > tmp_dst = dev_priv->vram_addr + cursor_gt->offset;
> > for (i = 0; i < cursor_pages; i++) {
> > -   tmp_src = kmap(gt->pages[i]);
> > +   tmp_src = kmap_thread(gt->pages[i]);
> > memcpy(tmp_dst, tmp_src, PAGE_SIZE);
> > -   kunmap(gt->pages[i]);
> > +   kunmap_thread(gt->pages[i]);
> > tmp_dst += PAGE_SIZE;
> > }
> >  
> > diff --git 

Re: [PATCH RFC PKS/PMEM 09/58] drivers/gpu: Utilize new kmap_thread()

2020-10-09 Thread Daniel Vetter
On Fri, Oct 09, 2020 at 12:49:44PM -0700, ira.we...@intel.com wrote:
> From: Ira Weiny 
> 
> These kmap() calls in the gpu stack are localized to a single thread.
> To avoid the over head of global PKRS updates use the new kmap_thread()
> call.
> 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Cc: Patrik Jakobsson 
> Signed-off-by: Ira Weiny 

I'm guessing the entire pile goes in through some other tree. If so:

Acked-by: Daniel Vetter 

If you want this to land through maintainer trees, then we need a
per-driver split (since aside from amdgpu and radeon they're all different
subtrees).

btw the two kmap calls in drm you highlight in the cover letter should
also be convertible to kmap_thread. We only hold vmalloc mappings for a
longer time (or it'd be quite a driver bug). So if you want maybe throw
those two as two additional patches on top, and we can do some careful
review & testing for them.
-Daniel

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 12 ++--
>  drivers/gpu/drm/gma500/gma_display.c |  4 ++--
>  drivers/gpu/drm/gma500/mmu.c | 10 +-
>  drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  4 ++--
>  .../gpu/drm/i915/gem/selftests/i915_gem_context.c|  4 ++--
>  drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c   |  8 
>  drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c |  4 ++--
>  drivers/gpu/drm/i915/gt/intel_gtt.c  |  4 ++--
>  drivers/gpu/drm/i915/gt/shmem_utils.c|  4 ++--
>  drivers/gpu/drm/i915/i915_gem.c  |  8 
>  drivers/gpu/drm/i915/i915_gpu_error.c|  4 ++--
>  drivers/gpu/drm/i915/selftests/i915_perf.c   |  4 ++--
>  drivers/gpu/drm/radeon/radeon_ttm.c  |  4 ++--
>  13 files changed, 37 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 978bae731398..bd564bccb7a3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2437,11 +2437,11 @@ static ssize_t amdgpu_ttm_gtt_read(struct file *f, 
> char __user *buf,
>  
>   page = adev->gart.pages[p];
>   if (page) {
> - ptr = kmap(page);
> + ptr = kmap_thread(page);
>   ptr += off;
>  
>   r = copy_to_user(buf, ptr, cur_size);
> - kunmap(adev->gart.pages[p]);
> + kunmap_thread(adev->gart.pages[p]);
>   } else
>   r = clear_user(buf, cur_size);
>  
> @@ -2507,9 +2507,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char 
> __user *buf,
>   if (p->mapping != adev->mman.bdev.dev_mapping)
>   return -EPERM;
>  
> - ptr = kmap(p);
> + ptr = kmap_thread(p);
>   r = copy_to_user(buf, ptr + off, bytes);
> - kunmap(p);
> + kunmap_thread(p);
>   if (r)
>   return -EFAULT;
>  
> @@ -2558,9 +2558,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const 
> char __user *buf,
>   if (p->mapping != adev->mman.bdev.dev_mapping)
>   return -EPERM;
>  
> - ptr = kmap(p);
> + ptr = kmap_thread(p);
>   r = copy_from_user(ptr + off, buf, bytes);
> - kunmap(p);
> + kunmap_thread(p);
>   if (r)
>   return -EFAULT;
>  
> diff --git a/drivers/gpu/drm/gma500/gma_display.c 
> b/drivers/gpu/drm/gma500/gma_display.c
> index 3df6d6e850f5..35f4e55c941f 100644
> --- a/drivers/gpu/drm/gma500/gma_display.c
> +++ b/drivers/gpu/drm/gma500/gma_display.c
> @@ -400,9 +400,9 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc,
>   /* Copy the cursor to cursor mem */
>   tmp_dst = dev_priv->vram_addr + cursor_gt->offset;
>   for (i = 0; i < cursor_pages; i++) {
> - tmp_src = kmap(gt->pages[i]);
> + tmp_src = kmap_thread(gt->pages[i]);
>   memcpy(tmp_dst, tmp_src, PAGE_SIZE);
> - kunmap(gt->pages[i]);
> + kunmap_thread(gt->pages[i]);
>   tmp_dst += PAGE_SIZE;
>   }
>  
> diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c
> index 505044c9a673..fba7a3a461fd 100644
> --- a/drivers/gpu/drm/gma500/mmu.c
> +++ b/drivers/gpu/drm/gma500/mmu.c
> @@ -192,20 +192,20 @@ struct psb_mmu_pd *psb_mmu_alloc_pd(struct 
> psb_mmu_driver *driver,
>   pd->invalid_pte = 0;
>   }
>  
> - v = kmap(pd->dummy_pt);
> + v = kmap_thread(pd->dummy_pt);
>   for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
>   v[i] = pd->invalid_pte;
>  
> - kunmap(pd->dummy_pt);
> + kunmap_thread(pd->dummy_pt);
>  
> - v = kmap(pd->p);
> +

[PATCH RFC PKS/PMEM 09/58] drivers/gpu: Utilize new kmap_thread()

2020-10-09 Thread ira . weiny
From: Ira Weiny 

These kmap() calls in the gpu stack are localized to a single thread.
To avoid the over head of global PKRS updates use the new kmap_thread()
call.

Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Patrik Jakobsson 
Signed-off-by: Ira Weiny 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 12 ++--
 drivers/gpu/drm/gma500/gma_display.c |  4 ++--
 drivers/gpu/drm/gma500/mmu.c | 10 +-
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c|  4 ++--
 .../gpu/drm/i915/gem/selftests/i915_gem_context.c|  4 ++--
 drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c   |  8 
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c |  4 ++--
 drivers/gpu/drm/i915/gt/intel_gtt.c  |  4 ++--
 drivers/gpu/drm/i915/gt/shmem_utils.c|  4 ++--
 drivers/gpu/drm/i915/i915_gem.c  |  8 
 drivers/gpu/drm/i915/i915_gpu_error.c|  4 ++--
 drivers/gpu/drm/i915/selftests/i915_perf.c   |  4 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c  |  4 ++--
 13 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 978bae731398..bd564bccb7a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2437,11 +2437,11 @@ static ssize_t amdgpu_ttm_gtt_read(struct file *f, char 
__user *buf,
 
page = adev->gart.pages[p];
if (page) {
-   ptr = kmap(page);
+   ptr = kmap_thread(page);
ptr += off;
 
r = copy_to_user(buf, ptr, cur_size);
-   kunmap(adev->gart.pages[p]);
+   kunmap_thread(adev->gart.pages[p]);
} else
r = clear_user(buf, cur_size);
 
@@ -2507,9 +2507,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char 
__user *buf,
if (p->mapping != adev->mman.bdev.dev_mapping)
return -EPERM;
 
-   ptr = kmap(p);
+   ptr = kmap_thread(p);
r = copy_to_user(buf, ptr + off, bytes);
-   kunmap(p);
+   kunmap_thread(p);
if (r)
return -EFAULT;
 
@@ -2558,9 +2558,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const 
char __user *buf,
if (p->mapping != adev->mman.bdev.dev_mapping)
return -EPERM;
 
-   ptr = kmap(p);
+   ptr = kmap_thread(p);
r = copy_from_user(ptr + off, buf, bytes);
-   kunmap(p);
+   kunmap_thread(p);
if (r)
return -EFAULT;
 
diff --git a/drivers/gpu/drm/gma500/gma_display.c 
b/drivers/gpu/drm/gma500/gma_display.c
index 3df6d6e850f5..35f4e55c941f 100644
--- a/drivers/gpu/drm/gma500/gma_display.c
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -400,9 +400,9 @@ int gma_crtc_cursor_set(struct drm_crtc *crtc,
/* Copy the cursor to cursor mem */
tmp_dst = dev_priv->vram_addr + cursor_gt->offset;
for (i = 0; i < cursor_pages; i++) {
-   tmp_src = kmap(gt->pages[i]);
+   tmp_src = kmap_thread(gt->pages[i]);
memcpy(tmp_dst, tmp_src, PAGE_SIZE);
-   kunmap(gt->pages[i]);
+   kunmap_thread(gt->pages[i]);
tmp_dst += PAGE_SIZE;
}
 
diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c
index 505044c9a673..fba7a3a461fd 100644
--- a/drivers/gpu/drm/gma500/mmu.c
+++ b/drivers/gpu/drm/gma500/mmu.c
@@ -192,20 +192,20 @@ struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver 
*driver,
pd->invalid_pte = 0;
}
 
-   v = kmap(pd->dummy_pt);
+   v = kmap_thread(pd->dummy_pt);
for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
v[i] = pd->invalid_pte;
 
-   kunmap(pd->dummy_pt);
+   kunmap_thread(pd->dummy_pt);
 
-   v = kmap(pd->p);
+   v = kmap_thread(pd->p);
for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i)
v[i] = pd->invalid_pde;
 
-   kunmap(pd->p);
+   kunmap_thread(pd->p);
 
clear_page(kmap(pd->dummy_page));
-   kunmap(pd->dummy_page);
+   kunmap_thread(pd->dummy_page);
 
pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024);
if (!pd->tables)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 38113d3c0138..274424795fb7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -566,9 +566,9 @@ i915_gem_object_create_shmem_from_data(struct 
drm_i915_private *dev_priv,
if (err < 0)