Re: [PATCH v8 7/7] drm/panfrost: Switch to generic memory shrinker

2022-11-14 Thread Dmitry Osipenko
Hello Steve,

On 11/14/22 19:54, Steven Price wrote:
> On 05/11/2022 23:27, Dmitry Osipenko wrote:
>> Replace Panfrost's custom memory shrinker with a common drm-shmem
>> memory shrinker.
>>
>> Signed-off-by: Dmitry Osipenko 
> 
> Sadly this triggers GPU faults under memory pressure - it looks
> suspiciously like mappings are being freed while the jobs are still running.
> 
> I'm not sure I understand how the generic shrinker replicates the
> "gpu_usecount" atomic that Panfrost currently has, and I'm wondering if
> that's the cause?
> 
> Also just reverting this commit (so just patches 1-6) I can't actually
> get Panfrost to purge any memory. So I don't think the changes (most
> likely in patch 4) are quite right either.
> 
> At the moment I don't have the time to investigate in detail. But if
> you've any ideas for something specific I should look at I can run more
> testing.

Thank you for the testing! It just occurred to me that the shrinker
callback lost the dma_resv_test_signaled() in comparison to the previous
versions of this patchset. It appeared to me that the drm_gem_lru now
checks whether reservation is busy, but it doesn't.

I saw a similar page faults once in a while when was testing the
Panfrost driver, but then couldn't reproduce the faults after applying
the IOMMU unmap range fix that Robin made recently.

I'll re-add the dma_resv_test_signaled() in v9, it was a luck that I
didn't hit it much during my testing.

-- 
Best regards,
Dmitry



Re: [PATCH v8 7/7] drm/panfrost: Switch to generic memory shrinker

2022-11-14 Thread Steven Price
On 05/11/2022 23:27, Dmitry Osipenko wrote:
> Replace Panfrost's custom memory shrinker with a common drm-shmem
> memory shrinker.
> 
> Signed-off-by: Dmitry Osipenko 

Sadly this triggers GPU faults under memory pressure - it looks
suspiciously like mappings are being freed while the jobs are still running.

I'm not sure I understand how the generic shrinker replicates the
"gpu_usecount" atomic that Panfrost currently has, and I'm wondering if
that's the cause?

Also just reverting this commit (so just patches 1-6) I can't actually
get Panfrost to purge any memory. So I don't think the changes (most
likely in patch 4) are quite right either.

At the moment I don't have the time to investigate in detail. But if
you've any ideas for something specific I should look at I can run more
testing.

Steve

> ---
>  drivers/gpu/drm/panfrost/Makefile |   1 -
>  drivers/gpu/drm/panfrost/panfrost_device.h|   4 -
>  drivers/gpu/drm/panfrost/panfrost_drv.c   |  19 +--
>  drivers/gpu/drm/panfrost/panfrost_gem.c   |  33 +++--
>  drivers/gpu/drm/panfrost/panfrost_gem.h   |   9 --
>  .../gpu/drm/panfrost/panfrost_gem_shrinker.c  | 129 --
>  drivers/gpu/drm/panfrost/panfrost_job.c   |  18 ++-
>  7 files changed, 42 insertions(+), 171 deletions(-)
>  delete mode 100644 drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c
> 
> diff --git a/drivers/gpu/drm/panfrost/Makefile 
> b/drivers/gpu/drm/panfrost/Makefile
> index 7da2b3f02ed9..11622e22cf15 100644
> --- a/drivers/gpu/drm/panfrost/Makefile
> +++ b/drivers/gpu/drm/panfrost/Makefile
> @@ -5,7 +5,6 @@ panfrost-y := \
>   panfrost_device.o \
>   panfrost_devfreq.o \
>   panfrost_gem.o \
> - panfrost_gem_shrinker.o \
>   panfrost_gpu.o \
>   panfrost_job.o \
>   panfrost_mmu.o \
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h 
> b/drivers/gpu/drm/panfrost/panfrost_device.h
> index 8b25278f34c8..fe04b21fc044 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -115,10 +115,6 @@ struct panfrost_device {
>   atomic_t pending;
>   } reset;
>  
> - struct mutex shrinker_lock;
> - struct list_head shrinker_list;
> - struct shrinker shrinker;
> -
>   struct panfrost_devfreq pfdevfreq;
>  };
>  
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c 
> b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 94b8e6de34b8..fe78d5c75abf 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -160,7 +160,6 @@ panfrost_lookup_bos(struct drm_device *dev,
>   break;
>   }
>  
> - atomic_inc(>gpu_usecount);
>   job->mappings[i] = mapping;
>   }
>  
> @@ -392,7 +391,6 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, 
> void *data,
>  {
>   struct panfrost_file_priv *priv = file_priv->driver_priv;
>   struct drm_panfrost_madvise *args = data;
> - struct panfrost_device *pfdev = dev->dev_private;
>   struct drm_gem_object *gem_obj;
>   struct panfrost_gem_object *bo;
>   int ret = 0;
> @@ -409,7 +407,6 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, 
> void *data,
>   if (ret)
>   goto out_put_object;
>  
> - mutex_lock(>shrinker_lock);
>   mutex_lock(>mappings.lock);
>   if (args->madv == PANFROST_MADV_DONTNEED) {
>   struct panfrost_gem_mapping *first;
> @@ -435,17 +432,8 @@ static int panfrost_ioctl_madvise(struct drm_device 
> *dev, void *data,
>  
>   args->retained = drm_gem_shmem_madvise(>base, args->madv);
>  
> - if (args->retained) {
> - if (args->madv == PANFROST_MADV_DONTNEED)
> - list_move_tail(>base.madv_list,
> ->shrinker_list);
> - else if (args->madv == PANFROST_MADV_WILLNEED)
> - list_del_init(>base.madv_list);
> - }
> -
>  out_unlock_mappings:
>   mutex_unlock(>mappings.lock);
> - mutex_unlock(>shrinker_lock);
>   dma_resv_unlock(bo->base.base.resv);
>  out_put_object:
>   drm_gem_object_put(gem_obj);
> @@ -577,9 +565,6 @@ static int panfrost_probe(struct platform_device *pdev)
>   ddev->dev_private = pfdev;
>   pfdev->ddev = ddev;
>  
> - mutex_init(>shrinker_lock);
> - INIT_LIST_HEAD(>shrinker_list);
> -
>   err = panfrost_device_init(pfdev);
>   if (err) {
>   if (err != -EPROBE_DEFER)
> @@ -601,7 +586,7 @@ static int panfrost_probe(struct platform_device *pdev)
>   if (err < 0)
>   goto err_out1;
>  
> - panfrost_gem_shrinker_init(ddev);
> + drm_gem_shmem_shrinker_register(ddev, "panfrost-shrinker");
>  
>   return 0;
>  
> @@ -619,8 +604,8 @@ static int panfrost_remove(struct platform_device *pdev)
>   struct panfrost_device *pfdev = platform_get_drvdata(pdev);
>   struct drm_device *ddev = pfdev->ddev;
>  
> +   

[PATCH v8 7/7] drm/panfrost: Switch to generic memory shrinker

2022-11-05 Thread Dmitry Osipenko
Replace Panfrost's custom memory shrinker with a common drm-shmem
memory shrinker.

Signed-off-by: Dmitry Osipenko 
---
 drivers/gpu/drm/panfrost/Makefile |   1 -
 drivers/gpu/drm/panfrost/panfrost_device.h|   4 -
 drivers/gpu/drm/panfrost/panfrost_drv.c   |  19 +--
 drivers/gpu/drm/panfrost/panfrost_gem.c   |  33 +++--
 drivers/gpu/drm/panfrost/panfrost_gem.h   |   9 --
 .../gpu/drm/panfrost/panfrost_gem_shrinker.c  | 129 --
 drivers/gpu/drm/panfrost/panfrost_job.c   |  18 ++-
 7 files changed, 42 insertions(+), 171 deletions(-)
 delete mode 100644 drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c

diff --git a/drivers/gpu/drm/panfrost/Makefile 
b/drivers/gpu/drm/panfrost/Makefile
index 7da2b3f02ed9..11622e22cf15 100644
--- a/drivers/gpu/drm/panfrost/Makefile
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -5,7 +5,6 @@ panfrost-y := \
panfrost_device.o \
panfrost_devfreq.o \
panfrost_gem.o \
-   panfrost_gem_shrinker.o \
panfrost_gpu.o \
panfrost_job.o \
panfrost_mmu.o \
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h 
b/drivers/gpu/drm/panfrost/panfrost_device.h
index 8b25278f34c8..fe04b21fc044 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -115,10 +115,6 @@ struct panfrost_device {
atomic_t pending;
} reset;
 
-   struct mutex shrinker_lock;
-   struct list_head shrinker_list;
-   struct shrinker shrinker;
-
struct panfrost_devfreq pfdevfreq;
 };
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c 
b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 94b8e6de34b8..fe78d5c75abf 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -160,7 +160,6 @@ panfrost_lookup_bos(struct drm_device *dev,
break;
}
 
-   atomic_inc(>gpu_usecount);
job->mappings[i] = mapping;
}
 
@@ -392,7 +391,6 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, 
void *data,
 {
struct panfrost_file_priv *priv = file_priv->driver_priv;
struct drm_panfrost_madvise *args = data;
-   struct panfrost_device *pfdev = dev->dev_private;
struct drm_gem_object *gem_obj;
struct panfrost_gem_object *bo;
int ret = 0;
@@ -409,7 +407,6 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, 
void *data,
if (ret)
goto out_put_object;
 
-   mutex_lock(>shrinker_lock);
mutex_lock(>mappings.lock);
if (args->madv == PANFROST_MADV_DONTNEED) {
struct panfrost_gem_mapping *first;
@@ -435,17 +432,8 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, 
void *data,
 
args->retained = drm_gem_shmem_madvise(>base, args->madv);
 
-   if (args->retained) {
-   if (args->madv == PANFROST_MADV_DONTNEED)
-   list_move_tail(>base.madv_list,
-  >shrinker_list);
-   else if (args->madv == PANFROST_MADV_WILLNEED)
-   list_del_init(>base.madv_list);
-   }
-
 out_unlock_mappings:
mutex_unlock(>mappings.lock);
-   mutex_unlock(>shrinker_lock);
dma_resv_unlock(bo->base.base.resv);
 out_put_object:
drm_gem_object_put(gem_obj);
@@ -577,9 +565,6 @@ static int panfrost_probe(struct platform_device *pdev)
ddev->dev_private = pfdev;
pfdev->ddev = ddev;
 
-   mutex_init(>shrinker_lock);
-   INIT_LIST_HEAD(>shrinker_list);
-
err = panfrost_device_init(pfdev);
if (err) {
if (err != -EPROBE_DEFER)
@@ -601,7 +586,7 @@ static int panfrost_probe(struct platform_device *pdev)
if (err < 0)
goto err_out1;
 
-   panfrost_gem_shrinker_init(ddev);
+   drm_gem_shmem_shrinker_register(ddev, "panfrost-shrinker");
 
return 0;
 
@@ -619,8 +604,8 @@ static int panfrost_remove(struct platform_device *pdev)
struct panfrost_device *pfdev = platform_get_drvdata(pdev);
struct drm_device *ddev = pfdev->ddev;
 
+   drm_gem_shmem_shrinker_unregister(ddev);
drm_dev_unregister(ddev);
-   panfrost_gem_shrinker_cleanup(ddev);
 
pm_runtime_get_sync(pfdev->dev);
pm_runtime_disable(pfdev->dev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c 
b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 293e799e2fe8..f1436405e3a0 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -19,16 +19,6 @@ static void panfrost_gem_free_object(struct drm_gem_object 
*obj)
struct panfrost_gem_object *bo = to_panfrost_bo(obj);
struct panfrost_device *pfdev = obj->dev->dev_private;
 
-   /*
-* Make sure the BO is no longer inserted in the shrinker list before
-* taking care of the destruction itself. If we don't do that we