Re: [PATCH v2] drm/etnaviv: fix ref count leak via pm_runtime_get_sync

2020-06-17 Thread Navid Emamdoost
Hi Lucas,


On Wed, Jun 17, 2020 at 4:53 AM Lucas Stach  wrote:
>
> Hi Navid,
>
> Am Montag, den 15.06.2020, 01:12 -0500 schrieb Navid Emamdoost:
> > in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs,
> > and etnaviv_gpu_init the call to pm_runtime_get_sync increments the
> > counter even in case of failure, leading to incorrect ref count.
> > In case of failure, decrement the ref count before returning.
>
> While that change is correct with the current API, may I ask the
> question why the way this API works is considered reasonable? A API
> call that fails, but still changes internal state and expects the
> caller to clean up the mess it not really what I would consider fool-
> proof API design. Is there a specific reason why it is done this way
> and not handled internally?

I share the same concern with you on the way this API is working now.
To the best of my knowledge, there are ongoing discussions on this
issue:

https://lkml.org/lkml/2020/6/14/76
https://patchwork.ozlabs.org/project/linux-tegra/patch/20200520095148.10995-1-dinghao@zju.edu.cn/

>
> Regards,
> Lucas
>
> > Signed-off-by: Navid Emamdoost 
> > ---
> > Changes in v2:
> >   - replace pm_runtime_put with  pm_runtime_put_noidle
> > ---
> >  drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++
> >  1 file changed, 10 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c 
> > b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> > index a31eeff2b297..7c9f3f9ba123 100644
> > --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> > +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> > @@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
> >   ret = pm_runtime_get_sync(gpu->dev);
> >   if (ret < 0) {
> >   dev_err(gpu->dev, "Failed to enable GPU power domain\n");
> > - return ret;
> > + goto pm_put;
> >   }
> >
> >   etnaviv_hw_identify(gpu);
> > @@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
> >
> >  fail:
> >   pm_runtime_mark_last_busy(gpu->dev);
> > +pm_put:
> >   pm_runtime_put_autosuspend(gpu->dev);
> >
> >   return ret;
> > @@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct 
> > seq_file *m)
> >
> >   ret = pm_runtime_get_sync(gpu->dev);
> >   if (ret < 0)
> > - return ret;
> > + goto pm_put;
> >
> >   dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
> >   dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
> > @@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, 
> > struct seq_file *m)
> >   ret = 0;
> >
> >   pm_runtime_mark_last_busy(gpu->dev);
> > +pm_put:
> >   pm_runtime_put_autosuspend(gpu->dev);
> >
> >   return ret;
> > @@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
> >   dev_err(gpu->dev, "recover hung GPU!\n");
> >
> >   if (pm_runtime_get_sync(gpu->dev) < 0)
> > - return;
> > + goto pm_put;
> >
> >   mutex_lock(&gpu->lock);
> >
> > @@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
> >
> >   mutex_unlock(&gpu->lock);
> >   pm_runtime_mark_last_busy(gpu->dev);
> > +pm_put:
> >   pm_runtime_put_autosuspend(gpu->dev);
> >  }
> >
> > @@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct 
> > etnaviv_gem_submit *submit)
> >
> >   if (!submit->runtime_resumed) {
> >   ret = pm_runtime_get_sync(gpu->dev);
> > - if (ret < 0)
> > + if (ret < 0) {
> > + pm_runtime_put_noidle(gpu->dev);
> >   return NULL;
> > + }
> >   submit->runtime_resumed = true;
> >   }
> >
> > @@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct 
> > etnaviv_gem_submit *submit)
> >   ret = event_alloc(gpu, nr_events, event);
> >   if (ret) {
> >   DRM_ERROR("no free events\n");
> > + pm_runtime_put_noidle(gpu->dev);
> >   return NULL;
> >   }
> >
>


--
Navid.


Re: [PATCH v2] drm/etnaviv: fix ref count leak via pm_runtime_get_sync

2020-06-17 Thread Lucas Stach
Hi Navid,

Am Montag, den 15.06.2020, 01:12 -0500 schrieb Navid Emamdoost:
> in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs,
> and etnaviv_gpu_init the call to pm_runtime_get_sync increments the
> counter even in case of failure, leading to incorrect ref count.
> In case of failure, decrement the ref count before returning.

While that change is correct with the current API, may I ask the
question why the way this API works is considered reasonable? A API
call that fails, but still changes internal state and expects the
caller to clean up the mess it not really what I would consider fool-
proof API design. Is there a specific reason why it is done this way
and not handled internally?

Regards,
Lucas

> Signed-off-by: Navid Emamdoost 
> ---
> Changes in v2:
>   - replace pm_runtime_put with  pm_runtime_put_noidle
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c 
> b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> index a31eeff2b297..7c9f3f9ba123 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
> @@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>   ret = pm_runtime_get_sync(gpu->dev);
>   if (ret < 0) {
>   dev_err(gpu->dev, "Failed to enable GPU power domain\n");
> - return ret;
> + goto pm_put;
>   }
>  
>   etnaviv_hw_identify(gpu);
> @@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
>  
>  fail:
>   pm_runtime_mark_last_busy(gpu->dev);
> +pm_put:
>   pm_runtime_put_autosuspend(gpu->dev);
>  
>   return ret;
> @@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct 
> seq_file *m)
>  
>   ret = pm_runtime_get_sync(gpu->dev);
>   if (ret < 0)
> - return ret;
> + goto pm_put;
>  
>   dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
>   dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
> @@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct 
> seq_file *m)
>   ret = 0;
>  
>   pm_runtime_mark_last_busy(gpu->dev);
> +pm_put:
>   pm_runtime_put_autosuspend(gpu->dev);
>  
>   return ret;
> @@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
>   dev_err(gpu->dev, "recover hung GPU!\n");
>  
>   if (pm_runtime_get_sync(gpu->dev) < 0)
> - return;
> + goto pm_put;
>  
>   mutex_lock(&gpu->lock);
>  
> @@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
>  
>   mutex_unlock(&gpu->lock);
>   pm_runtime_mark_last_busy(gpu->dev);
> +pm_put:
>   pm_runtime_put_autosuspend(gpu->dev);
>  }
>  
> @@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct 
> etnaviv_gem_submit *submit)
>  
>   if (!submit->runtime_resumed) {
>   ret = pm_runtime_get_sync(gpu->dev);
> - if (ret < 0)
> + if (ret < 0) {
> + pm_runtime_put_noidle(gpu->dev);
>   return NULL;
> + }
>   submit->runtime_resumed = true;
>   }
>  
> @@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct 
> etnaviv_gem_submit *submit)
>   ret = event_alloc(gpu, nr_events, event);
>   if (ret) {
>   DRM_ERROR("no free events\n");
> + pm_runtime_put_noidle(gpu->dev);
>   return NULL;
>   }
>  



Re: [PATCH v2] drm/etnaviv: fix ref count leak via pm_runtime_get_sync

2020-06-15 Thread Markus Elfring
…
> In case of failure, decrement the ref count before returning.

Can it be nicer to use the term “reference count” here?

Will the tag “Fixes” become helpful for the commit message?


…
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
…
> @@ -1326,6 +1331,7 @@  struct dma_fence *etnaviv_gpu_submit(struct 
> etnaviv_gem_submit *submit)
>   ret = event_alloc(gpu, nr_events, event);
>   if (ret) {
>   DRM_ERROR("no free events\n");
> + pm_runtime_put_noidle(gpu->dev);
>   return NULL;
>   }

I suggest to move a bit of exception handling code to the end of
this function implementation so that it can be better reused after
the addition of a jump target like “put_runtime”.

Regards,
Markus


[PATCH v2] drm/etnaviv: fix ref count leak via pm_runtime_get_sync

2020-06-14 Thread Navid Emamdoost
in etnaviv_gpu_submit, etnaviv_gpu_recover_hang, etnaviv_gpu_debugfs,
and etnaviv_gpu_init the call to pm_runtime_get_sync increments the
counter even in case of failure, leading to incorrect ref count.
In case of failure, decrement the ref count before returning.

Signed-off-by: Navid Emamdoost 
---
Changes in v2:
- replace pm_runtime_put with  pm_runtime_put_noidle
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c 
b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index a31eeff2b297..7c9f3f9ba123 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -722,7 +722,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
ret = pm_runtime_get_sync(gpu->dev);
if (ret < 0) {
dev_err(gpu->dev, "Failed to enable GPU power domain\n");
-   return ret;
+   goto pm_put;
}
 
etnaviv_hw_identify(gpu);
@@ -819,6 +819,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 
 fail:
pm_runtime_mark_last_busy(gpu->dev);
+pm_put:
pm_runtime_put_autosuspend(gpu->dev);
 
return ret;
@@ -859,7 +860,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct 
seq_file *m)
 
ret = pm_runtime_get_sync(gpu->dev);
if (ret < 0)
-   return ret;
+   goto pm_put;
 
dma_lo = gpu_read(gpu, VIVS_FE_DMA_LOW);
dma_hi = gpu_read(gpu, VIVS_FE_DMA_HIGH);
@@ -1003,6 +1004,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct 
seq_file *m)
ret = 0;
 
pm_runtime_mark_last_busy(gpu->dev);
+pm_put:
pm_runtime_put_autosuspend(gpu->dev);
 
return ret;
@@ -1016,7 +1018,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
dev_err(gpu->dev, "recover hung GPU!\n");
 
if (pm_runtime_get_sync(gpu->dev) < 0)
-   return;
+   goto pm_put;
 
mutex_lock(&gpu->lock);
 
@@ -1035,6 +1037,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 
mutex_unlock(&gpu->lock);
pm_runtime_mark_last_busy(gpu->dev);
+pm_put:
pm_runtime_put_autosuspend(gpu->dev);
 }
 
@@ -1308,8 +1311,10 @@ struct dma_fence *etnaviv_gpu_submit(struct 
etnaviv_gem_submit *submit)
 
if (!submit->runtime_resumed) {
ret = pm_runtime_get_sync(gpu->dev);
-   if (ret < 0)
+   if (ret < 0) {
+   pm_runtime_put_noidle(gpu->dev);
return NULL;
+   }
submit->runtime_resumed = true;
}
 
@@ -1326,6 +1331,7 @@ struct dma_fence *etnaviv_gpu_submit(struct 
etnaviv_gem_submit *submit)
ret = event_alloc(gpu, nr_events, event);
if (ret) {
DRM_ERROR("no free events\n");
+   pm_runtime_put_noidle(gpu->dev);
return NULL;
}
 
-- 
2.17.1