Re: [PATCH 4/4] drm/panthor: Call panthor_sched_post_reset() even if the reset failed

2024-05-03 Thread Liviu Dudau
On Thu, May 02, 2024 at 08:38:12PM +0200, Boris Brezillon wrote:
> We need to undo what was done in panthor_sched_pre_reset() even if the
> reset failed. We just flag all previously running groups as terminated
> when that happens to unblock things.
> 
> Signed-off-by: Boris Brezillon 

Reviewed-by: Liviu Dudau 

> ---
>  drivers/gpu/drm/panthor/panthor_device.c |  7 +--
>  drivers/gpu/drm/panthor/panthor_sched.c  | 19 ++-
>  drivers/gpu/drm/panthor/panthor_sched.h  |  2 +-
>  3 files changed, 16 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_device.c 
> b/drivers/gpu/drm/panthor/panthor_device.c
> index 4c5b54e7abb7..4082c8f2951d 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.c
> +++ b/drivers/gpu/drm/panthor/panthor_device.c
> @@ -129,13 +129,8 @@ static void panthor_device_reset_work(struct work_struct 
> *work)
>   panthor_gpu_l2_power_on(ptdev);
>   panthor_mmu_post_reset(ptdev);
>   ret = panthor_fw_post_reset(ptdev);
> - if (ret)
> - goto out_dev_exit;
> -
>   atomic_set(&ptdev->reset.pending, 0);
> - panthor_sched_post_reset(ptdev);
> -
> -out_dev_exit:
> + panthor_sched_post_reset(ptdev, ret != 0);
>   drm_dev_exit(cookie);
>  
>   if (ret) {
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c 
> b/drivers/gpu/drm/panthor/panthor_sched.c
> index 6ea094b00cf9..fc43ff62c77d 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -2728,15 +2728,22 @@ void panthor_sched_pre_reset(struct panthor_device 
> *ptdev)
>   mutex_unlock(&sched->reset.lock);
>  }
>  
> -void panthor_sched_post_reset(struct panthor_device *ptdev)
> +void panthor_sched_post_reset(struct panthor_device *ptdev, bool 
> reset_failed)
>  {
>   struct panthor_scheduler *sched = ptdev->scheduler;
>   struct panthor_group *group, *group_tmp;
>  
>   mutex_lock(&sched->reset.lock);
>  
> - list_for_each_entry_safe(group, group_tmp, 
> &sched->reset.stopped_groups, run_node)
> + list_for_each_entry_safe(group, group_tmp, 
> &sched->reset.stopped_groups, run_node) {
> + /* Consider all previously running group as terminated if the
> +  * reset failed.
> +  */
> + if (reset_failed)
> + group->state = PANTHOR_CS_GROUP_TERMINATED;
> +
>   panthor_group_start(group);
> + }
>  
>   /* We're done resetting the GPU, clear the reset.in_progress bit so we 
> can
>* kick the scheduler.
> @@ -2744,9 +2751,11 @@ void panthor_sched_post_reset(struct panthor_device 
> *ptdev)
>   atomic_set(&sched->reset.in_progress, false);
>   mutex_unlock(&sched->reset.lock);
>  
> - sched_queue_delayed_work(sched, tick, 0);
> -
> - sched_queue_work(sched, sync_upd);
> + /* No need to queue a tick and update syncs if the reset failed. */
> + if (!reset_failed) {
> + sched_queue_delayed_work(sched, tick, 0);
> + sched_queue_work(sched, sync_upd);
> + }
>  }
>  
>  static void group_sync_upd_work(struct work_struct *work)
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.h 
> b/drivers/gpu/drm/panthor/panthor_sched.h
> index 66438b1f331f..3a30d2328b30 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.h
> +++ b/drivers/gpu/drm/panthor/panthor_sched.h
> @@ -40,7 +40,7 @@ void panthor_group_pool_destroy(struct panthor_file *pfile);
>  int panthor_sched_init(struct panthor_device *ptdev);
>  void panthor_sched_unplug(struct panthor_device *ptdev);
>  void panthor_sched_pre_reset(struct panthor_device *ptdev);
> -void panthor_sched_post_reset(struct panthor_device *ptdev);
> +void panthor_sched_post_reset(struct panthor_device *ptdev, bool 
> reset_failed);
>  void panthor_sched_suspend(struct panthor_device *ptdev);
>  void panthor_sched_resume(struct panthor_device *ptdev);
>  
> -- 
> 2.44.0
> 

-- 

| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---
¯\_(ツ)_/¯


Re: [PATCH 4/4] drm/panthor: Call panthor_sched_post_reset() even if the reset failed

2024-05-03 Thread Steven Price
On 02/05/2024 19:38, Boris Brezillon wrote:
> We need to undo what was done in panthor_sched_pre_reset() even if the
> reset failed. We just flag all previously running groups as terminated
> when that happens to unblock things.
> 
> Signed-off-by: Boris Brezillon 

Seems reasonable, although I hope this case doesn't happen in practice ;)

Reviewed-by: Steven Price 

> ---
>  drivers/gpu/drm/panthor/panthor_device.c |  7 +--
>  drivers/gpu/drm/panthor/panthor_sched.c  | 19 ++-
>  drivers/gpu/drm/panthor/panthor_sched.h  |  2 +-
>  3 files changed, 16 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_device.c 
> b/drivers/gpu/drm/panthor/panthor_device.c
> index 4c5b54e7abb7..4082c8f2951d 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.c
> +++ b/drivers/gpu/drm/panthor/panthor_device.c
> @@ -129,13 +129,8 @@ static void panthor_device_reset_work(struct work_struct 
> *work)
>   panthor_gpu_l2_power_on(ptdev);
>   panthor_mmu_post_reset(ptdev);
>   ret = panthor_fw_post_reset(ptdev);
> - if (ret)
> - goto out_dev_exit;
> -
>   atomic_set(&ptdev->reset.pending, 0);
> - panthor_sched_post_reset(ptdev);
> -
> -out_dev_exit:
> + panthor_sched_post_reset(ptdev, ret != 0);
>   drm_dev_exit(cookie);
>  
>   if (ret) {
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.c 
> b/drivers/gpu/drm/panthor/panthor_sched.c
> index 6ea094b00cf9..fc43ff62c77d 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.c
> +++ b/drivers/gpu/drm/panthor/panthor_sched.c
> @@ -2728,15 +2728,22 @@ void panthor_sched_pre_reset(struct panthor_device 
> *ptdev)
>   mutex_unlock(&sched->reset.lock);
>  }
>  
> -void panthor_sched_post_reset(struct panthor_device *ptdev)
> +void panthor_sched_post_reset(struct panthor_device *ptdev, bool 
> reset_failed)
>  {
>   struct panthor_scheduler *sched = ptdev->scheduler;
>   struct panthor_group *group, *group_tmp;
>  
>   mutex_lock(&sched->reset.lock);
>  
> - list_for_each_entry_safe(group, group_tmp, 
> &sched->reset.stopped_groups, run_node)
> + list_for_each_entry_safe(group, group_tmp, 
> &sched->reset.stopped_groups, run_node) {
> + /* Consider all previously running group as terminated if the
> +  * reset failed.
> +  */
> + if (reset_failed)
> + group->state = PANTHOR_CS_GROUP_TERMINATED;
> +
>   panthor_group_start(group);
> + }
>  
>   /* We're done resetting the GPU, clear the reset.in_progress bit so we 
> can
>* kick the scheduler.
> @@ -2744,9 +2751,11 @@ void panthor_sched_post_reset(struct panthor_device 
> *ptdev)
>   atomic_set(&sched->reset.in_progress, false);
>   mutex_unlock(&sched->reset.lock);
>  
> - sched_queue_delayed_work(sched, tick, 0);
> -
> - sched_queue_work(sched, sync_upd);
> + /* No need to queue a tick and update syncs if the reset failed. */
> + if (!reset_failed) {
> + sched_queue_delayed_work(sched, tick, 0);
> + sched_queue_work(sched, sync_upd);
> + }
>  }
>  
>  static void group_sync_upd_work(struct work_struct *work)
> diff --git a/drivers/gpu/drm/panthor/panthor_sched.h 
> b/drivers/gpu/drm/panthor/panthor_sched.h
> index 66438b1f331f..3a30d2328b30 100644
> --- a/drivers/gpu/drm/panthor/panthor_sched.h
> +++ b/drivers/gpu/drm/panthor/panthor_sched.h
> @@ -40,7 +40,7 @@ void panthor_group_pool_destroy(struct panthor_file *pfile);
>  int panthor_sched_init(struct panthor_device *ptdev);
>  void panthor_sched_unplug(struct panthor_device *ptdev);
>  void panthor_sched_pre_reset(struct panthor_device *ptdev);
> -void panthor_sched_post_reset(struct panthor_device *ptdev);
> +void panthor_sched_post_reset(struct panthor_device *ptdev, bool 
> reset_failed);
>  void panthor_sched_suspend(struct panthor_device *ptdev);
>  void panthor_sched_resume(struct panthor_device *ptdev);
>  



[PATCH 4/4] drm/panthor: Call panthor_sched_post_reset() even if the reset failed

2024-05-02 Thread Boris Brezillon
We need to undo what was done in panthor_sched_pre_reset() even if the
reset failed. We just flag all previously running groups as terminated
when that happens to unblock things.

Signed-off-by: Boris Brezillon 
---
 drivers/gpu/drm/panthor/panthor_device.c |  7 +--
 drivers/gpu/drm/panthor/panthor_sched.c  | 19 ++-
 drivers/gpu/drm/panthor/panthor_sched.h  |  2 +-
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/panthor/panthor_device.c 
b/drivers/gpu/drm/panthor/panthor_device.c
index 4c5b54e7abb7..4082c8f2951d 100644
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -129,13 +129,8 @@ static void panthor_device_reset_work(struct work_struct 
*work)
panthor_gpu_l2_power_on(ptdev);
panthor_mmu_post_reset(ptdev);
ret = panthor_fw_post_reset(ptdev);
-   if (ret)
-   goto out_dev_exit;
-
atomic_set(&ptdev->reset.pending, 0);
-   panthor_sched_post_reset(ptdev);
-
-out_dev_exit:
+   panthor_sched_post_reset(ptdev, ret != 0);
drm_dev_exit(cookie);
 
if (ret) {
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c 
b/drivers/gpu/drm/panthor/panthor_sched.c
index 6ea094b00cf9..fc43ff62c77d 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -2728,15 +2728,22 @@ void panthor_sched_pre_reset(struct panthor_device 
*ptdev)
mutex_unlock(&sched->reset.lock);
 }
 
-void panthor_sched_post_reset(struct panthor_device *ptdev)
+void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
 {
struct panthor_scheduler *sched = ptdev->scheduler;
struct panthor_group *group, *group_tmp;
 
mutex_lock(&sched->reset.lock);
 
-   list_for_each_entry_safe(group, group_tmp, 
&sched->reset.stopped_groups, run_node)
+   list_for_each_entry_safe(group, group_tmp, 
&sched->reset.stopped_groups, run_node) {
+   /* Consider all previously running group as terminated if the
+* reset failed.
+*/
+   if (reset_failed)
+   group->state = PANTHOR_CS_GROUP_TERMINATED;
+
panthor_group_start(group);
+   }
 
/* We're done resetting the GPU, clear the reset.in_progress bit so we 
can
 * kick the scheduler.
@@ -2744,9 +2751,11 @@ void panthor_sched_post_reset(struct panthor_device 
*ptdev)
atomic_set(&sched->reset.in_progress, false);
mutex_unlock(&sched->reset.lock);
 
-   sched_queue_delayed_work(sched, tick, 0);
-
-   sched_queue_work(sched, sync_upd);
+   /* No need to queue a tick and update syncs if the reset failed. */
+   if (!reset_failed) {
+   sched_queue_delayed_work(sched, tick, 0);
+   sched_queue_work(sched, sync_upd);
+   }
 }
 
 static void group_sync_upd_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h 
b/drivers/gpu/drm/panthor/panthor_sched.h
index 66438b1f331f..3a30d2328b30 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.h
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -40,7 +40,7 @@ void panthor_group_pool_destroy(struct panthor_file *pfile);
 int panthor_sched_init(struct panthor_device *ptdev);
 void panthor_sched_unplug(struct panthor_device *ptdev);
 void panthor_sched_pre_reset(struct panthor_device *ptdev);
-void panthor_sched_post_reset(struct panthor_device *ptdev);
+void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed);
 void panthor_sched_suspend(struct panthor_device *ptdev);
 void panthor_sched_resume(struct panthor_device *ptdev);
 
-- 
2.44.0