[RFC PATCH] drm/sched: Fix teardown leaks with refcounting

Philipp Stanner Tue, 03 Sep 2024 02:46:36 -0700

The GPU scheduler currently does not ensure that its pending_list is
empty before performing various other teardown tasks in
drm_sched_fini().


If there are still jobs in the pending_list, this is problematic because
after scheduler teardown, no one will call backend_ops.free_job()
anymore. This would, consequently, result in memory leaks.

One way to solves this is to implement reference counting for struct
drm_gpu_scheduler itself. Each job added to the pending_list takes a
reference, each one removed drops a reference.

This approach would keep the scheduler running even after users have
called drm_sched_fini(), and it would ultimately stop after the last job
has been removed from pending_list.

Add reference counting to struct drm_gpu_scheduler. Move the teardown
logic to __drm_sched_fini() and have drm_sched_fini() just also drop a
reference.

Suggested-by: Danilo Krummrich <[email protected]>
Signed-off-by: Philipp Stanner <[email protected]>
---
Hi all,

since the scheduler has many stake holders, I want this solution
discussed as an RFC first.

The advantage of this version would be that it does not block
drm_sched_fini(), but the price paid for that is that the scheduler
keeps running until all jobs are gone.

Cheers,
P.
---
 drivers/gpu/drm/scheduler/sched_main.c | 17 ++++++++++++++++-
 include/drm/gpu_scheduler.h            |  5 +++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 7e90c9f95611..62b453c8ed76 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -99,6 +99,8 @@ int drm_sched_policy = DRM_SCHED_POLICY_FIFO;
 MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on 
a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " 
__stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
 module_param_named(sched_policy, drm_sched_policy, int, 0444);
 
+static void __drm_sched_fini(struct kref *jobs_remaining);
+
 static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched)
 {
        u32 credits;
@@ -540,6 +542,7 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
 
        spin_lock(&sched->job_list_lock);
        list_add_tail(&s_job->list, &sched->pending_list);
+       kref_get(&sched->jobs_remaining);
        drm_sched_start_timeout(sched);
        spin_unlock(&sched->job_list_lock);
 }
@@ -564,6 +567,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
                 * is parked at which point it's safe.
                 */
                list_del_init(&job->list);
+               kref_put(&sched->jobs_remaining, __drm_sched_fini);
                spin_unlock(&sched->job_list_lock);
 
                status = job->sched->ops->timedout_job(job);
@@ -637,6 +641,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct 
drm_sched_job *bad)
                         */
                        spin_lock(&sched->job_list_lock);
                        list_del_init(&s_job->list);
+                       kref_put(&sched->jobs_remaining, __drm_sched_fini);
                        spin_unlock(&sched->job_list_lock);
 
                        /*
@@ -1084,6 +1089,7 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler 
*sched)
        if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
                /* remove job from pending_list */
                list_del_init(&job->list);
+               kref_put(&sched->jobs_remaining, __drm_sched_fini);
 
                /* cancel this job's TO timer */
                cancel_delayed_work(&sched->work_tdr);
@@ -1303,6 +1309,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
        init_waitqueue_head(&sched->job_scheduled);
        INIT_LIST_HEAD(&sched->pending_list);
        spin_lock_init(&sched->job_list_lock);
+       kref_init(&sched->jobs_remaining);
        atomic_set(&sched->credit_count, 0);
        INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
        INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
@@ -1334,11 +1341,14 @@ EXPORT_SYMBOL(drm_sched_init);
  *
  * Tears down and cleans up the scheduler.
  */
-void drm_sched_fini(struct drm_gpu_scheduler *sched)
+static void __drm_sched_fini(struct kref *jobs_remaining)
 {
+       struct drm_gpu_scheduler *sched;
        struct drm_sched_entity *s_entity;
        int i;
 
+       sched = container_of(jobs_remaining, struct drm_gpu_scheduler, 
jobs_remaining);
+
        drm_sched_wqueue_stop(sched);
 
        for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
@@ -1368,6 +1378,11 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
        kfree(sched->sched_rq);
        sched->sched_rq = NULL;
 }
+
+void drm_sched_fini(struct drm_gpu_scheduler *sched)
+{
+       kref_put(&sched->jobs_remaining, __drm_sched_fini);
+}
 EXPORT_SYMBOL(drm_sched_fini);
 
 /**
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 5acc64954a88..b7fad8294861 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -29,6 +29,7 @@
 #include <linux/completion.h>
 #include <linux/xarray.h>
 #include <linux/workqueue.h>
+#include <linux/kref.h>
 
 #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
 
@@ -495,6 +496,9 @@ struct drm_sched_backend_ops {
  *                 waits on this wait queue until all the scheduled jobs are
  *                 finished.
  * @job_id_count: used to assign unique id to the each job.
+ * @jobs_remaining: submitted jobs take a refcount of the scheduler to prevent 
it
+ *                 from terminating before the last job has been removed from
+ *                 @pending_list.
  * @submit_wq: workqueue used to queue @work_run_job and @work_free_job
  * @timeout_wq: workqueue used to queue @work_tdr
  * @work_run_job: work which calls run_job op of each scheduler.
@@ -525,6 +529,7 @@ struct drm_gpu_scheduler {
        struct drm_sched_rq             **sched_rq;
        wait_queue_head_t               job_scheduled;
        atomic64_t                      job_id_count;
+       struct kref                     jobs_remaining;
        struct workqueue_struct         *submit_wq;
        struct workqueue_struct         *timeout_wq;
        struct work_struct              work_run_job;
-- 
2.46.0

[RFC PATCH] drm/sched: Fix teardown leaks with refcounting

Reply via email to