When a process forks, the child inherits the open DRM file descriptor.
If the parent is killed (e.g., by SIGKILL), only the parent's jobs
are canceled. The child process can continue submitting jobs to the
same entity through its own user entry in the entity's user list.

Signed-off-by: Emily Deng <[email protected]>
Signed-off-by: Bingxi Guo <[email protected]>
---
 drivers/gpu/drm/scheduler/sched_entity.c | 133 +++++++++++++++++++----
 include/drm/gpu_scheduler.h              |  22 ++++
 2 files changed, 135 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
b/drivers/gpu/drm/scheduler/sched_entity.c
index 8867b95ab089..508a0629b839 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -110,6 +110,9 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
        atomic_set(&entity->fence_seq, 0);
        entity->fence_context = dma_fence_context_alloc(2);
 
+       INIT_LIST_HEAD(&entity->users);
+       spin_lock_init(&entity->users_lock);
+
        return 0;
 }
 EXPORT_SYMBOL(drm_sched_entity_init);
@@ -228,10 +231,24 @@ static void drm_sched_entity_kill(struct drm_sched_entity 
*entity)
 {
        struct drm_sched_job *job;
        struct dma_fence *prev;
+       struct drm_sched_entity_user *user;
+       struct spsc_queue temp_queue;
+       pid_t my_tgid = task_tgid_nr(current);
 
        if (!entity->rq)
                return;
 
+       /* Mark current process as exited */
+       spin_lock(&entity->users_lock);
+       list_for_each_entry(user, &entity->users, list) {
+               if (user->tgid == my_tgid) {
+                       atomic_set(&user->exited, 1);
+                       break;
+               }
+       }
+       spin_unlock(&entity->users_lock);
+
+       /* Temporarily stop entity to prevent new jobs */
        spin_lock(&entity->lock);
        entity->stopped = true;
        drm_sched_rq_remove_entity(entity->rq, entity);
@@ -240,27 +257,59 @@ static void drm_sched_entity_kill(struct drm_sched_entity 
*entity)
        /* Make sure this entity is not used by the scheduler at the moment */
        wait_for_completion(&entity->entity_idle);
 
-       /* The entity is guaranteed to not be used by the scheduler */
+       /*
+        * The entity is stopped and idle. No new jobs can be pushed.
+        * Scan the queue and separate jobs:
+        * - Jobs from this process: kill immediately
+        * - Jobs from other processes: keep in temp_queue
+        */
+       spsc_queue_init(&temp_queue);
        prev = rcu_dereference_check(entity->last_scheduled, true);
        dma_fence_get(prev);
+
        while ((job = drm_sched_entity_queue_pop(entity))) {
-               struct drm_sched_fence *s_fence = job->s_fence;
-
-               dma_fence_get(&s_fence->finished);
-               if (!prev ||
-                   dma_fence_add_callback(prev, &job->finish_cb,
-                                          drm_sched_entity_kill_jobs_cb)) {
-                       /*
-                        * Adding callback above failed.
-                        * dma_fence_put() checks for NULL.
-                        */
-                       dma_fence_put(prev);
-                       drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
+               if (job->owner_tgid == my_tgid) {
+                       /* Kill this job */
+                       struct drm_sched_fence *s_fence = job->s_fence;
+
+                       dma_fence_get(&s_fence->finished);
+                       if (!prev ||
+                           dma_fence_add_callback(prev, &job->finish_cb,
+                                                  
drm_sched_entity_kill_jobs_cb)) {
+                               dma_fence_put(prev);
+                               drm_sched_entity_kill_jobs_cb(NULL, 
&job->finish_cb);
+                       }
+                       prev = &s_fence->finished;
+               } else {
+                       /* Keep jobs from other processes */
+                       spsc_queue_push(&temp_queue, &job->queue_node);
                }
+       }
 
-               prev = &s_fence->finished;
+       /* Put back jobs from other processes */
+       while (true) {
+               struct spsc_node *node = spsc_queue_pop(&temp_queue);
+               if (!node)
+                       break;
+               spsc_queue_push(&entity->job_queue, node);
        }
+
        dma_fence_put(prev);
+
+       /* Check if there are other active users and restore entity if needed */
+       spin_lock(&entity->users_lock);
+       list_for_each_entry(user, &entity->users, list) {
+               if (!atomic_read(&user->exited)) {
+                       /* Found active user, restore entity */
+                       spin_unlock(&entity->users_lock);
+                       spin_lock(&entity->lock);
+                       entity->stopped = false;
+                       drm_sched_rq_add_entity(entity->rq, entity);
+                       spin_unlock(&entity->lock);
+                       return;
+               }
+       }
+       spin_unlock(&entity->users_lock);
 }
 
 /**
@@ -323,6 +372,8 @@ EXPORT_SYMBOL(drm_sched_entity_flush);
  */
 void drm_sched_entity_fini(struct drm_sched_entity *entity)
 {
+       struct drm_sched_entity_user *user, *tmp;
+
        /*
         * If consumption of existing IBs wasn't completed. Forcefully remove
         * them here. Also makes sure that the scheduler won't touch this entity
@@ -338,6 +389,14 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
 
        dma_fence_put(rcu_dereference_check(entity->last_scheduled, true));
        RCU_INIT_POINTER(entity->last_scheduled, NULL);
+
+       /* Clean up user list */
+       spin_lock(&entity->users_lock);
+       list_for_each_entry_safe(user, tmp, &entity->users, list) {
+               list_del_rcu(&user->list);
+               kfree_rcu(user, rcu);
+       }
+       spin_unlock(&entity->users_lock);
 }
 EXPORT_SYMBOL(drm_sched_entity_fini);
 
@@ -567,9 +626,40 @@ void drm_sched_entity_select_rq(struct drm_sched_entity 
*entity)
 void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
 {
        struct drm_sched_entity *entity = sched_job->entity;
+       struct drm_sched_entity_user *user, *found = NULL;
+       pid_t my_tgid = task_tgid_nr(current);
        bool first;
        ktime_t submit_ts;
 
+       /* Check if entity is stopped and reject directly */
+       if (entity->stopped)
+               goto error;
+
+       /* Entity is running, check user list */
+       spin_lock(&entity->users_lock);
+       list_for_each_entry(user, &entity->users, list) {
+               if (user->tgid == my_tgid) {
+                       found = user;
+                       /* Reject if this user has exited */
+                       if (atomic_read(&user->exited)) {
+                               spin_unlock(&entity->users_lock);
+                               goto error;
+                       }
+                       break;
+               }
+       }
+
+       /* If not found, create new user (fork case) */
+       if (!found) {
+               found = kzalloc(sizeof(*found), GFP_ATOMIC);
+               if (found) {
+                       found->tgid = my_tgid;
+                       atomic_set(&found->exited, 0);
+                       list_add_tail(&found->list, &entity->users);
+               }
+       }
+       spin_unlock(&entity->users_lock);
+
        trace_drm_sched_job_queue(sched_job, entity);
 
        if (trace_drm_sched_job_add_dep_enabled()) {
@@ -582,6 +672,9 @@ void drm_sched_entity_push_job(struct drm_sched_job 
*sched_job)
        atomic_inc(entity->rq->sched->score);
        WRITE_ONCE(entity->last_user, current->group_leader);
 
+       /* Record owner TGID */
+       sched_job->owner_tgid = my_tgid;
+
        /*
         * After the sched_job is pushed into the entity queue, it may be
         * completed and freed up at any time. We can no longer access it.
@@ -597,12 +690,6 @@ void drm_sched_entity_push_job(struct drm_sched_job 
*sched_job)
 
                /* Add the entity to the run queue */
                spin_lock(&entity->lock);
-               if (entity->stopped) {
-                       spin_unlock(&entity->lock);
-
-                       DRM_ERROR("Trying to push to a killed entity\n");
-                       return;
-               }
 
                rq = entity->rq;
                sched = rq->sched;
@@ -618,5 +705,11 @@ void drm_sched_entity_push_job(struct drm_sched_job 
*sched_job)
 
                drm_sched_wakeup(sched);
        }
+       return;
+
+error:
+       dma_fence_set_error(&sched_job->s_fence->finished, -EPERM);
+       drm_sched_fence_scheduled(sched_job->s_fence, NULL);
+       drm_sched_fence_finished(sched_job->s_fence, -EPERM);
 }
 EXPORT_SYMBOL(drm_sched_entity_push_job);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index e62a7214e052..45e066596405 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -59,6 +59,16 @@ struct drm_sched_rq;
 
 struct drm_file;
 
+/**
+ * struct drm_sched_entity_user - Per-process entity user tracking
+ */
+struct drm_sched_entity_user {
+       struct list_head                list;
+       struct rcu_head                 rcu;
+       pid_t                           tgid;
+       atomic_t                        exited;
+};
+
 /* These are often used as an (initial) index
  * to an array, and as such should start at 0.
  */
@@ -233,6 +243,13 @@ struct drm_sched_entity {
         */
        struct rb_node                  rb_tree_node;
 
+       /**
+        * @users:
+        *
+        * List of processes using this entity (for fork support)
+        */
+       struct list_head                users;
+       spinlock_t                      users_lock;
 };
 
 /**
@@ -385,6 +402,11 @@ struct drm_sched_job {
         * drm_sched_job_add_implicit_dependencies().
         */
        struct xarray                   dependencies;
+
+       /**
+        * @owner_tgid: TGID of the process that submitted this job
+        */
+       pid_t                           owner_tgid;
 };
 
 /**
-- 
2.43.0

Reply via email to