Currently an unordered workqueue is used for the DRM scheduler which means
its concurrency is externally managed, and given there is one scheduler
instance per userspace queue, that means workqueue management logic is
within its rights to spawn many kernel threads to submit their respective
jobs.

Problem there is that all run job callbacks are serialized on the device
global mutex, making the potential thread storm just causing lock
contention.

If we add a separate ordered workqueue for the DRM scheduler integration
we can avoid this problem, since the ordered property directly expresses
the nature of the submission backend implementation.

And considering the other user of this workqueue, the free job callback,
which is not globally serialized in this manner so could be thought to
potentially regress with this change, it should not be the case since
commit
a58f317c1ca0 ("drm/sched: Free all finished jobs at once")
made the DRM scheduler handle the cleanup of finished jobs more promptly.

Signed-off-by: Tvrtko Ursulin <[email protected]>
Cc: Boris Brezillon <[email protected]>
Cc: Liviu Dudau <[email protected]>
Cc: Steven Price <[email protected]>
---
v2:
 * Actually create an unordered wq.
 * Put back WQ_MEM_RECLAIM to sched->wq.
---
 drivers/gpu/drm/panthor/panthor_sched.c | 26 +++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/panthor/panthor_sched.c 
b/drivers/gpu/drm/panthor/panthor_sched.c
index 2bee1c92fb9e..f4dfd82ad8a8 100644
--- a/drivers/gpu/drm/panthor/panthor_sched.c
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -147,13 +147,11 @@ struct panthor_scheduler {
        struct panthor_device *ptdev;
 
        /**
-        * @wq: Workqueue used by our internal scheduler logic and
-        * drm_gpu_scheduler.
+        * @wq: Workqueue used by our internal scheduler logic.
         *
         * Used for the scheduler tick, group update or other kind of FW
         * event processing that can't be handled in the threaded interrupt
-        * path. Also passed to the drm_gpu_scheduler instances embedded
-        * in panthor_queue.
+        * path.
         */
        struct workqueue_struct *wq;
 
@@ -166,6 +164,14 @@ struct panthor_scheduler {
         */
        struct workqueue_struct *heap_alloc_wq;
 
+       /**
+        * @sched_wq: Workqueue used for the DRM scheduler.
+        *
+        * Workqueue used for drm_gpu_scheduler instances embedded in
+        * panthor_queue.
+        */
+       struct workqueue_struct *sched_wq;
+
        /** @tick_work: Work executed on a scheduling tick. */
        struct delayed_work tick_work;
 
@@ -3488,7 +3494,7 @@ group_create_queue(struct panthor_group *group,
 {
        struct drm_sched_init_args sched_args = {
                .ops = &panthor_queue_sched_ops,
-               .submit_wq = group->ptdev->scheduler->wq,
+               .submit_wq = group->ptdev->scheduler->sched_wq,
                /*
                 * The credit limit argument tells us the total number of
                 * instructions across all CS slots in the ringbuffer, with
@@ -4078,6 +4084,9 @@ static void panthor_sched_fini(struct drm_device *ddev, 
void *res)
        if (sched->heap_alloc_wq)
                destroy_workqueue(sched->heap_alloc_wq);
 
+       if (sched->sched_wq)
+               destroy_workqueue(sched->sched_wq);
+
        for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
                drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
                drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
@@ -4168,12 +4177,13 @@ int panthor_sched_init(struct panthor_device *ptdev)
         * allocate memory, and fail the tiling job if none of these
         * countermeasures worked.
         *
-        * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the
-        * system is running out of memory.
+        * Set WQ_MEM_RECLAIM on sched->wq and wq->sched_wq to unblock the
+        * situation when the system is running out of memory.
         */
        sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", 
WQ_UNBOUND, 0);
        sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | 
WQ_UNBOUND, 0);
-       if (!sched->wq || !sched->heap_alloc_wq) {
+       sched->sched_wq = alloc_ordered_workqueue("panthor-drm-sched", 
WQ_MEM_RECLAIM);
+       if (!sched->wq || !sched->heap_alloc_wq || !sched->sched_wq) {
                panthor_sched_fini(&ptdev->base, sched);
                drm_err(&ptdev->base, "Failed to allocate the workqueues");
                return -ENOMEM;
-- 
2.54.0

Reply via email to