On Fri Dec 19, 2025 at 2:53 PM CET, Tvrtko Ursulin wrote:
> diff --git a/drivers/gpu/drm/scheduler/sched_rq.c 
> b/drivers/gpu/drm/scheduler/sched_rq.c
> index 2d1f579d8352..2fde309d02a6 100644
> --- a/drivers/gpu/drm/scheduler/sched_rq.c
> +++ b/drivers/gpu/drm/scheduler/sched_rq.c
> @@ -16,6 +16,35 @@ drm_sched_entity_compare_before(struct rb_node *a, const 
> struct rb_node *b)
>       return ktime_before(ea->oldest_job_waiting, eb->oldest_job_waiting);
>  }
>  
> +static void drm_sched_rq_update_prio(struct drm_sched_rq *rq)
> +{
> +     enum drm_sched_priority prio = DRM_SCHED_PRIORITY_INVALID;
> +     struct rb_node *rb;
> +
> +     lockdep_assert_held(&rq->lock);
> +
> +     rb = rb_first_cached(&rq->rb_tree_root);
> +     if (rb) {
> +             struct drm_sched_entity *entity =
> +                     rb_entry(rb, typeof(*entity), rb_tree_node);
> +
> +             /*
> +              * The normal locking order is entity then run-queue so taking
> +              * the entity lock here would be a locking inversion for the
> +              * case when the current head of the run-queue is different from
> +              * the one we already have locked. The unlocked read is fine
> +              * though, because if the priority had just changed it is no big
> +              * deal for our algorithm, but just a transient reachable only
> +              * by drivers with userspace dynamic priority changes API. Equal
> +              * in effect to the priority change becoming visible a few
> +              * instructions later.
> +              */
> +             prio = READ_ONCE(entity->priority);

I still think that we should address the root cause of the lock inversion
problem instead.

I previously mentioned that I can take a look at this beginning of this year,
which I can do soon.

In the meantime, can you please explain what's the problem with this specific
case? This function is only ever called from drm_sched_rq_remove_fifo_locked()
and drm_sched_rq_update_fifo_locked(), which already seem to hold both locks.

> +     }
> +
> +     rq->head_prio = prio;
> +}
> +
>  static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity,
>                                           struct drm_sched_rq *rq)
>  {
> @@ -25,6 +54,7 @@ static void drm_sched_rq_remove_fifo_locked(struct 
> drm_sched_entity *entity,
>       if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
>               rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root);
>               RB_CLEAR_NODE(&entity->rb_tree_node);
> +             drm_sched_rq_update_prio(rq);
>       }
>  }
>  
> @@ -46,6 +76,7 @@ static void drm_sched_rq_update_fifo_locked(struct 
> drm_sched_entity *entity,
>  
>       rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root,
>                     drm_sched_entity_compare_before);
> +     drm_sched_rq_update_prio(rq);
>  }

Reply via email to