On Mon, Nov 17, 2025 at 10:41:52PM -0800, Niranjana Vishwanathapura wrote: > On Thu, Oct 16, 2025 at 01:48:24PM -0700, Matthew Brost wrote: > > Deregistering queues in the TDR introduces unnecessary complexity, > > requiring reference counting tricks to function correctly. All that's > > needed in the TDR is to kick the queue off the hardware, which is > > achieved by disabling scheduling. Queue deregistration should be handled > > in a single, well-defined point in the cleanup path, tied to the queue's > > reference count. > > > > Overall looks good to me. > But it would help if the commit text describes why this extra reference > taking was there before for lr jobs and why it is not needed now. >
This patch isn't related to LR jobs, the following patch is. The deregistering queues in TDR was never required, and this patches removes that flow. Matt > Niranjana > > > Signed-off-by: Matthew Brost <[email protected]> > > --- > > drivers/gpu/drm/xe/xe_guc_submit.c | 57 +++--------------------------- > > 1 file changed, 5 insertions(+), 52 deletions(-) > > > > diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c > > b/drivers/gpu/drm/xe/xe_guc_submit.c > > index 680696efc434..ab0f1a2d4871 100644 > > --- a/drivers/gpu/drm/xe/xe_guc_submit.c > > +++ b/drivers/gpu/drm/xe/xe_guc_submit.c > > @@ -69,9 +69,8 @@ exec_queue_to_guc(struct xe_exec_queue *q) > > #define EXEC_QUEUE_STATE_WEDGED (1 << 8) > > #define EXEC_QUEUE_STATE_BANNED (1 << 9) > > #define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) > > -#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) > > -#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12) > > -#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13) > > +#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 11) > > +#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 12) > > > > static bool exec_queue_registered(struct xe_exec_queue *q) > > { > > @@ -218,21 +217,6 @@ static void clear_exec_queue_check_timeout(struct > > xe_exec_queue *q) > > atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); > > } > > > > -static bool exec_queue_extra_ref(struct xe_exec_queue *q) > > -{ > > - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; > > -} > > - > > -static void set_exec_queue_extra_ref(struct xe_exec_queue *q) > > -{ > > - atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); > > -} > > - > > -static void clear_exec_queue_extra_ref(struct xe_exec_queue *q) > > -{ > > - atomic_and(~EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); > > -} > > - > > static bool exec_queue_pending_resume(struct xe_exec_queue *q) > > { > > return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME; > > @@ -1190,25 +1174,6 @@ static void disable_scheduling(struct xe_exec_queue > > *q, bool immediate) > > G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); > > } > > > > -static void __deregister_exec_queue(struct xe_guc *guc, struct > > xe_exec_queue *q) > > -{ > > - u32 action[] = { > > - XE_GUC_ACTION_DEREGISTER_CONTEXT, > > - q->guc->id, > > - }; > > - > > - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); > > - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); > > - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); > > - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); > > - > > - set_exec_queue_destroyed(q); > > - trace_xe_exec_queue_deregister(q); > > - > > - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), > > - G2H_LEN_DW_DEREGISTER_CONTEXT, 1); > > -} > > - > > static enum drm_gpu_sched_stat > > guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) > > { > > @@ -1326,8 +1291,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job > > *drm_job) > > xe_devcoredump(q, job, > > "Schedule disable failed to respond, > > guc_id=%d, ret=%d, guc_read=%d", > > q->guc->id, ret, > > xe_guc_read_stopped(guc)); > > - set_exec_queue_extra_ref(q); > > - xe_exec_queue_get(q); /* GT reset owns this */ > > set_exec_queue_banned(q); > > xe_gt_reset_async(q->gt); > > xe_sched_tdr_queue_imm(sched); > > @@ -1380,13 +1343,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job > > *drm_job) > > } > > } > > > > - /* Finish cleaning up exec queue via deregister */ > > set_exec_queue_banned(q); > > - if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { > > - set_exec_queue_extra_ref(q); > > - xe_exec_queue_get(q); > > - __deregister_exec_queue(guc, q); > > - } > > > > /* Mark all outstanding jobs as bad, thus completing them */ > > xe_sched_job_set_error(job, err); > > @@ -1928,7 +1885,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, > > struct xe_exec_queue *q) > > > > /* Clean up lost G2H + reset engine state */ > > if (exec_queue_registered(q)) { > > - if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) > > + if (xe_exec_queue_is_lr(q)) > > xe_exec_queue_put(q); > > else if (exec_queue_destroyed(q)) > > __guc_exec_queue_destroy(guc, q); > > @@ -2062,11 +2019,7 @@ static void > > guc_exec_queue_revert_pending_state_change(struct xe_guc *guc, > > > > if (exec_queue_destroyed(q) && exec_queue_registered(q)) { > > clear_exec_queue_destroyed(q); > > - if (exec_queue_extra_ref(q)) > > - xe_exec_queue_put(q); > > - else > > - q->guc->needs_cleanup = true; > > - clear_exec_queue_extra_ref(q); > > + q->guc->needs_cleanup = true; > > xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d", > > q->guc->id); > > } > > @@ -2483,7 +2436,7 @@ static void handle_deregister_done(struct xe_guc > > *guc, struct xe_exec_queue *q) > > > > clear_exec_queue_registered(q); > > > > - if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) > > + if (xe_exec_queue_is_lr(q)) > > xe_exec_queue_put(q); > > else > > __guc_exec_queue_destroy(guc, q); > > -- > > 2.34.1 > >
