Re: [Intel-gfx] [PATCH] drm/i915/gt: Prune 'inline' from execlists
Chris Wilson writes: > Remove the extraneous inlines. The only split by the compiler that > looked dubious was execlists_schedule_out, so push the code around > slightly to move all the work into the out-of-line function. > > In a normal build, bloat-o-meter shows that only the > execlists_schedule_out is contentious: > > add/remove: 1/0 grow/shrink: 0/2 up/down: 803/-1532 (-729) > Function old new delta > __execlists_schedule_out - 803+803 > execlists_submission_tasklet64885766-722 > execlists_reset_csb.constprop 1587 777-810 > Total: Before=1605815, After=1605086, chg -0.05% > > Signed-off-by: Chris Wilson > Cc: Jani Nikula Reviewed-by: Mika Kuoppala > --- > .../drm/i915/gt/intel_execlists_submission.c | 63 +-- > 1 file changed, 29 insertions(+), 34 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > index d7d5a58990bb..33c7495b12b1 100644 > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -230,8 +230,7 @@ active_request(const struct intel_timeline * const tl, > struct i915_request *rq) > return __active_request(tl, rq, 0); > } > > -static inline void > -ring_set_paused(const struct intel_engine_cs *engine, int state) > +static void ring_set_paused(const struct intel_engine_cs *engine, int state) > { > /* >* We inspect HWS_PREEMPT with a semaphore inside > @@ -244,12 +243,12 @@ ring_set_paused(const struct intel_engine_cs *engine, > int state) > wmb(); > } > > -static inline struct i915_priolist *to_priolist(struct rb_node *rb) > +static struct i915_priolist *to_priolist(struct rb_node *rb) > { > return rb_entry(rb, struct i915_priolist, node); > } > > -static inline int rq_prio(const struct i915_request *rq) > +static int rq_prio(const struct i915_request *rq) > { > return READ_ONCE(rq->sched.attr.priority); > } > @@ -299,8 +298,8 @@ static int virtual_prio(const struct > intel_engine_execlists *el) > return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; > } > > -static inline bool need_preempt(const struct intel_engine_cs *engine, > - const struct i915_request *rq) > +static bool need_preempt(const struct intel_engine_cs *engine, > + const struct i915_request *rq) > { > int last_prio; > > @@ -351,7 +350,7 @@ static inline bool need_preempt(const struct > intel_engine_cs *engine, > queue_prio(>execlists)) > last_prio; > } > > -__maybe_unused static inline bool > +__maybe_unused static bool > assert_priority_queue(const struct i915_request *prev, > const struct i915_request *next) > { > @@ -418,7 +417,7 @@ execlists_unwind_incomplete_requests(struct > intel_engine_execlists *execlists) > return __unwind_incomplete_requests(engine); > } > > -static inline void > +static void > execlists_context_status_change(struct i915_request *rq, unsigned long > status) > { > /* > @@ -503,7 +502,7 @@ static void reset_active(struct i915_request *rq, > ce->lrc.lrca = lrc_update_regs(ce, engine, head); > } > > -static inline struct intel_engine_cs * > +static struct intel_engine_cs * > __execlists_schedule_in(struct i915_request *rq) > { > struct intel_engine_cs * const engine = rq->engine; > @@ -549,7 +548,7 @@ __execlists_schedule_in(struct i915_request *rq) > return engine; > } > > -static inline void execlists_schedule_in(struct i915_request *rq, int idx) > +static void execlists_schedule_in(struct i915_request *rq, int idx) > { > struct intel_context * const ce = rq->context; > struct intel_engine_cs *old; > @@ -608,9 +607,9 @@ static void kick_siblings(struct i915_request *rq, struct > intel_context *ce) > tasklet_hi_schedule(>base.execlists.tasklet); > } > > -static inline void __execlists_schedule_out(struct i915_request *rq) > +static void __execlists_schedule_out(struct i915_request * const rq, > + struct intel_context * const ce) > { > - struct intel_context * const ce = rq->context; > struct intel_engine_cs * const engine = rq->engine; > unsigned int ccid; > > @@ -621,6 +620,7 @@ static inline void __execlists_schedule_out(struct > i915_request *rq) >*/ > > CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid); > + GEM_BUG_ON(ce->inflight != engine); > > if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) > lrc_check_regs(ce, engine, "after"); > @@ -660,10 +660,12 @@ static inline void __execlists_schedule_out(struct > i915_request *rq) >*/ > if (ce->engine != engine) > kick_siblings(rq, ce); > + > + WRITE_ONCE(ce->inflight, NULL); > +
[Intel-gfx] [PATCH] drm/i915/gt: Prune 'inline' from execlists
Remove the extraneous inlines. The only split by the compiler that looked dubious was execlists_schedule_out, so push the code around slightly to move all the work into the out-of-line function. In a normal build, bloat-o-meter shows that only the execlists_schedule_out is contentious: add/remove: 1/0 grow/shrink: 0/2 up/down: 803/-1532 (-729) Function old new delta __execlists_schedule_out - 803+803 execlists_submission_tasklet64885766-722 execlists_reset_csb.constprop 1587 777-810 Total: Before=1605815, After=1605086, chg -0.05% Signed-off-by: Chris Wilson Cc: Jani Nikula --- .../drm/i915/gt/intel_execlists_submission.c | 63 +-- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index d7d5a58990bb..33c7495b12b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -230,8 +230,7 @@ active_request(const struct intel_timeline * const tl, struct i915_request *rq) return __active_request(tl, rq, 0); } -static inline void -ring_set_paused(const struct intel_engine_cs *engine, int state) +static void ring_set_paused(const struct intel_engine_cs *engine, int state) { /* * We inspect HWS_PREEMPT with a semaphore inside @@ -244,12 +243,12 @@ ring_set_paused(const struct intel_engine_cs *engine, int state) wmb(); } -static inline struct i915_priolist *to_priolist(struct rb_node *rb) +static struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); } -static inline int rq_prio(const struct i915_request *rq) +static int rq_prio(const struct i915_request *rq) { return READ_ONCE(rq->sched.attr.priority); } @@ -299,8 +298,8 @@ static int virtual_prio(const struct intel_engine_execlists *el) return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; } -static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *rq) +static bool need_preempt(const struct intel_engine_cs *engine, +const struct i915_request *rq) { int last_prio; @@ -351,7 +350,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, queue_prio(>execlists)) > last_prio; } -__maybe_unused static inline bool +__maybe_unused static bool assert_priority_queue(const struct i915_request *prev, const struct i915_request *next) { @@ -418,7 +417,7 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) return __unwind_incomplete_requests(engine); } -static inline void +static void execlists_context_status_change(struct i915_request *rq, unsigned long status) { /* @@ -503,7 +502,7 @@ static void reset_active(struct i915_request *rq, ce->lrc.lrca = lrc_update_regs(ce, engine, head); } -static inline struct intel_engine_cs * +static struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { struct intel_engine_cs * const engine = rq->engine; @@ -549,7 +548,7 @@ __execlists_schedule_in(struct i915_request *rq) return engine; } -static inline void execlists_schedule_in(struct i915_request *rq, int idx) +static void execlists_schedule_in(struct i915_request *rq, int idx) { struct intel_context * const ce = rq->context; struct intel_engine_cs *old; @@ -608,9 +607,9 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_hi_schedule(>base.execlists.tasklet); } -static inline void __execlists_schedule_out(struct i915_request *rq) +static void __execlists_schedule_out(struct i915_request * const rq, +struct intel_context * const ce) { - struct intel_context * const ce = rq->context; struct intel_engine_cs * const engine = rq->engine; unsigned int ccid; @@ -621,6 +620,7 @@ static inline void __execlists_schedule_out(struct i915_request *rq) */ CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid); + GEM_BUG_ON(ce->inflight != engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) lrc_check_regs(ce, engine, "after"); @@ -660,10 +660,12 @@ static inline void __execlists_schedule_out(struct i915_request *rq) */ if (ce->engine != engine) kick_siblings(rq, ce); + + WRITE_ONCE(ce->inflight, NULL); + intel_context_put(ce); } -static inline void -execlists_schedule_out(struct i915_request *rq) +static inline void execlists_schedule_out(struct i915_request *rq) { struct intel_context * const ce = rq->context; @@ -671,12 +673,8