Re: [Intel-gfx] [PATCH] drm/i915/gt: Prune 'inline' from execlists

2021-01-14 Thread Mika Kuoppala
Chris Wilson  writes:

> Remove the extraneous inlines. The only split by the compiler that
> looked dubious was execlists_schedule_out, so push the code around
> slightly to move all the work into the out-of-line function.
>
> In a normal build, bloat-o-meter shows that only the
> execlists_schedule_out is contentious:
>
> add/remove: 1/0 grow/shrink: 0/2 up/down: 803/-1532 (-729)
> Function old new   delta
> __execlists_schedule_out   - 803+803
> execlists_submission_tasklet64885766-722
> execlists_reset_csb.constprop   1587 777-810
> Total: Before=1605815, After=1605086, chg -0.05%
>
> Signed-off-by: Chris Wilson 
> Cc: Jani Nikula 

Reviewed-by: Mika Kuoppala 

> ---
>  .../drm/i915/gt/intel_execlists_submission.c  | 63 +--
>  1 file changed, 29 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
> b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index d7d5a58990bb..33c7495b12b1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -230,8 +230,7 @@ active_request(const struct intel_timeline * const tl, 
> struct i915_request *rq)
>   return __active_request(tl, rq, 0);
>  }
>  
> -static inline void
> -ring_set_paused(const struct intel_engine_cs *engine, int state)
> +static void ring_set_paused(const struct intel_engine_cs *engine, int state)
>  {
>   /*
>* We inspect HWS_PREEMPT with a semaphore inside
> @@ -244,12 +243,12 @@ ring_set_paused(const struct intel_engine_cs *engine, 
> int state)
>   wmb();
>  }
>  
> -static inline struct i915_priolist *to_priolist(struct rb_node *rb)
> +static struct i915_priolist *to_priolist(struct rb_node *rb)
>  {
>   return rb_entry(rb, struct i915_priolist, node);
>  }
>  
> -static inline int rq_prio(const struct i915_request *rq)
> +static int rq_prio(const struct i915_request *rq)
>  {
>   return READ_ONCE(rq->sched.attr.priority);
>  }
> @@ -299,8 +298,8 @@ static int virtual_prio(const struct 
> intel_engine_execlists *el)
>   return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
>  }
>  
> -static inline bool need_preempt(const struct intel_engine_cs *engine,
> - const struct i915_request *rq)
> +static bool need_preempt(const struct intel_engine_cs *engine,
> +  const struct i915_request *rq)
>  {
>   int last_prio;
>  
> @@ -351,7 +350,7 @@ static inline bool need_preempt(const struct 
> intel_engine_cs *engine,
>  queue_prio(>execlists)) > last_prio;
>  }
>  
> -__maybe_unused static inline bool
> +__maybe_unused static bool
>  assert_priority_queue(const struct i915_request *prev,
> const struct i915_request *next)
>  {
> @@ -418,7 +417,7 @@ execlists_unwind_incomplete_requests(struct 
> intel_engine_execlists *execlists)
>   return __unwind_incomplete_requests(engine);
>  }
>  
> -static inline void
> +static void
>  execlists_context_status_change(struct i915_request *rq, unsigned long 
> status)
>  {
>   /*
> @@ -503,7 +502,7 @@ static void reset_active(struct i915_request *rq,
>   ce->lrc.lrca = lrc_update_regs(ce, engine, head);
>  }
>  
> -static inline struct intel_engine_cs *
> +static struct intel_engine_cs *
>  __execlists_schedule_in(struct i915_request *rq)
>  {
>   struct intel_engine_cs * const engine = rq->engine;
> @@ -549,7 +548,7 @@ __execlists_schedule_in(struct i915_request *rq)
>   return engine;
>  }
>  
> -static inline void execlists_schedule_in(struct i915_request *rq, int idx)
> +static void execlists_schedule_in(struct i915_request *rq, int idx)
>  {
>   struct intel_context * const ce = rq->context;
>   struct intel_engine_cs *old;
> @@ -608,9 +607,9 @@ static void kick_siblings(struct i915_request *rq, struct 
> intel_context *ce)
>   tasklet_hi_schedule(>base.execlists.tasklet);
>  }
>  
> -static inline void __execlists_schedule_out(struct i915_request *rq)
> +static void __execlists_schedule_out(struct i915_request * const rq,
> +  struct intel_context * const ce)
>  {
> - struct intel_context * const ce = rq->context;
>   struct intel_engine_cs * const engine = rq->engine;
>   unsigned int ccid;
>  
> @@ -621,6 +620,7 @@ static inline void __execlists_schedule_out(struct 
> i915_request *rq)
>*/
>  
>   CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid);
> + GEM_BUG_ON(ce->inflight != engine);
>  
>   if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
>   lrc_check_regs(ce, engine, "after");
> @@ -660,10 +660,12 @@ static inline void __execlists_schedule_out(struct 
> i915_request *rq)
>*/
>   if (ce->engine != engine)
>   kick_siblings(rq, ce);
> +
> + WRITE_ONCE(ce->inflight, NULL);
> + 

[Intel-gfx] [PATCH] drm/i915/gt: Prune 'inline' from execlists

2021-01-13 Thread Chris Wilson
Remove the extraneous inlines. The only split by the compiler that
looked dubious was execlists_schedule_out, so push the code around
slightly to move all the work into the out-of-line function.

In a normal build, bloat-o-meter shows that only the
execlists_schedule_out is contentious:

add/remove: 1/0 grow/shrink: 0/2 up/down: 803/-1532 (-729)
Function old new   delta
__execlists_schedule_out   - 803+803
execlists_submission_tasklet64885766-722
execlists_reset_csb.constprop   1587 777-810
Total: Before=1605815, After=1605086, chg -0.05%

Signed-off-by: Chris Wilson 
Cc: Jani Nikula 
---
 .../drm/i915/gt/intel_execlists_submission.c  | 63 +--
 1 file changed, 29 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index d7d5a58990bb..33c7495b12b1 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -230,8 +230,7 @@ active_request(const struct intel_timeline * const tl, 
struct i915_request *rq)
return __active_request(tl, rq, 0);
 }
 
-static inline void
-ring_set_paused(const struct intel_engine_cs *engine, int state)
+static void ring_set_paused(const struct intel_engine_cs *engine, int state)
 {
/*
 * We inspect HWS_PREEMPT with a semaphore inside
@@ -244,12 +243,12 @@ ring_set_paused(const struct intel_engine_cs *engine, int 
state)
wmb();
 }
 
-static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+static struct i915_priolist *to_priolist(struct rb_node *rb)
 {
return rb_entry(rb, struct i915_priolist, node);
 }
 
-static inline int rq_prio(const struct i915_request *rq)
+static int rq_prio(const struct i915_request *rq)
 {
return READ_ONCE(rq->sched.attr.priority);
 }
@@ -299,8 +298,8 @@ static int virtual_prio(const struct intel_engine_execlists 
*el)
return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN;
 }
 
-static inline bool need_preempt(const struct intel_engine_cs *engine,
-   const struct i915_request *rq)
+static bool need_preempt(const struct intel_engine_cs *engine,
+const struct i915_request *rq)
 {
int last_prio;
 
@@ -351,7 +350,7 @@ static inline bool need_preempt(const struct 
intel_engine_cs *engine,
   queue_prio(>execlists)) > last_prio;
 }
 
-__maybe_unused static inline bool
+__maybe_unused static bool
 assert_priority_queue(const struct i915_request *prev,
  const struct i915_request *next)
 {
@@ -418,7 +417,7 @@ execlists_unwind_incomplete_requests(struct 
intel_engine_execlists *execlists)
return __unwind_incomplete_requests(engine);
 }
 
-static inline void
+static void
 execlists_context_status_change(struct i915_request *rq, unsigned long status)
 {
/*
@@ -503,7 +502,7 @@ static void reset_active(struct i915_request *rq,
ce->lrc.lrca = lrc_update_regs(ce, engine, head);
 }
 
-static inline struct intel_engine_cs *
+static struct intel_engine_cs *
 __execlists_schedule_in(struct i915_request *rq)
 {
struct intel_engine_cs * const engine = rq->engine;
@@ -549,7 +548,7 @@ __execlists_schedule_in(struct i915_request *rq)
return engine;
 }
 
-static inline void execlists_schedule_in(struct i915_request *rq, int idx)
+static void execlists_schedule_in(struct i915_request *rq, int idx)
 {
struct intel_context * const ce = rq->context;
struct intel_engine_cs *old;
@@ -608,9 +607,9 @@ static void kick_siblings(struct i915_request *rq, struct 
intel_context *ce)
tasklet_hi_schedule(>base.execlists.tasklet);
 }
 
-static inline void __execlists_schedule_out(struct i915_request *rq)
+static void __execlists_schedule_out(struct i915_request * const rq,
+struct intel_context * const ce)
 {
-   struct intel_context * const ce = rq->context;
struct intel_engine_cs * const engine = rq->engine;
unsigned int ccid;
 
@@ -621,6 +620,7 @@ static inline void __execlists_schedule_out(struct 
i915_request *rq)
 */
 
CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid);
+   GEM_BUG_ON(ce->inflight != engine);
 
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
lrc_check_regs(ce, engine, "after");
@@ -660,10 +660,12 @@ static inline void __execlists_schedule_out(struct 
i915_request *rq)
 */
if (ce->engine != engine)
kick_siblings(rq, ce);
+
+   WRITE_ONCE(ce->inflight, NULL);
+   intel_context_put(ce);
 }
 
-static inline void
-execlists_schedule_out(struct i915_request *rq)
+static inline void execlists_schedule_out(struct i915_request *rq)
 {
struct intel_context * const ce = rq->context;
 
@@ -671,12 +673,8