[Intel-gfx] [PATCH 6/6] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore

2020-02-23 Thread Chris Wilson
If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
user batch or in our own preamble, the engine raises a
GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
respond to a semaphore wait by yielding the timeslice, if we have
another context to yield to!

The only real complication is that the interrupt is only generated for
the start of the semaphore wait, and is asynchronous to our
process_csb() -- that is, we may not have registered the timeslice before
we see the interrupt. To ensure we don't miss a potential semaphore
blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
the interrupt and apply it to the next timeslice regardless of whether it
was active at the time.

v2: We use semaphores in preempt-to-busy, within the timeslicing
implementation itself! Ergo, when we do insert a preemption due to an
expired timeslice, the new context may start with the missed semaphore
flagged by the retired context and be yielded, ad infinitum. To avoid
this, read the context id at the time of the semaphore interrupt and
only yield if that context is still active.

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Kenneth Graunke 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|  6 +++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 +
 drivers/gpu/drm/i915/gt/intel_gt_irq.c   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_lrc.c  | 40 +---
 drivers/gpu/drm/i915/i915_reg.h  |  1 +
 5 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 119c9cb24fd4..eec6e3245a97 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1288,6 +1288,12 @@ static void intel_engine_print_registers(struct 
intel_engine_cs *engine,
 
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+   if (HAS_EXECLISTS(dev_priv)) {
+   drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+  ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+   drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+  ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+   }
drm_printf(m, "\tRING_START: 0x%08x\n",
   ENGINE_READ(engine, RING_START));
drm_printf(m, "\tRING_HEAD:  0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index b23366a81048..24cff658e6e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -156,6 +156,15 @@ struct intel_engine_execlists {
 */
struct i915_priolist default_priolist;
 
+   /**
+* @yield: CCID at the time of the last semaphore-wait interrupt.
+*
+* Instead of leaving a semaphore busy-spinning on an engine, we would
+* like to switch to another ready context, i.e. yielding the semaphore
+* timeslice.
+*/
+   u32 yield;
+
/**
 * @error_interrupt: CS Master EIR
 *
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f0e7fd95165a..875bd0392ffc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
}
}
 
+   if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+   WRITE_ONCE(engine->execlists.yield,
+  ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+   if (del_timer(>execlists.timer))
+   tasklet = true;
+   }
+
if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
tasklet = true;
 
@@ -228,7 +235,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
-   GT_CONTEXT_SWITCH_INTERRUPT;
+   GT_CONTEXT_SWITCH_INTERRUPT |
+   GT_WAIT_SEMAPHORE_INTERRUPT;
struct intel_uncore *uncore = gt->uncore;
const u32 dmask = irqs << 16 | irqs;
const u32 smask = irqs << 16;
@@ -366,7 +374,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
-   GT_CONTEXT_SWITCH_INTERRUPT;
+   GT_CONTEXT_SWITCH_INTERRUPT |
+   GT_WAIT_SEMAPHORE_INTERRUPT;
const u32 gt_interrupts[] = {
irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 

[Intel-gfx] [PATCH 6/6] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore

2020-02-19 Thread Chris Wilson
If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
user batch or in our own preamble, the engine raises a
GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
respond to a semaphore wait by yielding the timeslice, if we have
another context to yield to!

The only real complication is that the interrupt is only generated for
the start of the semaphore wait, and is asynchronous to our
process_csb() -- that is, we may not have registered the timeslice before
we see the interrupt. To ensure we don't miss a potential semaphore
blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
the interrupt and apply it to the next timeslice regardless of whether it
was active at the time.

v2: We use semaphores in preempt-to-busy, within the timeslicing
implementation itself! Ergo, when we do insert a preemption due to an
expired timeslice, the new context may start with the missed semaphore
flagged by the retired context and be yielded, ad infinitum. To avoid
this, read the context id at the time of the semaphore interrupt and
only yield if that context is still active.

Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing")
Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Cc: Kenneth Graunke 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|  6 +++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 +
 drivers/gpu/drm/i915/gt/intel_gt_irq.c   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_lrc.c  | 40 +---
 drivers/gpu/drm/i915/i915_reg.h  |  1 +
 5 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index e46e55354e95..1a2e9610f1a8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1288,6 +1288,12 @@ static void intel_engine_print_registers(struct 
intel_engine_cs *engine,
 
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+   if (HAS_EXECLISTS(dev_priv)) {
+   drm_printf(m, "\tEL_STAT_HI: 0x%08x\n",
+  ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
+   drm_printf(m, "\tEL_STAT_LO: 0x%08x\n",
+  ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
+   }
drm_printf(m, "\tRING_START: 0x%08x\n",
   ENGINE_READ(engine, RING_START));
drm_printf(m, "\tRING_HEAD:  0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index b23366a81048..24cff658e6e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -156,6 +156,15 @@ struct intel_engine_execlists {
 */
struct i915_priolist default_priolist;
 
+   /**
+* @yield: CCID at the time of the last semaphore-wait interrupt.
+*
+* Instead of leaving a semaphore busy-spinning on an engine, we would
+* like to switch to another ready context, i.e. yielding the semaphore
+* timeslice.
+*/
+   u32 yield;
+
/**
 * @error_interrupt: CS Master EIR
 *
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f0e7fd95165a..875bd0392ffc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
}
}
 
+   if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+   WRITE_ONCE(engine->execlists.yield,
+  ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI));
+   if (del_timer(>execlists.timer))
+   tasklet = true;
+   }
+
if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
tasklet = true;
 
@@ -228,7 +235,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
-   GT_CONTEXT_SWITCH_INTERRUPT;
+   GT_CONTEXT_SWITCH_INTERRUPT |
+   GT_WAIT_SEMAPHORE_INTERRUPT;
struct intel_uncore *uncore = gt->uncore;
const u32 dmask = irqs << 16 | irqs;
const u32 smask = irqs << 16;
@@ -366,7 +374,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
-   GT_CONTEXT_SWITCH_INTERRUPT;
+   GT_CONTEXT_SWITCH_INTERRUPT |
+   GT_WAIT_SEMAPHORE_INTERRUPT;
const u32 gt_interrupts[] = {
irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 

[Intel-gfx] [PATCH 6/6] drm/i915/gt: Yield the timeslice if caught waiting on a user semaphore

2020-02-03 Thread Chris Wilson
If we find ourselves waiting on a MI_SEMAPHORE_WAIT, either within the
user batch or in our own preamble, the engine raises a
GT_WAIT_ON_SEMAPHORE interrupt. We can unmask that interrupt and so
respond to a semaphore wait by yielding the timeslice, if we have
another context to yield to!

The only real complication is that the interrupt is only generated for
the start of the semaphore wait, and is asynchronous to our
process_csb() -- that is, we may not have registered the timeslice before
we see the interrupt. To ensure we don't miss a potential semaphore
blocking forward progress (e.g. selftests/live_timeslice_preempt) we mark
the interrupt and apply it to the next timeslice regardless of whether it
was active at the time.

v2: We use semaphores in preempt-to-busy, within the timeslicing
implementation itself! Ergo, when we do insert a preemption due to an
expired timeslice, the new context may start with the missed semaphore
flagged by the retired context and be yielded, ad infinitum. To avoid
this, read the context id at the time of the semaphore interrupt and
only yield if that context is still active.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c|  6 +++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 +
 drivers/gpu/drm/i915/gt/intel_gt_irq.c   | 13 ++-
 drivers/gpu/drm/i915/gt/intel_lrc.c  | 40 +---
 drivers/gpu/drm/i915/i915_reg.h  |  5 +++
 5 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index b1c7b1ed6149..f6873b0d9938 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1297,6 +1297,12 @@ static void intel_engine_print_registers(struct 
intel_engine_cs *engine,
 
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
+   if (HAS_EXECLISTS(dev_priv)) {
+   drm_printf(m, "\tEL_CCID:0x%08x\n",
+  ENGINE_READ(engine, EXECLIST_CCID));
+   drm_printf(m, "\tEL_STATUS:  0x%08x\n",
+  ENGINE_READ(engine, EXECLIST_STATUS));
+   }
drm_printf(m, "\tRING_START: 0x%08x\n",
   ENGINE_READ(engine, RING_START));
drm_printf(m, "\tRING_HEAD:  0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 45e36d963ea7..8eb7365b4230 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -157,6 +157,15 @@ struct intel_engine_execlists {
 */
struct i915_priolist default_priolist;
 
+   /**
+* @yield: CCID at the time of the last semaphore-wait interrupt.
+*
+* Instead of leaving a semaphore busy-spinning on an engine, we would
+* like to switch to another ready context, i.e. yielding the semaphore
+* timeslice.
+*/
+   u32 yield;
+
/**
 * @error_interrupt: CS Master EIR
 *
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index f0e7fd95165a..975064edd956 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -39,6 +39,13 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
}
}
 
+   if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
+   WRITE_ONCE(engine->execlists.yield,
+  ENGINE_READ_FW(engine, EXECLIST_CCID));
+   if (del_timer(>execlists.timer))
+   tasklet = true;
+   }
+
if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
tasklet = true;
 
@@ -228,7 +235,8 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
-   GT_CONTEXT_SWITCH_INTERRUPT;
+   GT_CONTEXT_SWITCH_INTERRUPT |
+   GT_WAIT_SEMAPHORE_INTERRUPT;
struct intel_uncore *uncore = gt->uncore;
const u32 dmask = irqs << 16 | irqs;
const u32 smask = irqs << 16;
@@ -366,7 +374,8 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
const u32 irqs =
GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
-   GT_CONTEXT_SWITCH_INTERRUPT;
+   GT_CONTEXT_SWITCH_INTERRUPT |
+   GT_WAIT_SEMAPHORE_INTERRUPT;
const u32 gt_interrupts[] = {
irqs << GEN8_RCS_IRQ_SHIFT | irqs << GEN8_BCS_IRQ_SHIFT,
irqs << GEN8_VCS0_IRQ_SHIFT | irqs << GEN8_VCS1_IRQ_SHIFT,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c196fb90c59f..dd21066d6987 100644
---