Chris Wilson <ch...@chris-wilson.co.uk> writes:

> Before we execute a batch, we must first issue any and all TLB
> invalidations so that batch picks up the new page table entries.
> Tigerlake's preparser is weakening our post-sync CS_STALL inside the
> invalidate pipe-control and allowing the loading of the batch buffer
> before we have setup its page table (and so it loads the wrong page and
> executes indefinitely).
>
> The igt_cs_tlb indicates that this issue can only be observed on rcs,
> even though the preparser is common to all engines. Alternatively, we
> could do TLB shootdown via mmio on updating the GTT.
>
> By inserting the pre-parser disable inside EMIT_INVALIDATE, we will also
> accidentally fixup execution that writes into subsequent batches, such
> as gem_exec_whisper and even relocations performed on the GPU. We should
> be careful not to allow this disable to become baked into the uABI!
>
> Testcase: igt/i915_selftests/live_gtt/igt_cs_tlb
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
> Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 75 ++++++++++++++++++++++++++++-
>  1 file changed, 74 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
> b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index a99166a2d2eb..60b7b163c3d0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2807,6 +2807,79 @@ static int gen11_emit_flush_render(struct i915_request 
> *request,
>       return 0;
>  }
>  
> +static u32 preparser_disable(bool state)
> +{
> +     return MI_ARB_CHECK | 1 << 8 | state;
> +}

Descriptive enough, so no need to define the mask.

Acked-by: Mika Kuoppala <mika.kuopp...@linux.intel.com>

> +
> +static int gen12_emit_flush_render(struct i915_request *request,
> +                                u32 mode)
> +{
> +     struct intel_engine_cs *engine = request->engine;
> +     const u32 scratch_addr =
> +             intel_gt_scratch_offset(engine->gt,
> +                                     INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
> +
> +     if (mode & EMIT_FLUSH) {
> +             u32 flags = 0;
> +             u32 *cs;
> +
> +             flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
> +             flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
> +             flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +             flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
> +             flags |= PIPE_CONTROL_FLUSH_ENABLE;
> +
> +             flags |= PIPE_CONTROL_QW_WRITE;
> +             flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
> +
> +             flags |= PIPE_CONTROL_CS_STALL;
> +
> +             cs = intel_ring_begin(request, 6);
> +             if (IS_ERR(cs))
> +                     return PTR_ERR(cs);
> +
> +             cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
> +             intel_ring_advance(request, cs);
> +     }
> +
> +     if (mode & EMIT_INVALIDATE) {
> +             u32 flags = 0;
> +             u32 *cs;
> +
> +             flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
> +             flags |= PIPE_CONTROL_TLB_INVALIDATE;
> +             flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
> +             flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
> +             flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
> +             flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
> +             flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
> +
> +             flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
> +             flags |= PIPE_CONTROL_QW_WRITE;
> +
> +             flags |= PIPE_CONTROL_CS_STALL;
> +
> +             cs = intel_ring_begin(request, 8);
> +             if (IS_ERR(cs))
> +                     return PTR_ERR(cs);
> +
> +             /*
> +              * Prevent the pre-parser from skipping past the TLB
> +              * invalidate and loading a stale page for the batch
> +              * buffer / request payload.
> +              */
> +             *cs++ = preparser_disable(true);
> +
> +             cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
> +
> +             *cs++ = preparser_disable(false);
> +             intel_ring_advance(request, cs);
> +     }
> +
> +     return 0;
> +}
> +
>  /*
>   * Reserve space for 2 NOOPs at the end of each request to be
>   * used as a workaround for not being allowed to do lite
> @@ -3072,7 +3145,7 @@ static void rcs_submission_override(struct 
> intel_engine_cs *engine)
>  {
>       switch (INTEL_GEN(engine->i915)) {
>       case 12:
> -             engine->emit_flush = gen11_emit_flush_render;
> +             engine->emit_flush = gen12_emit_flush_render;
>               engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
>               break;
>       case 11:
> -- 
> 2.23.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to