Set the instruction and state cache invalidate bit using INDIRECT_CTX on
every gpu context switch. 
The goal of this workaround is to actually perform an explicit
invalidation of that cache (by re-writing the register) during every GPU
context switch, which is accomplished via a "workaround batchbuffer"
that's attached to the context via INDIRECT_CTX. (Matt)
BSpec: 11354

v2:
- Removed extra parentheses from the condition (Lucas)
- Fixed spacing and new line (Lucas)

v3:
- Fixed commit message.

v4:
- Only GEN12 changes are kept  (Matt Ropper)
- Fixed the commit message for r-b (Matt Ropper)
- Renamed the register bit in define

v5:
- Moved out the  from golden context init (Matt Roper)
- Use INDIRECT_CTX to set bit on each GPU context switch (Matt Roper)

Cc: Lucas De Marchi <[email protected]>
Cc: Matt Roper <[email protected]>
Signed-off-by: Dnyaneshwar Bhadane <[email protected]>

---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  2 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c     | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 0e4c638fcbbf..38f02ef8ed01 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -164,6 +164,8 @@
 #define GEN9_CSFE_CHICKEN1_RCS                 _MMIO(0x20d4)
 #define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE       (1 << 2)
 #define   GEN11_ENABLE_32_PLANE_MODE           (1 << 7)
+#define GEN12_CS_DEBUG_MODE2                   _MMIO(0x20d8)
+#define   INSTRUCTION_STATE_CACHE_INVALIDATE   REG_BIT(6)
 
 #define GEN7_FF_SLICE_CS_CHICKEN1              _MMIO(0x20e0)
 #define   GEN9_FFSC_PERCTX_PREEMPT_CTRL                (1 << 14)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 967fe4d77a87..fe98039499c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1332,6 +1332,15 @@ dg2_emit_draw_watermark_setting(u32 *cs)
        return cs;
 }
 
+static u32 *
+gen12_set_instruction_state_cache_invalid(u32 *cs)
+{
+       *cs++ = MI_LOAD_REGISTER_IMM(1);
+       *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
+       *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
+       return cs;
+}
+
 static u32 *
 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
 {
@@ -1345,6 +1354,12 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context 
*ce, u32 *cs)
 
        cs = gen12_emit_aux_table_inv(ce->engine, cs);
 
+       /* Wa_18022495364 */
+       if (IS_ALDERLAKE_P(ce->engine->i915) || 
IS_ALDERLAKE_P_N(ce->engine->i915) ||
+           IS_ALDERLAKE_S(ce->engine->i915) || IS_TIGERLAKE(ce->engine->i915) 
||
+               IS_ROCKETLAKE(ce->engine->i915) || IS_DG1(ce->engine->i915))
+               cs = gen12_set_instruction_state_cache_invalid(cs);
+
        /* Wa_16014892111 */
        if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) 
||
            IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) 
||
-- 
2.34.1

Reply via email to