Quoting Oscar Mateo (2018-02-15 22:46:41)
> There are different kind of workarounds (those that modify registers that
> live in the context image, those that modify global registers, those that
> whitelist registers, etc...) and they have different requirements in terms
> of where they are applied and how. Also, by splitting them apart, it should
> be easier to decide where a new workaround should go.
>
> v2:
> - Add multiple MISSING_CASE
> - Rebased
>
> v3:
> - Rename mmio_workarounds to gt_workarounds (Chris, Mika)
> - Create empty placeholders for BDW and CHV GT WAs
> - Rebased
>
> v4: Rebased
>
> Signed-off-by: Oscar Mateo <[email protected]>
> Cc: Chris Wilson <[email protected]>
> Cc: Mika Kuoppala <[email protected]>
> Cc: Ville Syrjälä <[email protected]>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 3 +
> drivers/gpu/drm/i915/i915_gem_context.c | 6 +
> drivers/gpu/drm/i915/intel_lrc.c | 10 +-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +-
> drivers/gpu/drm/i915/intel_workarounds.c | 627
> +++++++++++++++++++------------
> drivers/gpu/drm/i915/intel_workarounds.h | 8 +-
> 6 files changed, 414 insertions(+), 244 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fc68b35..94707c2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -35,6 +35,7 @@
> #include "intel_drv.h"
> #include "intel_frontbuffer.h"
> #include "intel_mocs.h"
> +#include "intel_workarounds.h"
> #include "i915_gemfs.h"
> #include <linux/dma-fence-array.h>
> #include <linux/kthread.h>
> @@ -5107,6 +5108,8 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv)
> }
> }
>
> + intel_gt_workarounds_apply(dev_priv);
> +
> i915_gem_init_swizzling(dev_priv);
>
> /*
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
> b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3d75f48..a5ada99 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -90,6 +90,7 @@
> #include <drm/i915_drm.h>
> #include "i915_drv.h"
> #include "i915_trace.h"
> +#include "intel_workarounds.h"
>
> #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
>
> @@ -452,11 +453,16 @@ static bool needs_preempt_context(struct
> drm_i915_private *i915)
> int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> {
> struct i915_gem_context *ctx;
> + int ret;
>
> /* Reassure ourselves we are only called once */
> GEM_BUG_ON(dev_priv->kernel_context);
> GEM_BUG_ON(dev_priv->preempt_context);
>
> + ret = intel_ctx_workarounds_init(dev_priv);
> + if (ret)
> + return ret;
> +
> INIT_LIST_HEAD(&dev_priv->contexts.list);
> INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
> init_llist_head(&dev_priv->contexts.free_list);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 62de613..39d43bb 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1544,7 +1544,7 @@ static int gen8_init_render_ring(struct intel_engine_cs
> *engine)
>
> I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
>
> - return init_workarounds_ring(engine);
> + return 0;
> }
>
> static int gen9_init_render_ring(struct intel_engine_cs *engine)
> @@ -1555,7 +1555,11 @@ static int gen9_init_render_ring(struct
> intel_engine_cs *engine)
> if (ret)
> return ret;
>
> - return init_workarounds_ring(engine);
> + ret = intel_whitelist_workarounds_apply(engine);
> + if (ret)
> + return ret;
This looks wrong, as I expect this to be universal and gen8 appears
absent. And it should also be called from legacy submission, even if
always empty.
> +
> + return 0;
> }
>
> static void reset_irq(struct intel_engine_cs *engine)
> @@ -1904,7 +1908,7 @@ static int gen8_init_rcs_context(struct
> drm_i915_gem_request *req)
> {
> int ret;
>
> - ret = intel_ring_workarounds_emit(req);
> + ret = intel_ctx_workarounds_emit(req);
> if (ret)
> return ret;
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index ec580f5..0b6c20f 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -600,7 +600,7 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request
> *req)
> {
> int ret;
>
> - ret = intel_ring_workarounds_emit(req);
> + ret = intel_ctx_workarounds_emit(req);
> if (ret != 0)
> return ret;
>
> @@ -659,7 +659,7 @@ static int init_render_ring(struct intel_engine_cs
> *engine)
> if (INTEL_GEN(dev_priv) >= 6)
> I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
>
> - return init_workarounds_ring(engine);
> + return 0;
> }
Otherwise, ok.
> static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs)
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.c
> b/drivers/gpu/drm/i915/intel_workarounds.c
> index 56a1152..9e8c6d4 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/intel_workarounds.c
> @@ -40,27 +40,8 @@ static int wa_add(struct drm_i915_private *dev_priv,
> #define WA_SET_FIELD_MASKED(addr, mask, value) \
> WA_REG(addr, mask, _MASKED_FIELD(mask, value))
>
> -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
> - i915_reg_t reg)
> -{
> - struct drm_i915_private *dev_priv = engine->i915;
> - struct i915_workarounds *wa = &dev_priv->workarounds;
> - const uint32_t index = wa->hw_whitelist_count[engine->id];
> -
> - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
> - return -EINVAL;
> -
> - I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
> - i915_mmio_reg_offset(reg));
> - wa->hw_whitelist_count[engine->id]++;
> -
> - return 0;
> -}
> -
> -static int gen8_init_workarounds(struct intel_engine_cs *engine)
> +static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> -
> WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
>
> /* WaDisableAsyncFlipPerfMode:bdw,chv */
> @@ -108,12 +89,11 @@ static int gen8_init_workarounds(struct intel_engine_cs
> *engine)
> return 0;
> }
>
> -static int bdw_init_workarounds(struct intel_engine_cs *engine)
> +static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> int ret;
>
> - ret = gen8_init_workarounds(engine);
> + ret = gen8_ctx_workarounds_init(dev_priv);
> if (ret)
> return ret;
>
> @@ -140,12 +120,11 @@ static int bdw_init_workarounds(struct intel_engine_cs
> *engine)
> return 0;
> }
>
> -static int chv_init_workarounds(struct intel_engine_cs *engine)
> +static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> int ret;
>
> - ret = gen8_init_workarounds(engine);
> + ret = gen8_ctx_workarounds_init(dev_priv);
> if (ret)
> return ret;
>
> @@ -158,23 +137,8 @@ static int chv_init_workarounds(struct intel_engine_cs
> *engine)
> return 0;
> }
>
> -static int gen9_init_workarounds(struct intel_engine_cs *engine)
> +static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> - int ret;
> -
> - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
> - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
> _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
> -
> - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
> - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
> - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
> -
> - /* WaDisableKillLogic:bxt,skl,kbl */
> - if (!IS_COFFEELAKE(dev_priv))
> - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
> - ECOCHK_DIS_TLB);
> -
> if (HAS_LLC(dev_priv)) {
> /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
> *
> @@ -185,11 +149,6 @@ static int gen9_init_workarounds(struct intel_engine_cs
> *engine)
> GEN9_PBE_COMPRESSED_HASH_SELECTION);
> WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
> GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
> -
> - I915_WRITE(MMCD_MISC_CTRL,
> - I915_READ(MMCD_MISC_CTRL) |
> - MMCD_PCLA |
> - MMCD_HOTSPOT_EN);
> }
>
> /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
> @@ -240,10 +199,6 @@ static int gen9_init_workarounds(struct intel_engine_cs
> *engine)
> WA_SET_BIT_MASKED(HDC_CHICKEN0,
> HDC_FORCE_NON_COHERENT);
>
> - /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
> - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
> - BDW_DISABLE_HDC_INVALIDATION);
> -
> /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
> if (IS_SKYLAKE(dev_priv) ||
> IS_KABYLAKE(dev_priv) ||
> @@ -254,19 +209,6 @@ static int gen9_init_workarounds(struct intel_engine_cs
> *engine)
> /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
> WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
>
> - /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
> - if (IS_GEN9_LP(dev_priv)) {
> - u32 val = I915_READ(GEN8_L3SQCREG1);
> -
> - val &= ~L3_PRIO_CREDITS_MASK;
> - val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
> - I915_WRITE(GEN8_L3SQCREG1, val);
> - }
> -
> - /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
> - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
> - GEN8_LQSC_FLUSH_COHERENT_LINES));
> -
> /*
> * Supporting preemption with fine-granularity requires changes in the
> * batch buffer programming. Since we can't break old userspace, we
> @@ -285,29 +227,11 @@ static int gen9_init_workarounds(struct intel_engine_cs
> *engine)
> WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
> GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
>
> - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
> - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
> - if (ret)
> - return ret;
> -
> - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
> - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
> - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
> - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
> - if (ret)
> - return ret;
> -
> - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
> - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
> - if (ret)
> - return ret;
> -
> return 0;
> }
>
> -static int skl_tune_iz_hashing(struct intel_engine_cs *engine)
> +static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> u8 vals[3] = { 0, 0, 0 };
> unsigned int i;
>
> @@ -346,77 +270,99 @@ static int skl_tune_iz_hashing(struct intel_engine_cs
> *engine)
> return 0;
> }
>
> -static int skl_init_workarounds(struct intel_engine_cs *engine)
> +static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> int ret;
>
> - ret = gen9_init_workarounds(engine);
> + ret = gen9_ctx_workarounds_init(dev_priv);
> if (ret)
> return ret;
>
> - /* WaEnableGapsTsvCreditFix:skl */
> - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
> - GEN9_GAPS_TSV_CREDIT_DISABLE));
> -
> - /* WaDisableGafsUnitClkGating:skl */
> - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
> - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
> + return skl_tune_iz_hashing(dev_priv);
> +}
>
> - /* WaInPlaceDecompressionHang:skl */
> - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
> - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
> +static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> +{
> + int ret;
>
> - /* WaDisableLSQCROPERFforOCL:skl */
> - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
> + ret = gen9_ctx_workarounds_init(dev_priv);
> if (ret)
> return ret;
>
> - return skl_tune_iz_hashing(engine);
> + /* WaDisableThreadStallDopClockGating:bxt */
> + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> + STALL_DOP_GATING_DISABLE);
> +
> + /* WaToEnableHwFixForPushConstHWBug:bxt */
> + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
> +
> + return 0;
> }
>
> -static int bxt_init_workarounds(struct intel_engine_cs *engine)
> +static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> int ret;
>
> - ret = gen9_init_workarounds(engine);
> + ret = gen9_ctx_workarounds_init(dev_priv);
> if (ret)
> return ret;
>
> - /* WaDisableThreadStallDopClockGating:bxt */
> - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> - STALL_DOP_GATING_DISABLE);
> + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
> + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
> + WA_SET_BIT_MASKED(HDC_CHICKEN0,
> + HDC_FENCE_DEST_SLM_DISABLE);
>
> - /* WaDisablePooledEuLoadBalancingFix:bxt */
> - I915_WRITE(FF_SLICE_CS_CHICKEN2,
> -
> _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
> + /* WaToEnableHwFixForPushConstHWBug:kbl */
> + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
> + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
>
> - /* WaToEnableHwFixForPushConstHWBug:bxt */
> + /* WaDisableSbeCacheDispatchPortSharing:kbl */
> + WA_SET_BIT_MASKED(
> + GEN7_HALF_SLICE_CHICKEN1,
> + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
> +
> + return 0;
> +}
> +
> +static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> +{
> + int ret;
> +
> + ret = gen9_ctx_workarounds_init(dev_priv);
> + if (ret)
> + return ret;
> +
> + /* WaToEnableHwFixForPushConstHWBug:glk */
> WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
>
> - /* WaInPlaceDecompressionHang:bxt */
> - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
> -
> return 0;
> }
>
> -static int cnl_init_workarounds(struct intel_engine_cs *engine)
> +static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> int ret;
>
> - /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
> - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
> - I915_WRITE(GAMT_CHKN_BIT_REG,
> - (I915_READ(GAMT_CHKN_BIT_REG) |
> - GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT));
> + ret = gen9_ctx_workarounds_init(dev_priv);
> + if (ret)
> + return ret;
> +
> + /* WaToEnableHwFixForPushConstHWBug:cfl */
> + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
> +
> + /* WaDisableSbeCacheDispatchPortSharing:cfl */
> + WA_SET_BIT_MASKED(
> + GEN7_HALF_SLICE_CHICKEN1,
> + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
> +
> + return 0;
> +}
>
> +static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> +{
> /* WaForceContextSaveRestoreNonCoherent:cnl */
> WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
> HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
> @@ -434,15 +380,10 @@ static int cnl_init_workarounds(struct intel_engine_cs
> *engine)
> WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
>
> - /* WaInPlaceDecompressionHang:cnl */
> - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
> -
> /* WaPushConstantDereferenceHoldDisable:cnl */
> WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
>
> - /* FtrEnableFastAnisoL1BankingFix: cnl */
> + /* FtrEnableFastAnisoL1BankingFix:cnl */
> WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
>
> /* WaDisable3DMidCmdPreemption:cnl */
> @@ -452,28 +393,175 @@ static int cnl_init_workarounds(struct intel_engine_cs
> *engine)
> WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
> GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
>
> - /* WaEnablePreemptionGranularityControlByUMD:cnl */
> - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
> - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
> - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
> - if (ret)
> - return ret;
> -
> /* WaDisableEarlyEOT:cnl */
> WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
>
> return 0;
> }
>
> -static int kbl_init_workarounds(struct intel_engine_cs *engine)
> +int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> - int ret;
> + int err;
> +
> + dev_priv->workarounds.count = 0;
> +
> + if (INTEL_GEN(dev_priv) < 8)
> + err = 0;
> + else if (IS_BROADWELL(dev_priv))
> + err = bdw_ctx_workarounds_init(dev_priv);
> + else if (IS_CHERRYVIEW(dev_priv))
> + err = chv_ctx_workarounds_init(dev_priv);
> + else if (IS_SKYLAKE(dev_priv))
> + err = skl_ctx_workarounds_init(dev_priv);
> + else if (IS_BROXTON(dev_priv))
> + err = bxt_ctx_workarounds_init(dev_priv);
> + else if (IS_KABYLAKE(dev_priv))
> + err = kbl_ctx_workarounds_init(dev_priv);
> + else if (IS_GEMINILAKE(dev_priv))
> + err = glk_ctx_workarounds_init(dev_priv);
> + else if (IS_COFFEELAKE(dev_priv))
> + err = cfl_ctx_workarounds_init(dev_priv);
> + else if (IS_CANNONLAKE(dev_priv))
> + err = cnl_ctx_workarounds_init(dev_priv);
> + else {
> + MISSING_CASE(INTEL_GEN(dev_priv));
> + err = 0;
> + }
> + if (err)
> + return err;
>
> - ret = gen9_init_workarounds(engine);
> + DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n",
> + dev_priv->workarounds.count);
\o/ Only once!
> + return 0;
> +}
> +
> +int intel_ctx_workarounds_emit(struct drm_i915_gem_request *req)
> +{
> + struct i915_workarounds *w = &req->i915->workarounds;
> + u32 *cs;
> + int ret, i;
> +
> + if (w->count == 0)
> + return 0;
> +
> + ret = req->engine->emit_flush(req, EMIT_BARRIER);
> if (ret)
> return ret;
>
> + cs = intel_ring_begin(req, (w->count * 2 + 2));
> + if (IS_ERR(cs))
> + return PTR_ERR(cs);
> +
> + *cs++ = MI_LOAD_REGISTER_IMM(w->count);
> + for (i = 0; i < w->count; i++) {
> + *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
> + *cs++ = w->reg[i].value;
> + }
> + *cs++ = MI_NOOP;
> +
> + intel_ring_advance(req, cs);
> +
> + ret = req->engine->emit_flush(req, EMIT_BARRIER);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
> +
> +static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> +{
> +}
> +
> +static void chv_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> +{
> +}
> +
> +static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> +{
> + /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
> + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
> +
> _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
> +
> + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
> + I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
> + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
> +
> + /* WaDisableKillLogic:bxt,skl,kbl */
> + if (!IS_COFFEELAKE(dev_priv))
> + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
> + ECOCHK_DIS_TLB);
> +
> + if (HAS_LLC(dev_priv)) {
> + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
> + *
> + * Must match Display Engine. See
> + * WaCompressedResourceDisplayNewHashMode.
> + */
> + I915_WRITE(MMCD_MISC_CTRL,
> + I915_READ(MMCD_MISC_CTRL) |
> + MMCD_PCLA |
> + MMCD_HOTSPOT_EN);
> + }
> +
> + /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
> + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
> + BDW_DISABLE_HDC_INVALIDATION);
> +
> + /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
> + if (IS_GEN9_LP(dev_priv)) {
> + u32 val = I915_READ(GEN8_L3SQCREG1);
> +
> + val &= ~L3_PRIO_CREDITS_MASK;
> + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
> + I915_WRITE(GEN8_L3SQCREG1, val);
> + }
> +
> + /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
> + I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
> + GEN8_LQSC_FLUSH_COHERENT_LINES));
> +
> + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
> + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
> + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
> +}
> +
> +static void skl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> +{
> + gen9_gt_workarounds_apply(dev_priv);
> +
> + /* WaEnableGapsTsvCreditFix:skl */
> + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
> + GEN9_GAPS_TSV_CREDIT_DISABLE));
> +
> + /* WaDisableGafsUnitClkGating:skl */
> + I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
> + GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
> +
> + /* WaInPlaceDecompressionHang:skl */
> + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
> + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> + (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
> +}
> +
> +static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> +{
> + gen9_gt_workarounds_apply(dev_priv);
> +
> + /* WaDisablePooledEuLoadBalancingFix:bxt */
> + I915_WRITE(FF_SLICE_CS_CHICKEN2,
> +
> _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
> +
> + /* WaInPlaceDecompressionHang:bxt */
> + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> + (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
> +}
> +
> +static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> +{
> + gen9_gt_workarounds_apply(dev_priv);
> +
> /* WaEnableGapsTsvCreditFix:kbl */
> I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
> GEN9_GAPS_TSV_CREDIT_DISABLE));
> @@ -484,159 +572,224 @@ static int kbl_init_workarounds(struct
> intel_engine_cs *engine)
> (I915_READ(GAMT_CHKN_BIT_REG) |
> GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING));
>
> - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
> - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
> - WA_SET_BIT_MASKED(HDC_CHICKEN0,
> - HDC_FENCE_DEST_SLM_DISABLE);
> -
> - /* WaToEnableHwFixForPushConstHWBug:kbl */
> - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
> - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
> -
> /* WaDisableGafsUnitClkGating:kbl */
> I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
> GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
>
> - /* WaDisableSbeCacheDispatchPortSharing:kbl */
> - WA_SET_BIT_MASKED(
> - GEN7_HALF_SLICE_CHICKEN1,
> - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
> -
> /* WaInPlaceDecompressionHang:kbl */
> I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
> -
> - /* WaDisableLSQCROPERFforOCL:kbl */
> - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
> - if (ret)
> - return ret;
> -
> - return 0;
> }
>
> -static int glk_init_workarounds(struct intel_engine_cs *engine)
> +static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> - int ret;
> -
> - ret = gen9_init_workarounds(engine);
> - if (ret)
> - return ret;
> -
> - /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
> - ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
> - if (ret)
> - return ret;
> -
> - /* WaToEnableHwFixForPushConstHWBug:glk */
> - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
> -
> - return 0;
> + gen9_gt_workarounds_apply(dev_priv);
> }
>
> -static int cfl_init_workarounds(struct intel_engine_cs *engine)
> +static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> - int ret;
> -
> - ret = gen9_init_workarounds(engine);
> - if (ret)
> - return ret;
> + gen9_gt_workarounds_apply(dev_priv);
>
> /* WaEnableGapsTsvCreditFix:cfl */
> I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
> GEN9_GAPS_TSV_CREDIT_DISABLE));
>
> - /* WaToEnableHwFixForPushConstHWBug:cfl */
> - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
> -
> /* WaDisableGafsUnitClkGating:cfl */
> I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) |
> GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE));
>
> - /* WaDisableSbeCacheDispatchPortSharing:cfl */
> - WA_SET_BIT_MASKED(
> - GEN7_HALF_SLICE_CHICKEN1,
> - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
> -
> /* WaInPlaceDecompressionHang:cfl */
> I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
> -
> - return 0;
> }
>
> -int init_workarounds_ring(struct intel_engine_cs *engine)
> +static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> {
> - struct drm_i915_private *dev_priv = engine->i915;
> - int err;
> + /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
> + if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
> + I915_WRITE(GAMT_CHKN_BIT_REG,
> + (I915_READ(GAMT_CHKN_BIT_REG) |
> + GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT));
>
> - if (GEM_WARN_ON(engine->id != RCS))
> - return -EINVAL;
> + /* WaInPlaceDecompressionHang:cnl */
> + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> + (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS));
>
> - dev_priv->workarounds.count = 0;
> - dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
> + /* WaEnablePreemptionGranularityControlByUMD:cnl */
> + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
> + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
> +}
>
> - if (IS_BROADWELL(dev_priv))
> - err = bdw_init_workarounds(engine);
> +void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
> +{
> + if (INTEL_GEN(dev_priv) < 8)
> + return;
> + else if (IS_BROADWELL(dev_priv))
> + bdw_gt_workarounds_apply(dev_priv);
> else if (IS_CHERRYVIEW(dev_priv))
> - err = chv_init_workarounds(engine);
> + chv_gt_workarounds_apply(dev_priv);
> else if (IS_SKYLAKE(dev_priv))
> - err = skl_init_workarounds(engine);
> + skl_gt_workarounds_apply(dev_priv);
> else if (IS_BROXTON(dev_priv))
> - err = bxt_init_workarounds(engine);
> + bxt_gt_workarounds_apply(dev_priv);
> else if (IS_KABYLAKE(dev_priv))
> - err = kbl_init_workarounds(engine);
> + kbl_gt_workarounds_apply(dev_priv);
> else if (IS_GEMINILAKE(dev_priv))
> - err = glk_init_workarounds(engine);
> + glk_gt_workarounds_apply(dev_priv);
> else if (IS_COFFEELAKE(dev_priv))
> - err = cfl_init_workarounds(engine);
> + cfl_gt_workarounds_apply(dev_priv);
> else if (IS_CANNONLAKE(dev_priv))
> - err = cnl_init_workarounds(engine);
> + cnl_gt_workarounds_apply(dev_priv);
> else
> - err = 0;
> - if (err)
> - return err;
> + MISSING_CASE(INTEL_GEN(dev_priv));
> +}
> +
> +static int wa_ring_whitelist_reg(struct intel_engine_cs *engine,
> + i915_reg_t reg)
> +{
> + struct drm_i915_private *dev_priv = engine->i915;
> + struct i915_workarounds *wa = &dev_priv->workarounds;
> + const uint32_t index = wa->hw_whitelist_count[engine->id];
> +
> + if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
> + return -EINVAL;
> +
> + I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index),
> + i915_mmio_reg_offset(reg));
> + wa->hw_whitelist_count[engine->id]++;
>
> - DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n",
> - engine->name, dev_priv->workarounds.count);
> return 0;
> }
>
> -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
> +static int gen9_whitelist_workarounds_apply(struct intel_engine_cs *engine)
> {
> - struct i915_workarounds *w = &req->i915->workarounds;
> - u32 *cs;
> - int ret, i;
> + int ret;
>
> - if (w->count == 0)
> - return 0;
> + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
> + ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG);
> + if (ret)
> + return ret;
>
> - ret = req->engine->emit_flush(req, EMIT_BARRIER);
> + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
> + ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
> if (ret)
> return ret;
>
> - cs = intel_ring_begin(req, (w->count * 2 + 2));
> - if (IS_ERR(cs))
> - return PTR_ERR(cs);
> + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
> + ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1);
> + if (ret)
> + return ret;
>
> - *cs++ = MI_LOAD_REGISTER_IMM(w->count);
> - for (i = 0; i < w->count; i++) {
> - *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
> - *cs++ = w->reg[i].value;
> - }
> - *cs++ = MI_NOOP;
> + return 0;
> +}
>
> - intel_ring_advance(req, cs);
> +static int skl_whitelist_workarounds_apply(struct intel_engine_cs *engine)
> +{
> + int ret = gen9_whitelist_workarounds_apply(engine);
> + if (ret)
> + return ret;
>
> - ret = req->engine->emit_flush(req, EMIT_BARRIER);
> + /* WaDisableLSQCROPERFforOCL:skl */
> + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
> +
> +static int bxt_whitelist_workarounds_apply(struct intel_engine_cs *engine)
> +{
> + int ret = gen9_whitelist_workarounds_apply(engine);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
> +
> +static int kbl_whitelist_workarounds_apply(struct intel_engine_cs *engine)
> +{
> + int ret = gen9_whitelist_workarounds_apply(engine);
> + if (ret)
> + return ret;
> +
> + /* WaDisableLSQCROPERFforOCL:kbl */
> + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
> +
> +static int glk_whitelist_workarounds_apply(struct intel_engine_cs *engine)
> +{
> + int ret = gen9_whitelist_workarounds_apply(engine);
> + if (ret)
> + return ret;
> +
> + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
> + ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
> +
> +static int cfl_whitelist_workarounds_apply(struct intel_engine_cs *engine)
> +{
> + int ret = gen9_whitelist_workarounds_apply(engine);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
> +
> +static int cnl_whitelist_workarounds_apply(struct intel_engine_cs *engine)
> +{
> + int ret;
> +
> + /* WaEnablePreemptionGranularityControlByUMD:cnl */
> + ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1);
> if (ret)
> return ret;
>
> return 0;
> }
> +
> +int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine)
> +{
> + struct drm_i915_private *dev_priv = engine->i915;
> + int err;
> +
> + WARN_ON(engine->id != RCS);
> +
> + dev_priv->workarounds.hw_whitelist_count[engine->id] = 0;
> +
> + if (INTEL_GEN(dev_priv) < 9) {
> + WARN(1, "No whitelisting in Gen%u\n", INTEL_GEN(dev_priv));
> + err = 0;
> + } else if (IS_SKYLAKE(dev_priv))
> + err = skl_whitelist_workarounds_apply(engine);
> + else if (IS_BROXTON(dev_priv))
> + err = bxt_whitelist_workarounds_apply(engine);
> + else if (IS_KABYLAKE(dev_priv))
> + err = kbl_whitelist_workarounds_apply(engine);
> + else if (IS_GEMINILAKE(dev_priv))
> + err = glk_whitelist_workarounds_apply(engine);
> + else if (IS_COFFEELAKE(dev_priv))
> + err = cfl_whitelist_workarounds_apply(engine);
> + else if (IS_CANNONLAKE(dev_priv))
> + err = cnl_whitelist_workarounds_apply(engine);
> + else {
> + MISSING_CASE(INTEL_GEN(dev_priv));
> + err = 0;
> + }
> + if (err)
> + return err;
> +
> + DRM_DEBUG_DRIVER("%s: Number of whitelist w/a: %d\n", engine->name,
> +
> dev_priv->workarounds.hw_whitelist_count[engine->id]);
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_workarounds.h
> b/drivers/gpu/drm/i915/intel_workarounds.h
> index 7681165..64f9599 100644
> --- a/drivers/gpu/drm/i915/intel_workarounds.h
> +++ b/drivers/gpu/drm/i915/intel_workarounds.h
> @@ -7,7 +7,11 @@
> #ifndef _I915_WORKAROUNDS_H_
> #define _I915_WORKAROUNDS_H_
>
> -int init_workarounds_ring(struct intel_engine_cs *engine);
> -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req);
> +int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv);
> +int intel_ctx_workarounds_emit(struct drm_i915_gem_request *req);
> +
> +void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv);
> +
> +int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine);
I'm comfortable with the rest, so if we can make the code flow identical
for each path (just some have less work to do than others),
Reviewed-by: Chris Wilson <[email protected]>
I think we should apply these patches slowly, so refine and resubmit
this one by itself.
-Chris
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx