Re: [Intel-gfx] [PATCH 3/3] drm/i915/dg1: Move Wa_1806527549 to the right function

2023-03-06 Thread Matt Roper
On Mon, Mar 06, 2023 at 12:49:54PM -0800, Lucas De Marchi wrote:
> dg1_ctx_workarounds_init() is DG1-only, while
> gen12_ctx_workarounds_init() is shared with other platforms. Move the
> workaround to the former so there is no additional platform check
> needed.
> 
> Signed-off-by: Lucas De Marchi 
> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +++--
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 389bfcd299af..f68fe64f63a6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -707,8 +707,6 @@ static void gen12_ctx_gt_tuning_init(struct 
> intel_engine_cs *engine,
>  static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
>  struct i915_wa_list *wal)
>  {
> - struct drm_i915_private *i915 = engine->i915;
> -
>   gen12_ctx_gt_tuning_init(engine, wal);
>  
>   /*
> @@ -742,10 +740,6 @@ static void gen12_ctx_workarounds_init(struct 
> intel_engine_cs *engine,
>  FF_MODE2_GS_TIMER_MASK,
>  FF_MODE2_GS_TIMER_224,
>  0, false);
> -
> - if (!IS_DG1(i915))

I think you missed the "!" here.  I.e., this workaround applies to all
the "gen12" platforms *except* DG1.


Matt

> - /* Wa_1806527549 */
> - wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
>  }
>  
>  static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
> @@ -760,6 +754,9 @@ static void dg1_ctx_workarounds_init(struct 
> intel_engine_cs *engine,
>   /* Wa_22010493298 */
>   wa_masked_en(wal, HIZ_CHICKEN,
>DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
> +
> + /* Wa_1806527549 */
> + wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
>  }
>  
>  static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
> -- 
> 2.39.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH 2/3] drm/i915: Move DG2 tuning to the right function

2023-03-06 Thread Matt Roper
On Mon, Mar 06, 2023 at 12:49:53PM -0800, Lucas De Marchi wrote:
> Use gt_tuning_settings() for the recommended tunings rather than the one
> for workarounds.
> 
> Signed-off-by: Lucas De Marchi 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 11 +++
>  1 file changed, 3 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index eb6cc4867d67..389bfcd299af 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1653,13 +1653,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>   /* Wa_14014830051:dg2 */
>   wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
>  
> - /*
> -  * The following are not actually "workarounds" but rather
> -  * recommended tuning settings documented in the bspec's
> -  * performance guide section.
> -  */
> - wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
> -
>   /* Wa_14015795083 */
>   wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
>  
> @@ -1752,8 +1745,10 @@ static void gt_tuning_settings(struct intel_gt *gt, 
> struct i915_wa_list *wal)
>   wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
>   }
>  
> - if (IS_DG2(gt->i915))
> + if (IS_DG2(gt->i915)) {
>   wa_mcr_write_or(wal, XEHP_L3SCQREG7, 
> BLEND_FILL_CACHING_OPT_DIS);
> + wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
> + }
>  }
>  
>  static void
> -- 
> 2.39.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH 1/3] drm/i915: Remove redundant check for DG1

2023-03-06 Thread Matt Roper
On Mon, Mar 06, 2023 at 12:49:52PM -0800, Lucas De Marchi wrote:
> dg1_gt_workarounds_init() is only ever called for DG1, so there is no
> point checking it again.
> 
> Signed-off-by: Lucas De Marchi 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 12 +++-
>  1 file changed, 3 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 32aa1647721a..eb6cc4867d67 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1472,21 +1472,15 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>  static void
>  dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
>  {
> - struct drm_i915_private *i915 = gt->i915;
> -
>   gen12_gt_workarounds_init(gt, wal);
>  
>   /* Wa_1409420604:dg1 */
> - if (IS_DG1(i915))
> - wa_mcr_write_or(wal,
> - SUBSLICE_UNIT_LEVEL_CLKGATE2,
> - CPSSUNIT_CLKGATE_DIS);
> + wa_mcr_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2,
> + CPSSUNIT_CLKGATE_DIS);
>  
>   /* Wa_1408615072:dg1 */
>   /* Empirical testing shows this register is unaffected by engine reset. 
> */
> - if (IS_DG1(i915))
> - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
> - VSUNIT_CLKGATE_DIS_TGL);
> + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL);
>  }
>  
>  static void
> -- 
> 2.39.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [PATCH v2] drm/i915/mtl: Apply Wa_14017073508 for MTL Media Step

2023-03-01 Thread Matt Roper
On Wed, Mar 01, 2023 at 03:42:51PM +0530, Badal Nilawar wrote:
> Apply Wa_14017073508 for MTL Media step instead of graphics step.
> 
> v2: Use Media stepping instead of SoC die stepping (Matt)
> 
> Bspec: 66623
> 
> Fixes: 8f70f1ec587d ("drm/i915/mtl: Add Wa_14017073508 for SAMedia")
> Signed-off-by: Badal Nilawar 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 ++--
>  drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 2 +-
>  2 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index cef3d6f5c34e..a14f23b3355a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -29,7 +29,7 @@
>  static void mtl_media_busy(struct intel_gt *gt)
>  {
>   /* Wa_14017073508: mtl */
> - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
> + if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) &&
>   gt->type == GT_MEDIA)
>   snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
> PCODE_MBOX_GT_STATE_MEDIA_BUSY,
> @@ -39,7 +39,7 @@ static void mtl_media_busy(struct intel_gt *gt)
>  static void mtl_media_idle(struct intel_gt *gt)
>  {
>   /* Wa_14017073508: mtl */
> - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
> + if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) &&
>   gt->type == GT_MEDIA)
>   snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
> PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
> index fcf51614f9a4..a53a995c3950 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
> @@ -19,7 +19,7 @@ static bool __guc_rc_supported(struct intel_guc *guc)
>* Do not enable gucrc to avoid additional interrupts which
>* may disrupt pcode wa.
>*/
> - if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
> + if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) &&
>   gt->type == GT_MEDIA)
>   return false;
>  
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [PATCH] drm/i915: Move MCR_REG define to i915_reg_defs.h

2023-02-24 Thread Matt Roper
On Fri, Feb 24, 2023 at 01:12:21PM -0800, Lucas De Marchi wrote:
> Define MCR_REG() in the same header where i915_mcr_reg_t is defined,
> like i915_reg_t and _MMIO(). It's a more natural place for such a
> definition so it's not mixed with the registers for the platforms.
> 
> Signed-off-by: Lucas De Marchi 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 --
>  drivers/gpu/drm/i915/i915_reg_defs.h| 2 ++
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 416976d396ba..de2e85fd2f93 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -9,8 +9,6 @@
>  #include "i915_reg_defs.h"
>  #include "display/intel_display_reg_defs.h"  /* VLV_DISPLAY_BASE */
>  
> -#define MCR_REG(offset)  ((const i915_mcr_reg_t){ .reg = (offset) })
> -
>  /*
>   * The perf control registers are technically multicast registers, but the
>   * driver never needs to read/write them directly; we only use them to build
> diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h 
> b/drivers/gpu/drm/i915/i915_reg_defs.h
> index 983c5aa3045b..db26de6b57bc 100644
> --- a/drivers/gpu/drm/i915/i915_reg_defs.h
> +++ b/drivers/gpu/drm/i915/i915_reg_defs.h
> @@ -165,6 +165,8 @@ typedef struct {
>   u32 reg;
>  } i915_mcr_reg_t;
>  
> +#define MCR_REG(offset)  ((const i915_mcr_reg_t){ .reg = (offset) })
> +
>  #define INVALID_MMIO_REG _MMIO(0)
>  
>  /*
> -- 
> 2.39.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH] drm/i915/mtl: Apply Wa_14017073508 for MTL SoC Step

2023-02-24 Thread Matt Roper
On Thu, Feb 23, 2023 at 03:20:28PM -0500, Rodrigo Vivi wrote:
> On Fri, Feb 24, 2023 at 12:11:40AM +0530, Badal Nilawar wrote:
> > Apply Wa_14017073508 for MTL SoC die A step instead of graphics step.
> > To get the SoC die stepping there is no direct interface so using
> > revid as revid 0 aligns with SoC die A step.
> > 
> > Bspec: 55420
> 
> This doesn't prove anything. It is just saying Die A0 with GT A0,
> die B0 with GT B0 and so on... Please help me to understand that
> better offline before we move forward...

The definition of the workaround doesn't say anything about SoC
steppings that I can see.  The workaround itself is tagged as being
being tied to Xe_LPM+ (i.e., the media IP), not to MTL as a platform and
not to the Xe_LPG graphics IP.  In relation to the media IP
specifically, the bounds are listed as needed from A0, fixed in B0.  So
unless there's a belief that the workaround itself is incorrect, I think
the bounds should be

IS_MTL_MEDIA_STEP(i915, STEP_A0, STEP_B0)


Matt

> 
> > 
> > Fixes: 8f70f1ec587d ("drm/i915/mtl: Add Wa_14017073508 for SAMedia")
> > Signed-off-by: Badal Nilawar 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 ++--
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 2 +-
> >  2 files changed, 3 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> > index cef3d6f5c34e..4ba3c8c97ccc 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> > @@ -29,7 +29,7 @@
> >  static void mtl_media_busy(struct intel_gt *gt)
> >  {
> > /* Wa_14017073508: mtl */
> > -   if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
> > +   if (IS_METEORLAKE(gt->i915) && INTEL_REVID(gt->i915) == 0 &&
> > gt->type == GT_MEDIA)
> > snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
> >   PCODE_MBOX_GT_STATE_MEDIA_BUSY,
> > @@ -39,7 +39,7 @@ static void mtl_media_busy(struct intel_gt *gt)
> >  static void mtl_media_idle(struct intel_gt *gt)
> >  {
> > /* Wa_14017073508: mtl */
> > -   if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
> > +   if (IS_METEORLAKE(gt->i915) && INTEL_REVID(gt->i915) == 0 &&
> > gt->type == GT_MEDIA)
> > snb_pcode_write_p(gt->uncore, PCODE_MBOX_GT_STATE,
> >   PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY,
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
> > index fcf51614f9a4..7429c233ad45 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
> > @@ -19,7 +19,7 @@ static bool __guc_rc_supported(struct intel_guc *guc)
> >  * Do not enable gucrc to avoid additional interrupts which
> >  * may disrupt pcode wa.
> >  */
> > -   if (IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0) &&
> > +   if (IS_METEORLAKE(gt->i915) && INTEL_REVID(gt->i915) == 0 &&
> > gt->type == GT_MEDIA)
> > return false;
> >  
> > -- 
> > 2.25.1
> > 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH] drm/i915/mtl: Add engine TLB invalidation

2023-02-24 Thread Matt Roper
On Thu, Feb 23, 2023 at 10:08:51AM +0100, Andrzej Hajda wrote:
> On 17.02.2023 19:54, Matt Roper wrote:
> > MTL's primary GT can continue to use the same engine TLB invalidation
> > programming as past Xe_HP-based platforms.  However the media GT needs
> > some special handling:
> >   * Invalidation registers on the media GT are singleton registers
> > (unlike the primary GT where they are still MCR).
> >   * Since the GSC is now exposed as an engine, there's a new register to
> > use for TLB invalidation.  The offset is identical to the compute
> > engine offset, but this is expected --- compute engines only exist on
> > the primary GT while the GSC only exists on the media GT.
> >   * Although there's only a single GSC engine instance, it inexplicably
> > uses bit 1 to request invalidations rather than bit 0.
> > 
> > Cc: Tvrtko Ursulin 
> > Cc: Daniele Ceraolo Spurio 
> > Signed-off-by: Matt Roper 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine_cs.c | 52 ---
> >   drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  1 +
> >   2 files changed, 38 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index f3a91e7f85f7..af8e158fbd84 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -1166,6 +1166,11 @@ static int intel_engine_init_tlb_invalidation(struct 
> > intel_engine_cs *engine)
> > [COPY_ENGINE_CLASS].mcr_reg   = XEHP_BLT_TLB_INV_CR,
> > [COMPUTE_CLASS].mcr_reg   = XEHP_COMPCTX_TLB_INV_CR,
> > };
> > +   static const union intel_engine_tlb_inv_reg xelpmp_regs[] = {
> > +   [VIDEO_DECODE_CLASS].reg  = GEN12_VD_TLB_INV_CR,
> > +   [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
> > +   [OTHER_CLASS].reg = XELPMP_GSC_TLB_INV_CR,
> > +   };
> > struct drm_i915_private *i915 = engine->i915;
> > const unsigned int instance = engine->instance;
> > const unsigned int class = engine->class;
> > @@ -1185,19 +1190,28 @@ static int 
> > intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
> >  * 12.00 -> 12.50 transition multi cast handling is required too.
> >  */
> > -   if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> > -   GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> > -   regs = xehp_regs;
> > -   num = ARRAY_SIZE(xehp_regs);
> > -   } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> > -  GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
> > -   regs = gen12_regs;
> > -   num = ARRAY_SIZE(gen12_regs);
> > -   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> > -   regs = gen8_regs;
> > -   num = ARRAY_SIZE(gen8_regs);
> > -   } else if (GRAPHICS_VER(i915) < 8) {
> > -   return 0;
> > +   if (engine->gt->type == GT_MEDIA) {
> > +   if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
> > +   regs = xelpmp_regs;
> > +   num = ARRAY_SIZE(xelpmp_regs);
> > +   }
> 
> As I understand currently only GEN13 of media can have GT_MEDIA, so fallback
> to gt_WARN_ONCE below is expected behavior.

"Gen" terminology isn't used anymore, but yes, standalone media is a new
feature starting from media version 13.

> 
> > +   } else {
> > +   if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
> 
> 12.71 is not yet present in i915_pci.c, maybe they should be added together,
> up to you.

No, i915_pci.c isn't the source of IP versions anymore.  Starting with
MTL (and continuing with future platforms), the graphics, media, and
display IP versions are read out directly from the hardware itself (the
GMD_ID registers); they no longer get derived from PCI devid matching.
The vestigial 12.70 value in i915_pci.c shouldn't get used anywhere
except as a very basic sanity check that the GMD_ID registers are
correctly reporting a high enough version.


Matt

> 
> > +   GRAPHICS_VER_FULL(i915) == IP_VER(12, 70)  > +  
> > GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> > +   GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> > +   regs = xehp_regs;
> > +   num = ARRAY_SIZE(xehp_regs);
> > +   } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> > +  GRAPHICS_VER_FULL(i915) == IP_

[PATCH v2] drm/i915/mtl: Add engine TLB invalidation

2023-02-23 Thread Matt Roper
MTL's primary GT can continue to use the same engine TLB invalidation
programming as past Xe_HP-based platforms.  However the media GT needs
some special handling:
 * Invalidation registers on the media GT are singleton registers
   (unlike the primary GT where they are still MCR).
 * Since the GSC is now exposed as an engine, there's a new register to
   use for TLB invalidation.  The offset is identical to the compute
   engine offset, but this is expected --- compute engines only exist on
   the primary GT while the GSC only exists on the media GT.
 * Although there's only a single GSC engine instance, it inexplicably
   uses bit 1 to request invalidations rather than bit 0.

v2:
 - Add a 'regs == xelpmp_regs' condition to the GSC instance handling.
   If the registers change on a future platform, the GSC-specific
   handling is likely to change as well.  (Andrzej)

Cc: Tvrtko Ursulin 
Cc: Daniele Ceraolo Spurio 
Cc: Andrzej Hajda 
Signed-off-by: Matt Roper 
Reviewed-by: Andrzej Hajda 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 52 ---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  1 +
 2 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f3a91e7f85f7..4aa08fac1465 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1166,6 +1166,11 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
[COPY_ENGINE_CLASS].mcr_reg   = XEHP_BLT_TLB_INV_CR,
[COMPUTE_CLASS].mcr_reg   = XEHP_COMPCTX_TLB_INV_CR,
};
+   static const union intel_engine_tlb_inv_reg xelpmp_regs[] = {
+   [VIDEO_DECODE_CLASS].reg  = GEN12_VD_TLB_INV_CR,
+   [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
+   [OTHER_CLASS].reg = XELPMP_GSC_TLB_INV_CR,
+   };
struct drm_i915_private *i915 = engine->i915;
const unsigned int instance = engine->instance;
const unsigned int class = engine->class;
@@ -1185,19 +1190,28 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
 * 12.00 -> 12.50 transition multi cast handling is required too.
 */
 
-   if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
-   GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
-   regs = xehp_regs;
-   num = ARRAY_SIZE(xehp_regs);
-   } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
-  GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
-   regs = gen12_regs;
-   num = ARRAY_SIZE(gen12_regs);
-   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
-   regs = gen8_regs;
-   num = ARRAY_SIZE(gen8_regs);
-   } else if (GRAPHICS_VER(i915) < 8) {
-   return 0;
+   if (engine->gt->type == GT_MEDIA) {
+   if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
+   regs = xelpmp_regs;
+   num = ARRAY_SIZE(xelpmp_regs);
+   }
+   } else {
+   if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
+   GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) ||
+   GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
+   GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
+   regs = xehp_regs;
+   num = ARRAY_SIZE(xehp_regs);
+   } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
+  GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
+   regs = gen12_regs;
+   num = ARRAY_SIZE(gen12_regs);
+   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) 
{
+   regs = gen8_regs;
+   num = ARRAY_SIZE(gen8_regs);
+   } else if (GRAPHICS_VER(i915) < 8) {
+   return 0;
+   }
}
 
if (gt_WARN_ONCE(engine->gt, !num,
@@ -1212,7 +1226,14 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
 
reg = regs[class];
 
-   if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) {
+   if (regs == xelpmp_regs && class == OTHER_CLASS) {
+   /*
+* There's only a single GSC instance, but it uses register bit
+* 1 instead of either 0 or OTHER_GSC_INSTANCE.
+*/
+   GEM_WARN_ON(instance != OTHER_GSC_INSTANCE);
+   val = 1;
+   } else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance 
== 1) {
reg.reg = GEN8_M2TCR;
val = 0;
} else {
@@ -1228,7 +1249,8 @@ static int intel_eng

Re: [PATCH] drm/i915/sseu: fix max_subslices array-index-out-of-bounds access

2023-02-21 Thread Matt Roper
On Tue, Feb 21, 2023 at 09:01:24AM +, Tvrtko Ursulin wrote:
> 
> 
> On 20/02/2023 17:18, Andrea Righi wrote:
> > It seems that commit bc3c5e0809ae ("drm/i915/sseu: Don't try to store EU
> > mask internally in UAPI format") exposed a potential out-of-bounds
> > access, reported by UBSAN as following on a laptop with a gen 11 i915
> > card:
> > 
> >UBSAN: array-index-out-of-bounds in 
> > drivers/gpu/drm/i915/gt/intel_sseu.c:65:27
> >index 6 is out of range for type 'u16 [6]'
> >CPU: 2 PID: 165 Comm: systemd-udevd Not tainted 6.2.0-9-generic #9-Ubuntu
> >Hardware name: Dell Inc. XPS 13 9300/077Y9N, BIOS 1.11.0 03/22/2022
> >Call Trace:
> > 
> > show_stack+0x4e/0x61
> > dump_stack_lvl+0x4a/0x6f
> > dump_stack+0x10/0x18
> > ubsan_epilogue+0x9/0x3a
> > __ubsan_handle_out_of_bounds.cold+0x42/0x47
> > gen11_compute_sseu_info+0x121/0x130 [i915]
> > intel_sseu_info_init+0x15d/0x2b0 [i915]
> > intel_gt_init_mmio+0x23/0x40 [i915]
> > i915_driver_mmio_probe+0x129/0x400 [i915]
> > ? intel_gt_probe_all+0x91/0x2e0 [i915]
> > i915_driver_probe+0xe1/0x3f0 [i915]
> > ? drm_privacy_screen_get+0x16d/0x190 [drm]
> > ? acpi_dev_found+0x64/0x80
> > i915_pci_probe+0xac/0x1b0 [i915]
> > ...
> > 
> > According to the definition of sseu_dev_info, eu_mask->hsw is limited to
> > a maximum of GEN_MAX_SS_PER_HSW_SLICE (6) sub-slices, but
> > gen11_sseu_info_init() can potentially set 8 sub-slices, in the
> > !IS_JSL_EHL(gt->i915) case.
> > 
> > Fix this by reserving up to 8 slots for max_subslices in the eu_mask
> > struct.
> > 
> > Reported-by: Emil Renner Berthing 
> > Signed-off-by: Andrea Righi 
> 
> Looks like bug was probably introduced in:
> 
> Fixes: bc3c5e0809ae ("drm/i915/sseu: Don't try to store EU mask internally in 
> UAPI format")
> Cc: Matt Roper 
> Cc: Balasubramani Vivekanandan 
> Cc:  # v6.0+
> 
> Adding Matt to cross-check.

Yep, looks like there's one specific SKU of ICL that has 8 subslices
that we overlooked previously.

Reviewed-by: Matt Roper 

> 
> Regards,
> 
> Tvrtko
> 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_sseu.h | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h 
> > b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > index aa87d3832d60..d7e8c374f153 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > @@ -27,7 +27,7 @@ struct drm_printer;
> >* is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the
> >* I915_MAX_SS_FUSE_BITS value below).
> >*/
> > -#define GEN_MAX_SS_PER_HSW_SLICE   6
> > +#define GEN_MAX_SS_PER_HSW_SLICE   8
> >   /*
> >* Maximum number of 32-bit registers used by hardware to express the

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


[PATCH] drm/i915/mtl: Add engine TLB invalidation

2023-02-17 Thread Matt Roper
MTL's primary GT can continue to use the same engine TLB invalidation
programming as past Xe_HP-based platforms.  However the media GT needs
some special handling:
 * Invalidation registers on the media GT are singleton registers
   (unlike the primary GT where they are still MCR).
 * Since the GSC is now exposed as an engine, there's a new register to
   use for TLB invalidation.  The offset is identical to the compute
   engine offset, but this is expected --- compute engines only exist on
   the primary GT while the GSC only exists on the media GT.
 * Although there's only a single GSC engine instance, it inexplicably
   uses bit 1 to request invalidations rather than bit 0.

Cc: Tvrtko Ursulin 
Cc: Daniele Ceraolo Spurio 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 52 ---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  1 +
 2 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f3a91e7f85f7..af8e158fbd84 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1166,6 +1166,11 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
[COPY_ENGINE_CLASS].mcr_reg   = XEHP_BLT_TLB_INV_CR,
[COMPUTE_CLASS].mcr_reg   = XEHP_COMPCTX_TLB_INV_CR,
};
+   static const union intel_engine_tlb_inv_reg xelpmp_regs[] = {
+   [VIDEO_DECODE_CLASS].reg  = GEN12_VD_TLB_INV_CR,
+   [VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
+   [OTHER_CLASS].reg = XELPMP_GSC_TLB_INV_CR,
+   };
struct drm_i915_private *i915 = engine->i915;
const unsigned int instance = engine->instance;
const unsigned int class = engine->class;
@@ -1185,19 +1190,28 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
 * 12.00 -> 12.50 transition multi cast handling is required too.
 */
 
-   if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
-   GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
-   regs = xehp_regs;
-   num = ARRAY_SIZE(xehp_regs);
-   } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
-  GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
-   regs = gen12_regs;
-   num = ARRAY_SIZE(gen12_regs);
-   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
-   regs = gen8_regs;
-   num = ARRAY_SIZE(gen8_regs);
-   } else if (GRAPHICS_VER(i915) < 8) {
-   return 0;
+   if (engine->gt->type == GT_MEDIA) {
+   if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) {
+   regs = xelpmp_regs;
+   num = ARRAY_SIZE(xelpmp_regs);
+   }
+   } else {
+   if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 71) ||
+   GRAPHICS_VER_FULL(i915) == IP_VER(12, 70) ||
+   GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
+   GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
+   regs = xehp_regs;
+   num = ARRAY_SIZE(xehp_regs);
+   } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
+  GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
+   regs = gen12_regs;
+   num = ARRAY_SIZE(gen12_regs);
+   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) 
{
+   regs = gen8_regs;
+   num = ARRAY_SIZE(gen8_regs);
+   } else if (GRAPHICS_VER(i915) < 8) {
+   return 0;
+   }
}
 
if (gt_WARN_ONCE(engine->gt, !num,
@@ -1212,7 +1226,14 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
 
reg = regs[class];
 
-   if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == 1) {
+   if (class == OTHER_CLASS) {
+   /*
+* There's only a single GSC instance, but it uses register bit
+* 1 instead of either 0 or OTHER_GSC_INSTANCE.
+*/
+   GEM_WARN_ON(instance != OTHER_GSC_INSTANCE);
+   val = 1;
+   } else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance 
== 1) {
reg.reg = GEN8_M2TCR;
val = 0;
} else {
@@ -1228,7 +1249,8 @@ static int intel_engine_init_tlb_invalidation(struct 
intel_engine_cs *engine)
if (GRAPHICS_VER(i915) >= 12 &&
(engine->class == VIDEO_DECODE_CLASS ||
 engine->class == VIDEO_ENHANCEMENT_CLASS ||
-engine->class == COMPUTE_CLASS))
+ 

Re: [PATCH v5] drm/i915: Consolidate TLB invalidation flow

2023-02-16 Thread Matt Roper
On Thu, Feb 16, 2023 at 09:21:23AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> As the logic for selecting the register and corresponsing values grew, the
> code become a bit unsightly. Consolidate by storing the required values at
> engine init time in the engine itself, and by doing so minimise the amount
> of invariant platform and engine checks during each and every TLB
> invalidation.
> 
> v2:
>  * Fail engine probe if TLB invlidations registers are unknown.
> 
> v3:
>  * Rebase.
> 
> v4:
>  * Fix handling of GEN8_M2TCR. (Andrzej)
> 
> v5:
>  * Tidy checkpatch warnings.
> 
> Signed-off-by: Tvrtko Ursulin 
> Cc: Andrzej Hajda 
> Cc: Matt Roper 
> Reviewed-by: Andrzej Hajda  # v1
> Reviewed-by: Matt Roper  # v3

Reviewed-by: Matt Roper 

for this version as well.


Matt

> Signed-off-by: Tvrtko Ursulin 
> Link: 
> https://patchwork.freedesktop.org/patch/msgid/20230202083218.4100760-1-tvrtko.ursu...@linux.intel.com
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c|  97 +
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |  14 ++
>  drivers/gpu/drm/i915/gt/intel_gt.c   | 138 +++
>  3 files changed, 133 insertions(+), 116 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index d4e29da74612..f3a91e7f85f7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -9,6 +9,7 @@
>  
>  #include "gem/i915_gem_context.h"
>  #include "gem/i915_gem_internal.h"
> +#include "gt/intel_gt_print.h"
>  #include "gt/intel_gt_regs.h"
>  
>  #include "i915_cmd_parser.h"
> @@ -1143,12 +1144,108 @@ static int init_status_page(struct intel_engine_cs 
> *engine)
>   return ret;
>  }
>  
> +static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
> +{
> + static const union intel_engine_tlb_inv_reg gen8_regs[] = {
> + [RENDER_CLASS].reg  = GEN8_RTCR,
> + [VIDEO_DECODE_CLASS].reg= GEN8_M1TCR, /* , GEN8_M2TCR */
> + [VIDEO_ENHANCEMENT_CLASS].reg   = GEN8_VTCR,
> + [COPY_ENGINE_CLASS].reg = GEN8_BTCR,
> + };
> + static const union intel_engine_tlb_inv_reg gen12_regs[] = {
> + [RENDER_CLASS].reg  = GEN12_GFX_TLB_INV_CR,
> + [VIDEO_DECODE_CLASS].reg= GEN12_VD_TLB_INV_CR,
> + [VIDEO_ENHANCEMENT_CLASS].reg   = GEN12_VE_TLB_INV_CR,
> + [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
> + [COMPUTE_CLASS].reg = GEN12_COMPCTX_TLB_INV_CR,
> + };
> + static const union intel_engine_tlb_inv_reg xehp_regs[] = {
> + [RENDER_CLASS].mcr_reg= XEHP_GFX_TLB_INV_CR,
> + [VIDEO_DECODE_CLASS].mcr_reg  = XEHP_VD_TLB_INV_CR,
> + [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
> + [COPY_ENGINE_CLASS].mcr_reg   = XEHP_BLT_TLB_INV_CR,
> + [COMPUTE_CLASS].mcr_reg   = XEHP_COMPCTX_TLB_INV_CR,
> + };
> + struct drm_i915_private *i915 = engine->i915;
> + const unsigned int instance = engine->instance;
> + const unsigned int class = engine->class;
> + const union intel_engine_tlb_inv_reg *regs;
> + union intel_engine_tlb_inv_reg reg;
> + unsigned int num = 0;
> + u32 val;
> +
> + /*
> +  * New platforms should not be added with catch-all-newer (>=)
> +  * condition so that any later platform added triggers the below warning
> +  * and in turn mandates a human cross-check of whether the invalidation
> +  * flows have compatible semantics.
> +  *
> +  * For instance with the 11.00 -> 12.00 transition three out of five
> +  * respective engine registers were moved to masked type. Then after the
> +  * 12.00 -> 12.50 transition multi cast handling is required too.
> +  */
> +
> + if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> + GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> + regs = xehp_regs;
> + num = ARRAY_SIZE(xehp_regs);
> + } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> +GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
> + regs = gen12_regs;
> + num = ARRAY_SIZE(gen12_regs);
> + } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> + regs = gen8_regs;
> + num = ARRAY_SIZE(gen8_regs);
> + } else if (GRAPHICS_VER(i915) < 8) {
> + return 0;
> + }
> +

Re: [PATCH] drm/i915/xelpmp: Consider GSI offset when doing MCR lookups

2023-02-15 Thread Matt Roper
On Wed, Feb 15, 2023 at 11:48:13AM -0800, Sripada, Radhakrishna wrote:
> 
> 
> > -Original Message-
> > From: dri-devel  On Behalf Of Matt
> > Roper
> > Sent: Monday, February 13, 2023 4:19 PM
> > To: intel-...@lists.freedesktop.org
> > Cc: dri-devel@lists.freedesktop.org
> > Subject: [PATCH] drm/i915/xelpmp: Consider GSI offset when doing MCR
> > lookups
> > 
> > MCR range tables use the final MMIO offset of a register (including the
> > 0x38 GSI offset when applicable).  Since the i915_mcr_reg_t passed
> > as a parameter during steering lookup does not include the GSI offset,
> > we need to add it back in for GSI registers before searching the tables.
> > 
> > Fixes: a7ec65fc7e83 ("drm/i915/xelpmp: Add multicast steering for media GT")
> 
> LGTM,
> Reviewed-by: Radhakrishna Sripada 

Thanks, applied to drm-intel-gt-next.


Matt

> 
> > Signed-off-by: Matt Roper 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 5 -
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > index a4a8b8bc5737..03632df27de3 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > @@ -561,12 +561,15 @@ static bool reg_needs_read_steering(struct intel_gt
> > *gt,
> > i915_mcr_reg_t reg,
> > enum intel_steering_type type)
> >  {
> > -   const u32 offset = i915_mmio_reg_offset(reg);
> > +   u32 offset = i915_mmio_reg_offset(reg);
> > const struct intel_mmio_range *entry;
> > 
> > if (likely(!gt->steering_table[type]))
> > return false;
> > 
> > +   if (IS_GSI_REG(offset))
> > +   offset += gt->uncore->gsi_offset;
> > +
> > for (entry = gt->steering_table[type]; entry->end; entry++) {
> > if (offset >= entry->start && offset <= entry->end)
> > return true;
> > --
> > 2.39.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


[PATCH] drm/i915/xelpmp: Consider GSI offset when doing MCR lookups

2023-02-13 Thread Matt Roper
MCR range tables use the final MMIO offset of a register (including the
0x38 GSI offset when applicable).  Since the i915_mcr_reg_t passed
as a parameter during steering lookup does not include the GSI offset,
we need to add it back in for GSI registers before searching the tables.

Fixes: a7ec65fc7e83 ("drm/i915/xelpmp: Add multicast steering for media GT")
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index a4a8b8bc5737..03632df27de3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -561,12 +561,15 @@ static bool reg_needs_read_steering(struct intel_gt *gt,
i915_mcr_reg_t reg,
enum intel_steering_type type)
 {
-   const u32 offset = i915_mmio_reg_offset(reg);
+   u32 offset = i915_mmio_reg_offset(reg);
const struct intel_mmio_range *entry;
 
if (likely(!gt->steering_table[type]))
return false;
 
+   if (IS_GSI_REG(offset))
+   offset += gt->uncore->gsi_offset;
+
for (entry = gt->steering_table[type]; entry->end; entry++) {
if (offset >= entry->start && offset <= entry->end)
return true;
-- 
2.39.1



Re: [PATCH v3] drm/i915: Consolidate TLB invalidation flow

2023-02-13 Thread Matt Roper
On Wed, Feb 01, 2023 at 04:51:46PM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> As the logic for selecting the register and corresponsing values grew, the
> code become a bit unsightly. Consolidate by storing the required values at
> engine init time in the engine itself, and by doing so minimise the amount
> of invariant platform and engine checks during each and every TLB
> invalidation.
> 
> v2:
>  * Fail engine probe if TLB invlidations registers are unknown.
> 
> v3:
>  * Rebase.
> 
> Signed-off-by: Tvrtko Ursulin 
> Cc: Andrzej Hajda 
> Cc: Matt Roper 
> Reviewed-by: Andrzej Hajda  # v1

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c|  96 +
>  drivers/gpu/drm/i915/gt/intel_engine_types.h |  15 ++
>  drivers/gpu/drm/i915/gt/intel_gt.c   | 138 +++
>  3 files changed, 133 insertions(+), 116 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index d4e29da74612..e430945743ec 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -9,6 +9,7 @@
>  
>  #include "gem/i915_gem_context.h"
>  #include "gem/i915_gem_internal.h"
> +#include "gt/intel_gt_print.h"
>  #include "gt/intel_gt_regs.h"
>  
>  #include "i915_cmd_parser.h"
> @@ -1143,12 +1144,107 @@ static int init_status_page(struct intel_engine_cs 
> *engine)
>   return ret;
>  }
>  
> +static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
> +{
> + static const union intel_engine_tlb_inv_reg gen8_regs[] = {
> + [RENDER_CLASS].reg  = GEN8_RTCR,
> + [VIDEO_DECODE_CLASS].reg= GEN8_M1TCR, /* , GEN8_M2TCR */
> + [VIDEO_ENHANCEMENT_CLASS].reg   = GEN8_VTCR,
> + [COPY_ENGINE_CLASS].reg = GEN8_BTCR,
> + };
> + static const union intel_engine_tlb_inv_reg gen12_regs[] = {
> + [RENDER_CLASS].reg  = GEN12_GFX_TLB_INV_CR,
> + [VIDEO_DECODE_CLASS].reg= GEN12_VD_TLB_INV_CR,
> + [VIDEO_ENHANCEMENT_CLASS].reg   = GEN12_VE_TLB_INV_CR,
> + [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
> + [COMPUTE_CLASS].reg = GEN12_COMPCTX_TLB_INV_CR,
> + };
> + static const union intel_engine_tlb_inv_reg xehp_regs[] = {
> + [RENDER_CLASS].mcr_reg= XEHP_GFX_TLB_INV_CR,
> + [VIDEO_DECODE_CLASS].mcr_reg  = XEHP_VD_TLB_INV_CR,
> + [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
> + [COPY_ENGINE_CLASS].mcr_reg   = XEHP_BLT_TLB_INV_CR,
> + [COMPUTE_CLASS].mcr_reg   = XEHP_COMPCTX_TLB_INV_CR,
> + };
> + struct drm_i915_private *i915 = engine->i915;
> + const union intel_engine_tlb_inv_reg *regs;
> + union intel_engine_tlb_inv_reg reg;
> + unsigned int class = engine->class;
> + unsigned int num = 0;
> + u32 val;
> +
> + /*
> +  * New platforms should not be added with catch-all-newer (>=)
> +  * condition so that any later platform added triggers the below warning
> +  * and in turn mandates a human cross-check of whether the invalidation
> +  * flows have compatible semantics.
> +  *
> +  * For instance with the 11.00 -> 12.00 transition three out of five
> +  * respective engine registers were moved to masked type. Then after the
> +  * 12.00 -> 12.50 transition multi cast handling is required too.
> +  */
> +
> + if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> + GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
> + regs = xehp_regs;
> + num = ARRAY_SIZE(xehp_regs);
> + } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> +GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
> + regs = gen12_regs;
> + num = ARRAY_SIZE(gen12_regs);
> + } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> + regs = gen8_regs;
> + num = ARRAY_SIZE(gen8_regs);
> + } else if (GRAPHICS_VER(i915) < 8) {
> + return 0;
> + }
> +
> + if (gt_WARN_ONCE(engine->gt, !num,
> +  "Platform does not implement TLB invalidation!"))
> + return -ENODEV;
> +
> + if (gt_WARN_ON_ONCE(engine->gt,
> +  class >= num ||
> +  (!regs[class].reg.reg &&
> +   !regs[class].mcr_reg.reg)))
> 

Re: [Intel-gfx] [PATCH v2 2/2] drm/i915: Remove unused/wrong INF_UNIT_LEVEL_CLKGATE

2023-02-06 Thread Matt Roper
On Mon, Feb 06, 2023 at 08:54:10AM -0800, Lucas De Marchi wrote:
> INF_UNIT_LEVEL_CLKGATE is not replicated, but since it's not actually
> used it can just be removed.
> 
> Signed-off-by: Lucas De Marchi 

Reviewed-by: Matt Roper 

Looks like the only reference to the register was removed in

commit eee42141e498fa3df3ce524846d52f67a92b6845
Author:     Matt Roper 
AuthorDate: Tue Jul 13 12:36:35 2021 -0700
Commit:     Matt Roper 
CommitDate: Wed Jul 14 17:49:02 2021 -0700

drm/i915/icl: Drop workarounds that only apply to pre-production 
steppings

> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 ---
>  1 file changed, 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index cc1539c7a6b6..7256f7e3fd11 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -769,9 +769,6 @@
>  #define GEN10_DFR_RATIO_EN_AND_CHICKEN   MCR_REG(0x9550)
>  #define   DFR_DISABLE(1 << 9)
>  
> -#define INF_UNIT_LEVEL_CLKGATE   MCR_REG(0x9560)
> -#define   CGPSF_CLKGATE_DIS  (1 << 3)
> -
>  #define MICRO_BP0_0  _MMIO(0x9800)
>  #define MICRO_BP0_2  _MMIO(0x9804)
>  #define MICRO_BP0_1  _MMIO(0x9808)
> -- 
> 2.39.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [PATCH 1/2] drm/i915: Fix GEN8_MISCCPCTL

2023-02-03 Thread Matt Roper
On Fri, Feb 03, 2023 at 10:03:49AM -0800, Lucas De Marchi wrote:
> On Thu, Feb 02, 2023 at 05:12:10PM -0800, Matt Roper wrote:
> > On Thu, Feb 02, 2023 at 04:57:08PM -0800, Lucas De Marchi wrote:
> > > Register 0x9424 is not replicated on any platform, so it shouldn't be
> > > declared with REG_MCR(). Declaring it with _MMIO() is basically
> > > duplicate of the GEN7 version, so just remove the GEN8 and change all
> > > the callers to use the right functions.
> > 
> > According to an old copy of bspec page 13991, 0x9400-0x97FF was an MCR
> > range on gen8 platforms.  Newer copies of that bspec page forgot to even
> > include the register range table, so it's not obvious unless you dig
> > through the history and look at a version from before Aug 2020.
> > 
> > However bspec page 66673 indicates that this range went back to being a
> > singleton range in gen9 (and the other forcewake pages for newer
> > platforms indicate it stayed that way), so that means BDW and CHV are
> > the _only_ platforms that should treat it as MCR.  Usage for other
> > platforms should either add a new "GEN9" definition, or just go back to
> > using the GEN7 definition.
> 
> sounds like more a spec mistake. This range was listed as
> "slice common". I'm not sure we'd really have to set any steering for
> specific slice. Another thing is that we didn't set any steering for a
> long time in this register and it was working. Even now there is no
> table for gen8/gen9 in drivers/gpu/drm/i915/gt/intel_gt_mcr.c, so any
> call to intel_gt_mcr_* will simply fallback to "no steering required".
> 
> For me, any MCR_REG() should correspond to registers in these
> tables.  I don't think there's much point in annotating the register as
> MCR in its definition and then do nothing with it.  Btw, this is how I
> started getting warning wrt this register: as you knowm, in xe driver
> you added a warning for registers missing from the mcr tables,
> which I think is indeed the right thing to do for the recent platforms.

I guess that's fair.  Even though gen8 had multicast registers, I
believe the two types of steering (subslice and l3bank) could always be
reconciled with a single steering value; since the IFWI took care of
initializing this in a sane way, i915 never actually needed to touch it
(except when doing unicast reads for an errorstate dump or something).

I'm not sure the same is always true for gen9 though.  We should
probably add tables for those just to be safe, but that's future work
rather than something that we need to worry about for this patch.
Likewise, we should also finally kill off mcr_ranges_*[] in the
workaround file at some point; now that we have is_mcr in the workaround
itself, those range tables are redundant.  But that's also work for a
future series.

> 
> For gen8, this change here should not change any behavior.  It
> changes for gen11+ to the correct behavior. So I don't think we need to
> care much about double checking if gen8 had a unique behavior no other
> platforms have.  I think just amending the commit message with more
> information like this would be ok.

Yeah, sounds good.  With a slightly updated commit message

Reviewed-by: Matt Roper 

> 
> Lucas De Marchi
> 
> > 
> > 
> > Matt
> > 
> > > 
> > > Also use intel_uncore_rmw() rather than a read + write where possible.
> > > 
> > > Fixes: a9e69428b1b4 ("drm/i915: Define MCR registers explicitly")
> > > Cc: Matt Roper 
> > > Cc: Balasubramani Vivekanandan 
> > > Cc: Rodrigo Vivi 
> > > Cc: Gustavo Sousa 
> > > Cc: Matt Atwood 
> > > Cc: Ashutosh Dixit 
> > > Signed-off-by: Lucas De Marchi 
> > > ---
> > >  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  5 +
> > >  drivers/gpu/drm/i915/gt/intel_workarounds.c |  4 ++--
> > >  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c   |  5 ++---
> > >  drivers/gpu/drm/i915/intel_pm.c | 10 +-
> > >  4 files changed, 10 insertions(+), 14 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> > > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > index 7fa18a3b3957..cc1539c7a6b6 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > > @@ -686,10 +686,7 @@
> > >  #define GEN6_RSTCTL  _MMIO(0x9420)
> > > 
> > >  #define GEN7_MISCCPCTL   _MMIO(0x9424)
> > > -#define   GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0)
> > > -
> > > -#define GEN8_MISCCPCTL 

Re: [PATCH 1/2] drm/i915: Fix GEN8_MISCCPCTL

2023-02-02 Thread Matt Roper
On Thu, Feb 02, 2023 at 04:57:08PM -0800, Lucas De Marchi wrote:
> Register 0x9424 is not replicated on any platform, so it shouldn't be
> declared with REG_MCR(). Declaring it with _MMIO() is basically
> duplicate of the GEN7 version, so just remove the GEN8 and change all
> the callers to use the right functions.

According to an old copy of bspec page 13991, 0x9400-0x97FF was an MCR
range on gen8 platforms.  Newer copies of that bspec page forgot to even
include the register range table, so it's not obvious unless you dig
through the history and look at a version from before Aug 2020.

However bspec page 66673 indicates that this range went back to being a
singleton range in gen9 (and the other forcewake pages for newer
platforms indicate it stayed that way), so that means BDW and CHV are
the _only_ platforms that should treat it as MCR.  Usage for other
platforms should either add a new "GEN9" definition, or just go back to
using the GEN7 definition.


Matt

> 
> Also use intel_uncore_rmw() rather than a read + write where possible.
> 
> Fixes: a9e69428b1b4 ("drm/i915: Define MCR registers explicitly")
> Cc: Matt Roper 
> Cc: Balasubramani Vivekanandan 
> Cc: Rodrigo Vivi 
> Cc: Gustavo Sousa 
> Cc: Matt Atwood 
> Cc: Ashutosh Dixit 
> Signed-off-by: Lucas De Marchi 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  5 +
>  drivers/gpu/drm/i915/gt/intel_workarounds.c |  4 ++--
>  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c   |  5 ++---
>  drivers/gpu/drm/i915/intel_pm.c | 10 +-
>  4 files changed, 10 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 7fa18a3b3957..cc1539c7a6b6 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -686,10 +686,7 @@
>  #define GEN6_RSTCTL  _MMIO(0x9420)
>  
>  #define GEN7_MISCCPCTL   _MMIO(0x9424)
> -#define   GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0)
> -
> -#define GEN8_MISCCPCTL   MCR_REG(0x9424)
> -#define   GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0)
> +#define   GEN7_DOP_CLOCK_GATE_ENABLE REG_BIT(0)
>  #define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1)
>  #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE   (1 << 2)
>  #define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4)
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 29718d0595f4..cfc122c17e28 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1645,7 +1645,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>   wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
>  
>   /* Wa_14015795083 */
> - wa_mcr_write_clr(wal, GEN8_MISCCPCTL, 
> GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
> + wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
>  
>   /* Wa_18018781329 */
>   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
> @@ -1664,7 +1664,7 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>   pvc_init_mcr(gt, wal);
>  
>   /* Wa_14015795083 */
> - wa_mcr_write_clr(wal, GEN8_MISCCPCTL, 
> GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
> + wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
>  
>   /* Wa_18018781329 */
>   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> index 3d2249bda368..69133420c78b 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> @@ -39,9 +39,8 @@ static void guc_prepare_xfer(struct intel_gt *gt)
>  
>   if (GRAPHICS_VER(uncore->i915) == 9) {
>   /* DOP Clock Gating Enable for GuC clocks */
> - intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL,
> -  GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
> -  intel_gt_mcr_read_any(gt, 
> GEN8_MISCCPCTL));
> + intel_uncore_rmw(uncore, GEN7_MISCCPCTL, 0,
> +  GEN8_DOP_CLOCK_GATE_GUC_ENABLE);
>  
>   /* allows for 5us (in 10ns units) before GT can go to RC6 */
>   intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index e0364c4141b8..798607959458 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -4300

[PATCH 4/4] drm/i915/selftest: Use forcewake to sanity check engine wa lists

2023-02-01 Thread Matt Roper
Although register information in the bspec includes a field that is
supposed to reflect a register's reset characteristics (i.e., whether a
register maintains its value through engine resets), it's been
discovered that this information is incorrect for some register ranges
(i.e., registers that are not affected by engine resets are tagged in a
way that indicates they would be).

We can sanity check workaround registers placed on the RCS/CCS engine
workaround lists (including those placed there via the
general_render_compute_wa_init() function) by comparing against the
forcewake table.  As far as we know, there's never a case where a
register that lives outside the RENDER powerwell will be reset by an
RCS/CCS engine reset.

Signed-off-by: Matt Roper 
---
 .../gpu/drm/i915/gt/selftest_workarounds.c| 52 +++
 1 file changed, 52 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c 
b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 14a8b25b6204..1bc8febc5c1d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -1362,12 +1362,64 @@ live_engine_reset_workarounds(void *arg)
return ret;
 }
 
+/*
+ * The bspec's documentation for register reset behavior can be unreliable for
+ * some MMIO ranges.  But in general we do not expect registers outside the
+ * RENDER forcewake domain to be reset by RCS/CCS engine resets.  If we find
+ * workaround registers on an RCS or CCS engine's list, it likely indicates
+ * the register is misdocumented in the bspec and the workaround implementation
+ * should be moved to the GT workaround list instead.
+ */
+static int
+live_check_engine_workarounds_fw(void *arg)
+{
+   struct intel_gt *gt = arg;
+   struct intel_engine_cs *engine;
+   struct wa_lists *lists;
+   enum intel_engine_id id;
+   int ret = 0;
+
+   lists = kzalloc(sizeof(*lists), GFP_KERNEL);
+   if (!lists)
+   return -ENOMEM;
+
+   reference_lists_init(gt, lists);
+
+   for_each_engine(engine, gt, id) {
+   struct i915_wa_list *wal = >engine[id].wa_list;
+   struct i915_wa *wa;
+   int i;
+
+   if (engine->class != RENDER_CLASS &&
+   engine->class != COMPUTE_CLASS)
+   continue;
+
+   for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+   enum forcewake_domains fw;
+
+   fw = intel_uncore_forcewake_for_reg(gt->uncore, wa->reg,
+   FW_REG_READ | 
FW_REG_WRITE);
+   if ((fw & FORCEWAKE_RENDER) == 0) {
+   pr_err("%s: Register %#x not in RENDER 
forcewake domain!\n",
+  engine->name, 
i915_mmio_reg_offset(wa->reg));
+   ret = -EINVAL;
+   }
+   }
+   }
+
+   reference_lists_fini(gt, lists);
+   kfree(lists);
+
+   return ret;
+}
+
 int intel_workarounds_live_selftests(struct drm_i915_private *i915)
 {
static const struct i915_subtest tests[] = {
SUBTEST(live_dirty_whitelist),
SUBTEST(live_reset_whitelist),
SUBTEST(live_isolated_whitelist),
+   SUBTEST(live_check_engine_workarounds_fw),
SUBTEST(live_gpu_reset_workarounds),
SUBTEST(live_engine_reset_workarounds),
};
-- 
2.39.1



[PATCH 2/4] drm/i915/gen11: Wa_1408615072/Wa_1407596294 should be on GT list

2023-02-01 Thread Matt Roper
The UNSLICE_UNIT_LEVEL_CLKGATE register programmed by this workaround
has 'BUS' style reset, indicating that it does not lose its value on
engine resets.  Furthermore, this register is part of the GT forcewake
domain rather than the RENDER domain, so it should not be impacted by
RCS engine resets.  As such, we should implement this on the GT
workaround list rather than an engine list.

Bspec: 19219
Fixes: 3551ff928744 ("drm/i915/gen11: Moving WAs to rcs_engine_wa_init()")
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index f45ca3d4a07c..7e93ba6b3208 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1405,6 +1405,13 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
 
+   /*
+* Wa_1408615072:icl,ehl  (vsunit)
+* Wa_1407596294:icl,ehl  (hsunit)
+*/
+   wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
+   VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
+
/* Wa_1407352427:icl,ehl */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
PSDUNIT_CLKGATE_DIS);
@@ -2536,13 +2543,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
 GEN11_ENABLE_32_PLANE_MODE);
 
-   /*
-* Wa_1408615072:icl,ehl  (vsunit)
-* Wa_1407596294:icl,ehl  (hsunit)
-*/
-   wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
-   VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
-
/*
 * Wa_1408767742:icl[a2..forever],ehl[all]
 * Wa_1605460711:icl[a0..c0]
-- 
2.39.1



[PATCH 3/4] drm/i915/xehp: LNCF/LBCF workarounds should be on the GT list

2023-02-01 Thread Matt Roper
Although registers in the L3 bank/node configuration ranges are marked
as having "DEV" reset characteristics in the bspec, this appears to be a
hold-over from pre-Xe_HP platforms.  In reality, these registers
maintain their values across engine resets, meaning that workarounds
and tuning settings targetting them should be placed on the GT
workaround list rather than an engine workaround list.

Note that an extra clue here is that these registers moved from the
RENDER forcewake domain to the GT forcewake domain in Xe_HP; generally
RCS/CCS engine resets should not lead to the reset of a register that
lives outside the RENDER domain.

Re-applying these registers on engine resets wouldn't actually hurt
anything, but is unnecessary and just makes it more confusing to anyone
trying to decipher how these registers really work.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 61 +
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 7e93ba6b3208..09c9837458b5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1499,6 +1499,12 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
/* Wa_1409757795:xehpsdv */
wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
 
+   /* Wa_18011725039:xehpsdv */
+   if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
+   wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
+   wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
+   }
+
/* Wa_16011155590:xehpsdv */
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
@@ -1548,6 +1554,9 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
/* Wa_14014368820:xehpsdv */
wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
INVALIDATION_BROADCAST_MODE_DIS | 
GLOBAL_INVALIDATION_MODE);
+
+   /* Wa_14010670810:xehpsdv */
+   wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
 }
 
 static void
@@ -1684,6 +1693,9 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+
+   /* Wa_16016694945 */
+   wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
 }
 
 static void
@@ -1724,11 +1736,36 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
debug_dump_steering(gt);
 }
 
+/*
+ * The bspec performance guide has recommended MMIO tuning settings.  These
+ * aren't truly "workarounds" but we want to program them through the
+ * workaround infrastructure to make sure they're (re)applied at the proper
+ * times.
+ *
+ * The settings in this function are for settings that persist through
+ * engine resets and also are not part of any engine's register state context.
+ * I.e., settings that only need to be re-applied in the event of a full GT
+ * reset.
+ */
+static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+   if (IS_PONTEVECCHIO(gt->i915)) {
+   wa_mcr_write(wal, XEHPC_L3SCRUB,
+SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
+   wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
+   }
+
+   if (IS_DG2(gt->i915))
+   wa_mcr_write_or(wal, XEHP_L3SCQREG7, 
BLEND_FILL_CACHING_OPT_DIS);
+}
+
 static void
 gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
 {
struct drm_i915_private *i915 = gt->i915;
 
+   gt_tuning_settings(gt, wal);
+
if (gt->type == GT_MEDIA) {
if (MEDIA_VER(i915) >= 13)
xelpmp_gt_workarounds_init(gt, wal);
@@ -2897,16 +2934,8 @@ static void
 add_render_compute_tuning_settings(struct drm_i915_private *i915,
   struct i915_wa_list *wal)
 {
-   if (IS_PONTEVECCHIO(i915)) {
-   wa_mcr_write(wal, XEHPC_L3SCRUB,
-SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
-   wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
-   }
-
-   if (IS_DG2(i915)) {
-   wa_mcr_write_or(wal, XEHP_L3SCQREG7, 
BLEND_FILL_CACHING_OPT_DIS);
+   if (IS_DG2(i915))
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, 
STACKID_CTRL_512);
-   }
 
/*
 * This tuning setting proves beneficial only on ATS-M designs; the
@@ -2988,11 +3017,6 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
   0, false);
}

[PATCH 1/4] drm/i915/pvc: Annotate two more workaround/tuning registers as MCR

2023-02-01 Thread Matt Roper
XEHPC_LNCFMISCCFGREG0 and XEHPC_L3SCRUB are both in MCR register ranges
on PVC (with HALFBSLICE and L3BANK replication respectively), so they
should be explicitly declared as MCR registers and use MCR-aware
workaround handlers.

The workarounds/tuning settings should still be applied properly on PVC
even without the MCR annotation, but readback verification on
CONFIG_DRM_I915_DEBUG_GEM builds could potentitally give false positive
"workaround lost on load" warnings on parts fused such that a unicast
read targets a terminated register instance.

Fixes: a9e69428b1b4 ("drm/i915: Define MCR registers explicitly")
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  4 ++--
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 12 +---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 7fa18a3b3957..928698c621e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -979,7 +979,7 @@
 #define   GEN7_WA_FOR_GEN7_L3_CONTROL  0x3C47FF8C
 #define   GEN7_L3AGDIS (1 << 19)
 
-#define XEHPC_LNCFMISCCFGREG0  _MMIO(0xb01c)
+#define XEHPC_LNCFMISCCFGREG0  MCR_REG(0xb01c)
 #define   XEHPC_HOSTCACHEENREG_BIT(1)
 #define   XEHPC_OVRLSCCC   REG_BIT(0)
 
@@ -1042,7 +1042,7 @@
 #define XEHP_L3SCQREG7 MCR_REG(0xb188)
 #define   BLEND_FILL_CACHING_OPT_DIS   REG_BIT(3)
 
-#define XEHPC_L3SCRUB  _MMIO(0xb18c)
+#define XEHPC_L3SCRUB  MCR_REG(0xb18c)
 #define   SCRUB_CL_DWNGRADE_SHARED REG_BIT(12)
 #define   SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0)
 #define   SCRUB_RATE_4B_PER_CLK
REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 29718d0595f4..f45ca3d4a07c 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -240,6 +240,12 @@ wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
wa_write_clr_set(wal, reg, ~0, set);
 }
 
+static void
+wa_mcr_write(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
+{
+   wa_mcr_write_clr_set(wal, reg, ~0, set);
+}
+
 static void
 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 {
@@ -2892,9 +2898,9 @@ add_render_compute_tuning_settings(struct 
drm_i915_private *i915,
   struct i915_wa_list *wal)
 {
if (IS_PONTEVECCHIO(i915)) {
-   wa_write(wal, XEHPC_L3SCRUB,
+   wa_mcr_write(wal, XEHPC_L3SCRUB,
 SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
-   wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
+   wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
}
 
if (IS_DG2(i915)) {
@@ -2984,7 +2990,7 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
 
if (IS_PONTEVECCHIO(i915)) {
/* Wa_16016694945 */
-   wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
+   wa_mcr_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
}
 
if (IS_XEHPSDV(i915)) {
-- 
2.39.1



Re: [Intel-gfx] [PATCH v4] drm/i915/uncore: Use GT message helpers in uncore

2023-01-26 Thread Matt Roper
On Thu, Jan 26, 2023 at 10:46:01AM +, Tvrtko Ursulin wrote:
> 
> On 25/01/2023 19:04, Matt Roper wrote:
> > On Wed, Jan 25, 2023 at 10:51:53AM +, Tvrtko Ursulin wrote:
> > > 
> > > On 24/01/2023 20:54, john.c.harri...@intel.com wrote:
> > > > From: John Harrison 
> > > > 
> > > > Uncore is really part of the GT. So use the GT specific debug/error
> > 
> > That's not really true; uncore should be outside the GT since it's used
> > for all kinds of non-GT stuff as well (sgunit, display, etc.).  I
> > believe "uncore" is just an old-fashioned name for what modern docs
> > refer to as "system agent" these days.
> > 
> > Granted, our i915 design does stretch the truth quite a bit today by
> > rolling some of the GT-specific concepts into the uncore code (GT
> > forcewake/shadowing, GSI offset, etc.).  Having two intel_uncore
> > structures on a platform like MTL doesn't really match the hardware
> > reality at the lowest levels, but allows us to update the software for
> > these new platforms without major, intrusive changes for all platforms.
> > 
> > I feel like including 'gt' information in log messages unrelated to GT
> > might be confusing.  For display stuff it's probably obvious that the GT
> > information is bogus, but when stuff is related to the sgunit it won't
> > always be so obvious.
> 
> Level of confusing vs absence of debugability and a suggested way forward?
> Just do nothing and accept any forcewake related errors will not include the
> originating GT?

I guess it's probably fine to keep it on all messages; people will just
learn to ignore the bogus GT stuff on things that aren't actually
related to the GT.

It would still be good to change the commit message though so that
people doing git archaeology in the future don't get an incorrect
understanding of the relationship.


Matt

> 
> Regards,
> 
> Tvrtko
> 
> > 
> > 
> > Matt
> > 
> > > > message helpers so as to get the GT index in the prints.
> > > 
> > > Conversion looks good to me and on balance it's better to include the 
> > > origin
> > > in logs even for messages which strictly are not GT related, than not to
> > > have the origin at all (intel_de_... helpers, I *think*).
> > > 
> > > Reviewed-by: Tvrtko Ursulin 
> > > 
> > > I'll just add Jani and Matt in case they have a different opinion.
> > > 
> > > Regards,
> > > 
> > > Tvrtko
> > > 
> > > > Signed-off-by: John Harrison 
> > > > ---
> > > >drivers/gpu/drm/i915/intel_uncore.c | 133 
> > > > +---
> > > >1 file changed, 63 insertions(+), 70 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> > > > b/drivers/gpu/drm/i915/intel_uncore.c
> > > > index 8dee9e62a73ee..4e357477c6592 100644
> > > > --- a/drivers/gpu/drm/i915/intel_uncore.c
> > > > +++ b/drivers/gpu/drm/i915/intel_uncore.c
> > > > @@ -25,6 +25,7 @@
> > > >#include 
> > > >#include "gt/intel_engine_regs.h"
> > > > +#include "gt/intel_gt_print.h"
> > > >#include "gt/intel_gt_regs.h"
> > > >#include "i915_drv.h"
> > > > @@ -83,8 +84,7 @@ static void mmio_debug_resume(struct intel_uncore 
> > > > *uncore)
> > > > uncore->debug->unclaimed_mmio_check = 
> > > > uncore->debug->saved_mmio_check;
> > > > if (check_for_unclaimed_mmio(uncore))
> > > > -   drm_info(>i915->drm,
> > > > -"Invalid mmio detected during user access\n");
> > > > +   gt_info(uncore->gt, "Invalid mmio detected during user 
> > > > access\n");
> > > > spin_unlock(>debug->lock);
> > > >}
> > > > @@ -179,9 +179,9 @@ static inline void
> > > >fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain 
> > > > *d)
> > > >{
> > > > if (wait_ack_clear(d, FORCEWAKE_KERNEL)) {
> > > > -   drm_err(>uncore->i915->drm,
> > > > -   "%s: timed out waiting for forcewake ack to 
> > > > clear.\n",
> > > > -   intel_uncore_forcewake_domain_to_str(d->id));
> > > > +   gt_err(d->uncore-&

Re: [PATCH v2 2/3] drm/i915/mtl: Correct implementation of Wa_18018781329

2023-01-25 Thread Matt Roper
On Wed, Jan 25, 2023 at 03:41:58PM -0800, Matt Roper wrote:
> Workaround Wa_18018781329 has applied to several recent Xe_HP-based
> platforms.  However there are some extra gotchas to implementing this
> properly for MTL that we need to take into account:
> 
>  * Due to the separation of media and render/compute into separate GTs,
>this workaround needs to be implemented on each GT, not just the
>primary GT.  Since each class of register only exists on one of the
>two GTs, we should program the appropriate registers on each GT.
> 
>  * As with past Xe_HP platforms, the registers on the primary GT (Xe_LPG
>IP) are multicast/replicated registers and should be handled with the
>MCR-aware functions.  However the registers on the media GT (Xe_LPM+
>IP) are regular singleton registers and should _not_ use MCR
>handling.  We need to create separate register definitions for the
>Xe_HP multicast form and the Xe_LPM+ singleton form and use each in
>the appropriate place.
> 
>  * Starting with MTL, workarounds documented by the hardware teams are
>technically associated with IP versions/steppings rather than
>top-level platforms.  That means we should take care to check the
>media IP version rather than the graphics IP version when deciding
>whether the workaround is needed on the Xe_LPM+ media GT (in this
>case the workaround applies to both IPs and the stepping bounds are
>identical, but we should still write the code appropriately to set a
>proper precedent for future workaround implementations).
> 
>  * It's worth noting that the GSC register and the CCS register are
>defined with the same MMIO offset (0xCF30).  Since the CCS is only
>relevant to the primary GT and the GSC is only relevant to the media
>GT there isn't actually a clash here (the media GT automatically adds
>the additional 0x38 GSI offset).  However there's currently a
>glitch in the bspec where the CCS register doesn't show up at all and
>the GSC register is listed as existing on both GTs.  That's a known
>documentation problem for several registers with shared GSC/CCS
>offsets; rest assured that the CCS register really does still exist.
> 
> Cc: Gustavo Sousa 
> Signed-off-by: Matt Roper 

Forgot to add:

Fixes: 41bb543f5598 ("drm/i915/mtl: Add initial gt workarounds")


Matt

> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  7 +--
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 22 ++---
>  drivers/gpu/drm/i915/i915_drv.h |  4 
>  3 files changed, 24 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 2727645864db..310bdde049ab 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -1100,8 +1100,11 @@
>  #define XEHP_MERT_MOD_CTRL   MCR_REG(0xcf28)
>  #define RENDER_MOD_CTRL  MCR_REG(0xcf2c)
>  #define COMP_MOD_CTRLMCR_REG(0xcf30)
> -#define VDBX_MOD_CTRLMCR_REG(0xcf34)
> -#define VEBX_MOD_CTRLMCR_REG(0xcf38)
> +#define XELPMP_GSC_MOD_CTRL  _MMIO(0xcf30)   /* media GT 
> only */
> +#define XEHP_VDBX_MOD_CTRL   MCR_REG(0xcf34)
> +#define XELPMP_VDBX_MOD_CTRL _MMIO(0xcf34)
> +#define XEHP_VEBX_MOD_CTRL   MCR_REG(0xcf38)
> +#define XELPMP_VEBX_MOD_CTRL _MMIO(0xcf38)
>  #define   FORCE_MISS_FTLBREG_BIT(3)
>  
>  #define GEN12_GAMSTLB_CTRL   _MMIO(0xcf4c)
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 9db60078460a..4c978abf3e2a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -1681,8 +1681,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>   /* Wa_18018781329 */
>   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
>   wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
> - wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
> - wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
> + wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
> + wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
>  
>   /* Wa_1509235366:dg2 */
>   wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
> @@ -1700,8 +1700,8 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>   /* Wa_18018781329 */
>   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FOR

[PATCH v2 3/3] drm/i915/xehp: Annotate a couple more workaround registers as MCR

2023-01-25 Thread Matt Roper
GAMSTLB_CTRL and GAMCNTRL_CTRL became multicast/replicated registers on
Xe_HP.  They should be defined accordingly and use MCR-aware operations.

These registers have only been used for some dg2/xehpsdv workarounds, so
this fix is mostly just for consistency/future-proofing; even lacking
the MCR annotation, workarounds will always be properly applied in a
multicast manner on these platforms.

Cc: Gustavo Sousa 
Fixes: 58bc2453ab8a ("drm/i915: Define multicast registers as a new type")
Signed-off-by: Matt Roper 
Reviewed-by: Gustavo Sousa 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  4 ++--
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 16 
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 310bdde049ab..7fa18a3b3957 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1107,12 +1107,12 @@
 #define XELPMP_VEBX_MOD_CTRL   _MMIO(0xcf38)
 #define   FORCE_MISS_FTLB  REG_BIT(3)
 
-#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c)
+#define XEHP_GAMSTLB_CTRL  MCR_REG(0xcf4c)
 #define   CONTROL_BLOCK_CLKGATE_DISREG_BIT(12)
 #define   EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11)
 #define   TAG_BLOCK_CLKGATE_DISREG_BIT(7)
 
-#define GEN12_GAMCNTRL_CTRL_MMIO(0xcf54)
+#define XEHP_GAMCNTRL_CTRL MCR_REG(0xcf54)
 #define   INVALIDATION_BROADCAST_MODE_DIS  REG_BIT(12)
 #define   GLOBAL_INVALIDATION_MODE REG_BIT(2)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 4c978abf3e2a..3111df350f57 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1564,8 +1564,8 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
 
/* Wa_14014368820:xehpsdv */
-   wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
-   INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
+   wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
+   INVALIDATION_BROADCAST_MODE_DIS | 
GLOBAL_INVALIDATION_MODE);
 }
 
 static void
@@ -1659,10 +1659,10 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
 
/* Wa_14010680813:dg2_g10 */
-   wa_write_or(wal, GEN12_GAMSTLB_CTRL,
-   CONTROL_BLOCK_CLKGATE_DIS |
-   EGRESS_BLOCK_CLKGATE_DIS |
-   TAG_BLOCK_CLKGATE_DIS);
+   wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL,
+   CONTROL_BLOCK_CLKGATE_DIS |
+   EGRESS_BLOCK_CLKGATE_DIS |
+   TAG_BLOCK_CLKGATE_DIS);
}
 
/* Wa_14014830051:dg2 */
@@ -1685,8 +1685,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 
/* Wa_1509235366:dg2 */
-   wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
-   INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
+   wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
+   INVALIDATION_BROADCAST_MODE_DIS | 
GLOBAL_INVALIDATION_MODE);
 }
 
 static void
-- 
2.39.1



[PATCH v2 2/3] drm/i915/mtl: Correct implementation of Wa_18018781329

2023-01-25 Thread Matt Roper
Workaround Wa_18018781329 has applied to several recent Xe_HP-based
platforms.  However there are some extra gotchas to implementing this
properly for MTL that we need to take into account:

 * Due to the separation of media and render/compute into separate GTs,
   this workaround needs to be implemented on each GT, not just the
   primary GT.  Since each class of register only exists on one of the
   two GTs, we should program the appropriate registers on each GT.

 * As with past Xe_HP platforms, the registers on the primary GT (Xe_LPG
   IP) are multicast/replicated registers and should be handled with the
   MCR-aware functions.  However the registers on the media GT (Xe_LPM+
   IP) are regular singleton registers and should _not_ use MCR
   handling.  We need to create separate register definitions for the
   Xe_HP multicast form and the Xe_LPM+ singleton form and use each in
   the appropriate place.

 * Starting with MTL, workarounds documented by the hardware teams are
   technically associated with IP versions/steppings rather than
   top-level platforms.  That means we should take care to check the
   media IP version rather than the graphics IP version when deciding
   whether the workaround is needed on the Xe_LPM+ media GT (in this
   case the workaround applies to both IPs and the stepping bounds are
   identical, but we should still write the code appropriately to set a
   proper precedent for future workaround implementations).

 * It's worth noting that the GSC register and the CCS register are
   defined with the same MMIO offset (0xCF30).  Since the CCS is only
   relevant to the primary GT and the GSC is only relevant to the media
   GT there isn't actually a clash here (the media GT automatically adds
   the additional 0x38 GSI offset).  However there's currently a
   glitch in the bspec where the CCS register doesn't show up at all and
   the GSC register is listed as existing on both GTs.  That's a known
   documentation problem for several registers with shared GSC/CCS
   offsets; rest assured that the CCS register really does still exist.

Cc: Gustavo Sousa 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  7 +--
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 22 ++---
 drivers/gpu/drm/i915/i915_drv.h |  4 
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 2727645864db..310bdde049ab 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1100,8 +1100,11 @@
 #define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28)
 #define RENDER_MOD_CTRLMCR_REG(0xcf2c)
 #define COMP_MOD_CTRL  MCR_REG(0xcf30)
-#define VDBX_MOD_CTRL  MCR_REG(0xcf34)
-#define VEBX_MOD_CTRL  MCR_REG(0xcf38)
+#define XELPMP_GSC_MOD_CTRL_MMIO(0xcf30)   /* media GT 
only */
+#define XEHP_VDBX_MOD_CTRL MCR_REG(0xcf34)
+#define XELPMP_VDBX_MOD_CTRL   _MMIO(0xcf34)
+#define XEHP_VEBX_MOD_CTRL MCR_REG(0xcf38)
+#define XELPMP_VEBX_MOD_CTRL   _MMIO(0xcf38)
 #define   FORCE_MISS_FTLB  REG_BIT(3)
 
 #define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 9db60078460a..4c978abf3e2a 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1681,8 +1681,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
/* Wa_18018781329 */
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
-   wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
-   wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 
/* Wa_1509235366:dg2 */
wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
@@ -1700,8 +1700,8 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
/* Wa_18018781329 */
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
-   wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
-   wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 }
 
 static void
@@ -1715,8 +1715,6 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
/* Wa_18018781329 */
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB

[PATCH v2 1/3] drm/i915/xehp: GAM registers don't need to be re-applied on engine resets

2023-01-25 Thread Matt Roper
Register reset characteristics (i.e., whether the register maintains or
loses its value on engine reset) is an important factor that determines
which wa_list we want to add workarounds to.  We recently found out that
the bspec documentation for the Xe_HP's "GAM" registers in the 0xC800 -
0xCFFF range was misleading; these registers do not actually lose their
value on engine resets as the documentation implied.  This means there's
no need to re-apply workarounds touching these registers after a reset,
and the corresponding workarounds should be moved from the 'engine'
lists back to the 'gt' list.

v2:
 - Don't add Wa_18018781329 to xehpsdv; the original condition didn't
   include that platform.  (Gustavo)
 - Move the MTL code to the GT function as-is for now; we'll take care
   of the additional fixes needed in a follow-up patch.

Cc: Gustavo Sousa 
Fixes: edf176f48d87 ("drm/i915/dg2: Move misplaced 'ctx' & 'gt' wa's to engine 
wa list")
Fixes: b2006061ae28 ("drm/i915/xehpsdv: Move render/compute engine reset 
domains related workarounds")
Fixes: 41bb543f5598 ("drm/i915/mtl: Add initial gt workarounds")
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 77 -
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 4efc1a532982..9db60078460a 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1559,6 +1559,13 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
 
/* Wa_14011060649:xehpsdv */
wa_14011060649(gt, wal);
+
+   /* Wa_14012362059:xehpsdv */
+   wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+
+   /* Wa_14014368820:xehpsdv */
+   wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
+   INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
 }
 
 static void
@@ -1599,6 +1606,12 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
DSS_ROUTER_CLKGATE_DIS);
}
 
+   if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
+   IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
+   /* Wa_14012362059:dg2 */
+   wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+   }
+
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
/* Wa_14010948348:dg2_g10 */
wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
@@ -1644,6 +1657,12 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
 
/* Wa_14011028019:dg2_g10 */
wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+
+   /* Wa_14010680813:dg2_g10 */
+   wa_write_or(wal, GEN12_GAMSTLB_CTRL,
+   CONTROL_BLOCK_CLKGATE_DIS |
+   EGRESS_BLOCK_CLKGATE_DIS |
+   TAG_BLOCK_CLKGATE_DIS);
}
 
/* Wa_14014830051:dg2 */
@@ -1658,6 +1677,16 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
 
/* Wa_14015795083 */
wa_mcr_write_clr(wal, GEN8_MISCCPCTL, 
GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+
+   /* Wa_18018781329 */
+   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+
+   /* Wa_1509235366:dg2 */
+   wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
+   INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
 }
 
 static void
@@ -1667,16 +1696,29 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
 
/* Wa_14015795083 */
wa_mcr_write_clr(wal, GEN8_MISCCPCTL, 
GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+
+   /* Wa_18018781329 */
+   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 }
 
 static void
 xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 {
-   /* Wa_14014830051 */
if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
-   IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0))
+   IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) {
+   /* Wa_14014830051 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
 
+   /* Wa_18018781329 */
+   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, VDBX_M

Re: [PATCH 1/2] drm/i915/xehp: GAM registers don't need to be re-applied on engine resets

2023-01-25 Thread Matt Roper
On Wed, Jan 25, 2023 at 04:43:29PM -0300, Gustavo Sousa wrote:
> On Tue, Jan 24, 2023 at 05:14:06PM -0800, Matt Roper wrote:
> > Register reset characteristics (i.e., whether the register maintains or
> > loses its value on engine reset) is an important factor that determines
> > which wa_list we want to add workarounds to.  We recently found out that
> > the bspec documentation for the Xe_HP's "GAM" registers in the 0xC800 -
> > 0xCFFF range was misleading; these registers do not actually lose their
> > value on engine resets as the documentation implied.  This means there's
> > no need to re-apply workarounds touching these registers after a reset,
> > and the corresponding workarounds should be moved from the 'engine'
> > lists back to the 'gt' list.
> > 
> > While moving these GAM-related workarounds to the various platforms' GT
> > workaround functions, we should also take care to handle Wa_18018781329
> > properly for MTL's two GTs --- the render/compute setting should be set
> > on the primary GT where those engines reside, and the vd/ve/gsc setting
> > should be set on the media GT.  Previously the VD/VE/GSC setting was not
> > being properly applied.
> > 
> > Cc: Gustavo Sousa 
> > Fixes: edf176f48d87 ("drm/i915/dg2: Move misplaced 'ctx' & 'gt' wa's to 
> > engine wa list")
> > Fixes: b2006061ae28 ("drm/i915/xehpsdv: Move render/compute engine reset 
> > domains related workarounds")
> > Fixes: 41bb543f5598 ("drm/i915/mtl: Add initial gt workarounds")
> > Signed-off-by: Matt Roper 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
> >  drivers/gpu/drm/i915/gt/intel_workarounds.c | 88 +
> >  drivers/gpu/drm/i915/i915_drv.h |  4 +
> >  3 files changed, 59 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > index 2727645864db..4a37d048b512 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > @@ -1100,6 +1100,7 @@
> >  #define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28)
> >  #define RENDER_MOD_CTRLMCR_REG(0xcf2c)
> >  #define COMP_MOD_CTRL  MCR_REG(0xcf30)
> > +#define GSC_MOD_CTRL   MCR_REG(0xcf30) /* 
> > media GT only */
> >  #define VDBX_MOD_CTRL  MCR_REG(0xcf34)
> >  #define VEBX_MOD_CTRL  MCR_REG(0xcf38)
> >  #define   FORCE_MISS_FTLB  REG_BIT(3)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> > b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > index 4efc1a532982..0e7f64bb2860 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > @@ -1559,6 +1559,19 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, 
> > struct i915_wa_list *wal)
> >  
> > /* Wa_14011060649:xehpsdv */
> > wa_14011060649(gt, wal);
> > +
> > +   /* Wa_18018781329 */
> > +   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
> > +   wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
> > +   wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
> > +   wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
> 
> Maybe worth mentioning in the commit message that Wa_18018781329 is being
> extended to XEHPSDV in this patch? This could also be on its own patch.

Yeah, it's probably better to just drop it from this patch.  We could
potentially add it to xehpsdv as a separate patch down the line if
necessary.

> 
> > +
> > +   /* Wa_14012362059:xehpsdv */
> > +   wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
> > +
> > +   /* Wa_14014368820:xehpsdv */
> > +   wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
> > +   INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
> >  }
> >  
> >  static void
> > @@ -1599,6 +1612,12 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
> > i915_wa_list *wal)
> > DSS_ROUTER_CLKGATE_DIS);
> > }
> >  
> > +   if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
> > +   IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
> > +   /* Wa_14012362059:dg2 */
> > +   wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
> > +   }
> > +
> > if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0

Re: [Intel-gfx] [PATCH v4] drm/i915/uncore: Use GT message helpers in uncore

2023-01-25 Thread Matt Roper
, !i915_mmio_reg_valid(reg_set));
> > +   gt_WARN_ON(uncore->gt, !i915_mmio_reg_valid(reg_ack));
> > d->uncore = uncore;
> > d->wake_count = 0;
> > @@ -2254,8 +2250,8 @@ static void fw_domain_fini(struct intel_uncore 
> > *uncore,
> > return;
> > uncore->fw_domains &= ~BIT(domain_id);
> > -   drm_WARN_ON(>i915->drm, d->wake_count);
> > -   drm_WARN_ON(>i915->drm, hrtimer_cancel(>timer));
> > +   gt_WARN_ON(uncore->gt, d->wake_count);
> > +   gt_WARN_ON(uncore->gt, hrtimer_cancel(>timer));
> > kfree(d);
> >   }
> > @@ -2388,8 +2384,8 @@ static int intel_uncore_fw_domains_init(struct 
> > intel_uncore *uncore)
> > spin_unlock_irq(>lock);
> > if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
> > -   drm_info(>drm, "No MT forcewake available on 
> > Ivybridge, this can result in issues\n");
> > -   drm_info(>drm, "when using vblank-synced partial 
> > screen updates.\n");
> > +   gt_info(uncore->gt, "No MT forcewake available on 
> > Ivybridge, this can result in issues\n");
> > +   gt_info(uncore->gt, "when using vblank-synced partial 
> > screen updates.\n");
> > fw_domain_fini(uncore, FW_DOMAIN_ID_RENDER);
> > fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
> >FORCEWAKE, FORCEWAKE_ACK);
> > @@ -2403,7 +2399,7 @@ static int intel_uncore_fw_domains_init(struct 
> > intel_uncore *uncore)
> >   #undef fw_domain_init
> > /* All future platforms are expected to require complex power gating */
> > -   drm_WARN_ON(>drm, !ret && uncore->fw_domains == 0);
> > +   gt_WARN_ON(uncore->gt, !ret && uncore->fw_domains == 0);
> >   out:
> > if (ret)
> > @@ -2487,7 +2483,7 @@ int intel_uncore_setup_mmio(struct intel_uncore 
> > *uncore, phys_addr_t phys_addr)
> > uncore->regs = ioremap(phys_addr, mmio_size);
> > if (uncore->regs == NULL) {
> > -   drm_err(>drm, "failed to map registers\n");
> > +   gt_err(uncore->gt, "failed to map registers\n");
> > return -EIO;
> > }
> > @@ -2615,7 +2611,7 @@ int intel_uncore_init_mmio(struct intel_uncore 
> > *uncore)
> >  */
> > if (IS_DGFX(i915) &&
> > !(__raw_uncore_read32(uncore, GU_CNTL) & LMEM_INIT)) {
> > -   drm_err(>drm, "LMEM not initialized by firmware\n");
> > +   gt_err(uncore->gt, "LMEM not initialized by firmware\n");
> > return -ENODEV;
> > }
> > @@ -2646,7 +2642,7 @@ int intel_uncore_init_mmio(struct intel_uncore 
> > *uncore)
> > /* clear out unclaimed reg detection bit */
> > if (intel_uncore_unclaimed_mmio(uncore))
> > -   drm_dbg(>drm, "unclaimed mmio detected on uncore init, 
> > clearing\n");
> > +   gt_dbg(uncore->gt, "unclaimed mmio detected on uncore init, 
> > clearing\n");
> > return 0;
> >   }
> > @@ -2721,11 +2717,10 @@ void intel_uncore_prune_engine_fw_domains(struct 
> > intel_uncore *uncore,
> >*/
> >   static void driver_initiated_flr(struct intel_uncore *uncore)
> >   {
> > -   struct drm_i915_private *i915 = uncore->i915;
> > const unsigned int flr_timeout_ms = 3000; /* specs recommend a 3s wait 
> > */
> > int ret;
> > -   drm_dbg(>drm, "Triggering Driver-FLR\n");
> > +   gt_dbg(uncore->gt, "Triggering Driver-FLR\n");
> > /*
> >  * Make sure any pending FLR requests have cleared by waiting for the
> > @@ -2738,9 +2733,7 @@ static void driver_initiated_flr(struct intel_uncore 
> > *uncore)
> >  */
> > ret = intel_wait_for_register_fw(uncore, GU_CNTL, DRIVERFLR, 0, 
> > flr_timeout_ms);
> > if (ret) {
> > -   drm_err(>drm,
> > -   "Failed to wait for Driver-FLR bit to clear! %d\n",
> > -   ret);
> > +   gt_err(uncore->gt, "Failed to wait for Driver-FLR bit to clear! 
> > %d\n", ret);
> > return;
> > }
> > intel_uncore_write_fw(uncore, GU_DEBUG, DRIVERFLR_STATUS);
> > @@ -2752,7 +2745,7 @@ static void driver_initiated_flr(struct intel_uncore 
> > *uncore)
> >  DRIVERFLR_STATUS, DRIVERFLR_STATUS,
> >  flr_timeout_ms);
> > if (ret) {
> > -   drm_err(>drm, "wait for Driver-FLR completion failed! 
> > %d\n", ret);
> > +   gt_err(uncore->gt, "wait for Driver-FLR completion failed! 
> > %d\n", ret);
> > return;
> > }
> > @@ -2911,7 +2904,7 @@ intel_uncore_arm_unclaimed_mmio_detection(struct 
> > intel_uncore *uncore)
> >   {
> > bool ret = false;
> > -   if (drm_WARN_ON(>i915->drm, !uncore->debug))
> > +   if (gt_WARN_ON(uncore->gt, !uncore->debug))
> > return false;
> > spin_lock_irq(>debug->lock);
> > @@ -2921,10 +2914,10 @@ intel_uncore_arm_unclaimed_mmio_detection(struct 
> > intel_uncore *uncore)
> > if (unlikely(check_for_unclaimed_mmio(uncore))) {
> > if (!uncore->i915->params.mmio_debug) {
> > -   drm_dbg(>i915->drm,
> > -   "Unclaimed register detected, "
> > -   "enabling oneshot unclaimed register reporting. 
> > "
> > -   "Please use i915.mmio_debug=N for more 
> > information.\n");
> > +   gt_dbg(uncore->gt,
> > +  "Unclaimed register detected, "
> > +  "enabling oneshot unclaimed register reporting. "
> > +  "Please use i915.mmio_debug=N for more 
> > information.\n");
> > uncore->i915->params.mmio_debug++;
> > }
> > uncore->debug->unclaimed_mmio_check--;
> > @@ -2957,7 +2950,7 @@ intel_uncore_forcewake_for_reg(struct intel_uncore 
> > *uncore,
> >   {
> > enum forcewake_domains fw_domains = 0;
> > -   drm_WARN_ON(>i915->drm, !op);
> > +   gt_WARN_ON(uncore->gt, !op);
> > if (!intel_uncore_has_forcewake(uncore))
> > return 0;
> > @@ -2968,7 +2961,7 @@ intel_uncore_forcewake_for_reg(struct intel_uncore 
> > *uncore,
> > if (op & FW_REG_WRITE)
> > fw_domains |= uncore->funcs.write_fw_domains(uncore, reg);
> > -   drm_WARN_ON(>i915->drm, fw_domains & ~uncore->fw_domains);
> > +   gt_WARN_ON(uncore->gt, fw_domains & ~uncore->fw_domains);
> > return fw_domains;
> >   }

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


[PATCH 2/2] drm/i915/xehp: Annotate a couple more workaround registers as MCR

2023-01-24 Thread Matt Roper
GAMSTLB_CTRL and GAMCNTRL_CTRL became multicast/replicated registers on
Xe_HP.  They should be defined accordingly and use MCR-aware operations.

These registers have only been used for some dg2/xehpsdv workarounds, so
this fix is mostly just for consistency/future-proofing; even lacking
the MCR annotation, workarounds will always be properly applied in a
multicast manner on these platforms.

Cc: Gustavo Sousa 
Fixes: 58bc2453ab8a ("drm/i915: Define multicast registers as a new type")
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  4 ++--
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 16 
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 4a37d048b512..a0ebf3fa63ca 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1105,12 +1105,12 @@
 #define VEBX_MOD_CTRL  MCR_REG(0xcf38)
 #define   FORCE_MISS_FTLB  REG_BIT(3)
 
-#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c)
+#define XEHP_GAMSTLB_CTRL  MCR_REG(0xcf4c)
 #define   CONTROL_BLOCK_CLKGATE_DISREG_BIT(12)
 #define   EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11)
 #define   TAG_BLOCK_CLKGATE_DISREG_BIT(7)
 
-#define GEN12_GAMCNTRL_CTRL_MMIO(0xcf54)
+#define XEHP_GAMCNTRL_CTRL MCR_REG(0xcf54)
 #define   INVALIDATION_BROADCAST_MODE_DIS  REG_BIT(12)
 #define   GLOBAL_INVALIDATION_MODE REG_BIT(2)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 0e7f64bb2860..94eb498f3c2c 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1570,8 +1570,8 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
 
/* Wa_14014368820:xehpsdv */
-   wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
-   INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
+   wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
+   INVALIDATION_BROADCAST_MODE_DIS | 
GLOBAL_INVALIDATION_MODE);
 }
 
 static void
@@ -1665,10 +1665,10 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
 
/* Wa_14010680813:dg2_g10 */
-   wa_write_or(wal, GEN12_GAMSTLB_CTRL,
-   CONTROL_BLOCK_CLKGATE_DIS |
-   EGRESS_BLOCK_CLKGATE_DIS |
-   TAG_BLOCK_CLKGATE_DIS);
+   wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL,
+   CONTROL_BLOCK_CLKGATE_DIS |
+   EGRESS_BLOCK_CLKGATE_DIS |
+   TAG_BLOCK_CLKGATE_DIS);
}
 
/* Wa_14014830051:dg2 */
@@ -1691,8 +1691,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 
/* Wa_1509235366:dg2 */
-   wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
-   INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
+   wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
+   INVALIDATION_BROADCAST_MODE_DIS | 
GLOBAL_INVALIDATION_MODE);
 }
 
 static void
-- 
2.39.0



[PATCH 1/2] drm/i915/xehp: GAM registers don't need to be re-applied on engine resets

2023-01-24 Thread Matt Roper
Register reset characteristics (i.e., whether the register maintains or
loses its value on engine reset) is an important factor that determines
which wa_list we want to add workarounds to.  We recently found out that
the bspec documentation for the Xe_HP's "GAM" registers in the 0xC800 -
0xCFFF range was misleading; these registers do not actually lose their
value on engine resets as the documentation implied.  This means there's
no need to re-apply workarounds touching these registers after a reset,
and the corresponding workarounds should be moved from the 'engine'
lists back to the 'gt' list.

While moving these GAM-related workarounds to the various platforms' GT
workaround functions, we should also take care to handle Wa_18018781329
properly for MTL's two GTs --- the render/compute setting should be set
on the primary GT where those engines reside, and the vd/ve/gsc setting
should be set on the media GT.  Previously the VD/VE/GSC setting was not
being properly applied.

Cc: Gustavo Sousa 
Fixes: edf176f48d87 ("drm/i915/dg2: Move misplaced 'ctx' & 'gt' wa's to engine 
wa list")
Fixes: b2006061ae28 ("drm/i915/xehpsdv: Move render/compute engine reset 
domains related workarounds")
Fixes: 41bb543f5598 ("drm/i915/mtl: Add initial gt workarounds")
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 88 +
 drivers/gpu/drm/i915/i915_drv.h |  4 +
 3 files changed, 59 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 2727645864db..4a37d048b512 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1100,6 +1100,7 @@
 #define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28)
 #define RENDER_MOD_CTRLMCR_REG(0xcf2c)
 #define COMP_MOD_CTRL  MCR_REG(0xcf30)
+#define GSC_MOD_CTRL   MCR_REG(0xcf30) /* media GT 
only */
 #define VDBX_MOD_CTRL  MCR_REG(0xcf34)
 #define VEBX_MOD_CTRL  MCR_REG(0xcf38)
 #define   FORCE_MISS_FTLB  REG_BIT(3)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 4efc1a532982..0e7f64bb2860 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1559,6 +1559,19 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
 
/* Wa_14011060649:xehpsdv */
wa_14011060649(gt, wal);
+
+   /* Wa_18018781329 */
+   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+
+   /* Wa_14012362059:xehpsdv */
+   wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+
+   /* Wa_14014368820:xehpsdv */
+   wa_write_or(wal, GEN12_GAMCNTRL_CTRL,
+   INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
 }
 
 static void
@@ -1599,6 +1612,12 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
DSS_ROUTER_CLKGATE_DIS);
}
 
+   if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
+   IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
+   /* Wa_14012362059:dg2 */
+   wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+   }
+
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
/* Wa_14010948348:dg2_g10 */
wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
@@ -1644,6 +1663,12 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
 
/* Wa_14011028019:dg2_g10 */
wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+
+   /* Wa_14010680813:dg2_g10 */
+   wa_write_or(wal, GEN12_GAMSTLB_CTRL,
+   CONTROL_BLOCK_CLKGATE_DIS |
+   EGRESS_BLOCK_CLKGATE_DIS |
+   TAG_BLOCK_CLKGATE_DIS);
}
 
/* Wa_14014830051:dg2 */
@@ -1658,6 +1683,16 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
 
/* Wa_14015795083 */
wa_mcr_write_clr(wal, GEN8_MISCCPCTL, 
GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+
+   /* Wa_18018781329 */
+   wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+   wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+
+   /* Wa_1509235366:dg2 */
+   wa_write_or(wal, GEN12_GAMCNTRL_CTRL

[PATCH] drm/i915: Move/adjust register definitions related to Wa_22011450934

2023-01-17 Thread Matt Roper
The implementation of Wa_22011450934 introduced three new register
definitions in i915_reg.h that didn't get moved to the GT/engine
register headers when all the other registers moved; let's move them to
the appropriate headers and tidy up their definitions now for
consistency:

 - STATE_ACK_DEBUG is moved to the engine register header and converted
   to a parameterized definition; the workaround only needs the RCS
   instance to be programmed, but there are instances on other engines
   that could be used by other workarounds in the future.

 - The two CULLBIT registers move to the GT register header.  Since
   they belong to MMIO ranges that became MCR starting with Xe_HP,
   their definitions should be defined as MCR_REG() and use an Xe_HP
   prefix to keep the register semantics clear.

Note that the MCR definition is just for consistency and to prevent
accidental misuse if other workarounds related to these registers show
up in the future.  There's no functional change to today's driver since
the workaround that references these registers only accesses them via
MI_LRR engine instructions.  Engine-initiated register accesses do not
utilize the same same steering controls as CPU-initiated accesses; they
use a different steering control register (0x20CC) which is initialized
to a non-terminated DSS target by pre-OS firmware and never changed
thereafter (i915 does not touch it and userspace does not have
permission to change that register).

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_engine_regs.h | 1 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 4 
 drivers/gpu/drm/i915/gt/intel_lrc.c | 6 +++---
 drivers/gpu/drm/i915/i915_reg.h | 4 
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h 
b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index ee3efd06ee54..6b9d9f837669 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -81,6 +81,7 @@
 #define RING_EIR(base) _MMIO((base) + 0xb0)
 #define RING_EMR(base) _MMIO((base) + 0xb4)
 #define RING_ESR(base) _MMIO((base) + 0xb8)
+#define GEN12_STATE_ACK_DEBUG(base)_MMIO((base) + 0xbc)
 #define RING_INSTPM(base)  _MMIO((base) + 0xc0)
 #define RING_CMD_CCTL(base)_MMIO((base) + 0xc4)
 #define ACTHD(base)_MMIO((base) + 0xc8)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 4f5c06d60bcd..4a4bab261e66 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -407,6 +407,8 @@
 #define GEN9_WM_CHICKEN3   _MMIO(0x5588)
 #define   GEN9_FACTOR_IN_CLR_VAL_HIZ   (1 << 9)
 
+#define XEHP_CULLBIT1  MCR_REG(0x6100)
+
 #define CHICKEN_RASTER_1   MCR_REG(0x6204)
 #define   DIS_SF_ROUND_NEAREST_EVENREG_BIT(8)
 
@@ -457,6 +459,8 @@
 #define   HZ_DEPTH_TEST_LE_GE_OPT_DISABLE  REG_BIT(13)
 #define   BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE  REG_BIT(3)
 
+#define XEHP_CULLBIT2  MCR_REG(0x7030)
+
 #define GEN8_L3CNTLREG _MMIO(0x7034)
 #define   GEN8_ERRDETBCTRL (1 << 9)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 7771a19008c6..1dffe392b95c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1351,16 +1351,16 @@ static u32 *
 dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
 {
*cs++ = MI_LOAD_REGISTER_IMM(1);
-   *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG);
+   *cs++ = 
i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base));
*cs++ = 0x21;
 
*cs++ = MI_LOAD_REGISTER_REG;
*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
-   *cs++ = i915_mmio_reg_offset(GEN12_CULLBIT1);
+   *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1);
 
*cs++ = MI_LOAD_REGISTER_REG;
*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
-   *cs++ = i915_mmio_reg_offset(GEN12_CULLBIT2);
+   *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2);
 
return cs;
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8b2cf980f323..d30443f06bdd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8113,10 +8113,6 @@ enum skl_power_gate {
 #define CLKGATE_DIS_MISC   _MMIO(0x46534)
 #define  CLKGATE_DIS_MISC_DMASC_GATING_DIS REG_BIT(21)
 
-#define GEN12_CULLBIT1 _MMIO(0x6100)
-#define GEN12_CULLBIT2 _MMIO(0x7030)
-#define GEN12_STATE_ACK_DEBUG  _MMIO(0x20BC)
-
 #define _MTL_CLKGATE_DIS_TRANS_A 

Re: [PATCH v4] drm/i915: Do not cover all future platforms in TLB invalidation

2023-01-10 Thread Matt Roper
On Tue, Jan 10, 2023 at 11:35:33AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Revert to the original explicit approach and document the reasoning
> behind it.
> 
> v2:
>  * DG2 needs to be covered too. (Matt)
> 
> v3:
>  * Full version check for Gen12 to avoid catching all future platforms.
>(Matt)
> 
> v4:
>  * Be totally explicit on the Gen12 branch. (Andrzej)
> 
> Signed-off-by: Tvrtko Ursulin 
> Cc: Matt Roper 
> Cc: Balasubramani Vivekanandan 
> Cc: Andrzej Hajda 
> Reviewed-by: Andrzej Hajda  # v1
> Reviewed-by: Matt Roper  # v3

Reviewed-by: Matt Roper 

for v4 as well.



Matt

> ---
>  drivers/gpu/drm/i915/gt/intel_gt.c | 17 +++--
>  1 file changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 75a7cb33..5721bf85d119 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -1070,10 +1070,23 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   unsigned int num = 0;
>   unsigned long flags;
>  
> - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> + /*
> +  * New platforms should not be added with catch-all-newer (>=)
> +  * condition so that any later platform added triggers the below warning
> +  * and in turn mandates a human cross-check of whether the invalidation
> +  * flows have compatible semantics.
> +  *
> +  * For instance with the 11.00 -> 12.00 transition three out of five
> +  * respective engine registers were moved to masked type. Then after the
> +  * 12.00 -> 12.50 transition multi cast handling is required too.
> +  */
> +
> + if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50) ||
> + GRAPHICS_VER_FULL(i915) == IP_VER(12, 55)) {
>   regs = NULL;
>   num = ARRAY_SIZE(xehp_regs);
> - } else if (GRAPHICS_VER(i915) == 12) {
> + } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> +GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
>       regs = gen12_regs;
>   num = ARRAY_SIZE(gen12_regs);
>   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> -- 
> 2.34.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [PATCH v3] drm/i915: Do not cover all future platforms in TLB invalidation

2023-01-09 Thread Matt Roper
On Mon, Jan 09, 2023 at 12:24:42PM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Revert to the original explicit approach and document the reasoning
> behind it.
> 
> v2:
>  * DG2 needs to be covered too. (Matt)
> 
> v3:
>  * Full version check for Gen12 to avoid catching all future platforms.
>(Matt)
> 
> Signed-off-by: Tvrtko Ursulin 
> Cc: Matt Roper 
> Cc: Balasubramani Vivekanandan 
> Cc: Andrzej Hajda 
> Reviewed-by: Andrzej Hajda  # v1

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_gt.c | 17 +++--
>  1 file changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 75a7cb33..5521fa057aab 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -1070,10 +1070,23 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   unsigned int num = 0;
>   unsigned long flags;
>  
> - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> + /*
> +  * New platforms should not be added with catch-all-newer (>=)
> +  * condition so that any later platform added triggers the below warning
> +  * and in turn mandates a human cross-check of whether the invalidation
> +  * flows have compatible semantics.
> +  *
> +  * For instance with the 11.00 -> 12.00 transition three out of five
> +  * respective engine registers were moved to masked type. Then after the
> +  * 12.00 -> 12.50 transition multi cast handling is required too.
> +  */
> +
> + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50) &&
> + GRAPHICS_VER_FULL(i915) <= IP_VER(12, 55)) {
>   regs = NULL;
>   num = ARRAY_SIZE(xehp_regs);
> - } else if (GRAPHICS_VER(i915) == 12) {
> + } else if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
> +GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)) {
>   regs = gen12_regs;
>   num = ARRAY_SIZE(gen12_regs);
>   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> -- 
> 2.34.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [PATCH 1/4] drm/i915/gt: Remove platform comments from workarounds

2023-01-06 Thread Matt Roper
On Thu, Jan 05, 2023 at 01:35:52PM +, Tvrtko Ursulin wrote:
> 
> Okay to sum it up below with some final notes..
> 
> On 04/01/2023 19:34, Matt Roper wrote:
> > On Wed, Jan 04, 2023 at 09:58:13AM +, Tvrtko Ursulin wrote:
> > > 
> > > On 23/12/2022 18:28, Lucas De Marchi wrote:
> > > > On Fri, Dec 23, 2022 at 09:02:35AM +, Tvrtko Ursulin wrote:
> > > > > 
> > > > > On 22/12/2022 15:55, Lucas De Marchi wrote:
> > > > > > On Thu, Dec 22, 2022 at 10:27:00AM +, Tvrtko Ursulin wrote:
> > > > > > > 
> > > > > > > On 22/12/2022 08:25, Lucas De Marchi wrote:
> > > > > > > > The comments are redundant to the checks being done to apply the
> > > > > > > > workarounds and very often get outdated as workarounds need to 
> > > > > > > > be
> > > > > > > > extended to new platforms or steppings.  Remove them altogether 
> > > > > > > > with
> > > > > > > > the following matches (platforms extracted from 
> > > > > > > > intel_workarounds.c):
> > > > > > > > 
> > > > > > > >  find drivers/gpu/drm/i915/gt/ -name '*.c' | xargs sed -i 
> > > > > > > > -E \
> > > > > > > > 's/(Wa.*):(bdw|chv|bxt|glk|skl|kbl|cfl|cfl|whl|cml|aml|chv|cl|bw|ctg|elk|ilk|snb|dg|pvc|g4x|ilk|gen|glk|kbl|cml|glk|kbl|cml|hsw|icl|ehl|ivb|hsw|ivb|vlv|kbl|pvc|rkl|dg|adl|skl|skl|bxt|blk|cfl|cnl|glk|snb|tgl|vlv|xehpsdv).*/\1/'
> > > > > > > >  find drivers/gpu/drm/i915/gt/ -name '*.c' | xargs sed -i 
> > > > > > > > -E \
> > > > > > > > 's/(Wa.*):(bdw|chv|bxt|glk|skl|kbl|cfl|cfl|whl|cml|aml|chv|cl|bw|ctg|elk|ilk|snb|dg|pvc|g4x|ilk|gen|glk|kbl|cml|glk|kbl|cml|hsw|icl|ehl|ivb|hsw|ivb|vlv|kbl|pvc|rkl|dg|adl|skl|skl|bxt|blk|cfl|cnl|glk|snb|tgl|vlv|xehpsdv).*\*\//\1
> > > > > > > > 
> > > > > > > > Same things was executed in the gem directory, omitted
> > > > > > > > here for brevity.
> > > > > > > 
> > > > > > > > There were a few false positives that included the workaround
> > > > > > > > description. Those were manually patched.
> > > > > > > 
> > > > > > > sed -E 's/(Wa[a-zA-Z0-9_]+)[:,]([a-zA-Z0-9,-_\+\[]{2,})/\1/'
> > > > > > 
> > > > > > then there are false negatives. We have Was in the form
> > > > > > "Wa_xxx:tgl,dg2, mtl". False positives we can fixup, false negatives
> > > > > > we simply don't see. After running that in gt/:
> > > > > > 
> > > > > > $ git grep ": mtl" -- drivers/gpu/drm/i915/
> > > > > > drivers/gpu/drm/i915/gt/intel_gt_pm.c:  /* Wa_14017073508: mtl */
> > > > > > drivers/gpu/drm/i915/gt/intel_gt_pm.c:  /* Wa_14017073508: mtl */
> > > > > > drivers/gpu/drm/i915/gt/intel_gt_pm.c:  /* Wa_14017073508: mtl */
> > > > > > drivers/gpu/drm/i915/gt/intel_gt_pm.c:  /* Wa_14017073508: mtl */
> > > > > > drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c:   * Wa_14017073508: 
> > > > > > mtl
> > > > > > drivers/gpu/drm/i915/i915_reg.h:/* Wa_14017210380: mtl */
> > > > > > 
> > > > > > I was going with the platform names to avoid the false
> > > > > > negatives and because I was entertaining the idea of only doing 
> > > > > > this for
> > > > > > latest platforms where we do have the "Wa_[[:number:]]" form
> > > > > > 
> > > > > > > 
> > > > > > > Maybe..
> > > > > > > 
> > > > > > > Matt recently said he has this worked planned, but more
> > > > > > > importantly - I gather then that the WA lookup tool
> > > > > > > definitely does not output these strings?
> > > > > > 
> > > > > > Whatever it does it's true only at the time it's called. It
> > > > > > simply tells what
> > > > > > are the platforms and steppings the Wa applies to. We can change the
> > > > > > output to whatever we want, but that is not the point.
> > > > > > Those comments get stale and bring no real value as they match 1:1
> > > > > > what the code is supposed to be doing. Several 

Re: [PATCH v2] drm/i915: Do not cover all future platforms in TLB invalidation

2023-01-06 Thread Matt Roper
On Fri, Jan 06, 2023 at 10:38:35AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Revert to the original explicit approach and document the reasoning
> behind it.
> 
> v2:
>  * DG2 needs to be covered too. (Matt)
> 
> Signed-off-by: Tvrtko Ursulin 
> Cc: Matt Roper 
> Cc: Balasubramani Vivekanandan 
> Cc: Andrzej Hajda 
> Reviewed-by: Andrzej Hajda  # v1
> ---
> Matt, does DG1 need to be in the MCR branch or plain Gen12?

DG1 should use the same "gen12" branch as TGL/RKL/ADL.  Bspec page 66696
is the relevant MMIO table for DG1 and the range containing the TLB
invalidation registers is not a multicast/replicated range.  The types
of engines supported, and the register details for each engine are also
the same as TGL/RKL/ADL.

> ---
>  drivers/gpu/drm/i915/gt/intel_gt.c | 14 +-
>  1 file changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 75a7cb33..b2556a3d8a3f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -1070,7 +1070,19 @@ static void mmio_invalidate_full(struct intel_gt *gt)
>   unsigned int num = 0;
>   unsigned long flags;
>  
> - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> + /*
> +  * New platforms should not be added with catch-all-newer (>=)
> +  * condition so that any later platform added triggers the below warning
> +  * and in turn mandates a human cross-check of whether the invalidation
> +  * flows have compatible semantics.
> +  *
> +  * For instance with the 11.00 -> 12.00 transition three out of five
> +  * respective engine registers were moved to masked type. Then after the
> +  * 12.00 -> 12.50 transition multi cast handling is required too.
> +  */
> +
> + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50) &&
> + GRAPHICS_VER_FULL(i915) <= IP_VER(12, 55)) {
>   regs = NULL;
>   num = ARRAY_SIZE(xehp_regs);
>   } else if (GRAPHICS_VER(i915) == 12) {

Did you want to switch this one to

GRAPHICS_VER_FULL(i915) == IP_VER(12, 0) ||
GRAPHICS_VER_FULL(i915) == IP_VER(12, 10)

so that it won't automatically pick up future 12.xx platforms like PVC,
MTL, and whatever else might show up in that category in the future?


Matt

> -- 
> 2.34.1
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Consolidate TLB invalidation flow

2023-01-05 Thread Matt Roper
On Thu, Jan 05, 2023 at 01:06:37PM +, Tvrtko Ursulin wrote:
> 
> On 04/01/2023 17:41, Matt Roper wrote:
> > On Wed, Jan 04, 2023 at 10:08:29AM +, Tvrtko Ursulin wrote:
> > > 
> > > On 03/01/2023 19:57, Matt Roper wrote:
> > > > On Mon, Dec 19, 2022 at 05:10:02PM +0100, Andrzej Hajda wrote:
> > > > > On 19.12.2022 11:13, Tvrtko Ursulin wrote:
> > > > > > From: Tvrtko Ursulin 
> > > > > > 
> > > > > > As the logic for selecting the register and corresponsing values 
> > > > > > grew, the
> > > > > 
> > > > > corresponding
> > > > > 
> > > > > > code become a bit unsightly. Consolidate by storing the required 
> > > > > > values at
> > > > > > engine init time in the engine itself, and by doing so minimise the 
> > > > > > amount
> > > > > > of invariant platform and engine checks during each and every TLB
> > > > > > invalidation.
> > > > > > 
> > > > > > v2:
> > > > > > * Fail engine probe if TLB invlidations registers are unknown.
> > > > > > 
> > > > > > Signed-off-by: Tvrtko Ursulin 
> > > > > > Cc: Andrzej Hajda 
> > > > > > Cc: Matt Roper 
> > > > > > Reviewed-by: Andrzej Hajda  # v1
> > > > > > ---
> > > > > > drivers/gpu/drm/i915/gt/intel_engine_cs.c|  93 +
> > > > > > drivers/gpu/drm/i915/gt/intel_engine_types.h |  15 +++
> > > > > > drivers/gpu/drm/i915/gt/intel_gt.c   | 135 
> > > > > > +++
> > > > > > 3 files changed, 128 insertions(+), 115 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> > > > > > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > > > index 99c4b866addd..d47dadfc25c8 100644
> > > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > > > @@ -1143,12 +1143,105 @@ static int init_status_page(struct 
> > > > > > intel_engine_cs *engine)
> > > > > > return ret;
> > > > > > }
> > > > > > +static int intel_engine_init_tlb_invalidation(struct 
> > > > > > intel_engine_cs *engine)
> > > > > > +{
> > > > > > +   static const union intel_engine_tlb_inv_reg gen8_regs[] = {
> > > > > > +   [RENDER_CLASS].reg  = GEN8_RTCR,
> > > > > > +   [VIDEO_DECODE_CLASS].reg= GEN8_M1TCR, /* , 
> > > > > > GEN8_M2TCR */
> > > > > > +   [VIDEO_ENHANCEMENT_CLASS].reg   = GEN8_VTCR,
> > > > > > +   [COPY_ENGINE_CLASS].reg = GEN8_BTCR,
> > > > > > +   };
> > > > > > +   static const union intel_engine_tlb_inv_reg gen12_regs[] = {
> > > > > > +   [RENDER_CLASS].reg  = GEN12_GFX_TLB_INV_CR,
> > > > > > +   [VIDEO_DECODE_CLASS].reg= GEN12_VD_TLB_INV_CR,
> > > > > > +   [VIDEO_ENHANCEMENT_CLASS].reg   = GEN12_VE_TLB_INV_CR,
> > > > > > +   [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
> > > > > > +   [COMPUTE_CLASS].reg = 
> > > > > > GEN12_COMPCTX_TLB_INV_CR,
> > > > > > +   };
> > > > > > +   static const union intel_engine_tlb_inv_reg xehp_regs[] = {
> > > > > > +   [RENDER_CLASS].mcr_reg= XEHP_GFX_TLB_INV_CR,
> > > > > > +   [VIDEO_DECODE_CLASS].mcr_reg  = XEHP_VD_TLB_INV_CR,
> > > > > > +   [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
> > > > > > +   [COPY_ENGINE_CLASS].mcr_reg   = XEHP_BLT_TLB_INV_CR,
> > > > > > +   [COMPUTE_CLASS].mcr_reg   = 
> > > > > > XEHP_COMPCTX_TLB_INV_CR,
> > > > > > +   };
> > > > > > +   struct drm_i915_private *i915 = engine->i915;
> > > > > > +   const union intel_engine_tlb_inv_reg *regs;
> > > > > > +   union intel_engine_tlb_inv_reg reg;
> > > > > > +   unsigned int class = engine->class;
> > > > > > 

Re: [PATCH 1/4] drm/i915/gt: Remove platform comments from workarounds

2023-01-04 Thread Matt Roper
 from misunderstanding the
semantics of platforms (especially cases like DG2 where different
G10/G11/G12 variants have different stepping schemes) or technical
misunderstanding of the implementation details (register reset
characteristics, masked vs non-masked registers, context membership,
etc.).

> 
> Yeah it is much easier to rip them out that to find and fix the ones which
> went out of sync but that shouldn't be high on the list of criteria.
> 
> Argument that it is easy to overlook during review that comments and code do
> not match I don't think holds. That describes a very sloppy review. And if
> review is assumed to be that sloppy, do you really trust review to check
> against the WA database?

It's the same reason people who write prose can't find their own
spelling/grammer mistakes.  The mistakes are "obvious," but since their
brain already knows what it's "supposed" to say, they just can't see the
error themselves.  Once you've reviewed the code, it just gets really
hard to see where the comment doesn't align, especially for the
workarounds that apply to a bunch of platforms.

For example, if I'm reviewing a patch that adds:

/* Wa_12345:tgl,dg1[a0],rkl,adls,dg2_g10,dg2_g12[a0..c0) */
if (IS_TIGERLAKE(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) ||
IS_ALDERLAKE_P(i915) || IS_DG2_G10(i915) ||
IS_DG2_GRAPHICS_STEP(i915, G12, STEP_A0, STEP_C0))

I'm always looking at the code first and comparing that to the
workaround database.  If I then review the comment second, I'm much less
likely to catch the subtle errors (there are two of them in this example
where the code and comment don't match!) because I mentally already know
what the bounds are "supposed" to be and the comment all just kind of
blends together.

> 
> So my argument is that it is trivial for reviewers to spot comments and code
> do not match. Trivial and fast. And it's trivial (I hope) for the WA tool to
> output the right format for pasting in comments.

Given how much terminology mismatch there is between the internal
database and how we refer to things in i915 code, it's not trivial.
It's also not super easy to even figure out which platforms to include
in the list.  The workaround database is going to include platforms for
which there is no i915 support (e.g., LKF) stuff like CNL (support
already removed from i915), and future/potential platforms we can't talk
about yet, etc.  Finally, when there are duplicated/conflicting records
(because the people inputting the information are just human too), it
takes a bit of human intelligence to read between the lines and figure
out what the reality is supposed to be.

Sure, these problems could probably all be solved with enough effort,
but given how often the internal formatting and behavior of the database
itself changes, it would be painful to keep it always working properly.

> 
> Those are the points I would like to have explicitly discounted before
> proceeding. Maybe to be even clearer the workflow would be like this:
> 
> Patch author:
> 
> 1. Runs the WA tool for a WA number. Tool outputs text.
> 2. Pastes text verbatim in the comment.

This one isn't correct today (and as noted above, not terribly easy to
accomplish).  It's

   2.  Developer manually writes code comment based on interpreting wa's
   output.


Matt

> 3. Adjusts code to match.
> 
> Reviewer:
> 
> 1. Verifies both code and comment were changed.
> 2. Verifies code matches the comment.
> 
> If the counter proposal is, patch author:
> 
> 1. Runs the WA tool for a WA number. Tool outputs text.
> 2. Adjusts code to match.
> 
> Reviewer:
> 
> 1. Runs the WA tool. Tool outputs text.
> 2. Checks patch matchs the WA tool output.
> 
> I will accept it but I strongly believe skipping of step 2 will happen and
> it will be impossible to know. Rubber stamping with the options of
> comments+code at least leaves a trace of comment and code being out of sync.
> 
> > > And point here to stress out is that accidental logic errors (missed
> > > workarounds) can be super expensive to debug in the field. Sometimes
> > > it can literally take _months_ for sporadic and hard to reproduce
> > > issues to get debugged, handed over between the teams, etc. So any
> > > way in which we can influence the likelyhood of that happening is
> > > something to weigh carefully.
> > 
> > yes, that's why I want to remove the comments: from my experience they
> > are more a source of bugs rather than helping.
> > 
> > > Secondary but also important - if i915 is end of line then an extra
> > > why we want to rip out this for ancient platforms. Is the
> > > cost/benefit positive there?
> > 
> > yep, here I agree and was my argument about using the platform names
> > rather than a more "catch all" regex. I think doing this only for tgl+
> > platforms or even dg2+ would be ok.
> 
> Okay this is something to have as a 2nd option indeed. DG2 is out of force
> probe so maybe try with MTL. Although different rules for different
> platforms I don't know if will work in practice. Could be justt too
> complicated to be practical.
> 
> > > As a side note, and going back to the question of what the tool can
> > > output. Long time ago I had an idea where we could improve all this
> > > by making it completely data-driven. Have the WA database inspecting
> > > tool output a table which could be directly pasted into code and
> > > interpreted by i915.
> > > 
> > > For reference look at intel_workarounds_table.h in
> > > https://patchwork.freedesktop.org/patch/399377/?series=83580=3
> > > and see what you thing. That was just a sketch of the idea, not
> > > complete, and yes, i915 end of life point makes it moot.
> > 
> > now that xe is announced I can talk about this part... this was more
> > or less what I implemented in xe: it's a table with
> > "register + condition + action". There are the most common condition
> > checks builtin + a function hook for the more advanced ones. During
> > binding the driver walks the table and coalesces the entries creating
> > a per-register value that can be used at the proper times, depending if
> > they are gt, engine, context workarounds.
> 
> Cool, I support that high level approach.
> 
> Regards,
> 
> Tvrtko

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Consolidate TLB invalidation flow

2023-01-04 Thread Matt Roper
On Wed, Jan 04, 2023 at 10:08:29AM +, Tvrtko Ursulin wrote:
> 
> On 03/01/2023 19:57, Matt Roper wrote:
> > On Mon, Dec 19, 2022 at 05:10:02PM +0100, Andrzej Hajda wrote:
> > > On 19.12.2022 11:13, Tvrtko Ursulin wrote:
> > > > From: Tvrtko Ursulin 
> > > > 
> > > > As the logic for selecting the register and corresponsing values grew, 
> > > > the
> > > 
> > > corresponding
> > > 
> > > > code become a bit unsightly. Consolidate by storing the required values 
> > > > at
> > > > engine init time in the engine itself, and by doing so minimise the 
> > > > amount
> > > > of invariant platform and engine checks during each and every TLB
> > > > invalidation.
> > > > 
> > > > v2:
> > > >* Fail engine probe if TLB invlidations registers are unknown.
> > > > 
> > > > Signed-off-by: Tvrtko Ursulin 
> > > > Cc: Andrzej Hajda 
> > > > Cc: Matt Roper 
> > > > Reviewed-by: Andrzej Hajda  # v1
> > > > ---
> > > >drivers/gpu/drm/i915/gt/intel_engine_cs.c|  93 +
> > > >drivers/gpu/drm/i915/gt/intel_engine_types.h |  15 +++
> > > >drivers/gpu/drm/i915/gt/intel_gt.c   | 135 
> > > > +++
> > > >3 files changed, 128 insertions(+), 115 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> > > > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > index 99c4b866addd..d47dadfc25c8 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > @@ -1143,12 +1143,105 @@ static int init_status_page(struct 
> > > > intel_engine_cs *engine)
> > > > return ret;
> > > >}
> > > > +static int intel_engine_init_tlb_invalidation(struct intel_engine_cs 
> > > > *engine)
> > > > +{
> > > > +   static const union intel_engine_tlb_inv_reg gen8_regs[] = {
> > > > +   [RENDER_CLASS].reg  = GEN8_RTCR,
> > > > +   [VIDEO_DECODE_CLASS].reg= GEN8_M1TCR, /* , 
> > > > GEN8_M2TCR */
> > > > +   [VIDEO_ENHANCEMENT_CLASS].reg   = GEN8_VTCR,
> > > > +   [COPY_ENGINE_CLASS].reg = GEN8_BTCR,
> > > > +   };
> > > > +   static const union intel_engine_tlb_inv_reg gen12_regs[] = {
> > > > +   [RENDER_CLASS].reg  = GEN12_GFX_TLB_INV_CR,
> > > > +   [VIDEO_DECODE_CLASS].reg= GEN12_VD_TLB_INV_CR,
> > > > +   [VIDEO_ENHANCEMENT_CLASS].reg   = GEN12_VE_TLB_INV_CR,
> > > > +   [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
> > > > +   [COMPUTE_CLASS].reg = 
> > > > GEN12_COMPCTX_TLB_INV_CR,
> > > > +   };
> > > > +   static const union intel_engine_tlb_inv_reg xehp_regs[] = {
> > > > +   [RENDER_CLASS].mcr_reg= XEHP_GFX_TLB_INV_CR,
> > > > +   [VIDEO_DECODE_CLASS].mcr_reg  = XEHP_VD_TLB_INV_CR,
> > > > +   [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
> > > > +   [COPY_ENGINE_CLASS].mcr_reg   = XEHP_BLT_TLB_INV_CR,
> > > > +   [COMPUTE_CLASS].mcr_reg   = 
> > > > XEHP_COMPCTX_TLB_INV_CR,
> > > > +   };
> > > > +   struct drm_i915_private *i915 = engine->i915;
> > > > +   const union intel_engine_tlb_inv_reg *regs;
> > > > +   union intel_engine_tlb_inv_reg reg;
> > > > +   unsigned int class = engine->class;
> > > > +   unsigned int num = 0;
> > > > +   u32 val;
> > > > +
> > > > +   /*
> > > > +* New platforms should not be added with catch-all-newer (>=)
> > > > +* condition so that any later platform added triggers the 
> > > > below warning
> > > > +* and in turn mandates a human cross-check of whether the 
> > > > invalidation
> > > > +* flows have compatible semantics.
> > > > +*
> > > > +* For instance with the 11.00 -> 12.00 transition three out of 
> > > > five
> > > > +* respective engine registers were m

Re: [PATCH 1/4] drm/i915/gt: Remove platform comments from workarounds

2023-01-03 Thread Matt Roper
se has information of
> what are the platforms/steppings for each the WA is known to be applied
> *today*. And that can change and do change often, particularly for early
> steppings and recent platforms.
> 
> > Thought is, when a patch comes for review adding a new platform,
> > stepping, whatever, to an existing if condition, if it contains the
> > comments reviewer can more easily spot a hyphotetical logic inversion
> > error or similar. It is also trivial to check that both condition and
> > comment have been updated. (So lets not be rash and remove something
> > maybe useful just because it can go stale *only if* reviewers are not
> > giving sufficient attention that changes are made in tandem.)
> 
> I can argue to the other side too. We don't have comments in the kernel
> like
> 
>   /* Add 1 to i */
>   i += 1;
> 
> This is exactly what these comments are doing. And they are misleading

Yeah, this is exactly my feeling for why the platform/steppings should
just be removed from the comments.  Keeping the workaround numbers is
useful for quickly jumping to the implementation(s) of the workaround in
the code with grep, but everything else is (at best) just re-stating
exactly what the next lines of code should say.  During review, the code
itself is what really matters, and comments (which might become stale or
incorrect) just make it easier for mistakes to get overlooked.

I haven't reviewed the patch in detail, but from my point of view,

Acked-by: Matt Roper 

on the general direction here.


Matt

> and may introduce bugs rather than helping reviewing:
> 
>   Wa_12345:tgl[a0,c0)
>   if (IS_TGL_GRAPHICS_STEP(STEP_A0, STEP_B0)
> 
> One might read the comment, skipping over the condition and thinking
> "ok, we already extended this WA to B* steppings, which doesn't match
> the code.
> 
> 
> > From a slightly different angle - do we expect anyone reviewing
> > workaround patches will cross-check against the tool? Would it be
> > simpler and more efficient that they could just cross-check against the
> > comment output from the tool and put into the patch by the author?
> 
> see above. Someone cross-checking the comment is cross-checking the
> wrong thing. As I said, it happens more on early enabling of a platform.
> 
> > And point here to stress out is that accidental logic errors (missed
> > workarounds) can be super expensive to debug in the field. Sometimes it
> > can literally take _months_ for sporadic and hard to reproduce issues to
> > get debugged, handed over between the teams, etc. So any way in which we
> > can influence the likelyhood of that happening is something to weigh
> > carefully.
> 
> yes, that's why I want to remove the comments: from my experience they
> are more a source of bugs rather than helping.
> 
> > Secondary but also important - if i915 is end of line then an extra why
> > we want to rip out this for ancient platforms. Is the cost/benefit
> > positive there?
> 
> yep, here I agree and was my argument about using the platform names
> rather than a more "catch all" regex. I think doing this only for tgl+
> platforms or even dg2+ would be ok.
> 
> > As a side note, and going back to the question of what the tool can
> > output. Long time ago I had an idea where we could improve all this by
> > making it completely data-driven. Have the WA database inspecting tool
> > output a table which could be directly pasted into code and interpreted
> > by i915.
> > 
> > For reference look at intel_workarounds_table.h in
> > https://patchwork.freedesktop.org/patch/399377/?series=83580=3 and
> > see what you thing. That was just a sketch of the idea, not complete,
> > and yes, i915 end of life point makes it moot.
> 
> now that xe is announced I can talk about this part... this was more
> or less what I implemented in xe: it's a table with
> "register + condition + action". There are the most common condition
> checks builtin + a function hook for the more advanced ones. During
> binding the driver walks the table and coalesces the entries creating
> a per-register value that can be used at the proper times, depending if
> they are gt, engine, context workarounds.
> 
> Lucas De Marchi
> 
> > 
> > Regards,
> > 
> > Tvrtko

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Consolidate TLB invalidation flow

2023-01-03 Thread Matt Roper
On Mon, Dec 19, 2022 at 05:10:02PM +0100, Andrzej Hajda wrote:
> On 19.12.2022 11:13, Tvrtko Ursulin wrote:
> > From: Tvrtko Ursulin 
> > 
> > As the logic for selecting the register and corresponsing values grew, the
> 
> corresponding
> 
> > code become a bit unsightly. Consolidate by storing the required values at
> > engine init time in the engine itself, and by doing so minimise the amount
> > of invariant platform and engine checks during each and every TLB
> > invalidation.
> > 
> > v2:
> >   * Fail engine probe if TLB invlidations registers are unknown.
> > 
> > Signed-off-by: Tvrtko Ursulin 
> > Cc: Andrzej Hajda 
> > Cc: Matt Roper 
> > Reviewed-by: Andrzej Hajda  # v1
> > ---
> >   drivers/gpu/drm/i915/gt/intel_engine_cs.c|  93 +
> >   drivers/gpu/drm/i915/gt/intel_engine_types.h |  15 +++
> >   drivers/gpu/drm/i915/gt/intel_gt.c   | 135 +++
> >   3 files changed, 128 insertions(+), 115 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index 99c4b866addd..d47dadfc25c8 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -1143,12 +1143,105 @@ static int init_status_page(struct intel_engine_cs 
> > *engine)
> > return ret;
> >   }
> > +static int intel_engine_init_tlb_invalidation(struct intel_engine_cs 
> > *engine)
> > +{
> > +   static const union intel_engine_tlb_inv_reg gen8_regs[] = {
> > +   [RENDER_CLASS].reg  = GEN8_RTCR,
> > +   [VIDEO_DECODE_CLASS].reg= GEN8_M1TCR, /* , GEN8_M2TCR */
> > +   [VIDEO_ENHANCEMENT_CLASS].reg   = GEN8_VTCR,
> > +   [COPY_ENGINE_CLASS].reg = GEN8_BTCR,
> > +   };
> > +   static const union intel_engine_tlb_inv_reg gen12_regs[] = {
> > +   [RENDER_CLASS].reg  = GEN12_GFX_TLB_INV_CR,
> > +   [VIDEO_DECODE_CLASS].reg= GEN12_VD_TLB_INV_CR,
> > +   [VIDEO_ENHANCEMENT_CLASS].reg   = GEN12_VE_TLB_INV_CR,
> > +   [COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
> > +   [COMPUTE_CLASS].reg = GEN12_COMPCTX_TLB_INV_CR,
> > +   };
> > +   static const union intel_engine_tlb_inv_reg xehp_regs[] = {
> > +   [RENDER_CLASS].mcr_reg= XEHP_GFX_TLB_INV_CR,
> > +   [VIDEO_DECODE_CLASS].mcr_reg  = XEHP_VD_TLB_INV_CR,
> > +   [VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
> > +   [COPY_ENGINE_CLASS].mcr_reg   = XEHP_BLT_TLB_INV_CR,
> > +   [COMPUTE_CLASS].mcr_reg   = XEHP_COMPCTX_TLB_INV_CR,
> > +   };
> > +   struct drm_i915_private *i915 = engine->i915;
> > +   const union intel_engine_tlb_inv_reg *regs;
> > +   union intel_engine_tlb_inv_reg reg;
> > +   unsigned int class = engine->class;
> > +   unsigned int num = 0;
> > +   u32 val;
> > +
> > +   /*
> > +* New platforms should not be added with catch-all-newer (>=)
> > +* condition so that any later platform added triggers the below warning
> > +* and in turn mandates a human cross-check of whether the invalidation
> > +* flows have compatible semantics.
> > +*
> > +* For instance with the 11.00 -> 12.00 transition three out of five
> > +* respective engine registers were moved to masked type. Then after the
> > +* 12.00 -> 12.50 transition multi cast handling is required too.
> > +*/
> > +
> > +   if (GRAPHICS_VER_FULL(i915) == IP_VER(12, 50)) {

This is bad...it only captures XEHPSDV and breaks the handling of DG2
(12.55), PVC (12.60), and MTL (12.70, 12.71, and 12.72).  You're not
hitting the warning as expected since those are all now being captured
by the next case of the if/else ladder.  With the way GMD_ID works, we
may also get new version numbers that silently show up in hardware too
at some point (e.g., 12.73, 12.74, etc.)

> > +   regs = xehp_regs;
> > +   num = ARRAY_SIZE(xehp_regs);
> > +   } else if (GRAPHICS_VER(i915) == 12) {

You'd want to change this to 

GRAPHICS_VER_FULL(i915) == IP_VER(12, 0)

to get the behavior you expected.


Matt

> > +   regs = gen12_regs;
> > +   num = ARRAY_SIZE(gen12_regs);
> > +   } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
> > +   regs = gen8_regs;
> > +   num = ARRAY_SIZE(gen8_regs);
> > +   } else if (GRAPHICS_VER(i915) < 8) {
>

[PATCH] drm/i915/dg2: Return Wa_22012654132 to just specific steppings

2022-12-13 Thread Matt Roper
Programming of the ENABLE_PREFETCH_INTO_IC bit originally showed up in
both the general DG2 tuning guide (applicable to all DG2
variants/steppings) and under Wa_22012654132 (applicable only to
specific steppings).  It has now been removed from the tuning guide, and
the guidance is to only program it in the specific steppings associated
with the workaround.

Bspec: 68331
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 ++---
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 7d71f5bbddc8..bf84efb3f15f 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2913,20 +2913,6 @@ add_render_compute_tuning_settings(struct 
drm_i915_private *i915,
if (IS_DG2(i915)) {
wa_mcr_write_or(wal, XEHP_L3SCQREG7, 
BLEND_FILL_CACHING_OPT_DIS);
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, 
STACKID_CTRL_512);
-
-   /*
-* This is also listed as Wa_22012654132 for certain DG2
-* steppings, but the tuning setting programming is a superset
-* since it applies to all DG2 variants and steppings.
-*
-* Note that register 0xE420 is write-only and cannot be read
-* back for verification on DG2 (due to Wa_14012342262), so
-* we need to explicitly skip the readback.
-*/
-   wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
-  _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
-  0 /* write-only, so skip validation */,
-  true);
}
 
/*
@@ -3022,6 +3008,19 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
/* Wa_18017747507:dg2 */
wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, 
POLYGON_TRIFAN_LINELOOP_DISABLE);
}
+
+   if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || 
IS_DG2_G11(i915))
+   /*
+* Wa_22012654132
+*
+* Note that register 0xE420 is write-only and cannot be read
+* back for verification on DG2 (due to Wa_14012342262), so
+* we need to explicitly skip the readback.
+*/
+   wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+  _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+  0 /* write-only, so skip validation */,
+  true);
 }
 
 static void
-- 
2.38.1



Re: [PATCH] drm/i915/gen12: Apply recommended L3 hashing mask

2022-12-05 Thread Matt Roper
On Mon, Dec 05, 2022 at 04:33:29PM -0300, Gustavo Sousa wrote:
> On Thu, Dec 01, 2022 at 02:22:10PM -0800, Matt Roper wrote:
> > The TGL/RKL/DG1/ADL performance tuning guide suggests programming a
> > literal value of 0x2FC0100F for this register.  The register's hardware
> > default value is 0x2FC0108F, so this translates to just clearing one
> > bit.
> > 
> > Take this opportunity to also clean up the register definition and
> > re-write its existing bits/fields in the preferred notation.
> > 
> > Bspec: 31870
> > Signed-off-by: Matt Roper 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 9 +
> >  drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 
> >  2 files changed, 9 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > index 61a5c9a83b1b..f8eb807b56f9 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > @@ -958,10 +958,11 @@
> >  #define   GEN7_DISABLE_SAMPLER_PREFETCH(1 << 30)
> >  
> >  #define GEN8_GARBCNTL  _MMIO(0xb004)
> > -#define   GEN9_GAPS_TSV_CREDIT_DISABLE (1 << 7)
> > -#define   GEN11_ARBITRATION_PRIO_ORDER_MASK(0x3f << 22)
> > -#define   GEN11_HASH_CTRL_EXCL_MASK(0x7f << 0)
> > -#define   GEN11_HASH_CTRL_EXCL_BIT0(1 << 0)
> > +#define   GEN11_ARBITRATION_PRIO_ORDER_MASKREG_GENMASK(27, 22)
> > +#define   GEN12_BUS_HASH_CTL_BIT_EXC   REG_BIT(7)
> > +#define   GEN9_GAPS_TSV_CREDIT_DISABLE REG_BIT(7)
> > +#define   GEN11_HASH_CTRL_EXCL_MASKREG_GENMASK(6, 0)
> > +#define   GEN11_HASH_CTRL_EXCL_BIT0
> > REG_FIELD_PREP(GEN11_HASH_CTRL_EXCL_MASK, 0x1)
> >  
> >  #define GEN9_SCRATCH_LNCF1 _MMIO(0xb008)
> >  #define   GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE  REG_BIT(0)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> > b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > index 85822ebb0d64..2f13a92f77d3 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> > @@ -2937,6 +2937,10 @@ add_render_compute_tuning_settings(struct 
> > drm_i915_private *i915,
> > if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
> > wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, 
> > THREAD_EX_ARB_MODE,
> > THREAD_EX_ARB_MODE_RR_AFTER_DEP);
> > +
> > +   if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 
> > 50)) {
> > +   wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
> > +   }
> 
> Removing the unnecessary braces as pointed out by dim checkpatch,
> 
> Reviewed-by: Gustavo Sousa 

Dropped the braces and applied to drm-intel-gt-next.  Thanks for the
review.


Matt

> 
> >  }
> >  
> >  /*
> > -- 
> > 2.38.1
> > 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH] drm/i915/mtl: Check full IP version when applying hw steering semaphore

2022-12-05 Thread Matt Roper
On Mon, Dec 05, 2022 at 12:50:40PM +, Tvrtko Ursulin wrote:
> 
> On 02/12/2022 22:49, Rodrigo Vivi wrote:
> > On Fri, Dec 02, 2022 at 02:35:28PM -0800, Matt Roper wrote:
> > > When determining whether the platform has a hardware-level steering
> > > semaphore (i.e., MTL and beyond), we need to use GRAPHICS_VER_FULL() to
> > > compare the full version rather than just the major version number
> > > returned by GRAPHICS_VER().
> > > 
> > > Reported-by: kernel test robot 
> > > Fixes: 3100240bf846 ("drm/i915/mtl: Add hardware-level lock for steering")
> > > Cc: Balasubramani Vivekanandan 
> > > Signed-off-by: Matt Roper 
> > 
> > Reviewed-by: Rodrigo Vivi 
> > > ---
> > >   drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 4 ++--
> > >   1 file changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> > > b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > > index 087e4ac5b68d..41a237509dcf 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > > @@ -367,7 +367,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned 
> > > long *flags)
> > >* driver threads, but also with hardware/firmware agents.  A 
> > > dedicated
> > >* locking register is used.
> > >*/
> > > - if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
> > > + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
> 
> Ouch, tricky class of bugs... Anyone has an idea how to maybe coerce the 
> compiler into spotting them for us, cheaply?

I believe clang can already notice these problems with
Wtautological-constant-out-of-range-compare (which is how the kernel
test robot finds them):

>> drivers/gpu/drm/i915/gt/intel_gt_mcr.c:370:29: warning: result of 
comparison of constant 3142 with expression of type 'u8' (aka 'unsigned char')
+is always false [-Wtautological-constant-out-of-range-compare]
   if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
   ~~ ^  ~~
   drivers/gpu/drm/i915/gt/intel_gt_mcr.c:410:29: warning: result of 
comparison of constant 3142 with expression of type 'u8' (aka 'unsigned char')
+is always false [-Wtautological-constant-out-of-range-compare]
   if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
   ~~ ^  ~~
   2 warnings generated.

Unfortunately gcc doesn't seem to have anything equivalent as far as I
can see.

> 
> This one is undefined behaviour I think so not good:
> 
> -#define IP_VER(ver, rel)   ((ver) << 8 | (rel))
> +typedef void * i915_full_ver_t;
> +
> +#define IP_VER(ver, rel) (i915_full_ver_t)(unsigned long)((ver) << 8 | (rel))

Hmm, so by casting it into a pointer, you're hoping to trigger a
"comparison of pointer and integer without cast" warning on misuse?
That's a good idea, but as you noted, the C99 spec says comparison of
pointers is only guaranteed to work if both are pointers into the same
structure/array, otherwise the results are technically undefined.


Matt

> 
> Regards,
> 
> Tvrtko
> 
> > >   err = wait_for(intel_uncore_read_fw(gt->uncore,
> > >   
> > > MTL_STEER_SEMAPHORE) == 0x1, 100);
> > > @@ -407,7 +407,7 @@ void intel_gt_mcr_unlock(struct intel_gt *gt, 
> > > unsigned long flags)
> > >   {
> > >   spin_unlock_irqrestore(>mcr_lock, flags);
> > > - if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
> > > + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
> > >   intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 
> > > 0x1);
> > >   }
> > > -- 
> > > 2.38.1
> > > 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH v2 4/5] drm/i915/mtl: Add hardware-level lock for steering

2022-12-05 Thread Matt Roper
On Mon, Dec 05, 2022 at 08:58:16AM +, Tvrtko Ursulin wrote:
> 
> On 28/11/2022 23:30, Matt Roper wrote:
> > Starting with MTL, the driver needs to not only protect the steering
> > control register from simultaneous software accesses, but also protect
> > against races with hardware/firmware agents.  The hardware provides a
> > dedicated locking mechanism to support this via the MTL_STEER_SEMAPHORE
> > register.  Reading the register acts as a 'trylock' operation; the read
> > will return 0x1 if the lock is acquired or 0x0 if something else is
> > already holding the lock; once acquired, writing 0x1 to the register
> > will release the lock.
> > 
> > We'll continue to grab the software lock as well, just so lockdep can
> > track our locking; assuming the hardware lock is behaving properly,
> > there should never be any contention on the software lock in this case.
> > 
> > v2:
> >   - Extend hardware semaphore timeout and add a taint for CI if it ever
> > happens (this would imply misbehaving hardware/firmware).  (Mika)
> >   - Add "MTL_" prefix to new steering semaphore register.  (Mika)
> > 
> > Cc: Mika Kuoppala 
> > Signed-off-by: Matt Roper 
> > ---
> >   drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 38 ++---
> >   drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
> >   2 files changed, 35 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > index aa070ae57f11..087e4ac5b68d 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > @@ -347,10 +347,9 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
> >* @flags: storage to save IRQ flags to
> >*
> >* Performs locking to protect the steering for the duration of an MCR
> > - * operation.  Depending on the platform, this may be a software lock
> > - * (gt->mcr_lock) or a hardware lock (i.e., a register that synchronizes
> > - * access not only for the driver, but also for external hardware and
> > - * firmware agents).
> > + * operation.  On MTL and beyond, a hardware lock will also be taken to
> > + * serialize access not only for the driver, but also for external 
> > hardware and
> > + * firmware agents.
> >*
> >* Context: Takes gt->mcr_lock.  uncore->lock should *not* be held when 
> > this
> >*  function is called, although it may be acquired after this
> > @@ -359,12 +358,40 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
> >   void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
> >   {
> > unsigned long __flags;
> > +   int err = 0;
> > lockdep_assert_not_held(>uncore->lock);
> > +   /*
> > +* Starting with MTL, we need to coordinate not only with other
> > +* driver threads, but also with hardware/firmware agents.  A dedicated
> > +* locking register is used.
> > +*/
> > +   if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
> > +   err = wait_for(intel_uncore_read_fw(gt->uncore,
> > +   MTL_STEER_SEMAPHORE) == 
> > 0x1, 100);
> > +
> 
> If two i915 threads enter here what happens? (Given hw locking is done
> before the spinlock.)

The second thread will see a '0' when it reads the register, indicating
that something else (sw, fw, or hw) already has it locked.  As soon as
the first thread drops the lock, the next read will return '1' and allow
the second thread to proceed.


Matt

> 
> Regards,
> 
> Tvrtko
> 
> > +   /*
> > +* Even on platforms with a hardware lock, we'll continue to grab
> > +* a software spinlock too for lockdep purposes.  If the hardware lock
> > +* was already acquired, there should never be contention on the
> > +* software lock.
> > +*/
> > spin_lock_irqsave(>mcr_lock, __flags);
> > *flags = __flags;
> > +
> > +   /*
> > +* In theory we should never fail to acquire the HW semaphore; this
> > +* would indicate some hardware/firmware is misbehaving and not
> > +* releasing it properly.
> > +*/
> > +   if (err == -ETIMEDOUT) {
> > +   drm_err_ratelimited(>i915->drm,
> > +   "GT%u hardware MCR steering semaphore timed 
> > out",
> > +   gt->info.id);
> > +   add_taint_for_CI(gt->i915, TAINT_WARN);  /* CI is now 
> > unreliable */
> > +   }
>

[PATCH] drm/i915/mtl: Check full IP version when applying hw steering semaphore

2022-12-02 Thread Matt Roper
When determining whether the platform has a hardware-level steering
semaphore (i.e., MTL and beyond), we need to use GRAPHICS_VER_FULL() to
compare the full version rather than just the major version number
returned by GRAPHICS_VER().

Reported-by: kernel test robot 
Fixes: 3100240bf846 ("drm/i915/mtl: Add hardware-level lock for steering")
Cc: Balasubramani Vivekanandan 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 087e4ac5b68d..41a237509dcf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -367,7 +367,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long 
*flags)
 * driver threads, but also with hardware/firmware agents.  A dedicated
 * locking register is used.
 */
-   if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
err = wait_for(intel_uncore_read_fw(gt->uncore,
MTL_STEER_SEMAPHORE) == 
0x1, 100);
 
@@ -407,7 +407,7 @@ void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long 
flags)
 {
spin_unlock_irqrestore(>mcr_lock, flags);
 
-   if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
+   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1);
 }
 
-- 
2.38.1



[PATCH] drm/i915/gen12: Apply recommended L3 hashing mask

2022-12-01 Thread Matt Roper
The TGL/RKL/DG1/ADL performance tuning guide suggests programming a
literal value of 0x2FC0100F for this register.  The register's hardware
default value is 0x2FC0108F, so this translates to just clearing one
bit.

Take this opportunity to also clean up the register definition and
re-write its existing bits/fields in the preferred notation.

Bspec: 31870
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 9 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 4 
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 61a5c9a83b1b..f8eb807b56f9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -958,10 +958,11 @@
 #define   GEN7_DISABLE_SAMPLER_PREFETCH(1 << 30)
 
 #define GEN8_GARBCNTL  _MMIO(0xb004)
-#define   GEN9_GAPS_TSV_CREDIT_DISABLE (1 << 7)
-#define   GEN11_ARBITRATION_PRIO_ORDER_MASK(0x3f << 22)
-#define   GEN11_HASH_CTRL_EXCL_MASK(0x7f << 0)
-#define   GEN11_HASH_CTRL_EXCL_BIT0(1 << 0)
+#define   GEN11_ARBITRATION_PRIO_ORDER_MASKREG_GENMASK(27, 22)
+#define   GEN12_BUS_HASH_CTL_BIT_EXC   REG_BIT(7)
+#define   GEN9_GAPS_TSV_CREDIT_DISABLE REG_BIT(7)
+#define   GEN11_HASH_CTRL_EXCL_MASKREG_GENMASK(6, 0)
+#define   GEN11_HASH_CTRL_EXCL_BIT0
REG_FIELD_PREP(GEN11_HASH_CTRL_EXCL_MASK, 0x1)
 
 #define GEN9_SCRATCH_LNCF1 _MMIO(0xb008)
 #define   GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE  REG_BIT(0)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 85822ebb0d64..2f13a92f77d3 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2937,6 +2937,10 @@ add_render_compute_tuning_settings(struct 
drm_i915_private *i915,
if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, 
THREAD_EX_ARB_MODE,
THREAD_EX_ARB_MODE_RR_AFTER_DEP);
+
+   if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 
50)) {
+   wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
+   }
 }
 
 /*
-- 
2.38.1



Re: [PATCH v3 5/5] drm/i915/mtl: Hold forcewake and MCR lock over PPAT setup

2022-12-01 Thread Matt Roper
On Thu, Dec 01, 2022 at 02:56:30PM +0530, Balasubramani Vivekanandan wrote:
> On 30.11.2022 07:58, Matt Roper wrote:
> > PPAT setup involves a series of multicast writes.  This can be optimized
> > slightly be acquiring forcewake and the steering lock just once for the
> > entire sequence.
> > 
> > v2:
> >  - We should use FW_REG_WRITE instead of FW_REG_READ.  (Bala)
> > 
> > Suggested-by: Balasubramani Vivekanandan 
> > 
> > Signed-off-by: Matt Roper 
> 
> Reviewed-by: Balasubramani Vivekanandan 

Thanks.  Since this patch is independent of patch #4 (the only one that
hasn't been reviewed yet), I went ahead and pushed this one to
drm-intel-gt-next.  BTW, I noticed I wrote "mtl" in the patch title
where I actually meant to have "mcr" (this isn't a MTL-specific change),
so I corrected that typo while pushing as well.


Matt

> 
> Regards,
> Bala
> 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gtt.c | 27 +++
> >  1 file changed, 19 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
> > b/drivers/gpu/drm/i915/gt/intel_gtt.c
> > index 2ba3983984b9..e37164a60d37 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> > @@ -482,14 +482,25 @@ static void tgl_setup_private_ppat(struct 
> > intel_uncore *uncore)
> >  
> >  static void xehp_setup_private_ppat(struct intel_gt *gt)
> >  {
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
> > +   enum forcewake_domains fw;
> > +   unsigned long flags;
> > +
> > +   fw = intel_uncore_forcewake_for_reg(gt->uncore, 
> > _MMIO(XEHP_PAT_INDEX(0).reg),
> > +   FW_REG_WRITE);
> > +   intel_uncore_forcewake_get(gt->uncore, fw);
> > +
> > +   intel_gt_mcr_lock(gt, );
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
> > +   intel_gt_mcr_unlock(gt, flags);
> > +
> > +   intel_uncore_forcewake_put(gt->uncore, fw);
> >  }
> >  
> >  static void icl_setup_private_ppat(struct intel_uncore *uncore)
> > -- 
> > 2.38.1
> > 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH v6 1/1] drm/i915/pxp: Promote pxp subsystem to top-level of i915

2022-11-30 Thread Matt Roper
 0;
>  }
> -DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(pxp_info);
> +
> +static int pxp_info_open(struct inode *inode, struct file *file)
> +{
> + return single_open(file, pxp_info_show, inode->i_private);
> +}
> +
> +static const struct file_operations pxp_info_fops = {
> + .owner = THIS_MODULE,
> + .open = pxp_info_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
>  
>  static int pxp_terminate_get(void *data, u64 *val)
>  {
> @@ -59,23 +74,23 @@ static int pxp_terminate_set(void *data, u64 val)
>  }
>  
>  DEFINE_SIMPLE_ATTRIBUTE(pxp_terminate_fops, pxp_terminate_get, 
> pxp_terminate_set, "%llx\n");
> -void intel_pxp_debugfs_register(struct intel_pxp *pxp, struct dentry 
> *gt_root)
> +
> +void intel_pxp_debugfs_register(struct intel_pxp *pxp)
>  {
> - static const struct intel_gt_debugfs_file files[] = {
> - { "info", _info_fops, NULL },
> - { "terminate_state", _terminate_fops, NULL },
> - };
> - struct dentry *root;
> + struct drm_minor *minor = pxp->i915->drm.primary;
> + struct dentry *pxproot;
>  
> - if (!gt_root)
> + if (!HAS_PXP(pxp->i915))
>   return;
>  
> - if (!HAS_PXP((pxp_to_gt(pxp)->i915)))
> + pxproot = debugfs_create_dir("pxp", minor->debugfs_root);
> + if (IS_ERR(pxproot))
>   return;
>  
> - root = debugfs_create_dir("pxp", gt_root);
> - if (IS_ERR(root))
> - return;
> + debugfs_create_file("info", 0444, pxproot,
> + pxp, _info_fops);
> +
> + debugfs_create_file("terminate_state", 0644, pxproot,
> + pxp, _terminate_fops);
>  
> - intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), pxp);
>  }
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h 
> b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h
> index 7e0c3d2f5d7e..299382b59e66 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_debugfs.h
> @@ -10,10 +10,10 @@ struct intel_pxp;
>  struct dentry;
>  
>  #ifdef CONFIG_DRM_I915_PXP
> -void intel_pxp_debugfs_register(struct intel_pxp *pxp, struct dentry *root);
> +void intel_pxp_debugfs_register(struct intel_pxp *pxp);
>  #else
>  static inline void
> -intel_pxp_debugfs_register(struct intel_pxp *pxp, struct dentry *root)
> +intel_pxp_debugfs_register(struct intel_pxp *pxp)
>  {
>  }
>  #endif
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c 
> b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
> index c28be430718a..fd30befbf784 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.c
> @@ -3,14 +3,18 @@
>   * Copyright(c) 2020 Intel Corporation.
>   */
>  #include 
> -#include "intel_pxp.h"
> -#include "intel_pxp_irq.h"
> -#include "intel_pxp_session.h"
> +
>  #include "gt/intel_gt_irq.h"
>  #include "gt/intel_gt_regs.h"
>  #include "gt/intel_gt_types.h"
> +
>  #include "i915_irq.h"
>  #include "i915_reg.h"
> +
> +#include "intel_pxp.h"
> +#include "intel_pxp_irq.h"
> +#include "intel_pxp_session.h"
> +#include "intel_pxp_types.h"
>  #include "intel_runtime_pm.h"
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c 
> b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
> index 6a7d4e2ee138..37371f44a550 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
> @@ -3,11 +3,13 @@
>   * Copyright(c) 2020 Intel Corporation.
>   */
>  
> +#include "i915_drv.h"
> +
>  #include "intel_pxp.h"
>  #include "intel_pxp_irq.h"
>  #include "intel_pxp_pm.h"
>  #include "intel_pxp_session.h"
> -#include "i915_drv.h"
> +#include "intel_pxp_types.h"
>  
>  void intel_pxp_suspend_prepare(struct intel_pxp *pxp)
>  {
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c 
> b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
> index b0c9170b1395..16782d119bfd 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
> @@ -11,17 +11,19 @@
>  #include "gem/i915_gem_lmem.h"
>  
>  #include "i915_drv.h"
> +
>  #include "intel_pxp.h"
> -#include "intel_pxp_session.h"
> -#include "intel_pxp_tee.h"
>  #include "intel_pxp_cmd_interface_42.h"
>  #include "intel_pxp_huc.h"
> +#include "intel_pxp_session.h"
> +#include "intel_pxp_tee.h"
> +#include "intel_pxp_types.h"
>  
>  static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev)
>  {
>   struct drm_i915_private *i915 = kdev_to_i915(i915_kdev);
>  
> - return _gt(i915)->pxp;
> + return i915->pxp;
>  }
>  
>  static int intel_pxp_tee_io_message(struct intel_pxp *pxp,
> diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h 
> b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> index f74b1e11a505..d550cdba3399 100644
> --- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> +++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
> @@ -12,12 +12,23 @@
>  #include 
>  
>  struct intel_context;
> +struct intel_gt;
>  struct i915_pxp_component;
> +struct drm_i915_private;
>  
>  /**
>   * struct intel_pxp - pxp state
>   */
>  struct intel_pxp {
> + /** @i915: back poiner to i915*/
> + struct drm_i915_private *i915;
> +
> + /**
> +  * @ctrl_gt: poiner to the tile that owns the controls for PXP 
> subsystem assets that
> +  * the VDBOX, the KCR engine (and GSC CS depending on the platform)
> +  */
> + struct intel_gt *ctrl_gt;
> +
>   /**
>* @pxp_component: i915_pxp_component struct of the bound mei_pxp
>* module. Only set and cleared inside component bind/unbind functions,
> 
> base-commit: d21d6474a37e5d43075a24668807ea40a7ee9fc1
> -- 
> 2.34.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [linux-gfx] [PATCH] drm/i915/pvc: Implement recommended caching policy

2022-11-30 Thread Matt Roper
On Wed, Nov 30, 2022 at 09:07:23AM -0800, Wayne Boyer wrote:
> As per the performance tuning guide, set the HOSTCACHEEN bit to
> implement the recommended caching policy on PVC.
> 
> Signed-off-by: Wayne Boyer 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 +
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 784152548472..f96570995cfc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -973,6 +973,7 @@
>  #define   GEN7_L3AGDIS   (1 << 19)
>  
>  #define XEHPC_LNCFMISCCFGREG0_MMIO(0xb01c)
> +#define   XEHPC_HOSTCACHEEN  REG_BIT(1)
>  #define   XEHPC_OVRLSCCC REG_BIT(0)
>  
>  #define GEN7_L3CNTLREG2  _MMIO(0xb020)
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 1b0e40e68a9d..35e3f43e8b06 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -2903,6 +2903,7 @@ add_render_compute_tuning_settings(struct 
> drm_i915_private *i915,
>   if (IS_PONTEVECCHIO(i915)) {
>   wa_write(wal, XEHPC_L3SCRUB,
>SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
> + wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
>   }
>  
>   if (IS_DG2(i915)) {
> -- 
> 2.37.3
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH v2 5/5] drm/i915/mtl: Hold forcewake and MCR lock over PPAT setup

2022-11-30 Thread Matt Roper
On Wed, Nov 30, 2022 at 09:21:07PM +0530, Balasubramani Vivekanandan wrote:
> On 28.11.2022 15:30, Matt Roper wrote:
> > PPAT setup involves a series of multicast writes.  This can be optimized
> > slightly be acquiring forcewake and the steering lock just once for the
> > entire sequence.
> > 
> > Suggested-by: Balasubramani Vivekanandan 
> > 
> > Signed-off-by: Matt Roper 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gtt.c | 27 +++
> >  1 file changed, 19 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
> > b/drivers/gpu/drm/i915/gt/intel_gtt.c
> > index 2ba3983984b9..288d9f118ee9 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> > @@ -482,14 +482,25 @@ static void tgl_setup_private_ppat(struct 
> > intel_uncore *uncore)
> >  
> >  static void xehp_setup_private_ppat(struct intel_gt *gt)
> >  {
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
> > -   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
> > +   enum forcewake_domains fw;
> > +   unsigned long flags;
> > +
> > +   fw = intel_uncore_forcewake_for_reg(gt->uncore, 
> > _MMIO(XEHP_PAT_INDEX(0).reg),
> > +   FW_REG_READ);
> 
> I am not completely aware of forcewake implementation. I am wondering if
> the last parameter should be FW_REG_WRITE since it is register write
> which is happening later.

Yep, good catch.  Using FW_REG_WRITE allows the driver to potentially
skip obtaining forcewake and waking the hardware if the registers being
written are "shadowed" so it's always good to use FW_REG_WRITE in places
where we're only writing and not reading.

In this case the specific registers in question don't appear to be part
of the shadow table for any affected platforms (e.g.,
dg2_shadowed_regs[] and such in intel_uncore.c), so FW_REG_WRITE will
still behave the same as FW_REG_READ here.  But it's always possible
that future platforms could decide to shadow these registers, so it's
good to fix anyway; I just sent an updated copy of this patch making
that change.


Matt

> 
> Regards,
> Bala
> 
> > +   intel_uncore_forcewake_get(gt->uncore, fw);
> > +
> > +   intel_gt_mcr_lock(gt, );
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
> > +   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
> > +   intel_gt_mcr_unlock(gt, flags);
> > +
> > +   intel_uncore_forcewake_put(gt->uncore, fw);
> >  }
> >  
> >  static void icl_setup_private_ppat(struct intel_uncore *uncore)
> > -- 
> > 2.38.1
> > 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


[PATCH v3 5/5] drm/i915/mtl: Hold forcewake and MCR lock over PPAT setup

2022-11-30 Thread Matt Roper
PPAT setup involves a series of multicast writes.  This can be optimized
slightly be acquiring forcewake and the steering lock just once for the
entire sequence.

v2:
 - We should use FW_REG_WRITE instead of FW_REG_READ.  (Bala)

Suggested-by: Balasubramani Vivekanandan 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gtt.c | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 2ba3983984b9..e37164a60d37 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -482,14 +482,25 @@ static void tgl_setup_private_ppat(struct intel_uncore 
*uncore)
 
 static void xehp_setup_private_ppat(struct intel_gt *gt)
 {
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
+   enum forcewake_domains fw;
+   unsigned long flags;
+
+   fw = intel_uncore_forcewake_for_reg(gt->uncore, 
_MMIO(XEHP_PAT_INDEX(0).reg),
+   FW_REG_WRITE);
+   intel_uncore_forcewake_get(gt->uncore, fw);
+
+   intel_gt_mcr_lock(gt, );
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
+   intel_gt_mcr_unlock(gt, flags);
+
+   intel_uncore_forcewake_put(gt->uncore, fw);
 }
 
 static void icl_setup_private_ppat(struct intel_uncore *uncore)
-- 
2.38.1



[PATCH v2 4/5] drm/i915/mtl: Add hardware-level lock for steering

2022-11-28 Thread Matt Roper
Starting with MTL, the driver needs to not only protect the steering
control register from simultaneous software accesses, but also protect
against races with hardware/firmware agents.  The hardware provides a
dedicated locking mechanism to support this via the MTL_STEER_SEMAPHORE
register.  Reading the register acts as a 'trylock' operation; the read
will return 0x1 if the lock is acquired or 0x0 if something else is
already holding the lock; once acquired, writing 0x1 to the register
will release the lock.

We'll continue to grab the software lock as well, just so lockdep can
track our locking; assuming the hardware lock is behaving properly,
there should never be any contention on the software lock in this case.

v2:
 - Extend hardware semaphore timeout and add a taint for CI if it ever
   happens (this would imply misbehaving hardware/firmware).  (Mika)
 - Add "MTL_" prefix to new steering semaphore register.  (Mika)

Cc: Mika Kuoppala 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 38 ++---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index aa070ae57f11..087e4ac5b68d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -347,10 +347,9 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
  * @flags: storage to save IRQ flags to
  *
  * Performs locking to protect the steering for the duration of an MCR
- * operation.  Depending on the platform, this may be a software lock
- * (gt->mcr_lock) or a hardware lock (i.e., a register that synchronizes
- * access not only for the driver, but also for external hardware and
- * firmware agents).
+ * operation.  On MTL and beyond, a hardware lock will also be taken to
+ * serialize access not only for the driver, but also for external hardware and
+ * firmware agents.
  *
  * Context: Takes gt->mcr_lock.  uncore->lock should *not* be held when this
  *  function is called, although it may be acquired after this
@@ -359,12 +358,40 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
 void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
 {
unsigned long __flags;
+   int err = 0;
 
lockdep_assert_not_held(>uncore->lock);
 
+   /*
+* Starting with MTL, we need to coordinate not only with other
+* driver threads, but also with hardware/firmware agents.  A dedicated
+* locking register is used.
+*/
+   if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
+   err = wait_for(intel_uncore_read_fw(gt->uncore,
+   MTL_STEER_SEMAPHORE) == 
0x1, 100);
+
+   /*
+* Even on platforms with a hardware lock, we'll continue to grab
+* a software spinlock too for lockdep purposes.  If the hardware lock
+* was already acquired, there should never be contention on the
+* software lock.
+*/
spin_lock_irqsave(>mcr_lock, __flags);
 
*flags = __flags;
+
+   /*
+* In theory we should never fail to acquire the HW semaphore; this
+* would indicate some hardware/firmware is misbehaving and not
+* releasing it properly.
+*/
+   if (err == -ETIMEDOUT) {
+   drm_err_ratelimited(>i915->drm,
+   "GT%u hardware MCR steering semaphore timed 
out",
+   gt->info.id);
+   add_taint_for_CI(gt->i915, TAINT_WARN);  /* CI is now 
unreliable */
+   }
 }
 
 /**
@@ -379,6 +406,9 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long 
*flags)
 void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags)
 {
spin_unlock_irqrestore(>mcr_lock, flags);
+
+   if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
+   intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 784152548472..1618d46cb8c7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -67,6 +67,7 @@
 #define GMD_ID_MEDIA   _MMIO(MTL_MEDIA_GSI_BASE + 
0xd8c)
 
 #define MCFG_MCR_SELECTOR  _MMIO(0xfd0)
+#define MTL_STEER_SEMAPHORE_MMIO(0xfd0)
 #define MTL_MCR_SELECTOR   _MMIO(0xfd4)
 #define SF_MCR_SELECTOR_MMIO(0xfd8)
 #define GEN8_MCR_SELECTOR  _MMIO(0xfdc)
-- 
2.38.1



[PATCH v2 3/5] drm/i915/gt: Add dedicated MCR lock

2022-11-28 Thread Matt Roper
We've been overloading uncore->lock to protect access to the MCR
steering register.  That's not really what uncore->lock is intended for,
and it would be better if we didn't need to hold such a high-traffic
spinlock for the whole sequence of (apply steering, access MCR register,
restore steering).  Let's create a dedicated MCR lock to protect the
steering control register over this critical section and stop relying on
the high-traffic uncore->lock.

For now the new lock is a software lock.  However some platforms (MTL
and beyond) have a hardware-provided locking mechanism that can be used
to serialize not only software accesses, but also hardware/firmware
accesses as well; support for that hardware level lock will be added in
a future patch.

v2:
 - Use irqsave/irqrestore spinlock calls; platforms using execlist
   submission rather than GuC submission can perform MCR accesses in
   interrupt context because reset -> errordump happens in a tasklet.

Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Balasubramani Vivekanandan 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c  |  7 +-
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 79 +++--
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |  2 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h|  8 +++
 drivers/gpu/drm/i915/gt/intel_mocs.c|  3 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 12 ++--
 6 files changed, 101 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 7ef0edb2e37c..6847f3bd2b03 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1079,6 +1079,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
enum intel_engine_id id;
const i915_reg_t *regs;
unsigned int num = 0;
+   unsigned long flags;
 
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
regs = NULL;
@@ -1099,7 +1100,8 @@ static void mmio_invalidate_full(struct intel_gt *gt)
 
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
-   spin_lock_irq(>lock); /* serialise invalidate with GT reset */
+   intel_gt_mcr_lock(gt, );
+   spin_lock(>lock); /* serialise invalidate with GT reset */
 
awake = 0;
for_each_engine(engine, gt, id) {
@@ -1133,7 +1135,8 @@ static void mmio_invalidate_full(struct intel_gt *gt)
 IS_ALDERLAKE_P(i915)))
intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
 
-   spin_unlock_irq(>lock);
+   spin_unlock(>lock);
+   intel_gt_mcr_unlock(gt, flags);
 
for_each_engine_masked(engine, gt, awake, tmp) {
struct reg_and_bit rb;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index f4484bb18ec9..aa070ae57f11 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -143,6 +143,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
unsigned long fuse;
int i;
 
+   spin_lock_init(>mcr_lock);
+
/*
 * An mslice is unavailable only if both the meml3 for the slice is
 * disabled *and* all of the DSS in the slice (quadrant) are disabled.
@@ -228,6 +230,7 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
  * @instance: instance number (documented as "subsliceid" on older platforms)
  * @value: register value to be written (ignored for read)
  *
+ * Context: The caller must hold the MCR lock
  * Return: 0 for write access. register value for read access.
  *
  * Caller needs to make sure the relevant forcewake wells are up.
@@ -239,7 +242,7 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt,
struct intel_uncore *uncore = gt->uncore;
u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
 
-   lockdep_assert_held(>lock);
+   lockdep_assert_held(>mcr_lock);
 
if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) {
/*
@@ -316,6 +319,7 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
 {
struct intel_uncore *uncore = gt->uncore;
enum forcewake_domains fw_domains;
+   unsigned long flags;
u32 val;
 
fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg),
@@ -324,17 +328,59 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
 GEN8_MCR_SELECTOR,
 FW_REG_READ | 
FW_REG_WRITE);
 
-   spin_lock_irq(>lock);
+   intel_gt_mcr_lock(gt, );
+   spin_lock(>lock);
intel_uncore_forcewake_get__locked(uncore, fw_domains);
 
val = rw_with_mcr_steering_fw(gt, reg, rw_flag, group, instance, value);
 
intel_uncore_forcewake_put__locked(uncore, fw_domains);
-   spin_unlock_irq(>lock);
+   spin_unlock(>lock);
+   intel_gt_mcr_unlock(gt, flags);
 

[PATCH v2 5/5] drm/i915/mtl: Hold forcewake and MCR lock over PPAT setup

2022-11-28 Thread Matt Roper
PPAT setup involves a series of multicast writes.  This can be optimized
slightly be acquiring forcewake and the steering lock just once for the
entire sequence.

Suggested-by: Balasubramani Vivekanandan 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gtt.c | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 2ba3983984b9..288d9f118ee9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -482,14 +482,25 @@ static void tgl_setup_private_ppat(struct intel_uncore 
*uncore)
 
 static void xehp_setup_private_ppat(struct intel_gt *gt)
 {
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
-   intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
+   enum forcewake_domains fw;
+   unsigned long flags;
+
+   fw = intel_uncore_forcewake_for_reg(gt->uncore, 
_MMIO(XEHP_PAT_INDEX(0).reg),
+   FW_REG_READ);
+   intel_uncore_forcewake_get(gt->uncore, fw);
+
+   intel_gt_mcr_lock(gt, );
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
+   intel_gt_mcr_multicast_write_fw(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
+   intel_gt_mcr_unlock(gt, flags);
+
+   intel_uncore_forcewake_put(gt->uncore, fw);
 }
 
 static void icl_setup_private_ppat(struct intel_uncore *uncore)
-- 
2.38.1



[PATCH v2 1/5] drm/i915/gt: Correct kerneldoc for intel_gt_mcr_wait_for_reg()

2022-11-28 Thread Matt Roper
The kerneldoc function name was not updated when this function was
converted to a non-fw form.

Fixes: 192bb40f030a ("drm/i915/gt: Manage uncore->lock while waiting on MCR 
register")
Reported-by: kernel test robot 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index d9a8ff9e5e57..ea86c1ab5dc5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -702,7 +702,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, 
unsigned int dss,
 }
 
 /**
- * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected 
state
+ * intel_gt_mcr_wait_for_reg - wait until MCR register matches expected state
  * @gt: GT structure
  * @reg: the register to read
  * @mask: mask to apply to register value
-- 
2.38.1



[PATCH v2 0/5] i915: dedicated MCR locking and hardware semaphore

2022-11-28 Thread Matt Roper
We've been overloading uncore->lock to protect access to the MCR
steering register.  That's not really what uncore->lock is intended for,
and it would be better if we didn't need to hold such a high-traffic
spinlock for the whole sequence of (apply steering, access MCR register,
restore steering).  Switch to a dedicated MCR lock to protect the
steering control register over this critical section and stop relying on
the high-traffic uncore->lock.  On pre-MTL platforms the dedicated MCR
lock is just another software lock, but on MTL and beyond we also
utilize the hardware-provided STEER_SEMAPHORE that allows us to
synchronize with external hardware and firmware agents.

v2:
 - Use irqsave/irqrestore locking; on platforms that use execlist
   submission instead of GuC, MCR accesses can happen in interrupt
   context (tasklet) during reset -> error dump.
 - Extend timeout for hardware semaphore and CI taint if we ever
   encounter it (this implies a hardware/firmware problem).  (Mika)
 - Add an extra patch optimizing xehp_setup_private_ppat by holding
   forcewake & mcr lock over the sequence of register writes.  (Bala)

Cc: Mika Kuoppala 
Cc: Balasubramani Vivekanandan 

Matt Roper (5):
  drm/i915/gt: Correct kerneldoc for intel_gt_mcr_wait_for_reg()
  drm/i915/gt: Pass gt rather than uncore to lowest-level reads/writes
  drm/i915/gt: Add dedicated MCR lock
  drm/i915/mtl: Add hardware-level lock for steering
  drm/i915/mtl: Hold forcewake and MCR lock over PPAT setup

 drivers/gpu/drm/i915/gt/intel_gt.c  |   7 +-
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 129 ++--
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |   2 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |   1 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h|   8 ++
 drivers/gpu/drm/i915/gt/intel_gtt.c |  27 ++--
 drivers/gpu/drm/i915/gt/intel_mocs.c|   3 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  12 +-
 8 files changed, 162 insertions(+), 27 deletions(-)

-- 
2.38.1



[PATCH v2 2/5] drm/i915/gt: Pass gt rather than uncore to lowest-level reads/writes

2022-11-28 Thread Matt Roper
Passing the GT rather than uncore to the lowest level MCR read and write
functions will make it easier to introduce dedicated MCR locking in a
following patch.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index ea86c1ab5dc5..f4484bb18ec9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -221,7 +221,7 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
 
 /*
  * rw_with_mcr_steering_fw - Access a register with specific MCR steering
- * @uncore: pointer to struct intel_uncore
+ * @gt: GT to read register from
  * @reg: register being accessed
  * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access
  * @group: group number (documented as "sliceid" on older platforms)
@@ -232,10 +232,11 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
  *
  * Caller needs to make sure the relevant forcewake wells are up.
  */
-static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
+static u32 rw_with_mcr_steering_fw(struct intel_gt *gt,
   i915_mcr_reg_t reg, u8 rw_flag,
   int group, int instance, u32 value)
 {
+   struct intel_uncore *uncore = gt->uncore;
u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
 
lockdep_assert_held(>lock);
@@ -308,11 +309,12 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore 
*uncore,
return val;
 }
 
-static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
+static u32 rw_with_mcr_steering(struct intel_gt *gt,
i915_mcr_reg_t reg, u8 rw_flag,
int group, int instance,
u32 value)
 {
+   struct intel_uncore *uncore = gt->uncore;
enum forcewake_domains fw_domains;
u32 val;
 
@@ -325,7 +327,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
spin_lock_irq(>lock);
intel_uncore_forcewake_get__locked(uncore, fw_domains);
 
-   val = rw_with_mcr_steering_fw(uncore, reg, rw_flag, group, instance, 
value);
+   val = rw_with_mcr_steering_fw(gt, reg, rw_flag, group, instance, value);
 
intel_uncore_forcewake_put__locked(uncore, fw_domains);
spin_unlock_irq(>lock);
@@ -347,7 +349,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt,
  i915_mcr_reg_t reg,
  int group, int instance)
 {
-   return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, 
instance, 0);
+   return rw_with_mcr_steering(gt, reg, FW_REG_READ, group, instance, 0);
 }
 
 /**
@@ -364,7 +366,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt,
 void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 
value,
int group, int instance)
 {
-   rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, 
value);
+   rw_with_mcr_steering(gt, reg, FW_REG_WRITE, group, instance, value);
 }
 
 /**
@@ -588,7 +590,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, 
i915_mcr_reg_t reg)
for (type = 0; type < NUM_STEERING_TYPES; type++) {
if (reg_needs_read_steering(gt, reg, type)) {
get_nonterminated_steering(gt, type, , );
-   return rw_with_mcr_steering_fw(gt->uncore, reg,
+   return rw_with_mcr_steering_fw(gt, reg,
   FW_REG_READ,
   group, instance, 0);
}
@@ -615,7 +617,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, 
i915_mcr_reg_t reg)
for (type = 0; type < NUM_STEERING_TYPES; type++) {
if (reg_needs_read_steering(gt, reg, type)) {
get_nonterminated_steering(gt, type, , );
-   return rw_with_mcr_steering(gt->uncore, reg,
+   return rw_with_mcr_steering(gt, reg,
FW_REG_READ,
group, instance, 0);
}
-- 
2.38.1



Re: [Intel-gfx] [PATCH] drm/i915/gt: Manage uncore->lock while waiting on MCR register

2022-11-23 Thread Matt Roper
On Wed, Nov 23, 2022 at 02:46:18PM -0800, John Harrison wrote:
> On 11/17/2022 09:33, Matt Roper wrote:
> > ...
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > index 830edffe88cc..d9a8ff9e5e57 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > @@ -730,17 +730,19 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt 
> > *gt, unsigned int dss,
> >*
> >* Return: 0 if the register matches the desired condition, or -ETIMEDOUT.
> >*/
> > -int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
> > -i915_mcr_reg_t reg,
> > -u32 mask,
> > -u32 value,
> > -unsigned int fast_timeout_us,
> > -unsigned int slow_timeout_ms)
> > +int intel_gt_mcr_wait_for_reg(struct intel_gt *gt,
> This change missed the comment above and so is causing errors from the
> documentation build:

Yeah, I already sent a fix for that here:

https://patchwork.freedesktop.org/patch/512602/?series=111220=1


Matt

> 
> Error: make htmldocs had i915 warnings
> ./drivers/gpu/drm/i915/gt/intel_gt_mcr.c:739: warning: expecting prototype 
> for intel_gt_mcr_wait_for_reg_fw(). Prototype was for 
> intel_gt_mcr_wait_for_reg() instead
> ./drivers/gpu/drm/i915/gt/intel_gt_mcr.c:739: warning: expecting prototype 
> for intel_gt_mcr_wait_for_reg_fw(). Prototype was for 
> intel_gt_mcr_wait_for_reg() instead
> 
> John.
> 
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH v4] drm/i915/mtl: Media GT and Render GT share common GGTT

2022-11-22 Thread Matt Roper
On Tue, Nov 22, 2022 at 12:31:26PM +0530, Aravind Iddamsetty wrote:
> On XE_LPM+ platforms the media engines are carved out into a separate
> GT but have a common GGTMMADR address range which essentially makes
> the GGTT address space to be shared between media and render GT. As a
> result any updates in GGTT shall invalidate TLB of GTs sharing it and
> similarly any operation on GGTT requiring an action on a GT will have to
> involve all GTs sharing it. setup_private_pat was being done on a per
> GGTT based as that doesn't touch any GGTT structures moved it to per GT
> based.
> 
> BSPEC: 63834
> 
> v2:
> 1. Add details to commit msg
> 2. includes fix for failure to add item to ggtt->gt_list, as suggested
> by Lucas
> 3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within
> it.
> 4. setup_private_pat moved out of intel_gt_tiles_init
> 
> v3:
> 1. Move out for_each_gt from i915_driver.c (Jani Nikula)
> 
> v4: drop using RCU primitives on ggtt->gt_list as it is not an RCU list
> (Matt Roper)
> 
> Cc: Matt Roper 
> Signed-off-by: Aravind Iddamsetty 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 54 +--
>  drivers/gpu/drm/i915/gt/intel_gt.c| 13 +-
>  drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
>  drivers/gpu/drm/i915/gt/intel_gtt.h   |  4 ++
>  drivers/gpu/drm/i915/i915_driver.c| 12 ++---
>  drivers/gpu/drm/i915/i915_gem.c   |  2 +
>  drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++--
>  drivers/gpu/drm/i915/i915_vma.c   |  5 ++-
>  drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +
>  9 files changed, 111 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 8145851ad23d..7644738b9cdb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  
> +#include 
>  #include 
>  #include 
>  
> @@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
>  
>  void i915_ggtt_suspend(struct i915_ggtt *ggtt)
>  {
> + struct intel_gt *gt;
> +
>   i915_ggtt_suspend_vm(>vm);
>   ggtt->invalidate(ggtt);
>  
> - intel_gt_check_and_clear_faults(ggtt->vm.gt);
> + list_for_each_entry(gt, >gt_list, ggtt_link)
> + intel_gt_check_and_clear_faults(gt);
>  }
>  
>  void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
> @@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
>  
>  static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>  {
> - struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>   struct drm_i915_private *i915 = ggtt->vm.i915;
>  
>   gen8_ggtt_invalidate(ggtt);
>  
> - if (GRAPHICS_VER(i915) >= 12)
> - intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
> -   GEN12_GUC_TLB_INV_CR_INVALIDATE);
> - else
> - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> + if (GRAPHICS_VER(i915) >= 12) {
> + struct intel_gt *gt;
> +
> + list_for_each_entry(gt, >gt_list, ggtt_link)
> + intel_uncore_write_fw(gt->uncore,
> +   GEN12_GUC_TLB_INV_CR,
> +   GEN12_GUC_TLB_INV_CR_INVALIDATE);
> + } else {
> + intel_uncore_write_fw(ggtt->vm.gt->uncore,
> +   GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> + }
>  }
>  
>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> @@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  
>   ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>  
> - setup_private_pat(ggtt->vm.gt);
> -
>   return ggtt_probe_common(ggtt, size);
>  }
>  
> @@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, 
> struct intel_gt *gt)
>   */
>  int i915_ggtt_probe_hw(struct drm_i915_private *i915)
>  {
> - int ret;
> + struct intel_gt *gt;
> + int ret, i;
> +
> + for_each_gt(gt, i915, i) {
> + ret = intel_gt_assign_ggtt(gt);
> + if (ret)
> + return ret;
> + }
>  
>   ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
>   if (ret)
> @@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
>   return 0;
>  }
>  
> +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915)
> +{
> + struct i915_ggtt *ggtt;
> +
> + ggtt = 

[PATCH 2/4] drm/i915/gt: Pass gt rather than uncore to lowest-level reads/writes

2022-11-22 Thread Matt Roper
Passing the GT rather than uncore to the lowest level MCR read and write
functions will make it easier to introduce dedicated MCR locking in a
folling patch.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index ea86c1ab5dc5..f4484bb18ec9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -221,7 +221,7 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
 
 /*
  * rw_with_mcr_steering_fw - Access a register with specific MCR steering
- * @uncore: pointer to struct intel_uncore
+ * @gt: GT to read register from
  * @reg: register being accessed
  * @rw_flag: FW_REG_READ for read access or FW_REG_WRITE for write access
  * @group: group number (documented as "sliceid" on older platforms)
@@ -232,10 +232,11 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
  *
  * Caller needs to make sure the relevant forcewake wells are up.
  */
-static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
+static u32 rw_with_mcr_steering_fw(struct intel_gt *gt,
   i915_mcr_reg_t reg, u8 rw_flag,
   int group, int instance, u32 value)
 {
+   struct intel_uncore *uncore = gt->uncore;
u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
 
lockdep_assert_held(>lock);
@@ -308,11 +309,12 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore 
*uncore,
return val;
 }
 
-static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
+static u32 rw_with_mcr_steering(struct intel_gt *gt,
i915_mcr_reg_t reg, u8 rw_flag,
int group, int instance,
u32 value)
 {
+   struct intel_uncore *uncore = gt->uncore;
enum forcewake_domains fw_domains;
u32 val;
 
@@ -325,7 +327,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
spin_lock_irq(>lock);
intel_uncore_forcewake_get__locked(uncore, fw_domains);
 
-   val = rw_with_mcr_steering_fw(uncore, reg, rw_flag, group, instance, 
value);
+   val = rw_with_mcr_steering_fw(gt, reg, rw_flag, group, instance, value);
 
intel_uncore_forcewake_put__locked(uncore, fw_domains);
spin_unlock_irq(>lock);
@@ -347,7 +349,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt,
  i915_mcr_reg_t reg,
  int group, int instance)
 {
-   return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, 
instance, 0);
+   return rw_with_mcr_steering(gt, reg, FW_REG_READ, group, instance, 0);
 }
 
 /**
@@ -364,7 +366,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt,
 void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 
value,
int group, int instance)
 {
-   rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, 
value);
+   rw_with_mcr_steering(gt, reg, FW_REG_WRITE, group, instance, value);
 }
 
 /**
@@ -588,7 +590,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, 
i915_mcr_reg_t reg)
for (type = 0; type < NUM_STEERING_TYPES; type++) {
if (reg_needs_read_steering(gt, reg, type)) {
get_nonterminated_steering(gt, type, , );
-   return rw_with_mcr_steering_fw(gt->uncore, reg,
+   return rw_with_mcr_steering_fw(gt, reg,
   FW_REG_READ,
   group, instance, 0);
}
@@ -615,7 +617,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, 
i915_mcr_reg_t reg)
for (type = 0; type < NUM_STEERING_TYPES; type++) {
if (reg_needs_read_steering(gt, reg, type)) {
get_nonterminated_steering(gt, type, , );
-   return rw_with_mcr_steering(gt->uncore, reg,
+   return rw_with_mcr_steering(gt, reg,
FW_REG_READ,
group, instance, 0);
}
-- 
2.38.1



[PATCH 3/4] drm/i915/gt: Add dedicated MCR lock

2022-11-22 Thread Matt Roper
We've been overloading uncore->lock to protect access to the MCR
steering register.  That's not really what uncore->lock is intended for,
and it would be better if we didn't need to hold such a high-traffic
spinlock for the whole sequence of (apply steering, access MCR register,
restore steering).  Let's create a dedicated MCR lock to protect the
steering control register over this critical section and stop relying on
the high-traffic uncore->lock.

For now the new lock is a software lock.  However some platforms (MTL
and beyond) have a hardware-provided locking mechanism that can be used
to serialize not only software accesses, but also hardware/firmware
accesses as well; support for that hardware level lock will be added in
a future patch.

Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Balasubramani Vivekanandan 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c  |  2 +
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 66 -
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |  2 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h|  8 +++
 drivers/gpu/drm/i915/gt/intel_mocs.c|  2 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  4 ++
 6 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index b5ad9caa5537..f823fc0b3827 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1094,6 +1094,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
 
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
+   intel_gt_mcr_lock(gt);
spin_lock_irq(>lock); /* serialise invalidate with GT reset */
 
awake = 0;
@@ -1129,6 +1130,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
 
spin_unlock_irq(>lock);
+   intel_gt_mcr_unlock(gt);
 
for_each_engine_masked(engine, gt, awake, tmp) {
struct reg_and_bit rb;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index f4484bb18ec9..f9e722d91904 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -143,6 +143,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
unsigned long fuse;
int i;
 
+   spin_lock_init(>mcr_lock);
+
/*
 * An mslice is unavailable only if both the meml3 for the slice is
 * disabled *and* all of the DSS in the slice (quadrant) are disabled.
@@ -228,6 +230,7 @@ static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
  * @instance: instance number (documented as "subsliceid" on older platforms)
  * @value: register value to be written (ignored for read)
  *
+ * Context: The caller must hold the MCR lock
  * Return: 0 for write access. register value for read access.
  *
  * Caller needs to make sure the relevant forcewake wells are up.
@@ -239,7 +242,7 @@ static u32 rw_with_mcr_steering_fw(struct intel_gt *gt,
struct intel_uncore *uncore = gt->uncore;
u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
 
-   lockdep_assert_held(>lock);
+   lockdep_assert_held(>mcr_lock);
 
if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) {
/*
@@ -324,6 +327,7 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
 GEN8_MCR_SELECTOR,
 FW_REG_READ | 
FW_REG_WRITE);
 
+   intel_gt_mcr_lock(gt);
spin_lock_irq(>lock);
intel_uncore_forcewake_get__locked(uncore, fw_domains);
 
@@ -331,10 +335,45 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
 
intel_uncore_forcewake_put__locked(uncore, fw_domains);
spin_unlock_irq(>lock);
+   intel_gt_mcr_unlock(gt);
 
return val;
 }
 
+/**
+ * intel_gt_mcr_lock - Acquire MCR steering lock
+ * @gt: GT structure
+ *
+ * Performs locking to protect the steering for the duration of an MCR
+ * operation.  Depending on the platform, this may be a software lock
+ * (gt->mcr_lock) or a hardware lock (i.e., a register that synchronizes
+ * access not only for the driver, but also for external hardware and
+ * firmware agents).
+ *
+ * Context: Takes gt->mcr_lock.  uncore->lock should *not* be held when this
+ *  function is called, although it may be acquired after this
+ *  function call.
+ */
+void intel_gt_mcr_lock(struct intel_gt *gt)
+{
+   lockdep_assert_not_held(>uncore->lock);
+
+   spin_lock(>mcr_lock);
+}
+
+/**
+ * intel_gt_mcr_unlock - Release MCR steering lock
+ * @gt: GT structure
+ *
+ * Releases the lock acquired by intel_gt_mcr_lock().
+ *
+ * Context: Releases gt->mcr_lock
+ */
+void intel_gt_mcr_unlock(struct intel_gt *gt)
+{
+   spin_unlock(>mcr_lock);
+}
+
 /**
  * intel_gt_mcr_read - read a specific instanc

[PATCH 1/4] drm/i915/gt: Correct kerneldoc for intel_gt_mcr_wait_for_reg()

2022-11-22 Thread Matt Roper
The kerneldoc function name was not updated when this function was
converted to a non-fw form.

Fixes: 192bb40f030a ("drm/i915/gt: Manage uncore->lock while waiting on MCR 
register")
Reported-by: kernel test robot 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index d9a8ff9e5e57..ea86c1ab5dc5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -702,7 +702,7 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, 
unsigned int dss,
 }
 
 /**
- * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected 
state
+ * intel_gt_mcr_wait_for_reg - wait until MCR register matches expected state
  * @gt: GT structure
  * @reg: the register to read
  * @mask: mask to apply to register value
-- 
2.38.1



[PATCH 4/4] drm/i915/mtl: Add hardware-level lock for steering

2022-11-22 Thread Matt Roper
Starting with MTL, the driver needs to not only protect the steering
control register from simultaneous software accesses, but also protect
against races with hardware/firmware agents.  The hardware provides a
dedicated locking mechanism to support this via the STEER_SEMAPHORE
register.  Reading the register acts as a 'trylock' operation; the read
will return 0x1 if the lock is acquired or 0x0 if something else is
already holding the lock; once acquired, writing 0x1 to the register
will release the lock.

We'll continue to grab the software lock as well, just so lockdep can
track our locking; assuming the hardware lock is behaving properly,
there should never be any contention on the software lock in this case.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 29 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |  1 +
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index f9e722d91904..fe5f5e0affdf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -345,10 +345,9 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
  * @gt: GT structure
  *
  * Performs locking to protect the steering for the duration of an MCR
- * operation.  Depending on the platform, this may be a software lock
- * (gt->mcr_lock) or a hardware lock (i.e., a register that synchronizes
- * access not only for the driver, but also for external hardware and
- * firmware agents).
+ * operation.  On MTL and beyond, a hardware lock will also be taken to
+ * serialize access not only for the driver, but also for external hardware and
+ * firmware agents.
  *
  * Context: Takes gt->mcr_lock.  uncore->lock should *not* be held when this
  *  function is called, although it may be acquired after this
@@ -356,9 +355,28 @@ static u32 rw_with_mcr_steering(struct intel_gt *gt,
  */
 void intel_gt_mcr_lock(struct intel_gt *gt)
 {
+   int err = 0;
+
lockdep_assert_not_held(>uncore->lock);
 
+   /*
+* Starting with MTL, we need to coordinate not only with other
+* driver threads, but also with hardware/firmware agents.  A dedicated
+* locking register is used.
+*/
+   if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
+   err = wait_for(intel_uncore_read_fw(gt->uncore,
+   STEER_SEMAPHORE) == 0x1, 1);
+
+   /*
+* Even on platforms with a hardware lock, we'll continue to grab
+* a software spinlock too for lockdep purposes.  If the hardware lock
+* was already acquired, there should never be contention on the
+* software lock.
+*/
spin_lock(>mcr_lock);
+
+   drm_WARN_ON_ONCE(>i915->drm, err == -ETIMEDOUT);
 }
 
 /**
@@ -372,6 +390,9 @@ void intel_gt_mcr_lock(struct intel_gt *gt)
 void intel_gt_mcr_unlock(struct intel_gt *gt)
 {
spin_unlock(>mcr_lock);
+
+   if (GRAPHICS_VER(gt->i915) >= IP_VER(12, 70))
+   intel_uncore_write_fw(gt->uncore, STEER_SEMAPHORE, 0x1);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 80a979e6f6be..412c0b399ebd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -67,6 +67,7 @@
 #define GMD_ID_MEDIA   _MMIO(MTL_MEDIA_GSI_BASE + 
0xd8c)
 
 #define MCFG_MCR_SELECTOR  _MMIO(0xfd0)
+#define STEER_SEMAPHORE_MMIO(0xfd0)
 #define MTL_MCR_SELECTOR   _MMIO(0xfd4)
 #define SF_MCR_SELECTOR_MMIO(0xfd8)
 #define GEN8_MCR_SELECTOR  _MMIO(0xfdc)
-- 
2.38.1



[PATCH 0/4] i915: dedicated MCR locking and hardware semaphore

2022-11-22 Thread Matt Roper
We've been overloading uncore->lock to protect access to the MCR
steering register.  That's not really what uncore->lock is intended for,
and it would be better if we didn't need to hold such a high-traffic
spinlock for the whole sequence of (apply steering, access MCR register,
restore steering).  Switch to a dedicated MCR lock to protect the
steering control register over this critical section and stop relying on
the high-traffic uncore->lock.  On pre-MTL platforms the dedicated MCR
lock is just another software lock, but on MTL and beyond we also
utilize the hardware-provided STEER_SEMAPHORE that allows us to
synchronize with external hardware and firmware agents.

Matt Roper (4):
  drm/i915/gt: Correct kerneldoc for intel_gt_mcr_wait_for_reg()
  drm/i915/gt: Pass gt rather than uncore to lowest-level reads/writes
  drm/i915/gt: Add dedicated MCR lock
  drm/i915/mtl: Add hardware-level lock for steering

 drivers/gpu/drm/i915/gt/intel_gt.c  |   2 +
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 107 ++--
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h  |   2 +
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |   1 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h|   8 ++
 drivers/gpu/drm/i915/gt/intel_mocs.c|   2 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c |   4 +
 7 files changed, 115 insertions(+), 11 deletions(-)

-- 
2.38.1



Re: [PATCH v3] drm/i915/mtl: Media GT and Render GT share common GGTT

2022-11-18 Thread Matt Roper
On Tue, Nov 15, 2022 at 08:34:54PM +0530, Aravind Iddamsetty wrote:
> On XE_LPM+ platforms the media engines are carved out into a separate
> GT but have a common GGTMMADR address range which essentially makes
> the GGTT address space to be shared between media and render GT. As a
> result any updates in GGTT shall invalidate TLB of GTs sharing it and
> similarly any operation on GGTT requiring an action on a GT will have to
> involve all GTs sharing it. setup_private_pat was being done on a per
> GGTT based as that doesn't touch any GGTT structures moved it to per GT
> based.
> 
> BSPEC: 63834
> 
> v2:
> 1. Add details to commit msg
> 2. includes fix for failure to add item to ggtt->gt_list, as suggested
> by Lucas
> 3. as ggtt_flush() is used only for ggtt drop i915_is_ggtt check within
> it.
> 4. setup_private_pat moved out of intel_gt_tiles_init
> 
> v3:
> 1. Move out for_each_gt from i915_driver.c (Jani Nikula)
> 
> Cc: Matt Roper 
> Signed-off-by: Aravind Iddamsetty 
> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 54 +--
>  drivers/gpu/drm/i915/gt/intel_gt.c| 13 +-
>  drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
>  drivers/gpu/drm/i915/gt/intel_gtt.h   |  4 ++
>  drivers/gpu/drm/i915/i915_driver.c| 12 ++---
>  drivers/gpu/drm/i915/i915_gem.c   |  2 +
>  drivers/gpu/drm/i915/i915_gem_evict.c | 51 +++--
>  drivers/gpu/drm/i915/i915_vma.c   |  5 ++-
>  drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +
>  9 files changed, 111 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 8145851ad23d..7644738b9cdb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -8,6 +8,7 @@
>  #include 
>  #include 
>  
> +#include 
>  #include 
>  #include 
>  
> @@ -196,10 +197,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
>  
>  void i915_ggtt_suspend(struct i915_ggtt *ggtt)
>  {
> + struct intel_gt *gt;
> +
>   i915_ggtt_suspend_vm(>vm);
>   ggtt->invalidate(ggtt);
>  
> - intel_gt_check_and_clear_faults(ggtt->vm.gt);
> + list_for_each_entry(gt, >gt_list, ggtt_link)
> + intel_gt_check_and_clear_faults(gt);
>  }
>  
>  void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
> @@ -225,16 +229,21 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
>  
>  static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>  {
> - struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>   struct drm_i915_private *i915 = ggtt->vm.i915;
>  
>   gen8_ggtt_invalidate(ggtt);
>  
> - if (GRAPHICS_VER(i915) >= 12)
> - intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
> -   GEN12_GUC_TLB_INV_CR_INVALIDATE);
> - else
> - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> + if (GRAPHICS_VER(i915) >= 12) {
> + struct intel_gt *gt;
> +
> + list_for_each_entry(gt, >gt_list, ggtt_link)
> + intel_uncore_write_fw(gt->uncore,
> +   GEN12_GUC_TLB_INV_CR,
> +   GEN12_GUC_TLB_INV_CR_INVALIDATE);
> + } else {
> + intel_uncore_write_fw(ggtt->vm.gt->uncore,
> +   GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> + }
>  }
>  
>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> @@ -986,8 +995,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  
>   ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>  
> - setup_private_pat(ggtt->vm.gt);
> -
>   return ggtt_probe_common(ggtt, size);
>  }
>  
> @@ -1196,7 +1203,14 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, 
> struct intel_gt *gt)
>   */
>  int i915_ggtt_probe_hw(struct drm_i915_private *i915)
>  {
> - int ret;
> + struct intel_gt *gt;
> + int ret, i;
> +
> + for_each_gt(gt, i915, i) {
> + ret = intel_gt_assign_ggtt(gt);
> + if (ret)
> + return ret;
> + }
>  
>   ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
>   if (ret)
> @@ -1208,6 +1222,19 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
>   return 0;
>  }
>  
> +struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915)
> +{
> + struct i915_ggtt *ggtt;
> +
> + ggtt = drmm_kzalloc(>drm, sizeof(*ggtt), GFP_KERNEL);
> + if (!ggtt)
> + return ERR_PTR(-ENOMEM);
> +
> +   

Re: [Intel-gfx] [PATCH] drm/i915/gt: Manage uncore->lock while waiting on MCR register

2022-11-18 Thread Matt Roper
On Fri, Nov 18, 2022 at 01:20:45PM -0800, Lucas De Marchi wrote:
> On Thu, Nov 17, 2022 at 09:33:58AM -0800, Matt Roper wrote:
> > The GT MCR code currently relies on uncore->lock to avoid race
> > conditions on the steering control register during MCR operations.  The
> > *_fw() versions of MCR operations expect the caller to already hold
> > uncore->lock, while the non-fw variants manage the lock internally.
> > However the sole callsite of intel_gt_mcr_wait_for_reg_fw() does not
> > currently obtain the forcewake lock, allowing a potential race condition
> > (and triggering an assertion on lockdep builds).  Furthermore, since
> > 'wait for register value' requests may not return immediately, it is
> > undesirable to hold a fundamental lock like uncore->lock for the entire
> > wait and block all other MMIO for the duration; rather the lock is only
> > needed around the MCR read operations and can be released during the
> > delays.
> > 
> > Convert intel_gt_mcr_wait_for_reg_fw() to a non-fw variant that will
> > manage uncore->lock internally.  This does have the side effect of
> > causing an unnecessary lookup in the forcewake table on each read
> > operation, but since the caller is still holding the relevant forcewake
> > domain, this will ultimately just incremenent the reference count and
> > won't actually cause any additional MMIO traffic.
> > 
> > In the future we plan to switch to a dedicated MCR lock to protect the
> > steering critical section rather than using the overloaded and
> > high-traffic uncore->lock; on MTL and beyond the new lock can be
> > implemented on top of the hardware-provided synchonization mechanism for
> > steering.
> > 
> > Fixes: 3068bec83eea ("drm/i915/gt: Add intel_gt_mcr_wait_for_reg_fw()")
> > Cc: Lucas De Marchi 
> > Signed-off-by: Matt Roper 
> 
> 
> Reviewed-by: Lucas De Marchi 

Applied to drm-intel-gt-next.  Thanks for the review.


Matt

> 
> thanks
> Lucas De Marchi

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH] drm/i915: Fix workarounds on Gen2-3

2022-11-18 Thread Matt Roper
On Fri, Nov 18, 2022 at 11:52:49AM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> In 3653727560d0 ("drm/i915: Simplify internal helper function signature")
> I broke the old platforms by not noticing engine workaround init does not
> initialize the list on old platforms. Fix it by always initializing which
> already does the right thing by mostly not doing anything if there aren't
> any workarounds on the list.
> 
> Signed-off-by: Tvrtko Ursulin 
> Fixes: 3653727560d0 ("drm/i915: Simplify internal helper function signature")
> Reported-by: Ville Syrjälä 
> Cc: Mika Kuoppala 
> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 213160f29ec3..4d7a01b45e09 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -2991,7 +2991,7 @@ general_render_compute_wa_init(struct intel_engine_cs 
> *engine, struct i915_wa_li
>  static void
>  engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list 
> *wal)
>  {
> - if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4))
> + if (GRAPHICS_VER(engine->i915) < 4)
>   return;

Do we even need this early return at all?  As far as I can see, letting
this function run its course doesn't wind up having any effect or cause
any problems (you still wind up with an empty list).

Regardless,

Reviewed-by: Matt Roper 

>  
>   engine_fake_wa_init(engine, wal);
> @@ -3016,9 +3016,6 @@ void intel_engine_init_workarounds(struct 
> intel_engine_cs *engine)
>  {
>   struct i915_wa_list *wal = >wa_list;
>  
> - if (GRAPHICS_VER(engine->i915) < 4)
> - return;
> -
>   wa_init_start(wal, engine->gt, "engine", engine->name);
>   engine_init_workarounds(engine, wal);
>   wa_init_finish(wal);
> -- 
> 2.34.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


[PATCH] drm/i915/gt: Manage uncore->lock while waiting on MCR register

2022-11-17 Thread Matt Roper
The GT MCR code currently relies on uncore->lock to avoid race
conditions on the steering control register during MCR operations.  The
*_fw() versions of MCR operations expect the caller to already hold
uncore->lock, while the non-fw variants manage the lock internally.
However the sole callsite of intel_gt_mcr_wait_for_reg_fw() does not
currently obtain the forcewake lock, allowing a potential race condition
(and triggering an assertion on lockdep builds).  Furthermore, since
'wait for register value' requests may not return immediately, it is
undesirable to hold a fundamental lock like uncore->lock for the entire
wait and block all other MMIO for the duration; rather the lock is only
needed around the MCR read operations and can be released during the
delays.

Convert intel_gt_mcr_wait_for_reg_fw() to a non-fw variant that will
manage uncore->lock internally.  This does have the side effect of
causing an unnecessary lookup in the forcewake table on each read
operation, but since the caller is still holding the relevant forcewake
domain, this will ultimately just incremenent the reference count and
won't actually cause any additional MMIO traffic.

In the future we plan to switch to a dedicated MCR lock to protect the
steering critical section rather than using the overloaded and
high-traffic uncore->lock; on MTL and beyond the new lock can be
implemented on top of the hardware-provided synchonization mechanism for
steering.

Fixes: 3068bec83eea ("drm/i915/gt: Add intel_gt_mcr_wait_for_reg_fw()")
Cc: Lucas De Marchi 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c |  6 +++---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 18 ++
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h | 12 ++--
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 0325f071046c..b5ad9caa5537 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -1035,9 +1035,9 @@ get_reg_and_bit(const struct intel_engine_cs *engine, 
const bool gen8,
 static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
 {
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0,
-   TLB_INVAL_TIMEOUT_US,
-   TLB_INVAL_TIMEOUT_MS);
+   return intel_gt_mcr_wait_for_reg(gt, rb.mcr_reg, rb.bit, 0,
+TLB_INVAL_TIMEOUT_US,
+TLB_INVAL_TIMEOUT_MS);
else
return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 
0,
TLB_INVAL_TIMEOUT_US,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 830edffe88cc..d9a8ff9e5e57 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -730,17 +730,19 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, 
unsigned int dss,
  *
  * Return: 0 if the register matches the desired condition, or -ETIMEDOUT.
  */
-int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
-i915_mcr_reg_t reg,
-u32 mask,
-u32 value,
-unsigned int fast_timeout_us,
-unsigned int slow_timeout_ms)
+int intel_gt_mcr_wait_for_reg(struct intel_gt *gt,
+ i915_mcr_reg_t reg,
+ u32 mask,
+ u32 value,
+ unsigned int fast_timeout_us,
+ unsigned int slow_timeout_ms)
 {
-   u32 reg_value = 0;
-#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == 
value)
int ret;
 
+   lockdep_assert_not_held(>uncore->lock);
+
+#define done ((intel_gt_mcr_read_any(gt, reg) & mask) == value)
+
/* Catch any overuse of this function */
might_sleep_if(slow_timeout_ms);
GEM_BUG_ON(fast_timeout_us > 2);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
index 3fb0502bff22..ae93b20e1c17 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
@@ -37,12 +37,12 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, 
struct intel_gt *gt,
 void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
  unsigned int *group, unsigned int *instance);
 
-int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
-i915_mcr_reg_t reg,
-u32 mask,
-u32 v

Re: [PATCH] drm/i915: Update workaround documentation

2022-11-15 Thread Matt Roper
On Tue, Nov 15, 2022 at 11:26:11AM -0800, Lucas De Marchi wrote:
> There were several updates in the driver on how the workarounds are
> handled since its documentation was written. Update the documentation to
> reflect the current reality.
> 
> v2:
>   - Remove footnote that was wrongly referenced, adding back the
> reference in the correct paragraph.
>   - Remove "Display workarounds" and just mention "display IP" under
> "Other" category since all of them are peppered around the driver.
> 
> Cc: Matt Roper 
> Signed-off-by: Lucas De Marchi 
> Acked-by: Balasubramani Vivekanandan  # 
> v1

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 80 +
>  1 file changed, 50 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 213160f29ec3..290f9f91fdf4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -18,42 +18,62 @@
>  /**
>   * DOC: Hardware workarounds
>   *
> - * This file is intended as a central place to implement most [1]_ of the
> - * required workarounds for hardware to work as originally intended. They 
> fall
> - * in five basic categories depending on how/when they are applied:
> + * Hardware workarounds are register programming documented to be executed in
> + * the driver that fall outside of the normal programming sequences for a
> + * platform. There are some basic categories of workarounds, depending on
> + * how/when they are applied:
>   *
> - * - Workarounds that touch registers that are saved/restored to/from the HW
> - *   context image. The list is emitted (via Load Register Immediate 
> commands)
> - *   everytime a new context is created.
> - * - GT workarounds. The list of these WAs is applied whenever these 
> registers
> - *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
> - * - Display workarounds. The list is applied during display clock-gating
> - *   initialization.
> - * - Workarounds that whitelist a privileged register, so that UMDs can 
> manage
> - *   them directly. This is just a special case of a MMMIO workaround (as we
> - *   write the list of these to/be-whitelisted registers to some special HW
> - *   registers).
> - * - Workaround batchbuffers, that get executed automatically by the hardware
> - *   on every HW context restore.
> + * - Context workarounds: workarounds that touch registers that are
> + *   saved/restored to/from the HW context image. The list is emitted (via 
> Load
> + *   Register Immediate commands) once when initializing the device and 
> saved in
> + *   the default context. That default context is then used on every context
> + *   creation to have a "primed golden context", i.e. a context image that
> + *   already contains the changes needed to all the registers.
>   *
> - * .. [1] Please notice that there are other WAs that, due to their nature,
> - *cannot be applied from a central place. Those are peppered around the 
> rest
> - *of the code, as needed.
> + * - Engine workarounds: the list of these WAs is applied whenever the 
> specific
> + *   engine is reset. It's also possible that a set of engine classes share a
> + *   common power domain and they are reset together. This happens on some
> + *   platforms with render and compute engines. In this case (at least) one 
> of
> + *   them need to keeep the workaround programming: the approach taken in the
> + *   driver is to tie those workarounds to the first compute/render engine 
> that
> + *   is registered.  When executing with GuC submission, engine resets are
> + *   outside of kernel driver control, hence the list of registers involved 
> in
> + *   written once, on engine initialization, and then passed to GuC, that
> + *   saves/restores their values before/after the reset takes place. See
> + *   ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
>   *
> - * .. [2] Technically, some registers are powercontext saved & restored, so 
> they
> - *survive a suspend/resume. In practice, writing them again is not too
> - *costly and simplifies things. We can revisit this in the future.
> + * - GT workarounds: the list of these WAs is applied whenever these 
> registers
> + *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
> + *
> + * - Register whitelist: some workarounds need to be implemented in 
> userspace,
> + *   but need to touch privileged registers. The whitelist in the kernel
> + *   instructs the hardware to allow the access to happen. From the kernel 

Re: [PATCH] drm/i915: Update workaround documentation

2022-11-14 Thread Matt Roper
 to touch privileged registers. The whitelist in the kernel
> + *   instructs the hardware to allow the access to happen. From the kernel 
> side,
> + *   this is just a special case of a MMIO workaround (as we write the list 
> of
> + *   these to/be-whitelisted registers to some special HW registers).
>   *
> - * Keep things in this file ordered by WA type, as per the above (context, 
> GT,
> - * display, register whitelist, batchbuffer). Then, inside each type, keep 
> the
> - * following order:
> + * - Workaround batchbuffers: buffers that get executed automatically by the
> + *   hardware on every HW context restore. These buffers are created and
> + *   programmed in the default context so the hardware always go through 
> those
> + *   programming sequences when switching contexts. The support for 
> workaround
> + *   batchbuffers is enabled these hardware mechanisms:
>   *
> - * - Infrastructure functions and macros
> - * - WAs per platform in standard gen/chrono order
> - * - Public functions to init or apply the given workaround type.
> - */
> + *   #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
> + *  context, pointing the hardware to jump to that location when that 
> offset
> + *  is reached in the context restore. Workaround batchbuffer in the 
> driver
> + *  currently uses this mechanism for all platforms.
> + *
> + *   #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
> + *  pointing the hardware to a buffer to continue executing after the
> + *  engine registers are restored in a context restore sequence. This is
> + *  currently not used in the driver.
> + *
> + * - Display workarounds. The list is applied during display clock-gating
> + *   initialization. However most of the display workarounds may be 
> considered
> + *   to fall under the "Others" category below.

We don't have any such list today.  And if we do add one, I'm not sure
it would happen here in gt/.  Maybe we should just add this as an extra
"or" in the "Other" description below for now?


Matt

> + *
> + * - Other:  There are WAs that, due to their nature, cannot be applied from 
> a central
> + *   place. Those are peppered around the rest of the code, as needed.
> + *
> + * .. [1] Technically, some registers are powercontext saved & restored, so 
> they
> + *survive a suspend/resume. In practice, writing them again is not too
> + *costly and simplifies things, so it's the approach taken in the driver.
> +  */
>  
>  static void wa_init_start(struct i915_wa_list *wal, const char *name, const 
> char *engine_name)
>  {
> -- 
> 2.38.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


[PATCH] drm/i915/dg2: Drop force_probe requirement

2022-11-08 Thread Matt Roper
DG2 has been very usable for a while now, and all of the uapi changes
related to fundamental platform usage have been finalized.  Recent CI
results have also been healthy, so we're ready to drop the force_probe
requirement and enable the platform by default.

Cc: Rodrigo Vivi 
Cc: Tvrtko Ursulin 
Cc: Joonas Lahtinen 
Cc: Jani Nikula 
Signed-off-by: Matt Roper 
---

There was some recent offline discussion questioning whether we'd fully
identified the root cause of some historic CI failures, or whether it
was possible we might still have a bug lurking somewhere causing
sporadic failures.  Let's use this patch to centralize discussion about
any remaining concerns and make sure they're addressed before we apply
this.

 drivers/gpu/drm/i915/i915_pci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 211913be40ce..0866300243aa 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1078,7 +1078,6 @@ static const struct intel_device_info dg2_info = {
XE_LPD_FEATURES,
.__runtime.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) |
   BIT(TRANSCODER_C) | BIT(TRANSCODER_D),
-   .require_force_probe = 1,
 };
 
 static const struct intel_device_info ats_m_info = {
-- 
2.38.1



Re: [PATCH] drm/i915/mtl: Media GT and Render GT share common GGTT

2022-11-07 Thread Matt Roper
On Mon, Nov 07, 2022 at 07:43:59PM +0530, Iddamsetty, Aravind wrote:
> 
> 
> On 31-10-2022 23:16, Matt Roper wrote:
> > On Mon, Oct 31, 2022 at 06:01:11PM +0530, Aravind Iddamsetty wrote:
> >> On XE_LPM+ platforms the media engines are carved out into a separate
> >> GT but have a common GGTMMADR address range which essentially makes
> >> the GGTT address space to be shared between media and render GT.
> > 
> 
> 
> >>  
> >>  int intel_gt_init_mmio(struct intel_gt *gt)
> >> @@ -965,6 +973,9 @@ int intel_gt_tiles_init(struct drm_i915_private *i915)
> >>int ret;
> >>  
> >>for_each_gt(gt, i915, id) {
> >> +  if (GRAPHICS_VER(i915) >= 8)
> >> +  setup_private_pat(gt);
> >> +
> > 
> > Since the term "tile" is used for PVC-style remote tiles (which we have
> > some framework for, but haven't enabled yet), it seems confusing to have
> > the PAT setup for all GTs (including the standalone media GT) in a
> > function called intel_gt_tiles_init().  Maybe we should also have a prep
> > patch that renames this function if we're going to start doing non-tile
> > things in here too?
> 
> But isn't GT and Tile used interchangeably. Also, Could you please

The terminology has been used a bit inconsistently so far, but I think
we're trying to standardize on "tile" as referring to the PVC-style
combination of "GT + LMEM."  So I'd consider MTL's standalone media to
be a "GT," but not a "tile" because it isn't paired with its own lmem
unit.


Matt

> elaborate what do you mean by non tile related things here and as i
> understand PAT are per GT registers and in case of SA Media the
> gsi_offset get added.
> > 
> >>ret = intel_gt_probe_lmem(gt);
> >>if (ret)
> >>return ret;
> 
> 
> Thanks,
> Aravind.

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH v2 3/5] drm/i915/mtl: add GSC CS interrupt support

2022-11-07 Thread Matt Roper
On Wed, Nov 02, 2022 at 10:10:45AM -0700, Daniele Ceraolo Spurio wrote:
> The GSC CS re-uses the same interrupt bits that the GSC used in older
> platforms. This means that we can now have an engine interrupt coming
> out of OTHER_CLASS, so we need to handle that appropriately.
> 
> v2: clean up the if statement for the engine irq (Tvrtko)
> 
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Matt Roper 
> Cc: Tvrtko Ursulin 
> Reviewed-by: Matt Roper  #v1

Reviewed-by: Matt Roper 

for v2 as well.

> ---
>  drivers/gpu/drm/i915/gt/intel_gt_irq.c | 75 ++
>  1 file changed, 40 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> index f26882fdc24c..b197f0e9794f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> @@ -81,35 +81,27 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 
> instance,
> instance, iir);
>  }
>  
> -static void
> -gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
> -  const u8 instance, const u16 iir)
> +static struct intel_gt *pick_gt(struct intel_gt *gt, u8 class, u8 instance)
>  {
> - struct intel_engine_cs *engine;
> -
> - /*
> -  * Platforms with standalone media have their media engines in another
> -  * GT.
> -  */
> - if (MEDIA_VER(gt->i915) >= 13 &&
> - (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS)) {
> - if (!gt->i915->media_gt)
> - goto err;
> + struct intel_gt *media_gt = gt->i915->media_gt;
>  
> - gt = gt->i915->media_gt;
> + /* we expect the non-media gt to be passed in */
> + GEM_BUG_ON(gt == media_gt);
> +
> + if (!media_gt)
> + return gt;
> +
> + switch (class) {
> + case VIDEO_DECODE_CLASS:
> + case VIDEO_ENHANCEMENT_CLASS:
> + return media_gt;
> + case OTHER_CLASS:
> + if (instance == OTHER_GSC_INSTANCE && HAS_ENGINE(media_gt, 
> GSC0))
> + return media_gt;
> + fallthrough;
> + default:
> + return gt;
>   }
> -
> - if (instance <= MAX_ENGINE_INSTANCE)
> - engine = gt->engine_class[class][instance];
> - else
> - engine = NULL;
> -
> - if (likely(engine))
> - return intel_engine_cs_irq(engine, iir);
> -
> -err:
> - WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
> -   class, instance);
>  }
>  
>  static void
> @@ -122,8 +114,17 @@ gen11_gt_identity_handler(struct intel_gt *gt, const u32 
> identity)
>   if (unlikely(!intr))
>   return;
>  
> - if (class <= COPY_ENGINE_CLASS || class == COMPUTE_CLASS)
> - return gen11_engine_irq_handler(gt, class, instance, intr);
> + /*
> +  * Platforms with standalone media have the media and GSC engines in
> +  * another GT.
> +  */
> + gt = pick_gt(gt, class, instance);
> +
> + if (class <= MAX_ENGINE_CLASS && instance <= MAX_ENGINE_INSTANCE) {
> + struct intel_engine_cs *engine = 
> gt->engine_class[class][instance];
> + if (engine)
> + return intel_engine_cs_irq(engine, intr);
> + }
>  
>   if (class == OTHER_CLASS)
>   return gen11_other_irq_handler(gt, instance, intr);
> @@ -206,7 +207,7 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
>   intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE,0);
>   if (CCS_MASK(gt))
>   intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0);
> - if (HAS_HECI_GSC(gt->i915))
> + if (HAS_HECI_GSC(gt->i915) || HAS_ENGINE(gt, GSC0))
>   intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, 0);
>  
>   /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
> @@ -233,7 +234,7 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
>   intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0);
>   if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3))
>   intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0);
> - if (HAS_HECI_GSC(gt->i915))
> + if (HAS_HECI_GSC(gt->i915) || HAS_ENGINE(gt, GSC0))
>   intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~0);
>  
>   intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
> @@ -249,7 +250,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
>  {
>   struct intel_uncore *un

Re: [PATCH v2 5/5] drm/i915/mtl: don't expose GSC command streamer to the user

2022-11-04 Thread Matt Roper
On Wed, Nov 02, 2022 at 10:10:47AM -0700, Daniele Ceraolo Spurio wrote:
> There is no userspace user for this CS yet, we only need it for internal
> kernel ops (e.g. HuC, PXP), so don't expose it.
> 
> v2: even if it's not exposed, rename the engine so it is easier to
> identify in the debug logs (Matt)
> 
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Matt Roper 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_user.c | 27 -
>  1 file changed, 21 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 79312b734690..cd4f1b126f75 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -191,6 +191,15 @@ static void add_legacy_ring(struct legacy_ring *ring,
>   ring->instance++;
>  }
>  
> +static void engine_rename(struct intel_engine_cs *engine, const char *name, 
> u16 instance)
> +{
> + char old[sizeof(engine->name)];
> +
> + memcpy(old, engine->name, sizeof(engine->name));
> + scnprintf(engine->name, sizeof(engine->name), "%s%u", name, instance);
> + drm_dbg(>i915->drm, "renamed %s to %s\n", old, engine->name);
> +}
> +
>  void intel_engines_driver_register(struct drm_i915_private *i915)
>  {
>   struct legacy_ring ring = {};
> @@ -206,11 +215,19 @@ void intel_engines_driver_register(struct 
> drm_i915_private *i915)
>   struct intel_engine_cs *engine =
>   container_of((struct rb_node *)it, typeof(*engine),
>uabi_node);
> - char old[sizeof(engine->name)];
>  
>   if (intel_gt_has_unrecoverable_error(engine->gt))
>   continue; /* ignore incomplete engines */
>  
> + /*
> +  * We don't want to expose the GSC engine to the users, but we
> +  * still rename it so it is easier to identify in the debug logs
> +  */
> + if (engine->id == GSC0) {
> + engine_rename(engine, "gsc", 0);
> + continue;
> + }
> +
>   GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
>   engine->uabi_class = uabi_classes[engine->class];
>  
> @@ -220,11 +237,9 @@ void intel_engines_driver_register(struct 
> drm_i915_private *i915)
>   i915->engine_uabi_class_count[engine->uabi_class]++;
>  
>   /* Replace the internal name with the final user facing name */
> - memcpy(old, engine->name, sizeof(engine->name));
> - scnprintf(engine->name, sizeof(engine->name), "%s%u",
> -   intel_engine_class_repr(engine->class),
> -   engine->uabi_instance);
> - DRM_DEBUG_DRIVER("renamed %s to %s\n", old, engine->name);
> + engine_rename(engine,
> +   intel_engine_class_repr(engine->class),
> +   engine->uabi_instance);
>  
>   rb_link_node(>uabi_node, prev, p);
>   rb_insert_color(>uabi_node, >uabi_engines);
> -- 
> 2.37.3
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH] drm/i915/dg2: Introduce Wa_18017747507

2022-11-01 Thread Matt Roper
On Mon, Oct 31, 2022 at 06:15:09AM -0700, Wayne Boyer wrote:
> WA 18017747507 applies to all DG2 skus.
> 
> BSpec: 56035, 46121, 68173
> 
> Signed-off-by: Wayne Boyer 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++
>  2 files changed, 6 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index f4624262dc81..27b2641e1a53 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -501,6 +501,9 @@
>  #define VF_PREEMPTION_MMIO(0x83a4)
>  #define   PREEMPTION_VERTEX_COUNTREG_GENMASK(15, 0)
>  
> +#define VFG_PREEMPTION_CHICKEN   _MMIO(0x83b4)
> +#define  POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4)

We need one more space here between 'define' and the register name for
consistency with the rest of the file.  But I can fix that up while
applying.

Reviewed-by: Matt Roper 

Applied to drm-intel-gt-next.  Thanks for the patch.


Matt

> +
>  #define GEN8_RC6_CTX_INFO_MMIO(0x8504)
>  
>  #define XEHP_SQCMMCR_REG(0x8724)
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 2a35e7e66625..3cdf5c24dbc5 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -2975,6 +2975,9 @@ general_render_compute_wa_init(struct intel_engine_cs 
> *engine, struct i915_wa_li
>* Wa_22015475538:dg2
>*/
>   wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
> +
> + /* Wa_18017747507:dg2 */
> + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, 
> POLYGON_TRIFAN_LINELOOP_DISABLE);
>   }
>  }
>  
> -- 
> 2.37.3
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH] drm/i915/mtl: Add MC6 Wa_14017210380 for SAMedia

2022-11-01 Thread Matt Roper
382ff2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -740,6 +740,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>  #define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \
>   (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until))
>  
> +#define IS_MTL_GRAPHICS_STEP(__i915, variant, since, until) \
> + (IS_SUBPLATFORM(__i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_##variant) 
> && \
> +  IS_GRAPHICS_STEP(__i915, since, until))
> +
>  /*
>   * DG2 hardware steppings are a bit unusual.  The hardware design was forked 
> to
>   * create three variants (G10, G11, and G12) which each have distinct
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 1c0da50c0dc7..abe62cea083d 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6678,6 +6678,15 @@
>  /*   XEHP_PCODE_FREQUENCY_CONFIG param2 */
>  #define PCODE_MBOX_DOMAIN_NONE   0x0
>  #define PCODE_MBOX_DOMAIN_MEDIAFF0x3
> +
> +/* Wa_14017210380: mtl */
> +#define   PCODE_MBOX_GT_STATE0x50
> +/* sub-commands (param1) */
> +#define PCODE_MBOX_GT_STATE_MEDIA_BUSY   0x1
> +#define PCODE_MBOX_GT_STATE_MEDIA_NOT_BUSY   0x2
> +/* param2 */
> +#define PCODE_MBOX_GT_STATE_DOMAIN_MEDIA 0x1
> +
>  #define GEN6_PCODE_DATA  _MMIO(0x138128)
>  #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8
>  #define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT   16
> -- 
> 2.25.1
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH 1/5] drm/i915/mtl: add initial definitions for GSC CS

2022-10-31 Thread Matt Roper
On Mon, Oct 31, 2022 at 09:43:33AM -0700, Ceraolo Spurio, Daniele wrote:
> 
> 
> On 10/31/2022 9:26 AM, Matt Roper wrote:
> > On Thu, Oct 27, 2022 at 03:15:50PM -0700, Daniele Ceraolo Spurio wrote:
> > > Starting on MTL, the GSC is no longer managed with direct MMIO access,
> > > but we instead have a dedicated command streamer for it. As a first step
> > > for adding support for this CS, add the required definitions.
> > > Note that, although it is now a CS, the GSC retains its old
> > > class:instance value (OTHER_CLASS instance 6)
> > > 
> > > Signed-off-by: Daniele Ceraolo Spurio 
> > > Cc: Matt Roper 
> > Now that we have an OTHER_CLASS engine, I think we also need to deal
> > with the class -> reg mapping table in mmio_invalidate_full().  I think
> > the register we want is 0xCF04?
> 
> I missed that. Looks like the the situation is a bit more complex than just
> adding the new register, because on pre-MTL platforms CF04 is the compute
> class invalidation register. On MTL as you said CF04 is marked as the GSC CS
> invalidation register, but I can't find the compute one. Do you know if it
> re-uses the render one or something like that?
> Given that there seem to be non-GSC related changes as well, IMO this should
> probably be a separate patch to specifically handle the TLB inval changes on
> MTL.

Yeah, makes sense; we can follow up with separate patches for this.

+Cc Fei since he's done a lot of work on TLB invalidation and may know
what happens to compute class invalidation on MTL when the GSC takes
over that register.


Matt

> 
> Daniele
> 
> > 
> > Matt
> > 
> > > ---
> > >   drivers/gpu/drm/i915/gt/intel_engine_cs.c| 8 
> > >   drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
> > >   drivers/gpu/drm/i915/gt/intel_engine_user.c  | 1 +
> > >   drivers/gpu/drm/i915/i915_reg.h  | 1 +
> > >   4 files changed, 11 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> > > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > index 3b7d750ad054..e0fbfac03979 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > @@ -244,6 +244,13 @@ static const struct engine_info intel_engines[] = {
> > >   { .graphics_ver = 12, .base = 
> > > GEN12_COMPUTE3_RING_BASE }
> > >   }
> > >   },
> > > + [GSC0] = {
> > > + .class = OTHER_CLASS,
> > > + .instance = OTHER_GSC_INSTANCE,
> > > + .mmio_bases = {
> > > + { .graphics_ver = 12, .base = MTL_GSC_RING_BASE }
> > > + }
> > > + },
> > >   };
> > >   /**
> > > @@ -324,6 +331,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 
> > > class)
> > >   case VIDEO_DECODE_CLASS:
> > >   case VIDEO_ENHANCEMENT_CLASS:
> > >   case COPY_ENGINE_CLASS:
> > > + case OTHER_CLASS:
> > >   if (GRAPHICS_VER(gt->i915) < 8)
> > >   return 0;
> > >   return GEN8_LR_CONTEXT_OTHER_SIZE;
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
> > > b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > index 6b5d4ea22b67..4fd54fb8810f 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> > > @@ -136,6 +136,7 @@ enum intel_engine_id {
> > >   CCS2,
> > >   CCS3,
> > >   #define _CCS(n) (CCS0 + (n))
> > > + GSC0,
> > >   I915_NUM_ENGINES
> > >   #define INVALID_ENGINE ((enum intel_engine_id)-1)
> > >   };
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
> > > b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > index 46a174f8aa00..79312b734690 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > @@ -140,6 +140,7 @@ const char *intel_engine_class_repr(u8 class)
> > >   [COPY_ENGINE_CLASS] = "bcs",
> > >   [VIDEO_DECODE_CLASS] = "vcs",
> > >   [VIDEO_ENHANCEMENT_CLASS] = "vecs",
> > > + [OTHER_CLASS] = "other",
> > >   [COMPUTE_CLASS] = "ccs",
> > >   };
> > > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > > b/drivers/gpu/drm/i915/i915_reg.h
> > > index 1c0da50c0dc7..d056c3117ef2 100644
> > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > @@ -970,6 +970,7 @@
> > >   #define GEN11_VEBOX2_RING_BASE  0x1d8000
> > >   #define XEHP_VEBOX3_RING_BASE   0x1e8000
> > >   #define XEHP_VEBOX4_RING_BASE   0x1f8000
> > > +#define MTL_GSC_RING_BASE0x11a000
> > >   #define GEN12_COMPUTE0_RING_BASE0x1a000
> > >   #define GEN12_COMPUTE1_RING_BASE0x1c000
> > >   #define GEN12_COMPUTE2_RING_BASE0x1e000
> > > -- 
> > > 2.37.3
> > > 
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH] drm/i915/mtl: Media GT and Render GT share common GGTT

2022-10-31 Thread Matt Roper
On Mon, Oct 31, 2022 at 06:01:11PM +0530, Aravind Iddamsetty wrote:
> On XE_LPM+ platforms the media engines are carved out into a separate
> GT but have a common GGTMMADR address range which essentially makes
> the GGTT address space to be shared between media and render GT.

While this is all true, I feel like this description is lacking a bit of
explanation for why/how that translates into the code changes below.
For example you should elaborate on the areas this impacts, such as the
need to invalidate both GTs' TLBs, retire requests for both GTs, etc.

Also, the movement of the PAT setup should be noted and explained as
well since it differs from how you approached the other changes here.

> 
> BSPEC: 63834
> 
> Cc: Matt Roper 
> Signed-off-by: Aravind Iddamsetty 
> ---
>  drivers/gpu/drm/i915/gt/intel_ggtt.c  | 49 +++---
>  drivers/gpu/drm/i915/gt/intel_gt.c| 15 +-
>  drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
>  drivers/gpu/drm/i915/gt/intel_gtt.h   |  3 ++
>  drivers/gpu/drm/i915/i915_driver.c| 19 +--
>  drivers/gpu/drm/i915/i915_gem_evict.c | 63 +--
>  drivers/gpu/drm/i915/i915_vma.c   |  5 +-
>  drivers/gpu/drm/i915/selftests/i915_gem.c |  2 +
>  drivers/gpu/drm/i915/selftests/mock_gtt.c |  1 +
>  9 files changed, 115 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 2518cebbf931..f5c2f3c58627 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -196,10 +196,13 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
>  
>  void i915_ggtt_suspend(struct i915_ggtt *ggtt)
>  {
> + struct intel_gt *gt;
> +
>   i915_ggtt_suspend_vm(>vm);
>   ggtt->invalidate(ggtt);
>  
> - intel_gt_check_and_clear_faults(ggtt->vm.gt);
> + list_for_each_entry(gt, >gt_list, ggtt_link)
> + intel_gt_check_and_clear_faults(gt);
>  }
>  
>  void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
> @@ -214,27 +217,36 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
>  
>  static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
>  {
> - struct intel_uncore *uncore = ggtt->vm.gt->uncore;
> + struct intel_uncore *uncore;
> + struct intel_gt *gt;
>  
> - /*
> -  * Note that as an uncached mmio write, this will flush the
> -  * WCB of the writes into the GGTT before it triggers the invalidate.
> -  */
> - intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> + list_for_each_entry(gt, >gt_list, ggtt_link) {
> + uncore = gt->uncore;
> + /*
> +  * Note that as an uncached mmio write, this will flush the
> +  * WCB of the writes into the GGTT before it triggers the 
> invalidate.
> +  */
> + intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, 
> GFX_FLSH_CNTL_EN);

This isn't a GT register, so writing it for each GT doesn't do anything
different than just writing it once.  But actually it doesn't look like
this is even a register we should be writing to anymore since Xe_HP.
The GFX_FLSH_CNTL register no longer lives here.

> + }
>  }
>  
>  static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
>  {
> - struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>   struct drm_i915_private *i915 = ggtt->vm.i915;
>  
>   gen8_ggtt_invalidate(ggtt);
>  
> - if (GRAPHICS_VER(i915) >= 12)
> - intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
> -   GEN12_GUC_TLB_INV_CR_INVALIDATE);
> - else
> - intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> + if (GRAPHICS_VER(i915) >= 12) {
> + struct intel_gt *gt;
> +
> + list_for_each_entry(gt, >gt_list, ggtt_link)
> + intel_uncore_write_fw(gt->uncore,
> +   GEN12_GUC_TLB_INV_CR,
> +   GEN12_GUC_TLB_INV_CR_INVALIDATE);
> + } else {
> + intel_uncore_write_fw(ggtt->vm.gt->uncore,
> +   GEN8_GTCR, GEN8_GTCR_INVALIDATE);
> + }
>  }
>  
>  u64 gen8_ggtt_pte_encode(dma_addr_t addr,
> @@ -986,8 +998,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  
>   ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
>  
> - setup_private_pat(ggtt->vm.gt);
> -
>   return ggtt_probe_common(ggtt, size);
>  }
>  
> @@ -1186,7 +1196,7 @@ static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct 
> intel_gt *gt)
>

Re: [PATCH 1/5] drm/i915/mtl: add initial definitions for GSC CS

2022-10-31 Thread Matt Roper
On Thu, Oct 27, 2022 at 03:15:50PM -0700, Daniele Ceraolo Spurio wrote:
> Starting on MTL, the GSC is no longer managed with direct MMIO access,
> but we instead have a dedicated command streamer for it. As a first step
> for adding support for this CS, add the required definitions.
> Note that, although it is now a CS, the GSC retains its old
> class:instance value (OTHER_CLASS instance 6)
> 
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Matt Roper 

Now that we have an OTHER_CLASS engine, I think we also need to deal
with the class -> reg mapping table in mmio_invalidate_full().  I think
the register we want is 0xCF04?

Matt

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c| 8 
>  drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
>  drivers/gpu/drm/i915/gt/intel_engine_user.c  | 1 +
>  drivers/gpu/drm/i915/i915_reg.h  | 1 +
>  4 files changed, 11 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 3b7d750ad054..e0fbfac03979 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -244,6 +244,13 @@ static const struct engine_info intel_engines[] = {
>   { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }
>   }
>   },
> + [GSC0] = {
> + .class = OTHER_CLASS,
> + .instance = OTHER_GSC_INSTANCE,
> + .mmio_bases = {
> + { .graphics_ver = 12, .base = MTL_GSC_RING_BASE }
> + }
> + },
>  };
>  
>  /**
> @@ -324,6 +331,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 
> class)
>   case VIDEO_DECODE_CLASS:
>   case VIDEO_ENHANCEMENT_CLASS:
>   case COPY_ENGINE_CLASS:
> + case OTHER_CLASS:
>   if (GRAPHICS_VER(gt->i915) < 8)
>   return 0;
>   return GEN8_LR_CONTEXT_OTHER_SIZE;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 6b5d4ea22b67..4fd54fb8810f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -136,6 +136,7 @@ enum intel_engine_id {
>   CCS2,
>   CCS3,
>  #define _CCS(n) (CCS0 + (n))
> + GSC0,
>   I915_NUM_ENGINES
>  #define INVALID_ENGINE ((enum intel_engine_id)-1)
>  };
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 46a174f8aa00..79312b734690 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -140,6 +140,7 @@ const char *intel_engine_class_repr(u8 class)
>   [COPY_ENGINE_CLASS] = "bcs",
>   [VIDEO_DECODE_CLASS] = "vcs",
>   [VIDEO_ENHANCEMENT_CLASS] = "vecs",
> + [OTHER_CLASS] = "other",
>   [COMPUTE_CLASS] = "ccs",
>   };
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 1c0da50c0dc7..d056c3117ef2 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -970,6 +970,7 @@
>  #define GEN11_VEBOX2_RING_BASE   0x1d8000
>  #define XEHP_VEBOX3_RING_BASE    0x1e8000
>  #define XEHP_VEBOX4_RING_BASE0x1f8000
> +#define MTL_GSC_RING_BASE0x11a000
>  #define GEN12_COMPUTE0_RING_BASE 0x1a000
>  #define GEN12_COMPUTE1_RING_BASE 0x1c000
>  #define GEN12_COMPUTE2_RING_BASE 0x1e000
> -- 
> 2.37.3
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [Intel-gfx] [PATCH] drm/i915/mtl: Add missing steering table terminators

2022-10-28 Thread Matt Roper
On Fri, Oct 28, 2022 at 04:04:18PM -0700, Lucas De Marchi wrote:
> On Fri, Oct 28, 2022 at 03:40:22PM -0700, Matt Roper wrote:
> > The termination entries were missing for a couple of the recently-added
> > MTL steering tables.
> > 
> > Fixes: f32898c94a10 ("drm/i915/xelpg: Add multicast steering")
> > Fixes: a7ec65fc7e83 ("drm/i915/xelpmp: Add multicast steering for media GT")
> 
> I was thinking if we would need separate commits so they can be
> backported independently, but no... those commits were very close.
> 
> > Signed-off-by: Matt Roper 
> 
> 
> Reviewed-by: Lucas De Marchi 

Thanks for the review.  Applied to drm-intel-gt-next.


Matt

> 
> Lucas De Marchi
> 
> > ---
> > drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 ++
> > 1 file changed, 2 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > index 46cf2f3d1e8e..830edffe88cc 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
> > @@ -128,11 +128,13 @@ static const struct intel_mmio_range 
> > xelpg_dss_steering_table[] = {
> > { 0x00D800, 0x00D87F }, /* SLICE */
> > { 0x00DC00, 0x00DCFF }, /* SLICE */
> > { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
> > +   {},
> > };
> > 
> > static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = {
> > { 0x393200, 0x39323F },
> > { 0x393400, 0x3934FF },
> > +   {},
> > };
> > 
> > void intel_gt_mcr_init(struct intel_gt *gt)
> > -- 
> > 2.37.3
> > 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


[PATCH] drm/i915/mtl: Add missing steering table terminators

2022-10-28 Thread Matt Roper
The termination entries were missing for a couple of the recently-added
MTL steering tables.

Fixes: f32898c94a10 ("drm/i915/xelpg: Add multicast steering")
Fixes: a7ec65fc7e83 ("drm/i915/xelpmp: Add multicast steering for media GT")
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 46cf2f3d1e8e..830edffe88cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -128,11 +128,13 @@ static const struct intel_mmio_range 
xelpg_dss_steering_table[] = {
{ 0x00D800, 0x00D87F }, /* SLICE */
{ 0x00DC00, 0x00DCFF }, /* SLICE */
{ 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
+   {},
 };
 
 static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = {
{ 0x393200, 0x39323F },
{ 0x393400, 0x3934FF },
+   {},
 };
 
 void intel_gt_mcr_init(struct intel_gt *gt)
-- 
2.37.3



Re: [PATCH 5/5] drm/i915/mtl: don't expose GSC command streamer to the user

2022-10-28 Thread Matt Roper
On Fri, Oct 28, 2022 at 10:14:05AM -0700, Ceraolo Spurio, Daniele wrote:
> 
> 
> On 10/27/2022 8:40 PM, Matt Roper wrote:
> > On Thu, Oct 27, 2022 at 03:15:54PM -0700, Daniele Ceraolo Spurio wrote:
> > > There is no userspace user for this CS yet, we only need it for internal
> > > kernel ops (e.g. HuC, PXP), so don't expose it.
> > > 
> > > Signed-off-by: Daniele Ceraolo Spurio 
> > > Cc: Matt Roper 
> > Since we never expose it to userspace, we also never get to the point of
> > doing an engine rename and removing the apostrophe.  I assume we're okay
> > with this engine continuing to show up as "other'6" in debug logs?
> 
> I don't think it matters a lot in debug logs, but anyway it wouldn't be hard
> to rename it to something different. What do you suggest to rename it to?
> Since OTHER_CLASS doesn't have a uabi_class defined we can't use a count of
> engines of that type like we do for the other classes. Just rename it
> straight to hardcoded gsc0 ?

Yeah, a hardcoded "gsc0" seems fine to me.  I agree it doesn't matter
too much either way, so I'll leave it up to you whether you add that
rename or not.


Matt

> 
> Daniele
> 
> > 
> > Reviewed-by: Matt Roper 
> > 
> > > ---
> > >   drivers/gpu/drm/i915/gt/intel_engine_user.c | 4 
> > >   1 file changed, 4 insertions(+)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
> > > b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > index 79312b734690..ca795daca116 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> > > @@ -211,6 +211,10 @@ void intel_engines_driver_register(struct 
> > > drm_i915_private *i915)
> > >   if (intel_gt_has_unrecoverable_error(engine->gt))
> > >   continue; /* ignore incomplete engines */
> > > + /* don't expose GSC engine to user */
> > > + if (engine->class == OTHER_CLASS)
> > > + continue;
> > > +
> > >   GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
> > >   engine->uabi_class = uabi_classes[engine->class];
> > > -- 
> > > 2.37.3
> > > 
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH 5/5] drm/i915/mtl: don't expose GSC command streamer to the user

2022-10-27 Thread Matt Roper
On Thu, Oct 27, 2022 at 03:15:54PM -0700, Daniele Ceraolo Spurio wrote:
> There is no userspace user for this CS yet, we only need it for internal
> kernel ops (e.g. HuC, PXP), so don't expose it.
> 
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Matt Roper 

Since we never expose it to userspace, we also never get to the point of
doing an engine rename and removing the apostrophe.  I assume we're okay
with this engine continuing to show up as "other'6" in debug logs?

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_user.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 79312b734690..ca795daca116 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -211,6 +211,10 @@ void intel_engines_driver_register(struct 
> drm_i915_private *i915)
>   if (intel_gt_has_unrecoverable_error(engine->gt))
>   continue; /* ignore incomplete engines */
>  
> + /* don't expose GSC engine to user */
> + if (engine->class == OTHER_CLASS)
> + continue;
> +
>   GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
>   engine->uabi_class = uabi_classes[engine->class];
>  
> -- 
> 2.37.3
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH 4/5] drm/i915/mtl: add GSC CS reset support

2022-10-27 Thread Matt Roper
On Thu, Oct 27, 2022 at 03:15:53PM -0700, Daniele Ceraolo Spurio wrote:
> The GSC CS has its own dedicated bit in the GDRST register.
> 
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Matt Roper 

Bspec: 52549
Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 +
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index e0fbfac03979..f63829abf66c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -423,6 +423,7 @@ static u32 get_reset_domain(u8 ver, enum intel_engine_id 
> id)
>   [CCS1]  = GEN11_GRDOM_RENDER,
>   [CCS2]  = GEN11_GRDOM_RENDER,
>   [CCS3]  = GEN11_GRDOM_RENDER,
> + [GSC0]  = GEN12_GRDOM_GSC,
>   };
>   GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
>  !engine_reset_domains[id]);
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 23844ba7e824..16cf90306085 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -640,6 +640,7 @@
>  #define   XEHPC_GRDOM_BLT3   REG_BIT(26)
>  #define   XEHPC_GRDOM_BLT2   REG_BIT(25)
>  #define   XEHPC_GRDOM_BLT1   REG_BIT(24)
> +#define   GEN12_GRDOM_GSCREG_BIT(21)
>  #define   GEN11_GRDOM_SFC3   REG_BIT(20)
>  #define   GEN11_GRDOM_SFC2       REG_BIT(19)
>  #define   GEN11_GRDOM_SFC1   REG_BIT(18)
> -- 
> 2.37.3
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH 3/5] drm/i915/mtl: add GSC CS interrupt support

2022-10-27 Thread Matt Roper
On Thu, Oct 27, 2022 at 03:15:52PM -0700, Daniele Ceraolo Spurio wrote:
> The GSC CS re-uses the same interrupt bits that the GSC used in older
> platforms. This means that we can now have an engine interrupt coming
> out of OTHER_CLASS, so we need to handle that appropriately.
> 
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Matt Roper 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_gt_irq.c | 78 ++
>  1 file changed, 43 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> index f26882fdc24c..34ff1ee7e931 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> @@ -81,35 +81,27 @@ gen11_other_irq_handler(struct intel_gt *gt, const u8 
> instance,
> instance, iir);
>  }
>  
> -static void
> -gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
> -  const u8 instance, const u16 iir)
> +static struct intel_gt *pick_gt(struct intel_gt *gt, u8 class, u8 instance)
>  {
> - struct intel_engine_cs *engine;
> -
> - /*
> -  * Platforms with standalone media have their media engines in another
> -  * GT.
> -  */
> - if (MEDIA_VER(gt->i915) >= 13 &&
> - (class == VIDEO_DECODE_CLASS || class == VIDEO_ENHANCEMENT_CLASS)) {
> - if (!gt->i915->media_gt)
> - goto err;
> + struct intel_gt *media_gt = gt->i915->media_gt;
>  
> - gt = gt->i915->media_gt;
> + /* we expect the non-media gt to be passed in */
> + GEM_BUG_ON(gt == media_gt);
> +
> + if (!media_gt)
> + return gt;
> +
> + switch (class) {
> + case VIDEO_DECODE_CLASS:
> + case VIDEO_ENHANCEMENT_CLASS:
> + return media_gt;
> + case OTHER_CLASS:
> + if (instance == OTHER_GSC_INSTANCE && HAS_ENGINE(media_gt, 
> GSC0))
> + return media_gt;
> + fallthrough;
> + default:
> + return gt;
>   }
> -
> - if (instance <= MAX_ENGINE_INSTANCE)
> - engine = gt->engine_class[class][instance];
> - else
> - engine = NULL;
> -
> - if (likely(engine))
> - return intel_engine_cs_irq(engine, iir);
> -
> -err:
> - WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
> -   class, instance);
>  }
>  
>  static void
> @@ -118,12 +110,24 @@ gen11_gt_identity_handler(struct intel_gt *gt, const 
> u32 identity)
>   const u8 class = GEN11_INTR_ENGINE_CLASS(identity);
>   const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity);
>   const u16 intr = GEN11_INTR_ENGINE_INTR(identity);
> + struct intel_engine_cs *engine;
>  
>   if (unlikely(!intr))
>   return;
>  
> - if (class <= COPY_ENGINE_CLASS || class == COMPUTE_CLASS)
> - return gen11_engine_irq_handler(gt, class, instance, intr);
> + /*
> +  * Platforms with standalone media have the media and GSC engines in
> +  * another GT.
> +  */
> + gt = pick_gt(gt, class, instance);
> +
> + if (class <= MAX_ENGINE_CLASS && instance <= MAX_ENGINE_INSTANCE)
> + engine = gt->engine_class[class][instance];
> + else
> + engine = NULL;
> +
> + if (engine)
> + return intel_engine_cs_irq(engine, intr);
>  
>   if (class == OTHER_CLASS)
>   return gen11_other_irq_handler(gt, instance, intr);
> @@ -206,7 +210,7 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
>   intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE,0);
>   if (CCS_MASK(gt))
>   intel_uncore_write(uncore, GEN12_CCS_RSVD_INTR_ENABLE, 0);
> - if (HAS_HECI_GSC(gt->i915))
> + if (HAS_HECI_GSC(gt->i915) || HAS_ENGINE(gt, GSC0))
>   intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_ENABLE, 0);
>  
>   /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
> @@ -233,7 +237,7 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
>   intel_uncore_write(uncore, GEN12_CCS0_CCS1_INTR_MASK, ~0);
>   if (HAS_ENGINE(gt, CCS2) || HAS_ENGINE(gt, CCS3))
>   intel_uncore_write(uncore, GEN12_CCS2_CCS3_INTR_MASK, ~0);
> - if (HAS_HECI_GSC(gt->i915))
> + if (HAS_HECI_GSC(gt->i915) || HAS_ENGINE(gt, GSC0))
>   intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, ~0);
>  
>   intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
> @@ -249,7 +253,7 @@ void ge

Re: [PATCH 2/5] drm/i915/mtl: pass the GSC CS info to the GuC

2022-10-27 Thread Matt Roper
On Thu, Oct 27, 2022 at 03:15:51PM -0700, Daniele Ceraolo Spurio wrote:
> We need to tell the GuC that the GSC CS is there. The GuC interface
> swaps COMPUTE and OTHER class, so we also need to handle that.

When I first read the second sentence here, I thought you were saying
that the GuC interface had changed and redefined its own enums, but
that's not the case.  I think you just meant that the FOO_CLASS and
GUC_FOO_CLASS enums are nearly the same except that they order COMPUTE
and OTHER classes differently.  Honestly I think that's already pretty
clear from the existing values and mapping tables, even before this
patch, so I'd just leave this sentence off to avoid confusion.

Otherwise,

Reviewed-by: Matt Roper 

> 
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  | 11 +--
>  drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h |  7 +--
>  2 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index 34ef4f36e660..63e3d98788bd 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -478,6 +478,11 @@ static void fill_engine_enable_masks(struct intel_gt *gt,
>   info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 
> BCS_MASK(gt));
>   info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], 
> VDBOX_MASK(gt));
>   info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], 
> VEBOX_MASK(gt));
> +
> + /* The GSC engine is an instance (6) of OTHER_CLASS */
> + if (gt->engine[GSC0])
> + info_map_write(info_map, 
> engine_enabled_masks[GUC_GSC_OTHER_CLASS],
> +BIT(gt->engine[GSC0]->instance));
>  }
>  
>  #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
> @@ -519,9 +524,6 @@ static int guc_prep_golden_context(struct intel_guc *guc)
>   }
>  
>   for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; 
> ++engine_class) {
> - if (engine_class == OTHER_CLASS)
> - continue;
> -
>   guc_class = engine_class_to_guc_class(engine_class);
>  
>   if (!info_map_read(_map, engine_enabled_masks[guc_class]))
> @@ -599,9 +601,6 @@ static void guc_init_golden_context(struct intel_guc *guc)
>   addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
>  
>   for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; 
> ++engine_class) {
> - if (engine_class == OTHER_CLASS)
> - continue;
> -
>   guc_class = engine_class_to_guc_class(engine_class);
>   if (!ads_blob_read(guc, 
> system_info.engine_enabled_masks[guc_class]))
>   continue;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> index 968ebd79dce7..4ae5fc2f6002 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
> @@ -47,7 +47,8 @@
>  #define GUC_VIDEOENHANCE_CLASS   2
>  #define GUC_BLITTER_CLASS3
>  #define GUC_COMPUTE_CLASS4
> -#define GUC_LAST_ENGINE_CLASSGUC_COMPUTE_CLASS
> +#define GUC_GSC_OTHER_CLASS  5
> +#define GUC_LAST_ENGINE_CLASSGUC_GSC_OTHER_CLASS
>  #define GUC_MAX_ENGINE_CLASSES   16
>  #define GUC_MAX_INSTANCES_PER_CLASS  32
>  
> @@ -169,6 +170,7 @@ static u8 engine_class_guc_class_map[] = {
>   [COPY_ENGINE_CLASS]   = GUC_BLITTER_CLASS,
>   [VIDEO_DECODE_CLASS]  = GUC_VIDEO_CLASS,
>   [VIDEO_ENHANCEMENT_CLASS] = GUC_VIDEOENHANCE_CLASS,
> + [OTHER_CLASS] = GUC_GSC_OTHER_CLASS,
>   [COMPUTE_CLASS]   = GUC_COMPUTE_CLASS,
>  };
>  
> @@ -178,12 +180,13 @@ static u8 guc_class_engine_class_map[] = {
>   [GUC_VIDEO_CLASS]= VIDEO_DECODE_CLASS,
>   [GUC_VIDEOENHANCE_CLASS] = VIDEO_ENHANCEMENT_CLASS,
>   [GUC_COMPUTE_CLASS]  = COMPUTE_CLASS,
> + [GUC_GSC_OTHER_CLASS]= OTHER_CLASS,
>  };
>  
>  static inline u8 engine_class_to_guc_class(u8 class)
>  {
>   BUILD_BUG_ON(ARRAY_SIZE(engine_class_guc_class_map) != MAX_ENGINE_CLASS 
> + 1);
> - GEM_BUG_ON(class > MAX_ENGINE_CLASS || class == OTHER_CLASS);
> + GEM_BUG_ON(class > MAX_ENGINE_CLASS);
>  
>   return engine_class_guc_class_map[class];
>  }
> -- 
> 2.37.3
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH 1/5] drm/i915/mtl: add initial definitions for GSC CS

2022-10-27 Thread Matt Roper
On Thu, Oct 27, 2022 at 03:15:50PM -0700, Daniele Ceraolo Spurio wrote:
> Starting on MTL, the GSC is no longer managed with direct MMIO access,
> but we instead have a dedicated command streamer for it. As a first step
> for adding support for this CS, add the required definitions.
> Note that, although it is now a CS, the GSC retains its old
> class:instance value (OTHER_CLASS instance 6)
> 
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Matt Roper 

Maybe add "Bspec: 65308, 45605" as a reference?

Otherwise,

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c| 8 
>  drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
>  drivers/gpu/drm/i915/gt/intel_engine_user.c  | 1 +
>  drivers/gpu/drm/i915/i915_reg.h  | 1 +
>  4 files changed, 11 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 3b7d750ad054..e0fbfac03979 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -244,6 +244,13 @@ static const struct engine_info intel_engines[] = {
>   { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE }
>   }
>   },
> + [GSC0] = {
> + .class = OTHER_CLASS,
> + .instance = OTHER_GSC_INSTANCE,
> + .mmio_bases = {
> + { .graphics_ver = 12, .base = MTL_GSC_RING_BASE }
> + }
> + },
>  };
>  
>  /**
> @@ -324,6 +331,7 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 
> class)
>   case VIDEO_DECODE_CLASS:
>   case VIDEO_ENHANCEMENT_CLASS:
>   case COPY_ENGINE_CLASS:
> + case OTHER_CLASS:
>   if (GRAPHICS_VER(gt->i915) < 8)
>   return 0;
>   return GEN8_LR_CONTEXT_OTHER_SIZE;
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
> b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index 6b5d4ea22b67..4fd54fb8810f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -136,6 +136,7 @@ enum intel_engine_id {
>   CCS2,
>   CCS3,
>  #define _CCS(n) (CCS0 + (n))
> + GSC0,
>   I915_NUM_ENGINES
>  #define INVALID_ENGINE ((enum intel_engine_id)-1)
>  };
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> index 46a174f8aa00..79312b734690 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
> @@ -140,6 +140,7 @@ const char *intel_engine_class_repr(u8 class)
>   [COPY_ENGINE_CLASS] = "bcs",
>   [VIDEO_DECODE_CLASS] = "vcs",
>   [VIDEO_ENHANCEMENT_CLASS] = "vecs",
> + [OTHER_CLASS] = "other",
>   [COMPUTE_CLASS] = "ccs",
>   };
>  
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 1c0da50c0dc7..d056c3117ef2 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -970,6 +970,7 @@
>  #define GEN11_VEBOX2_RING_BASE   0x1d8000
>  #define XEHP_VEBOX3_RING_BASE    0x1e8000
>  #define XEHP_VEBOX4_RING_BASE0x1f8000
> +#define MTL_GSC_RING_BASE0x11a000
>  #define GEN12_COMPUTE0_RING_BASE 0x1a000
>  #define GEN12_COMPUTE1_RING_BASE 0x1c000
>  #define GEN12_COMPUTE2_RING_BASE 0x1e000
> -- 
> 2.37.3
> 

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


[PATCH] drm/i915/xelpg: Fix write to MTL_MCR_SELECTOR

2022-10-19 Thread Matt Roper
A misplaced closing parenthesis caused the groupid/instanceid values to
be considered part of the ternary operator's condition instead of being
OR'd into the resulting value.

Fixes: f32898c94a10 ("drm/i915/xelpg: Add multicast steering")
Reported-by: kernel test robot 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 0d2811724b00..46cf2f3d1e8e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -249,7 +249,7 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore 
*uncore,
intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR,
  REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
  REG_FIELD_PREP(MTL_MCR_INSTANCEID, 
instance) |
- (rw_flag == FW_REG_READ) ? 
GEN11_MCR_MULTICAST : 0);
+ (rw_flag == FW_REG_READ ? 
GEN11_MCR_MULTICAST : 0));
} else if (GRAPHICS_VER(uncore->i915) >= 11) {
mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance);
-- 
2.37.3



Re: [PATCH v2 5/7] drm/i915/mtl: Handle wopcm per-GT and limit calculations.

2022-10-18 Thread Matt Roper
On Tue, Oct 18, 2022 at 05:44:38PM -0700, John Harrison wrote:
> On 10/12/2022 17:03, Daniele Ceraolo Spurio wrote:
> > From: Aravind Iddamsetty 
> > 
...
> > diff --git a/drivers/gpu/drm/i915/intel_wopcm.c 
> > b/drivers/gpu/drm/i915/gt/intel_wopcm.c
> > similarity index 86%
> > rename from drivers/gpu/drm/i915/intel_wopcm.c
> > rename to drivers/gpu/drm/i915/gt/intel_wopcm.c
> > index 322fb9eeb880..487fbbbdf3d6 100644
> > --- a/drivers/gpu/drm/i915/intel_wopcm.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_wopcm.c
> > @@ -43,6 +43,7 @@
> >   /* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
> >   #define GEN11_WOPCM_SIZE  SZ_2M
> >   #define GEN9_WOPCM_SIZE   SZ_1M
> > +#define XELPM_SAMEDIA_WOPCM_SIZE   SZ_2M
> XELPM? Isn't it just XELP?

Xe_LP is the older TGL-ADL gfx IP name.  MTL's media IP is called
Xe_LPM+ (which we should label as XELPMP in code, so it looks like the
final "P" is missing here).


Matt

-- 
Matt Roper
Graphics Software Engineer
VTT-OSGC Platform Enablement
Intel Corporation


Re: [PATCH v2 7/7] drm/i915/guc: handle interrupts from media GuC

2022-10-14 Thread Matt Roper
On Wed, Oct 12, 2022 at 05:03:32PM -0700, Daniele Ceraolo Spurio wrote:
> The render and media GuCs share the same interrupt enable register, so
> we can no longer disable interrupts when we disable communication for
> one of the GuCs as this would impact the other GuC. Instead, we keep the
> interrupts always enabled in HW and use a variable in the GuC structure
> to determine if we want to service the received interrupts or not.
> 
> v2: use MTL_ prefix for reg definition (Matt)
> 
> Signed-off-by: Daniele Ceraolo Spurio 
> Cc: Matt Roper 
> Cc: John Harrison 
> Cc: Alan Previn 

Reviewed-by: Matt Roper 

> ---
>  drivers/gpu/drm/i915/gt/intel_gt_irq.c  | 21 ++
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  2 ++
>  drivers/gpu/drm/i915/gt/uc/intel_guc.c  | 29 ++---
>  drivers/gpu/drm/i915/gt/uc/intel_guc.h  |  5 -
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c   |  8 +--
>  5 files changed, 45 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> index f26882fdc24c..f6805088c0eb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
> @@ -17,6 +17,9 @@
>  
>  static void guc_irq_handler(struct intel_guc *guc, u16 iir)
>  {
> + if (unlikely(!guc->interrupts.enabled))
> + return;
> +
>   if (iir & GUC_INTR_GUC2HOST)
>   intel_guc_to_host_event_handler(guc);
>  }
> @@ -249,6 +252,7 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
>  {
>   struct intel_uncore *uncore = gt->uncore;
>   u32 irqs = GT_RENDER_USER_INTERRUPT;
> + u32 guc_mask = intel_uc_wants_guc(>uc) ? GUC_INTR_GUC2HOST : 0;
>   const u32 gsc_mask = GSC_IRQ_INTF(0) | GSC_IRQ_INTF(1);
>   u32 dmask;
>   u32 smask;
> @@ -299,6 +303,19 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
>   if (HAS_HECI_GSC(gt->i915))
>   intel_uncore_write(uncore, GEN11_GUNIT_CSME_INTR_MASK, 
> ~gsc_mask);
>  
> + if (guc_mask) {
> + /* the enable bit is common for both GTs but the masks are 
> separate */
> + u32 mask = gt->type == GT_MEDIA ?
> + REG_FIELD_PREP(ENGINE0_MASK, guc_mask) :
> + REG_FIELD_PREP(ENGINE1_MASK, guc_mask);
> +
> + intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE,
> +REG_FIELD_PREP(ENGINE1_MASK, guc_mask));
> +
> + /* we might not be the first GT to write this reg */
> + intel_uncore_rmw(uncore, MTL_GUC_MGUC_INTR_MASK, mask, 0);
> + }
> +
>   /*
>* RPS interrupts will get enabled/disabled on demand when RPS itself
>* is enabled/disabled.
> @@ -307,10 +324,6 @@ void gen11_gt_irq_postinstall(struct intel_gt *gt)
>   gt->pm_imr = ~gt->pm_ier;
>   intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
>   intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK,  ~0);
> -
> - /* Same thing for GuC interrupts */
> - intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
> - intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK,  ~0);
>  }
>  
>  void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 7f79bbf97828..8b597a918f24 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -1523,6 +1523,7 @@
>  #define   GEN11_CSME (31)
>  #define   GEN11_GUNIT(28)
>  #define   GEN11_GUC  (25)
> +#define   MTL_MGUC   (24)
>  #define   GEN11_WDPERF   (20)
>  #define   GEN11_KCR  (19)
>  #define   GEN11_GTPM (16)
> @@ -1577,6 +1578,7 @@
>  #define GEN11_VECS0_VECS1_INTR_MASK  _MMIO(0x1900d0)
>  #define GEN12_VECS2_VECS3_INTR_MASK  _MMIO(0x1900d4)
>  #define GEN11_GUC_SG_INTR_MASK   _MMIO(0x1900e8)
> +#define MTL_GUC_MGUC_INTR_MASK   _MMIO(0x1900e8) /* MTL+ 
> */
>  #define GEN11_GPM_WGBOXPERF_INTR_MASK_MMIO(0x1900ec)
>  #define GEN11_CRYPTO_RSVD_INTR_MASK  _MMIO(0x1900f0)
>  #define GEN11_GUNIT_CSME_INTR_MASK   _MMIO(0x1900f4)
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> index b3600be61a9a..09f2a673aa19 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
> @@ -98,6 +

[PATCH] drm/i915/pvc: Update forcewake domain for CCS register ranges

2022-10-14 Thread Matt Roper
The bspec was just updated with a correction to the forcewake domain
required when accessing registers in the CCS engine ranges (0x1a000 -
0x1 and 0x26000 - 0x27fff) on PVC; these ranges require a wake on
the RENDER domain, not the GT domain.

Bspec: 67609
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/intel_uncore.c | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index c058cdc6d8a0..2a3e2869fe71 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1682,25 +1682,27 @@ static const struct intel_forcewake_range 
__pvc_fw_ranges[] = {
GEN_FW_RANGE(0x12000, 0x12fff, 0), /*
0x12000 - 0x127ff: always on
0x12800 - 0x12fff: reserved */
-   GEN_FW_RANGE(0x13000, 0x23fff, FORCEWAKE_GT), /*
+   GEN_FW_RANGE(0x13000, 0x19fff, FORCEWAKE_GT), /*
0x13000 - 0x135ff: gt
0x13600 - 0x147ff: reserved
0x14800 - 0x153ff: gt
-   0x15400 - 0x19fff: reserved
-   0x1a000 - 0x1: gt
-   0x2 - 0x21fff: reserved
-   0x22000 - 0x23fff: gt */
+   0x15400 - 0x19fff: reserved */
+   GEN_FW_RANGE(0x1a000, 0x21fff, FORCEWAKE_RENDER), /*
+   0x1a000 - 0x1: render
+   0x2 - 0x21fff: reserved */
+   GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT),
GEN_FW_RANGE(0x24000, 0x2417f, 0), /*
24000 - 0x2407f: always on
24080 - 0x2417f: reserved */
-   GEN_FW_RANGE(0x24180, 0x3, FORCEWAKE_GT), /*
+   GEN_FW_RANGE(0x24180, 0x25fff, FORCEWAKE_GT), /*
0x24180 - 0x241ff: gt
0x24200 - 0x251ff: reserved
0x25200 - 0x252ff: gt
-   0x25300 - 0x25fff: reserved
-   0x26000 - 0x27fff: gt
-   0x28000 - 0x2: reserved
-   0x3 - 0x3: gt */
+   0x25300 - 0x25fff: reserved */
+   GEN_FW_RANGE(0x26000, 0x2, FORCEWAKE_RENDER), /*
+   0x26000 - 0x27fff: render
+   0x28000 - 0x2: reserved */
+   GEN_FW_RANGE(0x3, 0x3, FORCEWAKE_GT),
GEN_FW_RANGE(0x4, 0x1b, 0),
GEN_FW_RANGE(0x1c, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /*
0x1c - 0x1c2bff: VD0
-- 
2.37.3



[PATCH v3 13/14] drm/i915/xelpg: Add multicast steering

2022-10-14 Thread Matt Roper
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details.  Key changes from past platforms:
 * The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
   according to the MTL subplatform and cannot be read from fuse
   registers.  However steering to instance #0 will always provided a
   non-terminated value, so we can lump these all into a single
   "instance0" table.
 * The MCR steering register (and its bitfields) has changed.

Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering.  On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering.  Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup.  There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register.  Support for the semaphore
register will be introduced in a future patch.

v2:
 - Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
   we're matching on now rather than the platform.
 - Don't combine l3bank and mslice masks into a union.  It's not related
   to the other changes here and we might still need both of them on
   some future platform.
 - Separate debug dumping of steering settings to a separate helper
   function.  (Tvrtko)
 - Update debug dumping to include DSS ranges (and future-proof it so
   that any new ranges added on future platforms will also be dumped).
 - Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
   cleared it.  Also force the MULTICAST bit to true at the beginning of
   multicast writes just to be safe.  (Bala)

Bspec: 67788, 67112
Cc: Radhakrishna Sripada 
Cc: Balasubramani Vivekanandan 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 135 +---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h |   5 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h|   1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  33 -
 drivers/gpu/drm/i915/i915_pci.c |   1 +
 5 files changed, 154 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 349074bf365f..23a1ef9659bf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -41,6 +41,7 @@ static const char * const intel_steering_types[] = {
"MSLICE",
"LNCF",
"GAM",
+   "DSS",
"INSTANCE 0",
 };
 
@@ -99,9 +100,40 @@ static const struct intel_mmio_range 
pvc_instance0_steering_table[] = {
{},
 };
 
+static const struct intel_mmio_range xelpg_instance0_steering_table[] = {
+   { 0x000B00, 0x000BFF }, /* SQIDI */
+   { 0x001000, 0x001FFF }, /* SQIDI */
+   { 0x004000, 0x0048FF }, /* GAM */
+   { 0x008700, 0x0087FF }, /* SQIDI */
+   { 0x00B000, 0x00B0FF }, /* NODE */
+   { 0x00C800, 0x00CFFF }, /* GAM */
+   { 0x00D880, 0x00D8FF }, /* NODE */
+   { 0x00DD00, 0x00DDFF }, /* OAAL2 */
+   {},
+};
+
+static const struct intel_mmio_range xelpg_l3bank_steering_table[] = {
+   { 0x00B100, 0x00B3FF },
+   {},
+};
+
+/* DSS steering is used for SLICE ranges as well */
+static const struct intel_mmio_range xelpg_dss_steering_table[] = {
+   { 0x005200, 0x0052FF }, /* SLICE */
+   { 0x005500, 0x007FFF }, /* SLICE */
+   { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS 
(0x8150-0x815F) */
+   { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS 
(0x9520-0x955F) */
+   { 0x009680, 0x0096FF }, /* DSS */
+   { 0x00D800, 0x00D87F }, /* SLICE */
+   { 0x00DC00, 0x00DCFF }, /* SLICE */
+   { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
+};
+
 void intel_gt_mcr_init(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
+   unsigned long fuse;
+   int i;
 
/*
 * An mslice is unavailable only if both the meml3 for the slice is
@@ -119,7 +151,22 @@ void intel_gt_mcr_init(struct intel_gt *gt)
drm_warn(>drm, "mslice mask all zero!\n");
}
 
-   if (IS_PONTEVECCHIO(i915)) {
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70) &&
+   gt->type == GT_PRIMARY) {
+   fuse = REG_FIELD_GET(GT_L3_EXC_MASK,
+   

[PATCH v3 14/14] drm/i915/xelpmp: Add multicast steering for media GT

2022-10-14 Thread Matt Roper
MTL's media IP (Xe_LPM+) only has a single type of steering ("OAADDRM")
which selects between media slice 0 and media slice 1.  We'll always
steer to media slice 0 unless it is fused off (which is the case when
VD0, VE0, and SFC0 are all reported as unavailable).

Bspec: 67789
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c  | 18 --
 drivers/gpu/drm/i915/gt/intel_gt_types.h|  1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 17 +++--
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 23a1ef9659bf..0d2811724b00 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -42,6 +42,7 @@ static const char * const intel_steering_types[] = {
"LNCF",
"GAM",
"DSS",
+   "OADDRM",
"INSTANCE 0",
 };
 
@@ -129,6 +130,11 @@ static const struct intel_mmio_range 
xelpg_dss_steering_table[] = {
{ 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
 };
 
+static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = {
+   { 0x393200, 0x39323F },
+   { 0x393400, 0x3934FF },
+};
+
 void intel_gt_mcr_init(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
@@ -151,8 +157,9 @@ void intel_gt_mcr_init(struct intel_gt *gt)
drm_warn(>drm, "mslice mask all zero!\n");
}
 
-   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70) &&
-   gt->type == GT_PRIMARY) {
+   if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) {
+   gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
+   } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
fuse = REG_FIELD_GET(GT_L3_EXC_MASK,
 intel_uncore_read(gt->uncore, XEHP_FUSE4));
 
@@ -514,6 +521,13 @@ static void get_nonterminated_steering(struct intel_gt *gt,
*group = 0;
*instance = 0;
break;
+   case OADDRM:
+   if ((VDBOX_MASK(gt) | VEBOX_MASK(gt) | gt->info.sfc_mask) & 
BIT(0))
+   *group = 0;
+   else
+   *group = 1;
+   *instance = 0;
+   break;
default:
MISSING_CASE(type);
*group = 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index 0bb73d110a84..64aa2ba624fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -61,6 +61,7 @@ enum intel_steering_type {
LNCF,
GAM,
DSS,
+   OADDRM,
 
/*
 * On some platforms there are multiple types of MCR registers that
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 711a31935857..bae960486872 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1598,14 +1598,27 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct 
i915_wa_list *wal)
debug_dump_steering(gt);
 }
 
+static void
+xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+   /* FIXME: Actual workarounds will be added in future patch(es) */
+
+   debug_dump_steering(gt);
+}
+
 static void
 gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
 {
struct drm_i915_private *i915 = gt->i915;
 
-   /* FIXME: Media GT handling will be added in an upcoming patch */
-   if (gt->type == GT_MEDIA)
+   if (gt->type == GT_MEDIA) {
+   if (MEDIA_VER(i915) >= 13)
+   xelpmp_gt_workarounds_init(gt, wal);
+   else
+   MISSING_CASE(MEDIA_VER(i915));
+
return;
+   }
 
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
xelpg_gt_workarounds_init(gt, wal);
-- 
2.37.3



[PATCH v3 12/14] drm/i915: Define multicast registers as a new type

2022-10-14 Thread Matt Roper
Rather than treating multicast registers as 'i915_reg_t' let's define
them as a completely new type.  This will allow the compiler to help us
make sure we're using multicast-aware functions to operate on multicast
registers.

This plan does break down a bit in places where we're just maintaining
heterogeneous lists of registers (e.g., various MMIO whitelists used by
perf, GVT, etc.) rather than performing reads/writes.  We only really
care about the offset in those cases, so for now we can "cast" the
registers as non-MCR, leaving us with a list of i915_reg_t's, but we may
want to look for better ways to store mixed collections of i915_reg_t
and i915_mcr_reg_t in the future.

v2:
 - Add TLB invalidation registers
v3:
 - Make type checking of i915_mmio_reg_offset() stricter.  It will
   accept either i915_reg_t or i915_mcr_reg_t, but will now raise a
   compile error if any other type is passed, even if that type contains
   a 'reg' field.  (Jani)
 - Drop a ton of GVT changes; allowing i915_mmio_reg_offset() to take
   either an i915_reg_t or an i915_mcr_reg_t means that the huge lists
   of MMIO_D*() macros used in GVT will continue to work without
   modification.  We need only make changes to structures that have an
   explicit i915_reg_t in them now.

Cc: Jani Nikula 
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c| 16 --
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c| 51 ---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h| 18 +++
 drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 27 +++---
 drivers/gpu/drm/i915/gt/intel_workarounds.c   | 32 ++--
 .../gpu/drm/i915/gt/intel_workarounds_types.h |  5 +-
 .../gpu/drm/i915/gt/selftest_workarounds.c|  2 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c|  2 +-
 .../gpu/drm/i915/gt/uc/intel_guc_capture.c|  4 +-
 drivers/gpu/drm/i915/gvt/handlers.c   |  2 +-
 drivers/gpu/drm/i915/gvt/mmio_context.c   | 14 ++---
 drivers/gpu/drm/i915/i915_reg_defs.h  | 27 +-
 12 files changed, 117 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 3df0d0336dbc..27dbb9e4bd6c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -991,7 +991,10 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 }
 
 struct reg_and_bit {
-   i915_reg_t reg;
+   union {
+   i915_reg_t reg;
+   i915_mcr_reg_t mcr_reg;
+   };
u32 bit;
 };
 
@@ -1033,7 +1036,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, 
const bool gen8,
 static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
 {
if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
-   return intel_gt_mcr_wait_for_reg_fw(gt, rb.reg, rb.bit, 0,
+   return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0,
TLB_INVAL_TIMEOUT_US,
TLB_INVAL_TIMEOUT_MS);
else
@@ -1058,7 +1061,7 @@ static void mmio_invalidate_full(struct intel_gt *gt)
[COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
[COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
};
-   static const i915_reg_t xehp_regs[] = {
+   static const i915_mcr_reg_t xehp_regs[] = {
[RENDER_CLASS]  = XEHP_GFX_TLB_INV_CR,
[VIDEO_DECODE_CLASS]= XEHP_VD_TLB_INV_CR,
[VIDEO_ENHANCEMENT_CLASS]   = XEHP_VE_TLB_INV_CR,
@@ -1131,7 +1134,12 @@ static void mmio_invalidate_full(struct intel_gt *gt)
for_each_engine_masked(engine, gt, awake, tmp) {
struct reg_and_bit rb;
 
-   rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+   rb.mcr_reg = xehp_regs[engine->class];
+   rb.bit = BIT(engine->instance);
+   } else {
+   rb = get_reg_and_bit(engine, regs == gen8_regs, regs, 
num);
+   }
 
if (wait_for_invalidate(gt, rb))
drm_err_ratelimited(>i915->drm,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 1ed9bc4dccfd..349074bf365f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -150,6 +150,19 @@ void intel_gt_mcr_init(struct intel_gt *gt)
}
 }
 
+/*
+ * Although the rest of the driver should use MCR-specific functions to
+ * read/write MCR registers, we still use the regular intel_uncore_* functions
+ * internally to implement those, so we need a way for the functions in this
+ * file to "cast" an i915_mcr_reg_t into an i915_reg_t.
+ */
+static i915_reg_t mcr_reg_cast(const 

[PATCH v3 11/14] drm/i915/gt: Add MCR-specific workaround initializers

2022-10-14 Thread Matt Roper
Let's be more explicit about which of our workarounds are updating MCR
registers.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c   | 433 +++---
 .../gpu/drm/i915/gt/intel_workarounds_types.h |   4 +-
 2 files changed, 263 insertions(+), 174 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 96b9f02a2284..7671994d5b7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -166,12 +166,33 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t 
reg,
_wa_add(wal, );
 }
 
+static void wa_mcr_add(struct i915_wa_list *wal, i915_reg_t reg,
+  u32 clear, u32 set, u32 read_mask, bool masked_reg)
+{
+   struct i915_wa wa = {
+   .reg  = reg,
+   .clr  = clear,
+   .set  = set,
+   .read = read_mask,
+   .masked_reg = masked_reg,
+   .is_mcr = 1,
+   };
+
+   _wa_add(wal, );
+}
+
 static void
 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 {
wa_add(wal, reg, clear, set, clear, false);
 }
 
+static void
+wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 
set)
+{
+   wa_mcr_add(wal, reg, clear, set, clear, false);
+}
+
 static void
 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
 {
@@ -184,12 +205,24 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 
set)
wa_write_clr_set(wal, reg, set, set);
 }
 
+static void
+wa_mcr_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
+{
+   wa_mcr_write_clr_set(wal, reg, set, set);
+}
+
 static void
 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
 {
wa_write_clr_set(wal, reg, clr, 0);
 }
 
+static void
+wa_mcr_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
+{
+   wa_mcr_write_clr_set(wal, reg, clr, 0);
+}
+
 /*
  * WA operations on "masked register". A masked register has the upper 16 bits
  * documented as "masked" in b-spec. Its purpose is to allow writing to just a
@@ -207,12 +240,24 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, 
u32 val)
wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+   wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
+}
+
 static void
 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 }
 
+static void
+wa_mcr_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
+{
+   wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
+}
+
 static void
 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
u32 mask, u32 val)
@@ -220,6 +265,13 @@ wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t 
reg,
wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 }
 
+static void
+wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
+   u32 mask, u32 val)
+{
+   wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
+}
+
 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
  struct i915_wa_list *wal)
 {
@@ -241,8 +293,8 @@ static void gen8_ctx_workarounds_init(struct 
intel_engine_cs *engine,
wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), 
ASYNC_FLIP_PERF_DISABLE);
 
/* WaDisablePartialInstShootdown:bdw,chv */
-   wa_masked_en(wal, GEN8_ROW_CHICKEN,
-PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+   wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
/* Use Force Non-Coherent whenever executing a 3D context. This is a
 * workaround for a possible hang in the unlikely event a TLB
@@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct 
intel_engine_cs *engine,
gen8_ctx_workarounds_init(engine, wal);
 
/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
-   wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+   wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
/* WaDisableDopClockGating:bdw
 *
 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
 * to disable EUTC clock gating.
 */
-   wa_masked_en(wal, GEN8_ROW_CHICKEN2,
-DOP_CLOCK_GATING_DISABLE);
+   wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+DOP_CLOCK_GATING_DISABLE);
 
-   wa_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
-GEN8_SAMPLER_POWER_BYPASS_DIS);
+   wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+GEN8_SAMPLER_POWER_BYPASS_DIS);
 
wa_masked_

[PATCH v3 06/14] drm/i915/xehp: Check for faults on primary GAM

2022-10-14 Thread Matt Roper
On Xe_HP the fault registers are now in a multicast register range.
However as part of the GAM these registers follow special rules and we
need only read from the "primary" GAM's instance to get the information
we need.  So a single intel_gt_mcr_read_any() (which will automatically
steer to the primary GAM) is sufficient; we don't need to loop over each
instance of the MCR register.

v2:
 - Update more instances of fault registers.  (Bala)

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt.c| 52 +++
 drivers/gpu/drm/i915/i915_gpu_error.c | 12 +--
 2 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 445e171940fa..e14f159ad9fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -270,7 +270,11 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
   I915_MASTER_ERROR_INTERRUPT);
}
 
-   if (GRAPHICS_VER(i915) >= 12) {
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+   intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
+  RING_FAULT_VALID, 0);
+   intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+   } else if (GRAPHICS_VER(i915) >= 12) {
rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
} else if (GRAPHICS_VER(i915) >= 8) {
@@ -308,17 +312,49 @@ static void gen6_check_faults(struct intel_gt *gt)
}
 }
 
+static void xehp_check_faults(struct intel_gt *gt)
+{
+   u32 fault;
+
+   /*
+* Although the fault register now lives in an MCR register range,
+* the GAM registers are special and we only truly need to read
+* the "primary" GAM instance rather than handling each instance
+* individually.  intel_gt_mcr_read_any() will automatically steer
+* toward the primary instance.
+*/
+   fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+   if (fault & RING_FAULT_VALID) {
+   u32 fault_data0, fault_data1;
+   u64 fault_addr;
+
+   fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
+   fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
+
+   fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
+((u64)fault_data0 << 12);
+
+   drm_dbg(>i915->drm, "Unexpected fault\n"
+   "\tAddr: 0x%08x_%08x\n"
+   "\tAddress space: %s\n"
+   "\tEngine ID: %d\n"
+   "\tSource ID: %d\n"
+   "\tType: %d\n",
+   upper_32_bits(fault_addr), lower_32_bits(fault_addr),
+   fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+   GEN8_RING_FAULT_ENGINE_ID(fault),
+   RING_FAULT_SRCID(fault),
+   RING_FAULT_FAULT_TYPE(fault));
+   }
+}
+
 static void gen8_check_faults(struct intel_gt *gt)
 {
struct intel_uncore *uncore = gt->uncore;
i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
u32 fault;
 
-   if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) {
-   fault_reg = XEHP_RING_FAULT_REG;
-   fault_data0_reg = XEHP_FAULT_TLB_DATA0;
-   fault_data1_reg = XEHP_FAULT_TLB_DATA1;
-   } else if (GRAPHICS_VER(gt->i915) >= 12) {
+   if (GRAPHICS_VER(gt->i915) >= 12) {
fault_reg = GEN12_RING_FAULT_REG;
fault_data0_reg = GEN12_FAULT_TLB_DATA0;
fault_data1_reg = GEN12_FAULT_TLB_DATA1;
@@ -358,7 +394,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
 
/* From GEN8 onwards we only have one 'All Engine Fault Register' */
-   if (GRAPHICS_VER(i915) >= 8)
+   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+   xehp_check_faults(gt);
+   else if (GRAPHICS_VER(i915) >= 8)
gen8_check_faults(gt);
else if (GRAPHICS_VER(i915) >= 6)
gen6_check_faults(gt);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9ea2fe34e7d3..f2d53edcd2ee 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1221,7 +1221,10 @@ static void engine_record_registers(struct 
intel_engine_coredump *ee)
if (GRAPHICS_VER(i915) >= 6) {
ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);

[PATCH v3 07/14] drm/i915/gt: Add intel_gt_mcr_wait_for_reg_fw()

2022-10-14 Thread Matt Roper
Xe_HP has some MCR registers that need to be polled for completion of
operations like TLB invalidation.  Those registers are in the GAM range,
which rolls up the status from each unit into the 'primary' instance's
value.  This makes it useful to have a dedicated 'wait for register'
function that handles this on MCR registers, similar to the
__intel_wait_for_register_fw() function we already have for regular
registers.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 55 ++
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h |  7 
 2 files changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 4dc360f4e344..1ed9bc4dccfd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -568,3 +568,58 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, 
unsigned int dss,
return;
}
 }
+
+/**
+ * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected 
state
+ * @gt: GT structure
+ * @reg: the register to read
+ * @mask: mask to apply to register value
+ * @value: value to wait for
+ * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
+ * @slow_timeout_ms: slow timeout in millisecond
+ *
+ * This routine waits until the target register @reg contains the expected
+ * @value after applying the @mask, i.e. it waits until ::
+ *
+ * (intel_gt_mcr_read_any_fw(gt, reg) & mask) == value
+ *
+ * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
+ * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
+ * must be not larger than 20, microseconds.
+ *
+ * This function is basically an MCR-friendly version of
+ * __intel_wait_for_register_fw().  Generally this function will only be used
+ * on GAM registers which are a bit special --- although they're MCR registers,
+ * reads (e.g., waiting for status updates) are always directed to the primary
+ * instance.
+ *
+ * Note that this routine assumes the caller holds forcewake asserted, it is
+ * not suitable for very long waits.
+ *
+ * Return: 0 if the register matches the desired condition, or -ETIMEDOUT.
+ */
+int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
+i915_reg_t reg,
+u32 mask,
+u32 value,
+unsigned int fast_timeout_us,
+unsigned int slow_timeout_ms)
+{
+   u32 reg_value = 0;
+#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == 
value)
+   int ret;
+
+   /* Catch any overuse of this function */
+   might_sleep_if(slow_timeout_ms);
+   GEM_BUG_ON(fast_timeout_us > 2);
+   GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms);
+
+   ret = -ETIMEDOUT;
+   if (fast_timeout_us && fast_timeout_us <= 2)
+   ret = _wait_for_atomic(done, fast_timeout_us, 0);
+   if (ret && slow_timeout_ms)
+   ret = wait_for(done, slow_timeout_ms);
+
+   return ret;
+#undef done
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
index 781b267478db..548f922cd9fa 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
@@ -37,6 +37,13 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, 
struct intel_gt *gt,
 void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
  unsigned int *group, unsigned int *instance);
 
+int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
+i915_reg_t reg,
+u32 mask,
+u32 value,
+unsigned int fast_timeout_us,
+unsigned int slow_timeout_ms);
+
 /*
  * Helper for for_each_ss_steering loop.  On pre-Xe_HP platforms, subslice
  * presence is determined by using the group/instance as direct lookups in the
-- 
2.37.3



[PATCH v3 03/14] drm/i915/gt: Drop a few unused register definitions

2022-10-14 Thread Matt Roper
Let's drop a few register definitions that are unused anywhere in the
driver today.  Since the referenced offsets are part of what is now
considered a multicast register region, the current definitions would
not be correct for use on any future platform.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 17 -
 1 file changed, 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 0aa16caa33e4..71d8787230c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -454,13 +454,6 @@
 #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC   REG_BIT(11)
 #define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE   REG_BIT(9)
 
-/* GEN9 chicken */
-#define SLICE_ECO_CHICKEN0 _MMIO(0x7308)
-#define   PIXEL_MASK_CAMMING_DISABLE   (1 << 14)
-
-#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308)
-#define   DISABLE_PIXEL_MASK_CAMMING   (1 << 14)
-
 #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
 #define XEHP_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE   REG_BIT(14)
@@ -967,11 +960,6 @@
 #define GEN7_L3LOG(slice, i)   _MMIO(0xb070 + (slice) * 0x200 
+ (i) * 4)
 #define   GEN7_L3LOG_SIZE  0x80
 
-#define GEN10_SCRATCH_LNCF2_MMIO(0xb0a0)
-#define   PMFLUSHDONE_LNICRSDROP   (1 << 20)
-#define   PMFLUSH_GAPL3UNBLOCK (1 << 21)
-#define   PMFLUSHDONE_LNEBLK   (1 << 22)
-
 #define XEHP_L3NODEARBCFG  _MMIO(0xb0b4)
 #define   XEHP_LNESPAREREG_BIT(19)
 
@@ -986,9 +974,6 @@
 #define   L3_HIGH_PRIO_CREDITS(x)  (((x) >> 1) << 14)
 #define   L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14))
 
-#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xb114)
-#define   GEN11_I2M_WRITE_DISABLE  (1 << 28)
-
 #define GEN8_L3SQCREG4 _MMIO(0xb118)
 #define   GEN11_LQSC_CLEAN_EVICT_DISABLE   (1 << 6)
 #define   GEN8_LQSC_RO_PERF_DIS(1 << 27)
@@ -1191,8 +1176,6 @@
 #define SARB_CHICKEN1  _MMIO(0xe90c)
 #define   COMP_CKN_IN  REG_GENMASK(30, 29)
 
-#define GEN7_HALF_SLICE_CHICKEN1_GT2   _MMIO(0xf100)
-
 #define GEN7_ROW_CHICKEN2_GT2  _MMIO(0xf4f4)
 #define   DOP_CLOCK_GATING_DISABLE (1 << 0)
 #define   PUSH_CONSTANT_DEREF_DISABLE  (1 << 8)
-- 
2.37.3



[PATCH v3 05/14] drm/i915/gt: Add intel_gt_mcr_multicast_rmw() operation

2022-10-14 Thread Matt Roper
There are cases where we wish to read from any non-terminated MCR
register instance (or the primary instance in the case of GAM ranges),
clear/set some bits, and then write the value back out to the register
in a multicast manner.  Adding a "multicast RMW" will avoid the need to
open-code this.

v2:
 - Return a u32 to align with the recent change to intel_uncore_rmw.

Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 28 ++
 drivers/gpu/drm/i915/gt/intel_gt_mcr.h |  3 +++
 2 files changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index a2047a68ea7a..4dc360f4e344 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -302,6 +302,34 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, 
i915_reg_t reg, u32 va
intel_uncore_write_fw(gt->uncore, reg, value);
 }
 
+/**
+ * intel_gt_mcr_multicast_rmw - Performs a multicast RMW operations
+ * @gt: GT structure
+ * @reg: the MCR register to read and write
+ * @clear: bits to clear during RMW
+ * @set: bits to set during RMW
+ *
+ * Performs a read-modify-write on an MCR register in a multicast manner.
+ * This operation only makes sense on MCR registers where all instances are
+ * expected to have the same value.  The read will target any non-terminated
+ * instance and the write will be applied to all instances.
+ *
+ * This function assumes the caller is already holding any necessary forcewake
+ * domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should
+ * be obtained automatically.
+ *
+ * Returns the old (unmodified) value read.
+ */
+u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_reg_t reg,
+  u32 clear, u32 set)
+{
+   u32 val = intel_gt_mcr_read_any(gt, reg);
+
+   intel_gt_mcr_multicast_write(gt, reg, (val & ~clear) | set);
+
+   return val;
+}
+
 /*
  * reg_needs_read_steering - determine whether a register read requires
  * explicit steering
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h 
b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
index 77a8b11c287d..781b267478db 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
@@ -24,6 +24,9 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt,
 void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt,
 i915_reg_t reg, u32 value);
 
+u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_reg_t reg,
+  u32 clear, u32 set);
+
 void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
 i915_reg_t reg,
 u8 *group, u8 *instance);
-- 
2.37.3



[PATCH v3 08/14] drm/i915: Define MCR registers explicitly

2022-10-14 Thread Matt Roper
Rather than using the same _MMIO() macro to define MCR registers as
singleton registers, let's use a new MCR_REG() macro to make it clear
that these registers are special and should be handled accordingly.  For
now MCR_REG() will still generate an i915_reg_t with the given offset,
but we'll change that in future patches.

Bspec: 66673, 66696, 66534, 67609
Signed-off-by: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 134 
 1 file changed, 68 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 890960b56b9e..ad9985015b0e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -8,6 +8,8 @@
 
 #include "i915_reg_defs.h"
 
+#define MCR_REG(offset)_MMIO(offset)
+
 /* RPM unit config (Gen8+) */
 #define RPM_CONFIG0_MMIO(0xd00)
 #define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT3
@@ -333,12 +335,12 @@
 #define GEN7_TLB_RD_ADDR   _MMIO(0x4700)
 
 #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
-#define XEHP_PAT_INDEX(index)  _MMIO(0x4800 + (index) * 4)
+#define XEHP_PAT_INDEX(index)  MCR_REG(0x4800 + (index) * 4)
 
-#define XEHP_TILE0_ADDR_RANGE  _MMIO(0x4900)
+#define XEHP_TILE0_ADDR_RANGE  MCR_REG(0x4900)
 #define   XEHP_TILE_LMEM_RANGE_SHIFT   8
 
-#define XEHP_FLAT_CCS_BASE_ADDR_MMIO(0x4910)
+#define XEHP_FLAT_CCS_BASE_ADDRMCR_REG(0x4910)
 #define   XEHP_CCS_BASE_SHIFT  8
 
 #define GAMTARBMODE_MMIO(0x4a08)
@@ -388,18 +390,18 @@
 #define CHICKEN_RASTER_2   _MMIO(0x6208)
 #define   TBIMR_FAST_CLIP  REG_BIT(5)
 
-#define VFLSKPD_MMIO(0x62a8)
+#define VFLSKPDMCR_REG(0x62a8)
 #define   DIS_OVER_FETCH_CACHE REG_BIT(1)
 #define   DIS_MULT_MISS_RD_SQUASH  REG_BIT(0)
 
 #define GEN12_FF_MODE2 _MMIO(0x6604)
-#define XEHP_FF_MODE2  _MMIO(0x6604)
+#define XEHP_FF_MODE2  MCR_REG(0x6604)
 #define   FF_MODE2_GS_TIMER_MASK   REG_GENMASK(31, 24)
 #define   FF_MODE2_GS_TIMER_224
REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
 #define   FF_MODE2_TDS_TIMER_MASK  REG_GENMASK(23, 16)
 #define   FF_MODE2_TDS_TIMER_128   
REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
 
-#define XEHPG_INSTDONE_GEOM_SVG_MMIO(0x666c)
+#define XEHPG_INSTDONE_GEOM_SVGMCR_REG(0x666c)
 
 #define CACHE_MODE_0_GEN7  _MMIO(0x7000) /* IVB+ */
 #define   RC_OP_FLUSH_ENABLE   (1 << 0)
@@ -448,14 +450,14 @@
 #define GEN8_HDC_CHICKEN1  _MMIO(0x7304)
 
 #define GEN11_COMMON_SLICE_CHICKEN3_MMIO(0x7304)
-#define XEHP_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
+#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304)
 #define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
 #define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLEREG_BIT(12)
 #define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC   REG_BIT(11)
 #define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE   REG_BIT(9)
 
 #define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
-#define XEHP_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c)
 #define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE   REG_BIT(14)
 #define   GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
 
@@ -486,7 +488,7 @@
 
 #define GEN8_RC6_CTX_INFO  _MMIO(0x8504)
 
-#define XEHP_SQCM  _MMIO(0x8724)
+#define XEHP_SQCM  MCR_REG(0x8724)
 #define   EN_32B_ACCESSREG_BIT(30)
 
 #define HSW_IDICR  _MMIO(0x9008)
@@ -647,7 +649,7 @@
 #define GEN7_MISCCPCTL _MMIO(0x9424)
 #define   GEN7_DOP_CLOCK_GATE_ENABLE   (1 << 0)
 
-#define GEN8_MISCCPCTL _MMIO(0x9424)
+#define GEN8_MISCCPCTL MCR_REG(0x9424)
 #define   GEN8_DOP_CLOCK_GATE_ENABLE   REG_BIT(0)
 #define   GEN12_DOP_CLOCK_GATE_RENDER_ENABLE   REG_BIT(1)
 #define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2)
@@ -703,7 +705,7 @@
 #define   LTCDD_CLKGATE_DISREG_BIT(10)
 
 #define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
-#define XEHP_SLICE_UNIT_LEVEL_CLKGATE  _MMIO(0x94d4)
+#define XEHP_SLICE_UNIT_LEVEL_CLKGATE  MCR_REG(0x94d4)
 #define   SARBUNIT_CLKGATE_DIS (1 << 5)
 #define   RCCUNIT_CLKGATE_DIS  (1 << 7)
 #define  

<    1   2   3   4   5   6   7   8   9   10   >