Re: [Intel-gfx] [PATCH 1/2] drm/fourcc: define Intel Meteorlake related ccs modifiers

2023-05-11 Thread Matt Atwood
On Thu, May 11, 2023 at 01:37:13PM +0300, Juha-Pekka Heikkila wrote:
> Add Tile4 type ccs modifiers with aux buffer needed for MTL
> 
Bspec: 49251, 49252, 49253
> Cc: dri-devel@lists.freedesktop.org
> Cc: Jani Nikula 
Reviewed-by: Matt Atwood 
> Signed-off-by: Juha-Pekka Heikkila 
> ---
>  include/uapi/drm/drm_fourcc.h | 43 +++
>  1 file changed, 43 insertions(+)
> 
> diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
> index de703c6be969..cbe214adf1e4 100644
> --- a/include/uapi/drm/drm_fourcc.h
> +++ b/include/uapi/drm/drm_fourcc.h
> @@ -657,6 +657,49 @@ extern "C" {
>   */
>  #define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC fourcc_mod_code(INTEL, 12)
>  
> +/*
> + * Intel color control surfaces (CCS) for display ver 14 render compression.
nit: Color Control Surfaces, ver.
> + *
> + * The main surface is tile4 and at plane index 0, the CCS is linear and
> + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
> + * main surface. In other words, 4 bits in CCS map to a main surface cache
> + * line pair. The main surface pitch is required to be a multiple of four
> + * tile4 widths.
> + */
> +#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS fourcc_mod_code(INTEL, 13)
> +
> +/*
> + * Intel color control surfaces (CCS) for display ver 14 media compression
nit: Color Control Surfaces, ver.
> + *
> + * The main surface is tile4 and at plane index 0, the CCS is linear and
> + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in
> + * main surface. In other words, 4 bits in CCS map to a main surface cache
> + * line pair. The main surface pitch is required to be a multiple of four
> + * tile4 widths. For semi-planar formats like NV12, CCS planes follow the
> + * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces,
> + * planes 2 and 3 for the respective CCS.
> + */
> +#define I915_FORMAT_MOD_4_TILED_MTL_MC_CCS fourcc_mod_code(INTEL, 14)
> +
> +/*
> + * Intel Color Control Surface with Clear Color (CCS) for display ver 14 
> render
nit: ver.
> + * compression.
> + *
> + * The main surface is tile4 and is at plane index 0 whereas CCS is linear
> + * and at index 1. The clear color is stored at index 2, and the pitch should
> + * be ignored. The clear color structure is 256 bits. The first 128 bits
> + * represents Raw Clear Color Red, Green, Blue and Alpha color each 
> represented
> + * by 32 bits. The raw clear color is consumed by the 3d engine and generates
> + * the converted clear color of size 64 bits. The first 32 bits store the 
> Lower
> + * Converted Clear Color value and the next 32 bits store the Higher 
> Converted
> + * Clear Color value when applicable. The Converted Clear Color values are
> + * consumed by the DE. The last 64 bits are used to store Color Discard 
> Enable
> + * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line
> + * corresponds to an area of 4x1 tiles in the main surface. The main surface
> + * pitch is required to be a multiple of 4 tile widths.
> + */
> +#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15)
> +
>  /*
>   * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
>   *
> -- 
> 2.25.1
> 


Re: [Intel-gfx] [PATCH] drm/i915/dg2: Return Wa_22012654132 to just specific steppings

2022-12-14 Thread Matt Atwood
On Tue, Dec 13, 2022 at 03:41:19PM -0800, Matt Roper wrote:
> Programming of the ENABLE_PREFETCH_INTO_IC bit originally showed up in
> both the general DG2 tuning guide (applicable to all DG2
> variants/steppings) and under Wa_22012654132 (applicable only to
> specific steppings).  It has now been removed from the tuning guide, and
> the guidance is to only program it in the specific steppings associated
> with the workaround.
> 
> Bspec: 68331
Reviewed-by: Matt Atwood 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 ++---
>  1 file changed, 13 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 7d71f5bbddc8..bf84efb3f15f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -2913,20 +2913,6 @@ add_render_compute_tuning_settings(struct 
> drm_i915_private *i915,
>   if (IS_DG2(i915)) {
>   wa_mcr_write_or(wal, XEHP_L3SCQREG7, 
> BLEND_FILL_CACHING_OPT_DIS);
>   wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, 
> STACKID_CTRL_512);
> -
> - /*
> -  * This is also listed as Wa_22012654132 for certain DG2
> -  * steppings, but the tuning setting programming is a superset
> -  * since it applies to all DG2 variants and steppings.
> -  *
> -  * Note that register 0xE420 is write-only and cannot be read
> -  * back for verification on DG2 (due to Wa_14012342262), so
> -  * we need to explicitly skip the readback.
> -  */
> - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
> -_MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
> -0 /* write-only, so skip validation */,
> -true);
>   }
>  
>   /*
> @@ -3022,6 +3008,19 @@ general_render_compute_wa_init(struct intel_engine_cs 
> *engine, struct i915_wa_li
>   /* Wa_18017747507:dg2 */
>   wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, 
> POLYGON_TRIFAN_LINELOOP_DISABLE);
>   }
> +
> + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || 
> IS_DG2_G11(i915))
> + /*
> +  * Wa_22012654132
> +  *
> +  * Note that register 0xE420 is write-only and cannot be read
> +  * back for verification on DG2 (due to Wa_14012342262), so
> +  * we need to explicitly skip the readback.
> +  */
> + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
> +_MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
> +0 /* write-only, so skip validation */,
> +true);
>  }
>  
>  static void
> -- 
> 2.38.1
> 


Re: [Intel-gfx] [PATCH] Revert "drm/i915/dg2: Add preemption changes for Wa_14015141709"

2022-08-29 Thread Matt Atwood
On Fri, Aug 26, 2022 at 02:02:33PM -0700, Matt Roper wrote:
> This reverts commit ca6920811aa5428270dd78af0a7a36b10119065a.
> 
> The intent of Wa_14015141709 was to inform us that userspace can no
> longer control object-level preemption as it has on past platforms
> (i.e., by twiddling register bit CS_CHICKEN1[0]).  The description of
> the workaround in the spec wasn't terribly well-written, and when we
> requested clarification from the hardware teams we were told that on the
> kernel side we should also probably stop setting
> FF_SLICE_CS_CHICKEN1[14], which is the register bit that directs the
> hardware to honor the settings in per-context register CS_CHICKEN1.  It
> turns out that this guidance about FF_SLICE_CS_CHICKEN1[14] was a
> mistake; even though CS_CHICKEN1[0] is non-operational and useless to
> userspace, there are other bits in the register that do still work and
> might need to be adjusted by userspace in the future (e.g., to implement
> other workarounds that show up).  If we don't set
> FF_SLICE_CS_CHICKEN1[14] in i915, then those future workarounds would
> not take effect.
> 
> This miscommunication came to light because another workaround
> (Wa_16013994831) has now shown up that requires userspace to adjust the
> value of CS_CHICKEN[10] in certain circumstances.  To ensure userspace's
> updates to this chicken bit are handled properly by the hardware, we
> need to make sure that FF_SLICE_CS_CHICKEN1[14] is once again set by the
> kernel.
> 
Reviewed-by: Matt Atwood 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +-
>  drivers/gpu/drm/i915/i915_drv.h | 3 ---
>  2 files changed, 1 insertion(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 3cdb8294e13f..69a0c6a74474 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -2389,7 +2389,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
> struct i915_wa_list *wal)
>FF_DOP_CLOCK_GATE_DISABLE);
>   }
>  
> - if (HAS_PERCTX_PREEMPT_CTRL(i915)) {
> + if (IS_GRAPHICS_VER(i915, 9, 12)) {
>   /* 
> FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
>   wa_masked_en(wal,
>GEN7_FF_SLICE_CS_CHICKEN1,
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2b00ef3626db..d6a1ab6f65de 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1352,9 +1352,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>  #define HAS_GUC_DEPRIVILEGE(dev_priv) \
>   (INTEL_INFO(dev_priv)->has_guc_deprivilege)
>  
> -#define HAS_PERCTX_PREEMPT_CTRL(i915) \
> - ((GRAPHICS_VER(i915) >= 9) &&  GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
> -
>  #define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \
> IS_ALDERLAKE_S(dev_priv))
>  
> -- 
> 2.37.2
> 


Re: [Intel-gfx] [PATCH] drm/i915/ats-m: Add thread execution tuning setting

2022-08-29 Thread Matt Atwood
On Fri, Aug 26, 2022 at 02:27:18PM -0700, Matt Roper wrote:
> On client DG2 platforms, optimal performance is achieved with the
> hardware's default "age based" thread execution setting.  However on
> ATS-M, switching this to "round robin after dependencies" provides
> better performance.  We'll add a new "tuning" feature flag to the ATS-M
> device info to enable/disable this setting.
> 
> Bspec: 68331
> Cc: Lucas De Marchi 
Reviewed-by: Matt Atwood 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h | 2 ++
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 9 +
>  drivers/gpu/drm/i915/i915_pci.c | 1 +
>  drivers/gpu/drm/i915/intel_device_info.h| 1 +
>  4 files changed, 13 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 94f9ddcfb3a5..d414785003cc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -1110,6 +1110,8 @@
>  #define   GEN12_DISABLE_TDL_PUSH REG_BIT(9)
>  #define   GEN11_DIS_PICK_2ND_EU  REG_BIT(7)
>  #define   GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIXREG_BIT(4)
> +#define   THREAD_EX_ARB_MODE REG_GENMASK(3, 2)
> +#define   THREAD_EX_ARB_MODE_RR_AFTER_DEP
> REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
>  
>  #define HSW_ROW_CHICKEN3 _MMIO(0xe49c)
>  #define   HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6)
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 3cdb8294e13f..ff8c3735abc9 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -2700,6 +2700,15 @@ add_render_compute_tuning_settings(struct 
> drm_i915_private *i915,
>  0 /* write-only, so skip validation */,
>  true);
>   }
> +
> + /*
> +  * This tuning setting proves beneficial only on ATS-M designs; the
> +  * default "age based" setting is optimal on regular DG2 and other
> +  * platforms.
> +  */
> + if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
> + wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
> + THREAD_EX_ARB_MODE_RR_AFTER_DEP);
>  }
>  
>  /*
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 857e8bb6865c..26b25d9434d6 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1080,6 +1080,7 @@ static const struct intel_device_info ats_m_info = {
>   DG2_FEATURES,
>   .display = { 0 },
>   .require_force_probe = 1,
> + .tuning_thread_rr_after_dep = 1,
>  };
>  
>  #define XE_HPC_FEATURES \
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
> b/drivers/gpu/drm/i915/intel_device_info.h
> index 0ccde94b225f..6904ad03ca19 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -171,6 +171,7 @@ enum intel_ppgtt_type {
>   func(has_runtime_pm); \
>   func(has_snoop); \
>   func(has_coherent_ggtt); \
> + func(tuning_thread_rr_after_dep); \
>   func(unfenced_needs_alignment); \
>   func(hws_needs_physical);
>  
> -- 
> 2.37.2
> 


Re: [Intel-gfx] [PATCH] drm/i915/gt: Add general DSS steering iterator to intel_gt_mcr

2022-07-08 Thread Matt Atwood
On Fri, Jul 01, 2022 at 04:20:06PM -0700, Matt Roper wrote:
> Although all DSS belong to a single pool on Xe_HP platforms (i.e.,
> they're not organized into slices from a topology point of view), we do
> still need to pass 'group' and 'instance' targets when steering register
> accesses to a specific instance of a per-DSS multicast register.  The
> rules for how to determine group and instance IDs (which previously used
> legacy terms "slice" and "subslice") varies by platform.  Some platforms
> determine steering by gslice membership, some platforms by cslice
> membership, and future platforms may have other rules.
> 
> Since looping over each DSS and performing steered unicast register
> accesses is a relatively common pattern, let's add a dedicated iteration
> macro to handle this (and replace the platform-specific "instdone" loop
> we were using previously.  This will avoid the calling code needing to
> figure out the details about how to obtain steering IDs for a specific
> DSS.
> 
> Most of the places where we use this new loop are in the GPU errorstate
> code at the moment, but we do have some additional features coming in
> the future that will also need to loop over each DSS and steer some
> register accesses accordingly.
> 
Reviewed-by: Matt Atwood 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c | 34 ++-
>  drivers/gpu/drm/i915/gt/intel_engine_types.h  | 22 
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.c| 25 ++
>  drivers/gpu/drm/i915/gt/intel_gt_mcr.h| 24 +
>  .../gpu/drm/i915/gt/uc/intel_guc_capture.c| 13 ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 32 ++---
>  6 files changed, 75 insertions(+), 75 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 283870c65991..37fa813af766 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -1517,7 +1517,6 @@ void intel_engine_get_instdone(const struct 
> intel_engine_cs *engine,
>  struct intel_instdone *instdone)
>  {
>   struct drm_i915_private *i915 = engine->i915;
> - const struct sseu_dev_info *sseu = >gt->info.sseu;
>   struct intel_uncore *uncore = engine->uncore;
>   u32 mmio_base = engine->mmio_base;
>   int slice;
> @@ -1542,32 +1541,19 @@ void intel_engine_get_instdone(const struct 
> intel_engine_cs *engine,
>   intel_uncore_read(uncore, 
> GEN12_SC_INSTDONE_EXTRA2);
>   }
>  
> - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
> - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, 
> slice, subslice) {
> - instdone->sampler[slice][subslice] =
> - intel_gt_mcr_read(engine->gt,
> -   GEN7_SAMPLER_INSTDONE,
> -   slice, subslice);
> - instdone->row[slice][subslice] =
> - intel_gt_mcr_read(engine->gt,
> -   GEN7_ROW_INSTDONE,
> -   slice, subslice);
> - }
> - } else {
> - for_each_instdone_slice_subslice(i915, sseu, slice, 
> subslice) {
> - instdone->sampler[slice][subslice] =
> - intel_gt_mcr_read(engine->gt,
> -   GEN7_SAMPLER_INSTDONE,
> -   slice, subslice);
> - instdone->row[slice][subslice] =
> - intel_gt_mcr_read(engine->gt,
> -   GEN7_ROW_INSTDONE,
> -   slice, subslice);
> - }
> + for_each_ss_steering(iter, engine->gt, slice, subslice) {
> + instdone->sampler[slice][subslice] =
> + intel_gt_mcr_read(engine->gt,
> +   GEN7_SAMPLER_INSTDONE,
> +   slice, subslice);
> + instdone->row[slice][subslice] =
> + intel_gt_mcr_read(engine->gt,
> +   GEN7_ROW_INS

Re: [Intel-gfx] [PATCH 2/2] drm/i915/pvc: Add initial PVC workarounds

2022-05-31 Thread Matt Atwood
On Fri, May 27, 2022 at 09:33:48AM -0700, Matt Roper wrote:
> From: Stuart Summers 
> 
> Bspec: 64027
Reviewed-by: Matt Atwood 
> Signed-off-by: Stuart Summers 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_regs.h |  5 +-
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h |  3 +-
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 61 ++---
>  drivers/gpu/drm/i915/intel_pm.c | 16 +-
>  4 files changed, 73 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> index 75a0c55c5aa5..44de10cf7837 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> @@ -196,6 +196,7 @@
>  #define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ 
> */
>  #define RING_PREDICATE_RESULT(base)  _MMIO((base) + 0x3b8)
>  #define RING_FORCE_TO_NONPRIV(base, i)   _MMIO(((base) + 0x4D0) 
> + (i) * 4)
> +#define   RING_FORCE_TO_NONPRIV_DENY REG_BIT(30)
>  #define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2)
>  #define   RING_FORCE_TO_NONPRIV_ACCESS_RW(0 << 28)/* CFL+ & Gen11+ */
>  #define   RING_FORCE_TO_NONPRIV_ACCESS_RD(1 << 28)
> @@ -208,7 +209,9 @@
>  #define   RING_FORCE_TO_NONPRIV_RANGE_64 (3 << 0)
>  #define   RING_FORCE_TO_NONPRIV_RANGE_MASK   (3 << 0)
>  #define   RING_FORCE_TO_NONPRIV_MASK_VALID   \
> - (RING_FORCE_TO_NONPRIV_RANGE_MASK | RING_FORCE_TO_NONPRIV_ACCESS_MASK)
> + (RING_FORCE_TO_NONPRIV_RANGE_MASK | \
> +  RING_FORCE_TO_NONPRIV_ACCESS_MASK | \
> +  RING_FORCE_TO_NONPRIV_DENY)
>  #define   RING_MAX_NONPRIV_SLOTS  12
>  
>  #define RING_EXECLIST_SQ_CONTENTS(base)  _MMIO((base) + 0x510)
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index b4642dcc192f..58e9b464d564 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -1070,8 +1070,9 @@
>  
>  #define GEN10_CACHE_MODE_SS  _MMIO(0xe420)
>  #define   ENABLE_EU_COUNT_FOR_TDL_FLUSH  REG_BIT(10)
> -#define   ENABLE_PREFETCH_INTO_ICREG_BIT(3)
> +#define   DISABLE_ECCREG_BIT(5)
>  #define   FLOAT_BLEND_OPTIMIZATION_ENABLEREG_BIT(4)
> +#define   ENABLE_PREFETCH_INTO_ICREG_BIT(3)
>  
>  #define EU_PERF_CNTL0_MMIO(0xe458)
>  #define EU_PERF_CNTL4_MMIO(0xe45c)
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 73b59ea6fd3b..a604bc7c0701 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -776,7 +776,9 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
>   if (engine->class != RENDER_CLASS)
>   goto done;
>  
> - if (IS_DG2(i915))
> + if (IS_PONTEVECCHIO(i915))
> + ; /* noop; none at this time */
> + else if (IS_DG2(i915))
>   dg2_ctx_workarounds_init(engine, wal);
>   else if (IS_XEHPSDV(i915))
>   ; /* noop; none at this time */
> @@ -1494,7 +1496,9 @@ gt_init_workarounds(struct intel_gt *gt, struct 
> i915_wa_list *wal)
>  {
>   struct drm_i915_private *i915 = gt->i915;
>  
> - if (IS_DG2(i915))
> + if (IS_PONTEVECCHIO(i915))
> + ; /* none yet */
> + else if (IS_DG2(i915))
>   dg2_gt_workarounds_init(gt, wal);
>   else if (IS_XEHPSDV(i915))
>   xehpsdv_gt_workarounds_init(gt, wal);
> @@ -1924,6 +1928,32 @@ static void dg2_whitelist_build(struct intel_engine_cs 
> *engine)
>   }
>  }
>  
> +static void blacklist_trtt(struct intel_engine_cs *engine)
> +{
> + struct i915_wa_list *w = >whitelist;
> +
> + /*
> +  * Prevent read/write access to [0x4400, 0x4600) which covers
> +  * the TRTT range across all engines. Note that normally userspace
> +  * cannot access the other engines' trtt control, but for simplicity
> +  * we cover the entire range on each engine.
> +  */
> + whitelist_reg_ext(w, _MMIO(0x4400),
> +   RING_FORCE_TO_NONPRIV_DENY |
> +   RING_FORCE_TO_NONPRIV_RANGE_64);
> + whitelist_reg_ext(w, _MMIO(0x4500),
> +   RING_FORCE_TO_NONPRIV_DENY |
> +   RING_FORCE_TO_NONPRIV_RANGE_64);
> +}
> +
> +static void pvc_whitelist_build(struct intel_engine_cs *engine)
> +{
> + allow_rea

Re: [Intel-gfx] [PATCH 1/2] drm/i915/pvc: Extract stepping information from PCI revid

2022-05-31 Thread Matt Atwood
On Fri, May 27, 2022 at 09:33:47AM -0700, Matt Roper wrote:
> For PVC, the base die and compute tile have separate stepping values
> that we need to track; we'll use the existing graphics_step field to
> represent the compute tile stepping and add a new 'basedie_step' field.
> 
> Unlike past platforms, steppings for these components are represented by
> specific bitfields within the PCI revision ID, and we shouldn't make
> assumptions about the non-CT, non-BD bits staying 0.  Let's update our
> stepping code accordingly.
> 
> Bspec: 44484
Reviewed-by: Matt Atwood 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/i915_drv.h   | 13 ++
>  drivers/gpu/drm/i915/intel_step.c | 70 ++-
>  drivers/gpu/drm/i915/intel_step.h |  4 +-
>  3 files changed, 85 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 74b3caccd839..ec1b3484fdaf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -943,6 +943,7 @@ static inline struct intel_gt *to_gt(struct 
> drm_i915_private *i915)
>  #define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step)
>  #define INTEL_GRAPHICS_STEP(__i915) 
> (RUNTIME_INFO(__i915)->step.graphics_step)
>  #define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step)
> +#define INTEL_BASEDIE_STEP(__i915) (RUNTIME_INFO(__i915)->step.basedie_step)
>  
>  #define IS_DISPLAY_STEP(__i915, since, until) \
>   (drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \
> @@ -956,6 +957,10 @@ static inline struct intel_gt *to_gt(struct 
> drm_i915_private *i915)
>   (drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \
>INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < 
> (until))
>  
> +#define IS_BASEDIE_STEP(__i915, since, until) \
> + (drm_WARN_ON(&(__i915)->drm, INTEL_BASEDIE_STEP(__i915) == STEP_NONE), \
> +  INTEL_BASEDIE_STEP(__i915) >= (since) && INTEL_BASEDIE_STEP(__i915) < 
> (until))
> +
>  static __always_inline unsigned int
>  __platform_mask_index(const struct intel_runtime_info *info,
> enum intel_platform p)
> @@ -1208,6 +1213,14 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>   (IS_DG2(__i915) && \
>IS_DISPLAY_STEP(__i915, since, until))
>  
> +#define IS_PVC_BD_STEP(__i915, since, until) \
> + (IS_PONTEVECCHIO(__i915) && \
> +  IS_BASEDIE_STEP(__i915, since, until))
> +
> +#define IS_PVC_CT_STEP(__i915, since, until) \
> + (IS_PONTEVECCHIO(__i915) && \
> +  IS_GRAPHICS_STEP(__i915, since, until))
> +
>  #define IS_LP(dev_priv)  (INTEL_INFO(dev_priv)->is_lp)
>  #define IS_GEN9_LP(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && IS_LP(dev_priv))
>  #define IS_GEN9_BC(dev_priv) (GRAPHICS_VER(dev_priv) == 9 && 
> !IS_LP(dev_priv))
> diff --git a/drivers/gpu/drm/i915/intel_step.c 
> b/drivers/gpu/drm/i915/intel_step.c
> index 74e8e4680028..42b3133d8387 100644
> --- a/drivers/gpu/drm/i915/intel_step.c
> +++ b/drivers/gpu/drm/i915/intel_step.c
> @@ -135,6 +135,8 @@ static const struct intel_step_info adlp_n_revids[] = {
>   [0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_D0 },
>  };
>  
> +static void pvc_step_init(struct drm_i915_private *i915, int pci_revid);
> +
>  void intel_step_init(struct drm_i915_private *i915)
>  {
>   const struct intel_step_info *revids = NULL;
> @@ -142,7 +144,10 @@ void intel_step_init(struct drm_i915_private *i915)
>   int revid = INTEL_REVID(i915);
>   struct intel_step_info step = {};
>  
> - if (IS_DG2_G10(i915)) {
> + if (IS_PONTEVECCHIO(i915)) {
> + pvc_step_init(i915, revid);
> + return;
> + } else if (IS_DG2_G10(i915)) {
>   revids = dg2_g10_revid_step_tbl;
>   size = ARRAY_SIZE(dg2_g10_revid_step_tbl);
>   } else if (IS_DG2_G11(i915)) {
> @@ -235,6 +240,69 @@ void intel_step_init(struct drm_i915_private *i915)
>   RUNTIME_INFO(i915)->step = step;
>  }
>  
> +#define PVC_BD_REVID GENMASK(5, 3)
> +#define PVC_CT_REVID GENMASK(2, 0)
> +
> +static const int pvc_bd_subids[] = {
> + [0x0] = STEP_A0,
> + [0x3] = STEP_B0,
> + [0x4] = STEP_B1,
> + [0x5] = STEP_B3,
> +};
> +
> +static const int pvc_ct_subids[] = {
> + [0x3] = STEP_A0,
> + [0x5] = STEP_B0,
> + [0x6] = STEP_B1,
> + [0x7] = STEP_C0,
> +};
> +
> +static int
> +pvc_step_lookup(struct drm_i915_private *i915, const char *type,
> + con

Re: [Intel-gfx] [PATCH v3 5/5] drm/i915/guc: XEHPSDV and PVC do not use HuC

2022-05-24 Thread Matt Atwood
On Tue, May 10, 2022 at 11:02:28PM -0700, Matt Roper wrote:
> From: Daniele Ceraolo Spurio 
> 
> Disable HuC loading since it is not used on these platforms.
> 
> Cc: Stuart Summers 
Reviewed-by: Matt Atwood 
> Signed-off-by: Daniele Ceraolo Spurio 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index ecf149c5fdb0..55e1eb8f3612 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -45,6 +45,10 @@ static void uc_expand_default_options(struct intel_uc *uc)
>  
>   /* Default: enable HuC authentication and GuC submission */
>   i915->params.enable_guc = ENABLE_GUC_LOAD_HUC | ENABLE_GUC_SUBMISSION;
> +
> + /* XEHPSDV and PVC do not use HuC */
> + if (IS_XEHPSDV(i915) || IS_PONTEVECCHIO(i915))
> + i915->params.enable_guc &= ~ENABLE_GUC_LOAD_HUC;
>  }
>  
>  /* Reset GuC providing us with fresh state for both GuC and HuC.
> -- 
> 2.35.1
> 


Re: [Intel-gfx] [PATCH v3 4/5] drm/i915/pvc: Add new BCS engines to GuC engine list

2022-05-24 Thread Matt Atwood
On Tue, May 10, 2022 at 11:02:27PM -0700, Matt Roper wrote:
> Intialize ADS system info to reflect the availablity of new BCS engines
> 
> Original-author: CQ Tang
> Cc: Stuart Summers 
> Cc: John Harrison 
Reviewed-by: Matt Atwood 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +-
>  drivers/gpu/drm/i915/i915_drv.h| 2 ++
>  2 files changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index 3eabf4cf8eec..bb197610fd5b 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -457,7 +457,7 @@ static void fill_engine_enable_masks(struct intel_gt *gt,
>  {
>   info_map_write(info_map, engine_enabled_masks[GUC_RENDER_CLASS], 
> RCS_MASK(gt));
>   info_map_write(info_map, engine_enabled_masks[GUC_COMPUTE_CLASS], 
> CCS_MASK(gt));
> - info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 1);
> + info_map_write(info_map, engine_enabled_masks[GUC_BLITTER_CLASS], 
> BCS_MASK(gt));
>   info_map_write(info_map, engine_enabled_masks[GUC_VIDEO_CLASS], 
> VDBOX_MASK(gt));
>   info_map_write(info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS], 
> VEBOX_MASK(gt));
>  }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 54e9c2a5493d..4b147fd90ec4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1223,6 +1223,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>  })
>  #define RCS_MASK(gt) \
>   ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS)
> +#define BCS_MASK(gt) \
> + ENGINE_INSTANCES_MASK(gt, BCS0, I915_MAX_BCS)
>  #define VDBOX_MASK(gt) \
>   ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS)
>  #define VEBOX_MASK(gt) \
> -- 
> 2.35.1
> 


Re: [Intel-gfx] [PATCH v3 3/5] drm/i915/pvc: Remove additional 3D flags from PIPE_CONTROL

2022-05-24 Thread Matt Atwood
On Tue, May 10, 2022 at 11:02:26PM -0700, Matt Roper wrote:
> From: Stuart Summers 
> 
> Although we already strip 3D-specific flags from PIPE_CONTROL
> instructions when submitting to a compute engine, there are some
> additional flags that need to be removed when the platform as a whole
> lacks a 3D pipeline.  Add those restrictions here.
> 
> v2:
>  - Replace LACKS_3D_PIPELINE checks with !HAS_3D_PIPELINE and add
>has_3d_pipeline to all platforms except PVC.  (Lucas)
> 
> Bspec: 47112
> Cc: Lucas De Marchi 
Reviewed-by: Matt Atwood 
> Signed-off-by: Stuart Summers 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 18 --
>  drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 15 +--
>  drivers/gpu/drm/i915/i915_drv.h  |  2 ++
>  drivers/gpu/drm/i915/i915_pci.c  | 10 ++
>  drivers/gpu/drm/i915/intel_device_info.h |  1 +
>  5 files changed, 38 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index daa1a61972f4..98645797962f 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -197,8 +197,10 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 
> mode)
>  
>   flags |= PIPE_CONTROL_CS_STALL;
>  
> - if (engine->class == COMPUTE_CLASS)
> - flags &= ~PIPE_CONTROL_3D_FLAGS;
> + if (!HAS_3D_PIPELINE(engine->i915))
> + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
> + else if (engine->class == COMPUTE_CLASS)
> + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
>  
>   cs = intel_ring_begin(rq, 6);
>   if (IS_ERR(cs))
> @@ -227,8 +229,10 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 
> mode)
>  
>   flags |= PIPE_CONTROL_CS_STALL;
>  
> - if (engine->class == COMPUTE_CLASS)
> - flags &= ~PIPE_CONTROL_3D_FLAGS;
> + if (!HAS_3D_PIPELINE(engine->i915))
> + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
> + else if (engine->class == COMPUTE_CLASS)
> + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
>  
>   if (!HAS_FLAT_CCS(rq->engine->i915))
>   count = 8 + 4;
> @@ -717,8 +721,10 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request 
> *rq, u32 *cs)
>   /* Wa_1409600907 */
>   flags |= PIPE_CONTROL_DEPTH_STALL;
>  
> - if (rq->engine->class == COMPUTE_CLASS)
> - flags &= ~PIPE_CONTROL_3D_FLAGS;
> + if (!HAS_3D_PIPELINE(rq->engine->i915))
> + flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
> + else if (rq->engine->class == COMPUTE_CLASS)
> + flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
>  
>   cs = gen12_emit_ggtt_write_rcs(cs,
>  rq->fence.seqno,
> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
> b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> index 556bca3be804..964fe376c7fa 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
> @@ -288,8 +288,11 @@
>  #define   PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0)
>  #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
>  
> -/* 3D-related flags can't be set on compute engine */
> -#define PIPE_CONTROL_3D_FLAGS (\
> +/*
> + * 3D-related flags that can't be set on _engines_ that lack access to the 3D
> + * pipeline (i.e., CCS engines).
> + */
> +#define PIPE_CONTROL_3D_ENGINE_FLAGS (\
>   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | \
>   PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
>   PIPE_CONTROL_TILE_CACHE_FLUSH | \
> @@ -300,6 +303,14 @@
>   PIPE_CONTROL_VF_CACHE_INVALIDATE | \
>   PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET)
>  
> +/* 3D-related flags that can't be set on _platforms_ that lack a 3D pipeline 
> */
> +#define PIPE_CONTROL_3D_ARCH_FLAGS ( \
> + PIPE_CONTROL_3D_ENGINE_FLAGS | \
> + PIPE_CONTROL_INDIRECT_STATE_DISABLE | \
> + PIPE_CONTROL_FLUSH_ENABLE | \
> + PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | \
> + PIPE_CONTROL_DC_FLUSH_ENABLE)
> +
>  #define MI_MATH(x)   MI_INSTR(0x1a, (x) - 1)
>  #define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | 
> (op2))
>  /* Opcodes for MI_MATH_INSTR */
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/dri

Re: [Intel-gfx] [PATCH v3 2/5] drm/i915/pvc: Add forcewake support

2022-05-24 Thread Matt Atwood
On Tue, May 10, 2022 at 11:02:25PM -0700, Matt Roper wrote:
> Add PVC's forcewake ranges.
> 
> v2:
>  - Drop replicated comment completely; move general cleanup of the
>documentation to a separate patch.
> 
> Bspec: 67609
> Cc: Daniele Ceraolo Spurio 
> Cc: Stuart Summers 
Reviewed-by: Matt Atwood 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/intel_uncore.c   | 142 +-
>  drivers/gpu/drm/i915/selftests/intel_uncore.c |   2 +
>  2 files changed, 143 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> b/drivers/gpu/drm/i915/intel_uncore.c
> index 095e071e4053..fac0ff60bfbf 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1076,6 +1076,45 @@ static const struct i915_range dg2_shadowed_regs[] = {
>   { .start = 0x1F8510, .end = 0x1F8550 },
>  };
>  
> +static const struct i915_range pvc_shadowed_regs[] = {
> + { .start =   0x2030, .end =   0x2030 },
> + { .start =   0x2510, .end =   0x2550 },
> + { .start =   0xA008, .end =   0xA00C },
> + { .start =   0xA188, .end =   0xA188 },
> + { .start =   0xA278, .end =   0xA278 },
> + { .start =   0xA540, .end =   0xA56C },
> + { .start =   0xC4C8, .end =   0xC4C8 },
> + { .start =   0xC4E0, .end =   0xC4E0 },
> + { .start =   0xC600, .end =   0xC600 },
> + { .start =   0xC658, .end =   0xC658 },
> + { .start =  0x22030, .end =  0x22030 },
> + { .start =  0x22510, .end =  0x22550 },
> + { .start = 0x1C0030, .end = 0x1C0030 },
> + { .start = 0x1C0510, .end = 0x1C0550 },
> + { .start = 0x1C4030, .end = 0x1C4030 },
> + { .start = 0x1C4510, .end = 0x1C4550 },
> + { .start = 0x1C8030, .end = 0x1C8030 },
> + { .start = 0x1C8510, .end = 0x1C8550 },
> + { .start = 0x1D0030, .end = 0x1D0030 },
> + { .start = 0x1D0510, .end = 0x1D0550 },
> + { .start = 0x1D4030, .end = 0x1D4030 },
> + { .start = 0x1D4510, .end = 0x1D4550 },
> + { .start = 0x1D8030, .end = 0x1D8030 },
> + { .start = 0x1D8510, .end = 0x1D8550 },
> + { .start = 0x1E0030, .end = 0x1E0030 },
> + { .start = 0x1E0510, .end = 0x1E0550 },
> + { .start = 0x1E4030, .end = 0x1E4030 },
> + { .start = 0x1E4510, .end = 0x1E4550 },
> + { .start = 0x1E8030, .end = 0x1E8030 },
> + { .start = 0x1E8510, .end = 0x1E8550 },
> + { .start = 0x1F0030, .end = 0x1F0030 },
> + { .start = 0x1F0510, .end = 0x1F0550 },
> + { .start = 0x1F4030, .end = 0x1F4030 },
> + { .start = 0x1F4510, .end = 0x1F4550 },
> + { .start = 0x1F8030, .end = 0x1F8030 },
> + { .start = 0x1F8510, .end = 0x1F8550 },
> +};
> +
>  static int mmio_range_cmp(u32 key, const struct i915_range *range)
>  {
>   if (key < range->start)
> @@ -1525,6 +1564,103 @@ static const struct intel_forcewake_range 
> __dg2_fw_ranges[] = {
>   XEHP_FWRANGES(FORCEWAKE_RENDER)
>  };
>  
> +static const struct intel_forcewake_range __pvc_fw_ranges[] = {
> + GEN_FW_RANGE(0x0, 0xaff, 0),
> + GEN_FW_RANGE(0xb00, 0xbff, FORCEWAKE_GT),
> + GEN_FW_RANGE(0xc00, 0xfff, 0),
> + GEN_FW_RANGE(0x1000, 0x1fff, FORCEWAKE_GT),
> + GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER),
> + GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_GT),
> + GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER),
> + GEN_FW_RANGE(0x4000, 0x813f, FORCEWAKE_GT), /*
> + 0x4000 - 0x4aff: gt
> + 0x4b00 - 0x4fff: reserved
> + 0x5000 - 0x51ff: gt
> + 0x5200 - 0x52ff: reserved
> + 0x5300 - 0x53ff: gt
> + 0x5400 - 0x7fff: reserved
> + 0x8000 - 0x813f: gt */
> + GEN_FW_RANGE(0x8140, 0x817f, FORCEWAKE_RENDER),
> + GEN_FW_RANGE(0x8180, 0x81ff, 0),
> + GEN_FW_RANGE(0x8200, 0x94cf, FORCEWAKE_GT), /*
> + 0x8200 - 0x82ff: gt
> + 0x8300 - 0x84ff: reserved
> + 0x8500 - 0x887f: gt
> + 0x8880 - 0x8a7f: reserved
> + 0x8a80 - 0x8aff: gt
> + 0x8b00 - 0x8fff: reserved
> + 0x9000 - 0x947f: gt
> + 0x9480 - 0x94cf: reserved */
> + GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER),
> + GEN_FW_RANGE(0x9560, 0x967f, 0), /*
> + 0x9560 - 0x95ff: always on
> + 0x9600 - 0x967f: reserved */
> + GEN_FW_RANGE(0x9680, 0x97ff, FORCEWAKE_RENDER), /*
> + 0x9680 - 0x96ff: render
> + 0x9700 - 0x97ff: reserved */
> + GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /*
> + 0x9800 - 0xb4ff: gt
> + 0xb500 - 0xbfff: reserved
> + 0xc000 - 0xcfff: gt */
> 

Re: [Intel-gfx] [PATCH v3 1/5] drm/i915/uncore: Reorganize and document shadow and forcewake tables

2022-05-24 Thread Matt Atwood
On Tue, May 10, 2022 at 11:02:24PM -0700, Matt Roper wrote:
> Let's reorganize some of the forcewake/shadow handling in intel_uncore.c
> and consolidate the cargo-cult comments on each table into more general
> comments that apply to all tables.
> 
> We'll probably move forcewake handling to its own dedicated file in the
> near future and further enhance this with true kerneldoc.  But this is a
> good intermediate step to help clarify the behavior a bit.
> 
> Cc: Stuart Summers 
Reviewed-by: Matt Atwood 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 125 ++--
>  1 file changed, 80 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
> b/drivers/gpu/drm/i915/intel_uncore.c
> index 83517a703eb6..095e071e4053 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -938,36 +938,32 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset)
>   return entry->domains;
>  }
>  
> -#define GEN_FW_RANGE(s, e, d) \
> - { .start = (s), .end = (e), .domains = (d) }
> -
> -/* *Must* be sorted by offset ranges! See intel_fw_table_check(). */
> -static const struct intel_forcewake_range __vlv_fw_ranges[] = {
> - GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER),
> - GEN_FW_RANGE(0x5000, 0x7fff, FORCEWAKE_RENDER),
> - GEN_FW_RANGE(0xb000, 0x11fff, FORCEWAKE_RENDER),
> - GEN_FW_RANGE(0x12000, 0x13fff, FORCEWAKE_MEDIA),
> - GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_MEDIA),
> - GEN_FW_RANGE(0x2e000, 0x2, FORCEWAKE_RENDER),
> - GEN_FW_RANGE(0x3, 0x3, FORCEWAKE_MEDIA),
> -};
> -
> -#define __fwtable_reg_read_fw_domains(uncore, offset) \
> -({ \
> - enum forcewake_domains __fwd = 0; \
> - if (NEEDS_FORCE_WAKE((offset))) \
> - __fwd = find_fw_domain(uncore, offset); \
> - __fwd; \
> -})
> +/*
> + * Shadowed register tables describe special register ranges that i915 is
> + * allowed to write to without acquiring forcewake.  If these registers' 
> power
> + * wells are down, the hardware will save values written by i915 to a shadow
> + * copy and automatically transfer them into the real register the next time
> + * the power well is woken up.  Shadowing only applies to writes; forcewake
> + * must still be acquired when reading from registers in these ranges.
> + *
> + * The documentation for shadowed registers is somewhat spotty on older
> + * platforms.  However missing registers from these lists is non-fatal; it 
> just
> + * means we'll wake up the hardware for some register accesses where we 
> didn't
> + * really need to.
> + *
> + * The ranges listed in these tables must be sorted by offset.
> + *
> + * When adding new tables here, please also add them to
> + * intel_shadow_table_check() in selftests/intel_uncore.c so that they will 
> be
> + * scanned for obvious mistakes or typos by the selftests.
> + */
>  
> -/* *Must* be sorted by offset! See intel_shadow_table_check(). */
>  static const struct i915_range gen8_shadowed_regs[] = {
>   { .start =  0x2030, .end =  0x2030 },
>   { .start =  0xA008, .end =  0xA00C },
>   { .start = 0x12030, .end = 0x12030 },
>   { .start = 0x1a030, .end = 0x1a030 },
>   { .start = 0x22030, .end = 0x22030 },
> - /* TODO: Other registers are not yet used */
>  };
>  
>  static const struct i915_range gen11_shadowed_regs[] = {
> @@ -1107,11 +1103,71 @@ gen6_reg_write_fw_domains(struct intel_uncore 
> *uncore, i915_reg_t reg)
>   return FORCEWAKE_RENDER;
>  }
>  
> +#define __fwtable_reg_read_fw_domains(uncore, offset) \
> +({ \
> + enum forcewake_domains __fwd = 0; \
> + if (NEEDS_FORCE_WAKE((offset))) \
> + __fwd = find_fw_domain(uncore, offset); \
> + __fwd; \
> +})
> +
> +#define __fwtable_reg_write_fw_domains(uncore, offset) \
> +({ \
> + enum forcewake_domains __fwd = 0; \
> + const u32 __offset = (offset); \
> + if (NEEDS_FORCE_WAKE((__offset)) && !is_shadowed(uncore, __offset)) \
> + __fwd = find_fw_domain(uncore, __offset); \
> + __fwd; \
> +})
> +
> +#define GEN_FW_RANGE(s, e, d) \
> + { .start = (s), .end = (e), .domains = (d) }
> +
> +/*
> + * All platforms' forcewake tables below must be sorted by offset ranges.
> + * Furthermore, new forcewake tables added should be "watertight" and have
> + * no gaps between ranges.
> + *
> + * When there are multiple consecutive ranges listed in the bspec with
> + * the same forcewake domain, it is customary to combine them into a single
> + * row in the tables below to keep the tables small and lookups f

[PATCH v4 RFC] drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES

2022-03-30 Thread Matt Atwood
Newer platforms have DSS that aren't necessarily available for both
geometry and compute, two queries will need to exist. This introduces
the first, when passing a valid engine class and engine instance in the
flags returns a topology describing geometry.

v2: fix white space errors
v3: change flags from hosting 2 8 bit numbers to holding a
i915_engine_class_instance struct
v4: add error if non rcs engine passed.

Cc: Ashutosh Dixit 
Cc: Matt Roper 
Cc: Joonas Lahtinen 
UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143

Signed-off-by: Matt Atwood 
---
 drivers/gpu/drm/i915/i915_query.c | 71 ++-
 include/uapi/drm/i915_drm.h   | 26 +++
 2 files changed, 69 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_query.c 
b/drivers/gpu/drm/i915/i915_query.c
index b5ca00cb6cf6..32be84c95956 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -9,6 +9,7 @@
 #include "i915_drv.h"
 #include "i915_perf.h"
 #include "i915_query.h"
+#include "gt/intel_engine_user.h"
 #include 
 
 static int copy_query_item(void *query_hdr, size_t query_sz,
@@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
return 0;
 }
 
-static int query_topology_info(struct drm_i915_private *dev_priv,
-  struct drm_i915_query_item *query_item)
+static int fill_topology_info(const struct sseu_dev_info *sseu,
+ struct drm_i915_query_item *query_item,
+ const u8 *subslice_mask)
 {
-   const struct sseu_dev_info *sseu = _gt(dev_priv)->info.sseu;
struct drm_i915_query_topology_info topo;
u32 slice_length, subslice_length, eu_length, total_length;
int ret;
 
-   if (query_item->flags != 0)
-   return -EINVAL;
+   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
 
if (sseu->max_slices == 0)
return -ENODEV;
 
-   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
-
slice_length = sizeof(sseu->slice_mask);
subslice_length = sseu->max_slices * sseu->ss_stride;
eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
total_length = sizeof(topo) + slice_length + subslice_length +
   eu_length;
 
-   ret = copy_query_item(, sizeof(topo), total_length,
- query_item);
+   ret = copy_query_item(, sizeof(topo), total_length, query_item);
+
if (ret != 0)
return ret;
 
-   if (topo.flags != 0)
-   return -EINVAL;
-
memset(, 0, sizeof(topo));
topo.max_slices = sseu->max_slices;
topo.max_subslices = sseu->max_subslices;
@@ -69,27 +64,64 @@ static int query_topology_info(struct drm_i915_private 
*dev_priv,
topo.eu_stride = sseu->eu_stride;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
-  , sizeof(topo)))
+, sizeof(topo)))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
-  >slice_mask, slice_length))
+>slice_mask, slice_length))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-  sizeof(topo) + slice_length),
-  sseu->subslice_mask, subslice_length))
+sizeof(topo) + slice_length),
+subslice_mask, subslice_length))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-  sizeof(topo) +
-  slice_length + subslice_length),
-  sseu->eu_mask, eu_length))
+sizeof(topo) +
+slice_length + subslice_length),
+sseu->eu_mask, eu_length))
return -EFAULT;
 
return total_length;
 }
 
+static int query_topology_info(struct drm_i915_private *dev_priv,
+  struct drm_i915_query_item *query_item)
+{
+   const struct sseu_dev_info *sseu = _gt(dev_priv)->info.sseu;
+
+   if (query_item->flags != 0)
+   return -EINVAL;
+
+   return fill_topology_info(sseu, query_item, sseu->subslice_mask);
+}
+
+static int query_geometry_subslices(struct drm_i915_private *i915,
+   struct drm_i915_query_item *query_item)
+{
+   const struct sseu_dev_info *sseu;
+   struct intel_engine_cs *engine;
+   struct i915_engine_class_instance classinstance;
+
+   if (GRAPHICS_VER_FULL(i915) <

[PATCH] drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES

2022-03-16 Thread Matt Atwood
Newer platforms have DSS that aren't necessarily available for both
geometry and compute, two queries will need to exist. This introduces
the first, when passing a valid engine class and engine instance in the
flags returns a topology describing geometry.

v2: fix white space errors
v3: change flags from hosting 2 8 bit numbers to holding a
i915_engine_class_instance struct

Cc: Ashutosh Dixit 
Cc: Matt Roper 
Cc: Joonas Lahtinen 
UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143
Signed-off-by: Matt Atwood 
---
 drivers/gpu/drm/i915/i915_query.c | 68 ++-
 include/uapi/drm/i915_drm.h   | 24 +++
 2 files changed, 65 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_query.c 
b/drivers/gpu/drm/i915/i915_query.c
index 2dfbc22857a3..fcb374201edb 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -9,6 +9,7 @@
 #include "i915_drv.h"
 #include "i915_perf.h"
 #include "i915_query.h"
+#include "gt/intel_engine_user.h"
 #include 
 
 static int copy_query_item(void *query_hdr, size_t query_sz,
@@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
return 0;
 }
 
-static int query_topology_info(struct drm_i915_private *dev_priv,
-  struct drm_i915_query_item *query_item)
+static int fill_topology_info(const struct sseu_dev_info *sseu,
+ struct drm_i915_query_item *query_item,
+ const u8 *subslice_mask)
 {
-   const struct sseu_dev_info *sseu = _gt(dev_priv)->info.sseu;
struct drm_i915_query_topology_info topo;
u32 slice_length, subslice_length, eu_length, total_length;
int ret;
 
-   if (query_item->flags != 0)
-   return -EINVAL;
+   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
 
if (sseu->max_slices == 0)
return -ENODEV;
 
-   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
-
slice_length = sizeof(sseu->slice_mask);
subslice_length = sseu->max_slices * sseu->ss_stride;
eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
total_length = sizeof(topo) + slice_length + subslice_length +
   eu_length;
 
-   ret = copy_query_item(, sizeof(topo), total_length,
- query_item);
+   ret = copy_query_item(, sizeof(topo), total_length, query_item);
+
if (ret != 0)
return ret;
 
-   if (topo.flags != 0)
-   return -EINVAL;
-
memset(, 0, sizeof(topo));
topo.max_slices = sseu->max_slices;
topo.max_subslices = sseu->max_subslices;
@@ -69,27 +64,61 @@ static int query_topology_info(struct drm_i915_private 
*dev_priv,
topo.eu_stride = sseu->eu_stride;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
-  , sizeof(topo)))
+, sizeof(topo)))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
-  >slice_mask, slice_length))
+>slice_mask, slice_length))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-  sizeof(topo) + slice_length),
-  sseu->subslice_mask, subslice_length))
+sizeof(topo) + slice_length),
+subslice_mask, subslice_length))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-  sizeof(topo) +
-  slice_length + subslice_length),
-  sseu->eu_mask, eu_length))
+sizeof(topo) +
+slice_length + subslice_length),
+sseu->eu_mask, eu_length))
return -EFAULT;
 
return total_length;
 }
 
+static int query_topology_info(struct drm_i915_private *dev_priv,
+  struct drm_i915_query_item *query_item)
+{
+   const struct sseu_dev_info *sseu = _gt(dev_priv)->info.sseu;
+
+   if (query_item->flags != 0)
+   return -EINVAL;
+
+   return fill_topology_info(sseu, query_item, sseu->subslice_mask);
+}
+
+static int query_geometry_subslices(struct drm_i915_private *i915,
+   struct drm_i915_query_item *query_item)
+{
+   const struct sseu_dev_info *sseu;
+   struct intel_engine_cs *engine;
+   struct i915_engine_class_instance classinstance;
+
+   if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+   

Re: [Intel-gfx] [PATCH] drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES

2022-03-11 Thread Matt Atwood
On Thu, Mar 10, 2022 at 12:26:12PM +, Tvrtko Ursulin wrote:
> 
> On 10/03/2022 05:18, Matt Atwood wrote:
> > Newer platforms have DSS that aren't necessarily available for both
> > geometry and compute, two queries will need to exist. This introduces
> > the first, when passing a valid engine class and engine instance in the
> > flags returns a topology describing geometry.
> > 
> > v2: fix white space errors
> > 
> > Cc: Ashutosh Dixit 
> > Cc: Matt Roper 
> > Cc: Joonas Lahtinen 
> > UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143
> > Signed-off-by: Matt Atwood 
> > ---
> >   drivers/gpu/drm/i915/i915_query.c | 68 ++-
> >   include/uapi/drm/i915_drm.h   | 24 +++
> >   2 files changed, 65 insertions(+), 27 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_query.c 
> > b/drivers/gpu/drm/i915/i915_query.c
> > index 2dfbc22857a3..e4f35da28642 100644
> > --- a/drivers/gpu/drm/i915/i915_query.c
> > +++ b/drivers/gpu/drm/i915/i915_query.c
> > @@ -9,6 +9,7 @@
> >   #include "i915_drv.h"
> >   #include "i915_perf.h"
> >   #include "i915_query.h"
> > +#include "gt/intel_engine_user.h"
> >   #include 
> >   static int copy_query_item(void *query_hdr, size_t query_sz,
> > @@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t 
> > query_sz,
> > return 0;
> >   }
> > -static int query_topology_info(struct drm_i915_private *dev_priv,
> > -  struct drm_i915_query_item *query_item)
> > +static int fill_topology_info(const struct sseu_dev_info *sseu,
> > + struct drm_i915_query_item *query_item,
> > + const u8 *subslice_mask)
> >   {
> > -   const struct sseu_dev_info *sseu = _gt(dev_priv)->info.sseu;
> > struct drm_i915_query_topology_info topo;
> > u32 slice_length, subslice_length, eu_length, total_length;
> > int ret;
> > -   if (query_item->flags != 0)
> > -   return -EINVAL;
> > +   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
> > if (sseu->max_slices == 0)
> > return -ENODEV;
> > -   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
> > -
> > slice_length = sizeof(sseu->slice_mask);
> > subslice_length = sseu->max_slices * sseu->ss_stride;
> > eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
> > total_length = sizeof(topo) + slice_length + subslice_length +
> >eu_length;
> > -   ret = copy_query_item(, sizeof(topo), total_length,
> > - query_item);
> > +   ret = copy_query_item(, sizeof(topo), total_length, query_item);
> > +
> > if (ret != 0)
> > return ret;
> > -   if (topo.flags != 0)
> > -   return -EINVAL;
> > -
> > memset(, 0, sizeof(topo));
> > topo.max_slices = sseu->max_slices;
> > topo.max_subslices = sseu->max_subslices;
> > @@ -69,27 +64,61 @@ static int query_topology_info(struct drm_i915_private 
> > *dev_priv,
> > topo.eu_stride = sseu->eu_stride;
> > if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
> > -  , sizeof(topo)))
> > +, sizeof(topo)))
> > return -EFAULT;
> > if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
> > -  >slice_mask, slice_length))
> > +>slice_mask, slice_length))
> > return -EFAULT;
> > if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
> > -  sizeof(topo) + slice_length),
> > -  sseu->subslice_mask, subslice_length))
> > +sizeof(topo) + slice_length),
> > +subslice_mask, subslice_length))
> > return -EFAULT;
> > if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
> > -  sizeof(topo) +
> > -  slice_length + subslice_length),
> > -  sseu->eu_mask, eu_length))
> > +sizeof(topo) +
> > +slice_length + subslice_length),
> > +sseu->eu_mask, eu_length))
> > return -EFAULT;
> > return total_lengt

[PATCH] drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES

2022-03-09 Thread Matt Atwood
Newer platforms have DSS that aren't necessarily available for both
geometry and compute, two queries will need to exist. This introduces
the first, when passing a valid engine class and engine instance in the
flags returns a topology describing geometry.

v2: fix white space errors

Cc: Ashutosh Dixit 
Cc: Matt Roper 
Cc: Joonas Lahtinen 
UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143
Signed-off-by: Matt Atwood 
---
 drivers/gpu/drm/i915/i915_query.c | 68 ++-
 include/uapi/drm/i915_drm.h   | 24 +++
 2 files changed, 65 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_query.c 
b/drivers/gpu/drm/i915/i915_query.c
index 2dfbc22857a3..e4f35da28642 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -9,6 +9,7 @@
 #include "i915_drv.h"
 #include "i915_perf.h"
 #include "i915_query.h"
+#include "gt/intel_engine_user.h"
 #include 
 
 static int copy_query_item(void *query_hdr, size_t query_sz,
@@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
return 0;
 }
 
-static int query_topology_info(struct drm_i915_private *dev_priv,
-  struct drm_i915_query_item *query_item)
+static int fill_topology_info(const struct sseu_dev_info *sseu,
+ struct drm_i915_query_item *query_item,
+ const u8 *subslice_mask)
 {
-   const struct sseu_dev_info *sseu = _gt(dev_priv)->info.sseu;
struct drm_i915_query_topology_info topo;
u32 slice_length, subslice_length, eu_length, total_length;
int ret;
 
-   if (query_item->flags != 0)
-   return -EINVAL;
+   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
 
if (sseu->max_slices == 0)
return -ENODEV;
 
-   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
-
slice_length = sizeof(sseu->slice_mask);
subslice_length = sseu->max_slices * sseu->ss_stride;
eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
total_length = sizeof(topo) + slice_length + subslice_length +
   eu_length;
 
-   ret = copy_query_item(, sizeof(topo), total_length,
- query_item);
+   ret = copy_query_item(, sizeof(topo), total_length, query_item);
+
if (ret != 0)
return ret;
 
-   if (topo.flags != 0)
-   return -EINVAL;
-
memset(, 0, sizeof(topo));
topo.max_slices = sseu->max_slices;
topo.max_subslices = sseu->max_subslices;
@@ -69,27 +64,61 @@ static int query_topology_info(struct drm_i915_private 
*dev_priv,
topo.eu_stride = sseu->eu_stride;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
-  , sizeof(topo)))
+, sizeof(topo)))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
-  >slice_mask, slice_length))
+>slice_mask, slice_length))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-  sizeof(topo) + slice_length),
-  sseu->subslice_mask, subslice_length))
+sizeof(topo) + slice_length),
+subslice_mask, subslice_length))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-  sizeof(topo) +
-  slice_length + subslice_length),
-  sseu->eu_mask, eu_length))
+sizeof(topo) +
+slice_length + subslice_length),
+sseu->eu_mask, eu_length))
return -EFAULT;
 
return total_length;
 }
 
+static int query_topology_info(struct drm_i915_private *dev_priv,
+  struct drm_i915_query_item *query_item)
+{
+   const struct sseu_dev_info *sseu = _gt(dev_priv)->info.sseu;
+
+   if (query_item->flags != 0)
+   return -EINVAL;
+
+   return fill_topology_info(sseu, query_item, sseu->subslice_mask);
+}
+
+static int query_geometry_subslices(struct drm_i915_private *i915,
+   struct drm_i915_query_item *query_item)
+{
+   const struct sseu_dev_info *sseu;
+   struct intel_engine_cs *engine;
+   u8 engine_class, engine_instance;
+
+   if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+   return -ENODEV;
+
+   engine_class = query_item->flags & 0xFF;
+   engine_instance = (query_i

[PATCH] drm/i915/uapi: Add DRM_I915_QUERY_GEOMETRY_SUBSLICES

2022-03-09 Thread Matt Atwood
Newer platforms have DSS that aren't necessarily available for both
geometry and compute, two queries will need to exist. This introduces
the first, when passing a valid engine class and engine instance in the
flags returns a topology describing geometry.

Cc: Ashutosh Dixit 
Cc: Matt Roper 
UMD (mesa): https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14143
Signed-off-by: Matt Atwood 
---
 drivers/gpu/drm/i915/i915_query.c | 68 ++-
 include/uapi/drm/i915_drm.h   | 24 +++
 2 files changed, 65 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_query.c 
b/drivers/gpu/drm/i915/i915_query.c
index 2dfbc22857a3..0cc2670ae09c 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -9,6 +9,7 @@
 #include "i915_drv.h"
 #include "i915_perf.h"
 #include "i915_query.h"
+#include "gt/intel_engine_user.h"
 #include 
 
 static int copy_query_item(void *query_hdr, size_t query_sz,
@@ -28,36 +29,30 @@ static int copy_query_item(void *query_hdr, size_t query_sz,
return 0;
 }
 
-static int query_topology_info(struct drm_i915_private *dev_priv,
-  struct drm_i915_query_item *query_item)
+static int fill_topology_info(const struct sseu_dev_info *sseu,
+ struct drm_i915_query_item *query_item,
+ const u8 *subslice_mask)
 {
-   const struct sseu_dev_info *sseu = _gt(dev_priv)->info.sseu;
struct drm_i915_query_topology_info topo;
u32 slice_length, subslice_length, eu_length, total_length;
int ret;
 
-   if (query_item->flags != 0)
-   return -EINVAL;
+   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
 
if (sseu->max_slices == 0)
return -ENODEV;
 
-   BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
-
slice_length = sizeof(sseu->slice_mask);
subslice_length = sseu->max_slices * sseu->ss_stride;
eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
total_length = sizeof(topo) + slice_length + subslice_length +
   eu_length;
 
-   ret = copy_query_item(, sizeof(topo), total_length,
- query_item);
+   ret = copy_query_item(, sizeof(topo), total_length, query_item);
+
if (ret != 0)
return ret;
 
-   if (topo.flags != 0)
-   return -EINVAL;
-
memset(, 0, sizeof(topo));
topo.max_slices = sseu->max_slices;
topo.max_subslices = sseu->max_subslices;
@@ -69,27 +64,61 @@ static int query_topology_info(struct drm_i915_private 
*dev_priv,
topo.eu_stride = sseu->eu_stride;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
-  , sizeof(topo)))
+, sizeof(topo)))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
-  >slice_mask, slice_length))
+>slice_mask, slice_length))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-  sizeof(topo) + slice_length),
-  sseu->subslice_mask, subslice_length))
+sizeof(topo) + slice_length),
+subslice_mask, subslice_length))
return -EFAULT;
 
if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-  sizeof(topo) +
-  slice_length + subslice_length),
-  sseu->eu_mask, eu_length))
+sizeof(topo) +
+slice_length + subslice_length),
+sseu->eu_mask, eu_length))
return -EFAULT;
 
return total_length;
 }
 
+static int query_topology_info(struct drm_i915_private *dev_priv,
+  struct drm_i915_query_item *query_item)
+{
+   const struct sseu_dev_info *sseu = _gt(dev_priv )->info.sseu;
+
+   if (query_item->flags != 0)
+   return -EINVAL;
+
+   return fill_topology_info(sseu, query_item, sseu->subslice_mask);
+}
+
+static int query_geometry_subslices(struct drm_i915_private *i915,
+   struct drm_i915_query_item *query_item)
+{
+   const struct sseu_dev_info *sseu;
+   struct intel_engine_cs *engine;
+   u8 engine_class, engine_instance;
+
+   if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+   return -ENODEV;
+
+   engine_class = query_item->flags & 0xFF;
+   engine_instance = (query_item->flags >>8) & 0xFF;
+
+   engin

Re: [Intel-gfx] [PATCH v2 08/18] drm/i915/guc: Convert engine record to iosys_map

2022-02-15 Thread Matt Atwood
On Tue, Feb 08, 2022 at 02:45:14AM -0800, Lucas De Marchi wrote:
> Use iosys_map to read fields from the dma_blob so access to IO and
> system memory is abstracted away.
> 
> Cc: Matt Roper 
> Cc: Thomas Hellström 
> Cc: Daniel Vetter 
> Cc: John Harrison 
> Cc: Matthew Brost 
> Cc: Daniele Ceraolo Spurio 
Reviewed-by: Matt Atwood
> Signed-off-by: Lucas De Marchi 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c  | 14 ++
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h  |  3 ++-
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c   | 17 ++---
>  3 files changed, 18 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index 6a34ab38b45f..383c5994d4ef 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -695,18 +695,16 @@ void intel_guc_ads_reset(struct intel_guc *guc)
>  
>  u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
>  {
> - struct __guc_ads_blob *blob = guc->ads_blob;
> - u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma);
> - u32 offset = base + ptr_offset(blob, engine_usage);
> -
> - return offset;
> + return intel_guc_ggtt_offset(guc, guc->ads_vma) +
> + offsetof(struct __guc_ads_blob, engine_usage);
>  }
>  
> -struct guc_engine_usage_record *intel_guc_engine_usage(struct 
> intel_engine_cs *engine)
> +struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs 
> *engine)
>  {
>   struct intel_guc *guc = >gt->uc.guc;
> - struct __guc_ads_blob *blob = guc->ads_blob;
>   u8 guc_class = engine_class_to_guc_class(engine->class);
> + size_t offset = offsetof(struct __guc_ads_blob,
> +  
> engine_usage.engines[guc_class][ilog2(engine->logical_mask)]);
>  
> - return 
> >engine_usage.engines[guc_class][ilog2(engine->logical_mask)];
> + return IOSYS_MAP_INIT_OFFSET(>ads_map, offset);
>  }
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
> index e74c110facff..1c64f4d6ea21 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
> @@ -7,6 +7,7 @@
>  #define _INTEL_GUC_ADS_H_
>  
>  #include 
> +#include 
>  
>  struct intel_guc;
>  struct drm_printer;
> @@ -18,7 +19,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc);
>  void intel_guc_ads_reset(struct intel_guc *guc);
>  void intel_guc_ads_print_policy_info(struct intel_guc *guc,
>struct drm_printer *p);
> -struct guc_engine_usage_record *intel_guc_engine_usage(struct 
> intel_engine_cs *engine);
> +struct iosys_map intel_guc_engine_usage_record_map(struct intel_engine_cs 
> *engine);
>  u32 intel_guc_engine_usage_offset(struct intel_guc *guc);
>  
>  #endif
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index b3a429a92c0d..ab3cea352fb3 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1139,6 +1139,9 @@ __extend_last_switch(struct intel_guc *guc, u64 
> *prev_start, u32 new_start)
>   *prev_start = ((u64)gt_stamp_hi << 32) | new_start;
>  }
>  
> +#define record_read(map_, field_) \
> + iosys_map_rd_field(map_, 0, struct guc_engine_usage_record, field_)
> +
>  /*
>   * GuC updates shared memory and KMD reads it. Since this is not 
> synchronized,
>   * we run into a race where the value read is inconsistent. Sometimes the
> @@ -1153,17 +1156,17 @@ __extend_last_switch(struct intel_guc *guc, u64 
> *prev_start, u32 new_start)
>  static void __get_engine_usage_record(struct intel_engine_cs *engine,
> u32 *last_in, u32 *id, u32 *total)
>  {
> - struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
> + struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine);
>   int i = 0;
>  
>   do {
> - *last_in = READ_ONCE(rec->last_switch_in_stamp);
> - *id = READ_ONCE(rec->current_context_index);
> - *total = READ_ONCE(rec->total_runtime);
> + *last_in = record_read(_map, last_switch_in_stamp);
> + *id = record_read(_map, current_context_index);
> + *total = record_read(_map, total_runtime);
>  
> - if (READ_ONCE(rec->last_switch_in_stamp) == *last_in &&
> - READ_ONCE(rec->current_context_index) == *id &&
> - READ_ONCE(rec->total_runtime) == *total)
> + if (record_read(_map, last_switch_in_stamp) == *last_in &&
> + record_read(_map, current_context_index) == *id &&
> + record_read(_map, total_runtime) == *total)
>   break;
>   } while (++i < 6);
>  }
> -- 
> 2.35.1
> 


Re: [Intel-gfx] [PATCH v2 07/18] drm/i915/guc: Convert policies update to iosys_map

2022-02-10 Thread Matt Atwood
On Tue, Feb 08, 2022 at 02:45:13AM -0800, Lucas De Marchi wrote:
> Use iosys_map to write the policies update so access to IO and system
> memory is abstracted away.
> 
> Cc: Matt Roper 
> Cc: Thomas Hellström 
> Cc: Daniel Vetter 
> Cc: John Harrison 
> Cc: Matthew Brost 
> Cc: Daniele Ceraolo Spurio 
Reviewed-by: Matt Atwood 
> Signed-off-by: Lucas De Marchi 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 41 --
>  1 file changed, 23 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index b5b3a39f0c28..6a34ab38b45f 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -130,33 +130,37 @@ static u32 guc_ads_blob_size(struct intel_guc *guc)
>  guc_ads_private_data_size(guc);
>  }
>  
> -static void guc_policies_init(struct intel_guc *guc, struct guc_policies 
> *policies)
> +static void guc_policies_init(struct intel_guc *guc)
>  {
>   struct intel_gt *gt = guc_to_gt(guc);
>   struct drm_i915_private *i915 = gt->i915;
> + u32 global_flags = 0;
>  
> - policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US;
> - policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI;
> + ads_blob_write(guc, policies.dpc_promote_time,
> +GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
> + ads_blob_write(guc, policies.max_num_work_items,
> +GLOBAL_POLICY_MAX_NUM_WI);
>  
> - policies->global_flags = 0;
>   if (i915->params.reset < 2)
> - policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
> + global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
>  
> - policies->is_valid = 1;
> + ads_blob_write(guc, policies.global_flags, global_flags);
> + ads_blob_write(guc, policies.is_valid, 1);
>  }
>  
>  void intel_guc_ads_print_policy_info(struct intel_guc *guc,
>struct drm_printer *dp)
>  {
> - struct __guc_ads_blob *blob = guc->ads_blob;
> -
> - if (unlikely(!blob))
> + if (unlikely(iosys_map_is_null(>ads_map)))
>   return;
>  
>   drm_printf(dp, "Global scheduling policies:\n");
> - drm_printf(dp, "  DPC promote time   = %u\n", 
> blob->policies.dpc_promote_time);
> - drm_printf(dp, "  Max num work items = %u\n", 
> blob->policies.max_num_work_items);
> - drm_printf(dp, "  Flags  = %u\n", 
> blob->policies.global_flags);
> + drm_printf(dp, "  DPC promote time   = %u\n",
> +ads_blob_read(guc, policies.dpc_promote_time));
> + drm_printf(dp, "  Max num work items = %u\n",
> +ads_blob_read(guc, policies.max_num_work_items));
> + drm_printf(dp, "  Flags  = %u\n",
> +ads_blob_read(guc, policies.global_flags));
>  }
>  
>  static int guc_action_policies_update(struct intel_guc *guc, u32 
> policy_offset)
> @@ -171,23 +175,24 @@ static int guc_action_policies_update(struct intel_guc 
> *guc, u32 policy_offset)
>  
>  int intel_guc_global_policies_update(struct intel_guc *guc)
>  {
> - struct __guc_ads_blob *blob = guc->ads_blob;
>   struct intel_gt *gt = guc_to_gt(guc);
> + u32 scheduler_policies;
>   intel_wakeref_t wakeref;
>   int ret;
>  
> - if (!blob)
> + if (iosys_map_is_null(>ads_map))
>   return -EOPNOTSUPP;
>  
> - GEM_BUG_ON(!blob->ads.scheduler_policies);
> + scheduler_policies = ads_blob_read(guc, ads.scheduler_policies);
> + GEM_BUG_ON(!scheduler_policies);
>  
> - guc_policies_init(guc, >policies);
> + guc_policies_init(guc);
>  
>   if (!intel_guc_is_ready(guc))
>   return 0;
>  
>   with_intel_runtime_pm(>i915->runtime_pm, wakeref)
> - ret = guc_action_policies_update(guc, 
> blob->ads.scheduler_policies);
> + ret = guc_action_policies_update(guc, scheduler_policies);
>  
>   return ret;
>  }
> @@ -554,7 +559,7 @@ static void __guc_ads_init(struct intel_guc *guc)
>   u32 base;
>  
>   /* GuC scheduling policies */
> - guc_policies_init(guc, >policies);
> + guc_policies_init(guc);
>  
>   /* System info */
>   fill_engine_enable_masks(gt, >system_info);
> -- 
> 2.35.1
> 


Re: [Intel-gfx] [PATCH v2 05/18] drm/i915/guc: Add read/write helpers for ADS blob

2022-02-10 Thread Matt Atwood
On Tue, Feb 08, 2022 at 02:45:11AM -0800, Lucas De Marchi wrote:
> Add helpers on top of iosys_map_read_field() /
> iosys_map_write_field() functions so they always use the right
> arguments and make code easier to read.
> 
> Cc: Matt Roper 
> Cc: Thomas Hellström 
> Cc: Daniel Vetter 
> Cc: John Harrison 
> Cc: Matthew Brost 
> Cc: Daniele Ceraolo Spurio 
Reviewed-by: Matt Atwood 
> Signed-off-by: Lucas De Marchi 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index 13671b186908..9bf9096b8337 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -60,6 +60,13 @@ struct __guc_ads_blob {
>   struct guc_mmio_reg regset[0];
>  } __packed;
>  
> +#define ads_blob_read(guc_, field_)  \
> + iosys_map_rd_field(&(guc_)->ads_map, 0, struct __guc_ads_blob, field_)
> +
> +#define ads_blob_write(guc_, field_, val_)   \
> + iosys_map_wr_field(&(guc_)->ads_map, 0, struct __guc_ads_blob,  \
> +field_, val_)
> +
>  static u32 guc_ads_regset_size(struct intel_guc *guc)
>  {
>   GEM_BUG_ON(!guc->ads_regset_size);
> -- 
> 2.35.1
> 


Re: [Intel-gfx] [PATCH v2 04/18] drm/i915/guc: Keep iosys_map of ads_blob around

2022-02-10 Thread Matt Atwood
On Tue, Feb 08, 2022 at 02:45:10AM -0800, Lucas De Marchi wrote:
> Convert intel_guc_ads_create() and initialization to use iosys_map
> rather than plain pointer and save it in the guc struct. This will help
> with additional updates to the ads_blob after the
> creation/initialization by abstracting the IO vs system memory.
> 
> Cc: Matt Roper 
> Cc: Thomas Hellström 
> Cc: Daniel Vetter 
> Cc: John Harrison 
> Cc: Matthew Brost 
> Cc: Daniele Ceraolo Spurio 
Reviewed-by: Matt Atwood 
> Signed-off-by: Lucas De Marchi 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc.h | 4 +++-
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 6 ++
>  2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> index 697d9d66acef..9b9ba79f7594 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
> @@ -6,8 +6,9 @@
>  #ifndef _INTEL_GUC_H_
>  #define _INTEL_GUC_H_
>  
> -#include 
>  #include 
> +#include 
> +#include 
>  
>  #include "intel_uncore.h"
>  #include "intel_guc_fw.h"
> @@ -148,6 +149,7 @@ struct intel_guc {
>   struct i915_vma *ads_vma;
>   /** @ads_blob: contents of the GuC ADS */
>   struct __guc_ads_blob *ads_blob;
> + struct iosys_map ads_map;
>   /** @ads_regset_size: size of the save/restore regsets in the ADS */
>   u32 ads_regset_size;
>   /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index e61150adcbe9..13671b186908 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -624,6 +624,11 @@ int intel_guc_ads_create(struct intel_guc *guc)
>   if (ret)
>   return ret;
>  
> + if (i915_gem_object_is_lmem(guc->ads_vma->obj))
> + iosys_map_set_vaddr_iomem(>ads_map, (void __iomem 
> *)guc->ads_blob);
> + else
> + iosys_map_set_vaddr(>ads_map, guc->ads_blob);
> +
>   __guc_ads_init(guc);
>  
>   return 0;
> @@ -645,6 +650,7 @@ void intel_guc_ads_destroy(struct intel_guc *guc)
>  {
>   i915_vma_unpin_and_release(>ads_vma, I915_VMA_RELEASE_MAP);
>   guc->ads_blob = NULL;
> + iosys_map_clear(>ads_map);
>  }
>  
>  static void guc_ads_private_data_reset(struct intel_guc *guc)
> -- 
> 2.35.1
> 


Re: [Intel-gfx] [PATCH v2 03/18] drm/i915/gt: Add helper for shmem copy to iosys_map

2022-02-08 Thread Matt Atwood
On Tue, Feb 08, 2022 at 02:45:09AM -0800, Lucas De Marchi wrote:
> Add a variant of shmem_read() that takes a iosys_map pointer rather
> than a plain pointer as argument. It's mostly a copy __shmem_rw() but
> adapting the api and removing the write support since there's currently
> only need to use iosys_map as destination.
> 
> Reworking __shmem_rw() to share the implementation was tempting, but
> finding a good balance between reuse and clarity pushed towards a little
> code duplication. Since the function is small, just add the similar
> function with a copy/paste/adapt approach.
> 
> v2: Add an offset as argument and instead of using a map iterator, use the
> offset to keep track of where we are writing data to.
> 
> Cc: Matt Roper 
> Cc: Joonas Lahtinen 
> Cc: Tvrtko Ursulin 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Cc: Matthew Auld 
> Cc: Thomas Hellström 
> Cc: Maarten Lankhorst 
Reviewed-by: Matt Atwood 
> Signed-off-by: Lucas De Marchi 
> ---
>  drivers/gpu/drm/i915/gt/shmem_utils.c | 32 +++
>  drivers/gpu/drm/i915/gt/shmem_utils.h |  3 +++
>  2 files changed, 35 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.c 
> b/drivers/gpu/drm/i915/gt/shmem_utils.c
> index 0683b27a3890..402f085f3a02 100644
> --- a/drivers/gpu/drm/i915/gt/shmem_utils.c
> +++ b/drivers/gpu/drm/i915/gt/shmem_utils.c
> @@ -3,6 +3,7 @@
>   * Copyright © 2020 Intel Corporation
>   */
>  
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -123,6 +124,37 @@ static int __shmem_rw(struct file *file, loff_t off,
>   return 0;
>  }
>  
> +int shmem_read_to_iosys_map(struct file *file, loff_t off,
> + struct iosys_map *map, size_t map_off, size_t len)
> +{
> + unsigned long pfn;
> +
> + for (pfn = off >> PAGE_SHIFT; len; pfn++) {
> + unsigned int this =
> + min_t(size_t, PAGE_SIZE - offset_in_page(off), len);
> + struct page *page;
> + void *vaddr;
> +
> + page = shmem_read_mapping_page_gfp(file->f_mapping, pfn,
> +GFP_KERNEL);
> + if (IS_ERR(page))
> + return PTR_ERR(page);
> +
> + vaddr = kmap(page);
> + iosys_map_memcpy_to(map, map_off, vaddr + offset_in_page(off),
> + this);
> + mark_page_accessed(page);
> + kunmap(page);
> + put_page(page);
> +
> + len -= this;
> + map_off += this;
> + off = 0;
> + }
> +
> + return 0;
> +}
> +
>  int shmem_read(struct file *file, loff_t off, void *dst, size_t len)
>  {
>   return __shmem_rw(file, off, dst, len, false);
> diff --git a/drivers/gpu/drm/i915/gt/shmem_utils.h 
> b/drivers/gpu/drm/i915/gt/shmem_utils.h
> index c1669170c351..b2b04d88c6e5 100644
> --- a/drivers/gpu/drm/i915/gt/shmem_utils.h
> +++ b/drivers/gpu/drm/i915/gt/shmem_utils.h
> @@ -8,6 +8,7 @@
>  
>  #include 
>  
> +struct iosys_map;
>  struct drm_i915_gem_object;
>  struct file;
>  
> @@ -17,6 +18,8 @@ struct file *shmem_create_from_object(struct 
> drm_i915_gem_object *obj);
>  void *shmem_pin_map(struct file *file);
>  void shmem_unpin_map(struct file *file, void *ptr);
>  
> +int shmem_read_to_iosys_map(struct file *file, loff_t off,
> + struct iosys_map *map, size_t map_off, size_t len);
>  int shmem_read(struct file *file, loff_t off, void *dst, size_t len);
>  int shmem_write(struct file *file, loff_t off, void *src, size_t len);
>  
> -- 
> 2.35.1
> 


Re: [Intel-gfx] [PATCH v2 02/18] iosys-map: Add a few more helpers

2022-02-08 Thread Matt Atwood
On Tue, Feb 08, 2022 at 02:45:08AM -0800, Lucas De Marchi wrote:
> First the simplest ones:
> 
>   - iosys_map_memset(): when abstracting system and I/O memory,
> just like the memcpy() use case, memset() also has dedicated
> functions to be called for using IO memory.
>   - iosys_map_memcpy_from(): we may need to copy data from I/O
> memory, not only to.
> 
> In certain situations it's useful to be able to read or write to an
> offset that is calculated by having the memory layout given by a struct
> declaration. Usually we are going to read/write a u8, u16, u32 or u64.
> 
> As a pre-requisite for the implementation, add iosys_map_memcpy_from()
> to be the equivalent of iosys_map_memcpy_to(), but in the other
> direction. Then add 2 pairs of macros:
> 
>   - iosys_map_rd() / iosys_map_wr()
>   - iosys_map_rd_field() / iosys_map_wr_field()
> 
> The first pair takes the C-type and offset to read/write. The second
> pair uses a struct describing the layout of the mapping in order to
> calculate the offset and size being read/written.
> 
> We could use readb, readw, readl, readq and the write* counterparts,
> however due to alignment issues this may not work on all architectures.
> If alignment needs to be checked to call the right function, it's not
> possible to decide at compile-time which function to call: so just leave
> the decision to the memcpy function that will do exactly that.
> 
> Finally, in order to use the above macros with a map derived from
> another, add another initializer: IOSYS_MAP_INIT_OFFSET().
> 
> v2:
>   - Rework IOSYS_MAP_INIT_OFFSET() so it doesn't rely on aliasing rules
> within the union
>   - Add offset to both iosys_map_rd_field() and iosys_map_wr_field() to
> allow the struct itself to be at an offset from the mapping
>   - Add documentation to iosys_map_rd_field() with example and expected
> memory layout
> 
> Cc: Sumit Semwal 
> Cc: Christian König 
> Cc: Thomas Zimmermann 
> Cc: Mauro Carvalho Chehab 
> Cc: dri-devel@lists.freedesktop.org
> Cc: linux-ker...@vger.kernel.org
Reviewed-by: Matt Atwood 
> Signed-off-by: Lucas De Marchi 
> ---
>  include/linux/iosys-map.h | 202 ++
>  1 file changed, 202 insertions(+)
> 
> diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h
> index edd730b1e899..c6b223534b21 100644
> --- a/include/linux/iosys-map.h
> +++ b/include/linux/iosys-map.h
> @@ -6,6 +6,7 @@
>  #ifndef __IOSYS_MAP_H__
>  #define __IOSYS_MAP_H__
>  
> +#include 
>  #include 
>  #include 
>  
> @@ -120,6 +121,45 @@ struct iosys_map {
>   .is_iomem = false,  \
>   }
>  
> +/**
> + * IOSYS_MAP_INIT_OFFSET - Initializes struct iosys_map from another 
> iosys_map
> + * @map_:The dma-buf mapping structure to copy from
> + * @offset_: Offset to add to the other mapping
> + *
> + * Initializes a new iosys_map struct based on another passed as argument. It
> + * does a shallow copy of the struct so it's possible to update the back 
> storage
> + * without changing where the original map points to. It is the equivalent of
> + * doing:
> + *
> + * .. code-block:: c
> + *
> + *   iosys_map map = other_map;
> + *   iosys_map_incr(, );
> + *
> + * Example usage:
> + *
> + * .. code-block:: c
> + *
> + *   void foo(struct device *dev, struct iosys_map *base_map)
> + *   {
> + *   ...
> + *   struct iosys_map map = IOSYS_MAP_INIT_OFFSET(base_map, 
> FIELD_OFFSET);
> + *   ...
> + *   }
> + *
> + * The advantage of using the initializer over just increasing the offset 
> with
> + * iosys_map_incr() like above is that the new map will always point to the
> + * right place of the buffer during its scope. It reduces the risk of 
> updating
> + * the wrong part of the buffer and having no compiler warning about that. If
> + * the assignment to IOSYS_MAP_INIT_OFFSET() is forgotten, the compiler can 
> warn
> + * about the use of uninitialized variable.
> + */
> +#define IOSYS_MAP_INIT_OFFSET(map_, offset_) ({  
> \
> + struct iosys_map copy = *map_;  \
> + iosys_map_incr(, offset_); \
> + copy;   \
> +})
> +
>  /**
>   * iosys_map_set_vaddr - Sets a iosys mapping structure to an address in 
> system memory
>   * @map: The iosys_map structure
> @@ -239,6 +279,26 @@ static inline void iosys_map_memcpy_to(struct iosys_map 
> *dst, size_t dst_offset,
>   memcpy(dst->vaddr + dst_offset, src, len);
>  }
>