On Thu, 16 Feb 2023 16:58:50 -0800, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> MTL introduces additional OA units dedicated to media use cases. Add
> support for programming these OA units by passing the media engine class
> and instance parameters.
>
> UMD specific changes for GPUvis support:
> https://patchwork.freedesktop.org/patch/522827/?series=114023
> https://patchwork.freedesktop.org/patch/522822/?series=114023
> https://patchwork.freedesktop.org/patch/522826/?series=114023
> https://patchwork.freedesktop.org/patch/522828/?series=114023
> https://patchwork.freedesktop.org/patch/522816/?series=114023
> https://patchwork.freedesktop.org/patch/522825/?series=114023

General comment about the patch in case I miss something out, as I've
mentioned previously in general let's try to replace INTEL_METEORLAKE and
IS_METEORLAKE checks in the patch with:

        if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))

So that we don't have to enumerate each platform individually later.

> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.rama...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          |   2 +
>  drivers/gpu/drm/i915/i915_pci.c          |   1 +
>  drivers/gpu/drm/i915/i915_perf.c         | 247 ++++++++++++++++++++---
>  drivers/gpu/drm/i915/i915_perf_oa_regs.h |  78 +++++++
>  drivers/gpu/drm/i915/i915_perf_types.h   |  40 ++++
>  drivers/gpu/drm/i915/intel_device_info.h |   1 +
>  include/uapi/drm/i915_drm.h              |   4 +
>  7 files changed, 347 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0393273faa09..f3cacbf41c86 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -856,6 +856,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
>       (INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
>  #define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \
>       (INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits)
> +#define HAS_OAM(dev_priv) \
> +     (INTEL_INFO(dev_priv)->has_oam)
>
>  /*
>   * Set this flag, when platform requires 64K GTT page sizes or larger for
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index a8d942b16223..621730b6551c 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -1028,6 +1028,7 @@ static const struct intel_device_info adl_p_info = {
>       .has_mslice_steering = 1, \
>       .has_oa_bpc_reporting = 1, \
>       .has_oa_slice_contrib_limits = 1, \
> +     .has_oam = 1, \
>       .has_rc6 = 1, \
>       .has_reset_engine = 1, \
>       .has_rps = 1, \
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index f028df812067..a57690f4c531 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -192,6 +192,7 @@
>   */
>
>  #include <linux/anon_inodes.h>
> +#include <linux/nospec.h>
>  #include <linux/sizes.h>
>  #include <linux/uuid.h>
>
> @@ -326,6 +327,13 @@ static const struct i915_oa_format 
> oa_formats[I915_OA_FORMAT_MAX] = {
>       [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>       [I915_OAR_FORMAT_A32u40_A4u32_B8_C8]    = { 5, 256 },
>       [I915_OA_FORMAT_A24u40_A14u32_B8_C8]    = { 5, 256 },
> +     [I915_OAM_FORMAT_MPEC8u64_B8_C8]        = { 1, 192, TYPE_OAM, 
> HDR_64_BIT },
> +     [I915_OAM_FORMAT_MPEC8u32_B8_C8]        = { 2, 128, TYPE_OAM, 
> HDR_64_BIT },
> +};
> +
> +/* PERF_GROUP_OAG is unused for oa_base, drop it for mtl */

What does this comment mean?

> +static const u32 mtl_oa_base[] = {
> +     [PERF_GROUP_OAM_SAMEDIA_0] = 0x393000,
>  };
>
>  #define SAMPLE_OA_REPORT      (1<<0)
> @@ -418,11 +426,17 @@ static void free_oa_config_bo(struct i915_oa_config_bo 
> *oa_bo)
>       kfree(oa_bo);
>  }
>
> +static inline const
> +struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream)
> +{
> +     return &stream->oa_buffer.group->regs;

Should just use stream->engine->oa_group->regs, see near the bottom.

> +}
> +
>  static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
>  {
>       struct intel_uncore *uncore = stream->uncore;
>
> -     return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
> +     return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) &
>              GEN12_OAG_OATAILPTR_MASK;
>  }
>
> @@ -886,7 +900,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
> *stream,
>               i915_reg_t oaheadptr;
>
>               oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?

>= 12 ?

> -                         GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR;
> +                         __oa_regs(stream)->oa_head_ptr :
> +                         GEN8_OAHEADPTR;
>
>               spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>
> @@ -939,7 +954,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
>               return -EIO;
>
>       oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?

>= 12 ?

> -                    GEN12_OAG_OASTATUS : GEN8_OASTATUS;
> +                    __oa_regs(stream)->oa_status :
> +                    GEN8_OASTATUS;
>
>       oastatus = intel_uncore_read(uncore, oastatus_reg);
>
> @@ -1643,16 +1659,46 @@ free_noa_wait(struct i915_perf_stream *stream)
>       i915_vma_unpin_and_release(&stream->noa_wait, 0);
>  }
>
> +/*
> + * intel_engine_lookup_user ensures that most of engine specific checks are
> + * taken care of, however, we can run into a case where the OA unit catering 
> to
> + * the engine passed by the user is disabled for some reason. In such cases,
> + * ensure oa unit corresponding to an engine is functional. If there are no
> + * engines in the group, the unit is disabled.
> + */
> +static bool oa_unit_functional(const struct intel_engine_cs *engine)
> +{
> +     return engine->oa_group && engine->oa_group->num_engines;
> +}
> +
>  static bool engine_supports_oa(const struct intel_engine_cs *engine)
>  {
>       enum intel_platform platform = INTEL_INFO(engine->i915)->platform;
>
>       switch (platform) {
> +     case INTEL_METEORLAKE:
> +             return engine->class == RENDER_CLASS ||
> +                    ((engine->class == VIDEO_DECODE_CLASS ||
> +                      engine->class == VIDEO_ENHANCEMENT_CLASS) &&
> +                     engine->gt->type == GT_MEDIA);
>       default:
>               return engine->class == RENDER_CLASS;
>       }

As mentioned in a previous patch, this could just be:

        return engine->oa_group;

Because all these checks have already been done when the perf groups were
initialized so let's use that, as is done for oa_unit_functional.

Though, caution, to return engine->oa_group we'd have to remove
engine_supports_oa from __oa_engine_group, since engine->oa_group is not
yet assigned there. But I think the engine_supports_oa check in
__oa_engine_group is a duplication and should be removed.

>  }
>
> +static bool engine_class_supports_oa_format(struct intel_engine_cs *engine, 
> int type)
> +{
> +     switch (engine->class) {
> +     case RENDER_CLASS:
> +             return type == TYPE_OAG;
> +     case VIDEO_DECODE_CLASS:
> +     case VIDEO_ENHANCEMENT_CLASS:
> +             return type == TYPE_OAM;
> +     default:
> +             return false;
> +     }
> +}
> +

Again, how about:

        return engine->oa_group && engine->oa_group->type == type;

Otherwise as mentioned below oa_group->type is unused and also incorrectly
assigned at present. The format type and group types are the same
(TYPE_OAG/TYPE_OAM). Can name the function engine_supports_oa_format.

>  static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
>  {
>       struct i915_perf *perf = stream->perf;
> @@ -1680,7 +1726,7 @@ static void i915_oa_stream_destroy(struct 
> i915_perf_stream *stream)
>               drm_WARN_ON(&gt->i915->drm,
>                           intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc));
>
> -     intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
> +     intel_uncore_forcewake_put(stream->uncore, g->fw_domains);
>       intel_engine_pm_put(stream->engine);
>
>       if (stream->ctx)
> @@ -1804,8 +1850,8 @@ static void gen12_init_oa_buffer(struct 
> i915_perf_stream *stream)
>
>       spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>
> -     intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0);
> -     intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR,
> +     intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0);
> +     intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr,
>                          gtt_offset & GEN12_OAG_OAHEADPTR_MASK);
>       stream->oa_buffer.head = gtt_offset;
>
> @@ -1817,9 +1863,9 @@ static void gen12_init_oa_buffer(struct 
> i915_perf_stream *stream)
>        *  to enable proper functionality of the overflow
>        *  bit."
>        */
> -     intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
> +     intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset |
>                          OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
> -     intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
> +     intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr,
>                          gtt_offset & GEN12_OAG_OATAILPTR_MASK);
>
>       /* Mark that we need updated tail pointers to read from... */
> @@ -2579,7 +2625,8 @@ gen8_modify_self(struct intel_context *ce,
>       return err;
>  }
>
> -static int gen8_configure_context(struct i915_gem_context *ctx,
> +static int gen8_configure_context(struct i915_perf_stream *stream,
> +                               struct i915_gem_context *ctx,
>                                 struct flex *flex, unsigned int count)
>  {
>       struct i915_gem_engines_iter it;
> @@ -2589,7 +2636,8 @@ static int gen8_configure_context(struct 
> i915_gem_context *ctx,
>       for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
>               GEM_BUG_ON(ce == ce->engine->kernel_context);
>
> -             if (!engine_supports_oa(ce->engine))
> +             if (!engine_supports_oa(ce->engine) ||
> +                 ce->engine->class != stream->engine->class)
>                       continue;
>
>               /* Otherwise OA settings will be set upon first use */
> @@ -2720,7 +2768,7 @@ oa_configure_all_contexts(struct i915_perf_stream 
> *stream,
>
>               spin_unlock(&i915->gem.contexts.lock);
>
> -             err = gen8_configure_context(ctx, regs, num_regs);
> +             err = gen8_configure_context(stream, ctx, regs, num_regs);
>               if (err) {
>                       i915_gem_context_put(ctx);
>                       return err;
> @@ -2740,7 +2788,8 @@ oa_configure_all_contexts(struct i915_perf_stream 
> *stream,
>       for_each_uabi_engine(engine, i915) {
>               struct intel_context *ce = engine->kernel_context;
>
> -             if (!engine_supports_oa(ce->engine))
> +             if (!engine_supports_oa(ce->engine) ||
> +                 ce->engine->class != stream->engine->class)
>                       continue;
>
>               regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu);
> @@ -2765,6 +2814,9 @@ gen12_configure_all_contexts(struct i915_perf_stream 
> *stream,
>               },
>       };
>
> +     if (stream->engine->class != RENDER_CLASS)
> +             return 0;

OK, this is for render, nothing equivalent needed for media?

> +
>       return oa_configure_all_contexts(stream,
>                                        regs, ARRAY_SIZE(regs),
>                                        active);
> @@ -2894,7 +2946,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
>                                  
> _MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
>       }
>
> -     intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
> +     intel_uncore_write(uncore, __oa_regs(stream)->oa_debug,
>                          /* Disable clk ratio reports, like previous Gens. */
>                          
> _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
>                                             
> GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) |
> @@ -2904,7 +2956,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
>                           */
>                          oag_report_ctx_switches(stream));
>
> -     intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ?
> +     intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ?
>                          (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME |
>                           GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE |
>                           (period_exponent << 
> GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
> @@ -3058,8 +3110,8 @@ static void gen8_oa_enable(struct i915_perf_stream 
> *stream)
>
>  static void gen12_oa_enable(struct i915_perf_stream *stream)
>  {
> -     struct intel_uncore *uncore = stream->uncore;
> -     u32 report_format = stream->oa_buffer.format->format;
> +     const struct i915_perf_regs *regs;
> +     u32 val;
>
>       /*
>        * If we don't want OA reports from the OA buffer, then we don't even
> @@ -3070,9 +3122,11 @@ static void gen12_oa_enable(struct i915_perf_stream 
> *stream)
>
>       gen12_init_oa_buffer(stream);
>
> -     intel_uncore_write(uncore, GEN12_OAG_OACONTROL,
> -                        (report_format << 
> GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) |
> -                        GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE);
> +     regs = __oa_regs(stream);
> +     val = (stream->oa_buffer.format->format << 
> regs->oa_ctrl_counter_format_shift) |
> +           GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE;
> +
> +     intel_uncore_write(stream->uncore, regs->oa_ctrl, val);
>  }
>
>  /**
> @@ -3124,9 +3178,9 @@ static void gen12_oa_disable(struct i915_perf_stream 
> *stream)
>  {
>       struct intel_uncore *uncore = stream->uncore;
>
> -     intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0);
> +     intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0);
>       if (intel_wait_for_register(uncore,
> -                                 GEN12_OAG_OACONTROL,
> +                                 __oa_regs(stream)->oa_ctrl,
>                                   GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0,
>                                   50))
>               drm_err(&stream->perf->i915->drm,
> @@ -3329,6 +3383,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
> *stream,
>
>       stream->sample_size = sizeof(struct drm_i915_perf_record_header);
>
> +     stream->oa_buffer.group = g;

Should just use stream->engine->oa_group, see near the bottom.

>       stream->oa_buffer.format = &perf->oa_formats[props->oa_format];
>       if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0))
>               return -EINVAL;
> @@ -3379,7 +3434,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
> *stream,
>        *   references will effectively disable RC6.
>        */
>       intel_engine_pm_get(stream->engine);
> -     intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
> +     intel_uncore_forcewake_get(stream->uncore, g->fw_domains);
>
>       /*
>        * Wa_16011777198:dg2: GuC resets render as part of the Wa. This causes
> @@ -3440,7 +3495,7 @@ static int i915_oa_stream_init(struct i915_perf_stream 
> *stream,
>               intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc);
>
>  err_gucrc:
> -     intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
> +     intel_uncore_forcewake_put(stream->uncore, g->fw_domains);
>       intel_engine_pm_put(stream->engine);
>
>       free_oa_configs(stream);
> @@ -4033,6 +4088,7 @@ static int read_properties_unlocked(struct i915_perf 
> *perf,
>                                   struct perf_open_properties *props)
>  {
>       struct drm_i915_gem_context_param_sseu user_sseu;
> +     const struct i915_oa_format *f;
>       u64 __user *uprop = uprops;
>       bool config_sseu = false;
>       u8 class, instance;
> @@ -4203,6 +4259,17 @@ static int read_properties_unlocked(struct i915_perf 
> *perf,
>       if (!engine_supports_oa(props->engine))
>               return -EINVAL;
>
> +     if (!oa_unit_functional(props->engine))
> +             return -ENODEV;
> +
> +     i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX);

Why do we need this (something to do with speculation)? Can just do
'&perf->oa_formats[props->oa_format]' below? The format passed in has
already been checked in the switch statement above.

> +     f = &perf->oa_formats[i];
> +     if (!engine_class_supports_oa_format(props->engine, f->type)) {
> +             DRM_DEBUG("Invalid OA format %d for class %d\n",
> +                       f->type, props->engine->class);
> +             return -EINVAL;
> +     }
> +
>       if (config_sseu) {
>               ret = get_sseu_config(&props->sseu, props->engine, &user_sseu);
>               if (ret) {
> @@ -4383,6 +4450,14 @@ static const struct i915_range gen12_oa_b_counters[] = 
> {
>       {}
>  };
>
> +static const struct i915_range mtl_oam_b_counters[] = {
> +     { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */
> +     { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */
> +     { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */
> +     { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */
> +     {}
> +};
> +
>  static const struct i915_range xehp_oa_b_counters[] = {
>       { .start = 0xdc48, .end = 0xdc48 },     /* OAA_ENABLE_REG */
>       { .start = 0xdd00, .end = 0xdd48 },     /* OAG_LCE0_0 - OAA_LENABLE_REG 
> */
> @@ -4429,13 +4504,16 @@ static const struct i915_range gen12_oa_mux_regs[] = {
>
>  /*
>   * Ref: 14010536224:
> - * 0x20cc is repurposed on MTL, so use a separate array for MTL.
> + * 0x20cc is repurposed on MTL, so use a separate array for MTL. Also add the
> + * MPES/MPEC registers.

MPES/MPEC registers are added above now, not here so maybe get rid of the
comment change above?

>   */
>  static const struct i915_range mtl_oa_mux_regs[] = {
>       { .start = 0x0d00, .end = 0x0d04 },     /* RPM_CONFIG[0-1] */
>       { .start = 0x0d0c, .end = 0x0d2c },     /* NOA_CONFIG[0-8] */
>       { .start = 0x9840, .end = 0x9840 },     /* GDT_CHICKEN_BITS */
>       { .start = 0x9884, .end = 0x9888 },     /* NOA_WRITE */
> +     { .start = 0x38d100, .end = 0x38d114},  /* VISACTL */
> +     {}
>  };
>
>  static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
> @@ -4473,10 +4551,26 @@ static bool gen12_is_valid_b_counter_addr(struct 
> i915_perf *perf, u32 addr)
>       return reg_in_range_table(addr, gen12_oa_b_counters);
>  }
>
> +static bool xehp_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 
> addr)
> +{
> +     enum intel_platform platform = INTEL_INFO(perf->i915)->platform;
> +
> +     if (!HAS_OAM(perf->i915))
> +             return false;
> +
> +     switch (platform) {
> +     case INTEL_METEORLAKE:
> +             return reg_in_range_table(addr, mtl_oam_b_counters);
> +     default:
> +             return false;
> +     }

Replace with 'if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))', registers
are identical in later platforms too.

Should the function prefix be xehp or mtl? Don't see xehp in bspec,
probably xehp is discontinued.

> +}
> +
>  static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>  {
>       return reg_in_range_table(addr, xehp_oa_b_counters) ||
> -             reg_in_range_table(addr, gen12_oa_b_counters);
> +             reg_in_range_table(addr, gen12_oa_b_counters) ||
> +             xehp_is_valid_oam_b_counter_addr(perf, addr);
>  }
>
>  static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
> @@ -4846,11 +4940,39 @@ static u32 __num_perf_groups_per_gt(struct intel_gt 
> *gt)
>       enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
>
>       switch (platform) {
> +     case INTEL_METEORLAKE:
> +             return 1;

I don't think we need this, as proposed previously maybe the function
should just unconditionally return 1.

>       default:
>               return 1;
>       }
>  }
>
> +static u32 __oam_engine_group(struct intel_engine_cs *engine)
> +{
> +     enum intel_platform platform = INTEL_INFO(engine->i915)->platform;
> +     struct intel_gt *gt = engine->gt;
> +     u32 group = PERF_GROUP_INVALID;
> +
> +     switch (platform) {
> +     case INTEL_METEORLAKE:

Replace here with 'if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))'.

> +             /*
> +              * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media 
> slices
> +              * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA.
> +              */
> +             drm_WARN_ON(&engine->i915->drm,
> +                         engine->gt->type != GT_MEDIA);
> +
> +             group = PERF_GROUP_OAM_SAMEDIA_0;
> +             break;
> +     default:
> +             break;
> +     }
> +
> +     drm_WARN_ON(&gt->i915->drm, group >= __num_perf_groups_per_gt(gt));
> +
> +     return group;
> +}
> +
>  static u32 __oa_engine_group(struct intel_engine_cs *engine)
>  {
>       if (!engine_supports_oa(engine))

As mentioned above for engine_supports_oa, this looks like a duplication of
the checks below and should probably be removed.

> @@ -4860,11 +4982,58 @@ static u32 __oa_engine_group(struct intel_engine_cs 
> *engine)
>       case RENDER_CLASS:
>               return PERF_GROUP_OAG;
>
> +     case VIDEO_DECODE_CLASS:
> +     case VIDEO_ENHANCEMENT_CLASS:
> +             return __oam_engine_group(engine);
> +
>       default:
>               return PERF_GROUP_INVALID;
>       }
>  }
>
> +static struct i915_perf_regs __oam_regs(u32 base)
> +{
> +     return (struct i915_perf_regs) {
> +             base,
> +             GEN12_OAM_HEAD_POINTER(base),
> +             GEN12_OAM_TAIL_POINTER(base),
> +             GEN12_OAM_BUFFER(base),
> +             GEN12_OAM_CONTEXT_CONTROL(base),
> +             GEN12_OAM_CONTROL(base),
> +             GEN12_OAM_DEBUG(base),
> +             GEN12_OAM_STATUS(base),
> +             GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT,
> +     };
> +}
> +
> +static struct i915_perf_regs __oag_regs(void)
> +{
> +     return (struct i915_perf_regs) {
> +             0,
> +             GEN12_OAG_OAHEADPTR,
> +             GEN12_OAG_OATAILPTR,
> +             GEN12_OAG_OABUFFER,
> +             GEN12_OAG_OAGLBCTXCTRL,
> +             GEN12_OAG_OACONTROL,
> +             GEN12_OAG_OA_DEBUG,
> +             GEN12_OAG_OASTATUS,
> +             GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT,
> +     };
> +}
> +
> +static void oa_init_regs(struct intel_gt *gt, u32 id)
> +{
> +     struct i915_perf_group *group = &gt->perf.group[id];
> +     struct i915_perf_regs *regs = &group->regs;
> +
> +     if (id == PERF_GROUP_OAG && gt->type != GT_MEDIA)
> +             *regs = __oag_regs();
> +     else if (IS_METEORLAKE(gt->i915))

Replace with 'if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))', OAM
registers are identical in later platforms too. Maybe get rid of drm_WARN
below?

> +             *regs = __oam_regs(mtl_oa_base[id]);
> +     else
> +             drm_WARN(&gt->i915->drm, 1, "Unsupported platform for OA\n");
> +}
> +
>  static void oa_init_groups(struct intel_gt *gt)
>  {
>       int i, num_groups = gt->perf.num_perf_groups;
> @@ -4881,6 +5050,24 @@ static void oa_init_groups(struct intel_gt *gt)
>               g->oa_unit_id = perf->oa_unit_ids++;
>
>               g->gt = gt;
> +             oa_init_regs(gt, i);
> +             g->fw_domains = FORCEWAKE_ALL;
> +             if (i == PERF_GROUP_OAG) {
> +                     g->type = TYPE_OAG;
> +
> +                     /*
> +                      * Enabling all fw domains for OAG caps the max GT
> +                      * frequency to media FF max. This could be less than
> +                      * what the user sets through the sysfs and perf
> +                      * measurements could be skewed. Since some platforms
> +                      * have separate OAM units to measure media perf, do not
> +                      * enable media fw domains for OAG.
> +                      */
> +                     if (HAS_OAM(gt->i915))
> +                             g->fw_domains = FORCEWAKE_GT | FORCEWAKE_RENDER;

Is this needed even when media and render are separate tiles, which is the
only case we have in this code right now? For separate tiles setting
FORCEWAKE_ALL should not cap the freq, correct?

If not needed we can get rid of g->fw_domains.

> +             } else {
> +                     g->type = TYPE_OAM;

This is wrong, because num_perf_groups is 1. So the type should be assigned
not based on i (which is always 0) but maybe similar to what is done in
oa_init_regs above.

We are escaping because g->type is unused as mentioned below.

> +             }
>       }
>  }
>
> @@ -4970,9 +5157,15 @@ static void oa_init_supported_formats(struct i915_perf 
> *perf)
>               break;
>
>       case INTEL_DG2:
> +             oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
> +             oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
> +             break;
> +
>       case INTEL_METEORLAKE:
>               oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
>               oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
> +             oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8);
> +             oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8);
>               break;
>
>       default:
> @@ -5217,8 +5410,10 @@ int i915_perf_ioctl_version(void)
>        *
>        * 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and
>        *    DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE
> +      *
> +      * 7: Add support for video decode and enhancement classes.
>        */
> -     return 6;
> +     return 7;

Let's figure out if we want to club all this into 6.

>  }
>
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h 
> b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> index 381d94101610..ba103875e19f 100644
> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> @@ -138,4 +138,82 @@
>  #define   GEN12_SQCNT1_PMON_ENABLE           REG_BIT(30)
>  #define   GEN12_SQCNT1_OABPC                 REG_BIT(29)
>
> +/* Gen12 OAM unit */
> +#define GEN12_OAM_HEAD_POINTER_OFFSET   (0x1a0)
> +#define  GEN12_OAM_HEAD_POINTER_MASK    0xffffffc0
> +
> +#define GEN12_OAM_TAIL_POINTER_OFFSET   (0x1a4)
> +#define  GEN12_OAM_TAIL_POINTER_MASK    0xffffffc0
> +
> +#define GEN12_OAM_BUFFER_OFFSET         (0x1a8)
> +#define  GEN12_OAM_BUFFER_SIZE_MASK     (0x7)
> +#define  GEN12_OAM_BUFFER_SIZE_SHIFT    (3)
> +#define  GEN12_OAM_BUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */
> +
> +#define GEN12_OAM_CONTEXT_CONTROL_OFFSET              (0x1bc)
> +#define  GEN12_OAM_CONTEXT_CONTROL_TIMER_PERIOD_SHIFT 2
> +#define  GEN12_OAM_CONTEXT_CONTROL_TIMER_ENABLE       REG_BIT(1)
> +#define  GEN12_OAM_CONTEXT_CONTROL_COUNTER_RESUME     REG_BIT(0)
> +
> +#define GEN12_OAM_CONTROL_OFFSET                (0x194)
> +#define  GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT 1
> +#define  GEN12_OAM_CONTROL_COUNTER_ENABLE       REG_BIT(0)
> +
> +#define GEN12_OAM_DEBUG_OFFSET                      (0x198)
> +#define  GEN12_OAM_DEBUG_BUFFER_SIZE_SELECT         REG_BIT(12)
> +#define  GEN12_OAM_DEBUG_INCLUDE_CLK_RATIO          REG_BIT(6)
> +#define  GEN12_OAM_DEBUG_DISABLE_CLK_RATIO_REPORTS  REG_BIT(5)
> +#define  GEN12_OAM_DEBUG_DISABLE_GO_1_0_REPORTS     REG_BIT(2)
> +#define  GEN12_OAM_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1)
> +
> +#define GEN12_OAM_STATUS_OFFSET            (0x19c)
> +#define  GEN12_OAM_STATUS_COUNTER_OVERFLOW REG_BIT(2)
> +#define  GEN12_OAM_STATUS_BUFFER_OVERFLOW  REG_BIT(1)
> +#define  GEN12_OAM_STATUS_REPORT_LOST      REG_BIT(0)
> +
> +#define GEN12_OAM_MMIO_TRG_OFFSET    (0x1d0)
> +
> +#define GEN12_OAM_MMIO_TRG(base) \
> +     _MMIO((base) + GEN12_OAM_MMIO_TRG_OFFSET)
> +
> +#define GEN12_OAM_HEAD_POINTER(base) \
> +     _MMIO((base) + GEN12_OAM_HEAD_POINTER_OFFSET)
> +#define GEN12_OAM_TAIL_POINTER(base) \
> +     _MMIO((base) + GEN12_OAM_TAIL_POINTER_OFFSET)
> +#define GEN12_OAM_BUFFER(base) \
> +     _MMIO((base) + GEN12_OAM_BUFFER_OFFSET)
> +#define GEN12_OAM_CONTEXT_CONTROL(base) \
> +     _MMIO((base) + GEN12_OAM_CONTEXT_CONTROL_OFFSET)
> +#define GEN12_OAM_CONTROL(base) \
> +     _MMIO((base) + GEN12_OAM_CONTROL_OFFSET)
> +#define GEN12_OAM_DEBUG(base) \
> +     _MMIO((base) + GEN12_OAM_DEBUG_OFFSET)
> +#define GEN12_OAM_STATUS(base) \
> +     _MMIO((base) + GEN12_OAM_STATUS_OFFSET)
> +
> +#define GEN12_OAM_CEC0_0_OFFSET              (0x40)
> +#define GEN12_OAM_CEC7_1_OFFSET              (0x7c)
> +#define GEN12_OAM_CEC0_0(base) \
> +     _MMIO((base) + GEN12_OAM_CEC0_0_OFFSET)
> +#define GEN12_OAM_CEC7_1(base) \
> +     _MMIO((base) + GEN12_OAM_CEC7_1_OFFSET)
> +
> +#define GEN12_OAM_STARTTRIG1_OFFSET  (0x00)
> +#define GEN12_OAM_STARTTRIG8_OFFSET  (0x1c)
> +#define GEN12_OAM_STARTTRIG1(base) \
> +     _MMIO((base) + GEN12_OAM_STARTTRIG1_OFFSET)
> +#define GEN12_OAM_STARTTRIG8(base) \
> +     _MMIO((base) + GEN12_OAM_STARTTRIG8_OFFSET)
> +
> +#define GEN12_OAM_REPORTTRIG1_OFFSET (0x20)
> +#define GEN12_OAM_REPORTTRIG8_OFFSET (0x3c)
> +#define GEN12_OAM_REPORTTRIG1(base) \
> +     _MMIO((base) + GEN12_OAM_REPORTTRIG1_OFFSET)
> +#define GEN12_OAM_REPORTTRIG8(base) \
> +     _MMIO((base) + GEN12_OAM_REPORTTRIG8_OFFSET)
> +
> +#define GEN12_OAM_PERF_COUNTER_B0_OFFSET     (0x84)
> +#define GEN12_OAM_PERF_COUNTER_B(base, idx) \
> +     _MMIO((base) + GEN12_OAM_PERF_COUNTER_B0_OFFSET + 4 * (idx))
> +
>  #endif /* __INTEL_PERF_OA_REGS__ */
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
> b/drivers/gpu/drm/i915/i915_perf_types.h
> index 8ccb0b89d019..5b2c3bab60f8 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -20,6 +20,7 @@
>  #include "gt/intel_engine_types.h"
>  #include "gt/intel_sseu.h"
>  #include "i915_reg_defs.h"
> +#include "intel_uncore.h"
>  #include "intel_wakeref.h"
>
>  struct drm_i915_private;
> @@ -33,6 +34,7 @@ struct intel_engine_cs;
>
>  enum {
>       PERF_GROUP_OAG = 0,
> +     PERF_GROUP_OAM_SAMEDIA_0 = 0,
>
>       PERF_GROUP_MAX,
>       PERF_GROUP_INVALID = U32_MAX,
> @@ -43,9 +45,27 @@ enum report_header {
>       HDR_64_BIT,
>  };
>
> +struct i915_perf_regs {
> +     u32 base;
> +     i915_reg_t oa_head_ptr;
> +     i915_reg_t oa_tail_ptr;
> +     i915_reg_t oa_buffer;
> +     i915_reg_t oa_ctx_ctrl;
> +     i915_reg_t oa_ctrl;
> +     i915_reg_t oa_debug;
> +     i915_reg_t oa_status;
> +     u32 oa_ctrl_counter_format_shift;
> +};
> +
> +enum {

enum oa_type?

> +     TYPE_OAG,
> +     TYPE_OAM,
> +};
> +
>  struct i915_oa_format {
>       u32 format;
>       int size;
> +     int type;
>       enum report_header header;
>  };
>
> @@ -317,6 +337,11 @@ struct i915_perf_stream {
>                * @tail: The last verified tail that can be read by userspace.
>                */
>               u32 tail;
> +
> +             /**
> +              * @group: The group object for this OA buffer.
> +              */
> +             struct i915_perf_group *group;

Isn't this just stream->engine->oa_group, so let's use that instead rather
than duplicating?

>       } oa_buffer;
>
>       /**
> @@ -431,6 +456,21 @@ struct i915_perf_group {
>        * @engine_mask: A mask of engines using a single OA buffer.
>        */
>       intel_engine_mask_t engine_mask;
> +
> +     /*
> +      * @regs: OA buffer register group for programming the OA unit.
> +      */
> +     struct i915_perf_regs regs;
> +
> +     /*
> +      * @type: Type of OA buffer, OAM, OAG etc.

s/OA buffer/OA unit/

> +      */
> +     int type;

Also (incorrectly) assigned but currently unused unless we make the change
to engine_class_supports_oa_format mentioned above. But I think we should
retain and use this.

> +
> +     /*
> +      * @fw_domains: forcewake domains required for this group.
> +      */
> +     enum forcewake_domains fw_domains;

Let's see if we need this.

>  };
>
>  struct i915_perf_gt {
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
> b/drivers/gpu/drm/i915/intel_device_info.h
> index 80bda653d61b..45e218327f44 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -166,6 +166,7 @@ enum intel_ppgtt_type {
>       func(has_mslice_steering); \
>       func(has_oa_bpc_reporting); \
>       func(has_oa_slice_contrib_limits); \
> +     func(has_oam); \
>       func(has_one_eu_per_fuse_bit); \
>       func(has_pxp); \
>       func(has_rc6); \
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index b6922b52d85c..70bfa6530dbc 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -2676,6 +2676,10 @@ enum drm_i915_oa_format {
>       I915_OAR_FORMAT_A32u40_A4u32_B8_C8,
>       I915_OA_FORMAT_A24u40_A14u32_B8_C8,
>
> +     /* MTL OAM */
> +     I915_OAM_FORMAT_MPEC8u64_B8_C8,
> +     I915_OAM_FORMAT_MPEC8u32_B8_C8,
> +
>       I915_OA_FORMAT_MAX          /* non-ABI */
>  };

Thanks.
--
Ashutosh

Reply via email to