Chris Wilson <ch...@chris-wilson.co.uk> writes:

> We repeatedly (and more so in future) use the same looping construct
> over the mocs definition table to setup the register state. Refactor the
> loop construct into a reusable macro.
>
> add/remove: 2/1 grow/shrink: 1/2 up/down: 113/-330 (-217)
> Function                                     old     new   delta
> intel_mocs_init_engine.cold                    -      71     +71
> offset                                         -      28     +28
> __func__                                   17273   17287     +14
> intel_mocs_init                              143     113     -30
> mocs_register.isra                            91       -     -91
> intel_mocs_init_engine                       503     294    -209

Effective diet.

>
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>

Could not spot anything off at this round and even pretty
defines this time.

Reviewed-by: Mika Kuoppala <mika.kuopp...@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/gt/intel_mocs.c | 133 +++++++++++----------------
>  drivers/gpu/drm/i915/i915_reg.h      |  19 ++--
>  2 files changed, 64 insertions(+), 88 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
> b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index e6f3f36a3988..47d16a242183 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -329,27 +329,6 @@ static bool get_mocs_settings(const struct 
> drm_i915_private *i915,
>       return true;
>  }
>  
> -static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int 
> index)
> -{
> -     switch (engine->id) {
> -     case RCS0:
> -             return GEN9_GFX_MOCS(index);
> -     case VCS0:
> -             return GEN9_MFX0_MOCS(index);
> -     case BCS0:
> -             return GEN9_BLT_MOCS(index);
> -     case VECS0:
> -             return GEN9_VEBOX_MOCS(index);
> -     case VCS1:
> -             return GEN9_MFX1_MOCS(index);
> -     case VCS2:
> -             return GEN11_MFX2_MOCS(index);
> -     default:
> -             MISSING_CASE(engine->id);
> -             return INVALID_MMIO_REG;
> -     }
> -}
> -
>  /*
>   * Get control_value from MOCS entry taking into account when it's not used:
>   * I915_MOCS_PTE's value is returned in this case.
> @@ -357,29 +336,47 @@ static i915_reg_t mocs_register(const struct 
> intel_engine_cs *engine, int index)
>  static u32 get_entry_control(const struct drm_i915_mocs_table *table,
>                            unsigned int index)
>  {
> -     if (table->table[index].used)
> +     if (index < table->size && table->table[index].used)
>               return table->table[index].control_value;
>  
>       return table->table[I915_MOCS_PTE].control_value;
>  }
>  
> -static void init_mocs_table(struct intel_engine_cs *engine,
> -                         const struct drm_i915_mocs_table *table)
> +#define for_each_mocs(mocs, t, i) \
> +     for (i = 0; \
> +          i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\
> +          i++)
> +
> +static void __init_mocs_table(struct intel_uncore *uncore,
> +                           const struct drm_i915_mocs_table *table,
> +                           u32 addr)
>  {
> -     struct intel_uncore *uncore = engine->uncore;
> -     u32 unused_value = table->table[I915_MOCS_PTE].control_value;
>       unsigned int i;
> +     u32 mocs;
>  
> -     for (i = 0; i < table->size; i++)
> -             intel_uncore_write_fw(uncore,
> -                                   mocs_register(engine, i),
> -                                   get_entry_control(table, i));
> +     for_each_mocs(mocs, table, i)
> +             intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
> +}
> +
> +static u32 mocs_offset(const struct intel_engine_cs *engine)
> +{
> +     static const u32 offset[] = {
> +             [RCS0]  =  __GEN9_RCS0_MOCS0,
> +             [VCS0]  =  __GEN9_VCS0_MOCS0,
> +             [VCS1]  =  __GEN9_VCS1_MOCS0,
> +             [VECS0] =  __GEN9_VECS0_MOCS0,
> +             [BCS0]  =  __GEN9_BCS0_MOCS0,
> +             [VCS2]  = __GEN11_VCS2_MOCS0,
> +     };
> +
> +     GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset));
> +     return offset[engine->id];
> +}
>  
> -     /* All remaining entries are unused */
> -     for (; i < table->n_entries; i++)
> -             intel_uncore_write_fw(uncore,
> -                                   mocs_register(engine, i),
> -                                   unused_value);
> +static void init_mocs_table(struct intel_engine_cs *engine,
> +                         const struct drm_i915_mocs_table *table)
> +{
> +     __init_mocs_table(engine->uncore, table, mocs_offset(engine));
>  }
>  
>  /*
> @@ -389,7 +386,7 @@ static void init_mocs_table(struct intel_engine_cs 
> *engine,
>  static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
>                         unsigned int index)
>  {
> -     if (table->table[index].used)
> +     if (index < table->size && table->table[index].used)
>               return table->table[index].l3cc_value;
>  
>       return table->table[I915_MOCS_PTE].l3cc_value;
> @@ -400,37 +397,23 @@ static inline u32 l3cc_combine(u16 low, u16 high)
>       return low | (u32)high << 16;
>  }
>  
> +#define for_each_l3cc(l3cc, t, i) \
> +     for (i = 0; \
> +          i < ((t)->n_entries + 1) / 2 ? \
> +          (l3cc = l3cc_combine(get_entry_l3cc((t), 2 * i), \
> +                               get_entry_l3cc((t), 2 * i + 1))), 1 : \
> +          0; \
> +          i++)
> +
>  static void init_l3cc_table(struct intel_engine_cs *engine,
>                           const struct drm_i915_mocs_table *table)
>  {
>       struct intel_uncore *uncore = engine->uncore;
> -     u16 unused_value = table->table[I915_MOCS_PTE].l3cc_value;
>       unsigned int i;
> +     u32 l3cc;
>  
> -     for (i = 0; i < table->size / 2; i++) {
> -             u16 low = get_entry_l3cc(table, 2 * i);
> -             u16 high = get_entry_l3cc(table, 2 * i + 1);
> -
> -             intel_uncore_write(uncore,
> -                                GEN9_LNCFCMOCS(i),
> -                                l3cc_combine(low, high));
> -     }
> -
> -     /* Odd table size - 1 left over */
> -     if (table->size & 1) {
> -             u16 low = get_entry_l3cc(table, 2 * i);
> -
> -             intel_uncore_write(uncore,
> -                                GEN9_LNCFCMOCS(i),
> -                                l3cc_combine(low, unused_value));
> -             i++;
> -     }
> -
> -     /* All remaining entries are also unused */
> -     for (; i < table->n_entries / 2; i++)
> -             intel_uncore_write(uncore,
> -                                GEN9_LNCFCMOCS(i),
> -                                l3cc_combine(unused_value, unused_value));
> +     for_each_l3cc(l3cc, table, i)
> +             intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
>  }
>  
>  void intel_mocs_init_engine(struct intel_engine_cs *engine)
> @@ -451,11 +434,14 @@ void intel_mocs_init_engine(struct intel_engine_cs 
> *engine)
>               init_l3cc_table(engine, &table);
>  }
>  
> -static void intel_mocs_init_global(struct intel_gt *gt)
> +static u32 global_mocs_offset(void)
> +{
> +     return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
> +}
> +
> +static void init_global_mocs(struct intel_gt *gt)
>  {
> -     struct intel_uncore *uncore = gt->uncore;
>       struct drm_i915_mocs_table table;
> -     unsigned int index;
>  
>       /*
>        * LLC and eDRAM control values are not applicable to dgfx
> @@ -463,29 +449,14 @@ static void intel_mocs_init_global(struct intel_gt *gt)
>       if (IS_DGFX(gt->i915))
>               return;
>  
> -     GEM_BUG_ON(!HAS_GLOBAL_MOCS_REGISTERS(gt->i915));
> -
>       if (!get_mocs_settings(gt->i915, &table))
>               return;
>  
> -     for (index = 0; index < table.size; index++)
> -             intel_uncore_write(uncore,
> -                                GEN12_GLOBAL_MOCS(index),
> -                                table.table[index].control_value);
> -
> -     /*
> -      * Ok, now set the unused entries to the invalid entry (index 0). These
> -      * entries are officially undefined and no contract for the contents and
> -      * settings is given for these entries.
> -      */
> -     for (; index < table.n_entries; index++)
> -             intel_uncore_write(uncore,
> -                                GEN12_GLOBAL_MOCS(index),
> -                                table.table[I915_MOCS_PTE].control_value);
> +     __init_mocs_table(gt->uncore, &table, global_mocs_offset());
>  }
>  
>  void intel_mocs_init(struct intel_gt *gt)
>  {
>       if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
> -             intel_mocs_init_global(gt);
> +             init_global_mocs(gt);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 2ffcc21670b7..0960ea0b5a66 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -11688,13 +11688,18 @@ enum skl_power_gate {
>  /* MOCS (Memory Object Control State) registers */
>  #define GEN9_LNCFCMOCS(i)    _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */
>  
> -#define GEN9_GFX_MOCS(i)     _MMIO(0xc800 + (i) * 4) /* Graphics MOCS 
> registers */
> -#define GEN9_MFX0_MOCS(i)    _MMIO(0xc900 + (i) * 4) /* Media 0 MOCS 
> registers */
> -#define GEN9_MFX1_MOCS(i)    _MMIO(0xca00 + (i) * 4) /* Media 1 MOCS 
> registers */
> -#define GEN9_VEBOX_MOCS(i)   _MMIO(0xcb00 + (i) * 4) /* Video MOCS registers 
> */
> -#define GEN9_BLT_MOCS(i)     _MMIO(0xcc00 + (i) * 4) /* Blitter MOCS 
> registers */
> -/* Media decoder 2 MOCS registers */
> -#define GEN11_MFX2_MOCS(i)   _MMIO(0x10000 + (i) * 4)
> +#define __GEN9_RCS0_MOCS0    0xc800
> +#define GEN9_GFX_MOCS(i)     _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4)
> +#define __GEN9_VCS0_MOCS0    0xc900
> +#define GEN9_MFX0_MOCS(i)    _MMIO(__GEN9_VCS0_MOCS0 + (i) * 4)
> +#define __GEN9_VCS1_MOCS0    0xca00
> +#define GEN9_MFX1_MOCS(i)    _MMIO(__GEN9_VCS1_MOCS0 + (i) * 4)
> +#define __GEN9_VECS0_MOCS0   0xcb00
> +#define GEN9_VEBOX_MOCS(i)   _MMIO(__GEN9_VECS0_MOCS0 + (i) * 4)
> +#define __GEN9_BCS0_MOCS0    0xcc00
> +#define GEN9_BLT_MOCS(i)     _MMIO(__GEN9_BCS0_MOCS0 + (i) * 4)
> +#define __GEN11_VCS2_MOCS0   0x10000
> +#define GEN11_MFX2_MOCS(i)   _MMIO(__GEN11_VCS2_MOCS0 + (i) * 4)
>  
>  #define GEN10_SCRATCH_LNCF2          _MMIO(0xb0a0)
>  #define   PMFLUSHDONE_LNICRSDROP     (1 << 20)
> -- 
> 2.24.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to