On Sun, 2019-09-22 at 19:48 +0300, Lionel Landwerlin wrote: > On 21/09/2019 03:39, Lucas De Marchi wrote: > > On Fri, Sep 13, 2019 at 12:51 AM Chris Wilson < > > [email protected]> wrote: > > > From: Daniele Ceraolo Spurio <[email protected]> > > > > > > Gen12 has dual-subslices (DSS), which compared to gen11 subslices > > > have > > > some duplicated resources/paths. Although DSS behave similarly to > > > 2 > > > subslices, instead of splitting this and presenting userspace > > > with bits > > > not directly representative of hardware resources, present > > > userspace > > > with a subslice_mask made up of DSS bits instead. > > > > > > v2: GEM_BUG_ON on mask size (Lionel) > > > > > > Bspec: 29547 > > > Bspec: 12247 > > > Cc: Kelvin Gardiner <[email protected]> > > > Cc: Tvrtko Ursulin <[email protected]> > > > Cc: Lionel Landwerlin <[email protected]> > > > CC: Radhakrishna Sripada <[email protected]> > > > Cc: Michel Thierry <[email protected]> #v1 > > > Cc: Daniele Ceraolo Spurio <[email protected]> > > > Cc: José Roberto de Souza <[email protected]> > > > Signed-off-by: Daniele Ceraolo Spurio < > > > [email protected]> > > > Signed-off-by: James Ausmus <[email protected]> > > > Signed-off-by: Oscar Mateo <[email protected]> > > > Signed-off-by: Sudeep Dutt <[email protected]> > > > Signed-off-by: Stuart Summers <[email protected]> > > > Signed-off-by: Mika Kuoppala <[email protected]> > > > Acked-by: Lionel Landwerlin <[email protected]> > > > --- > > > > After this I get the correct values for TGL: > > - Available Subslice Total: 2 > > - Available Slice0 subslices: 2 > > - Available EU Total: 16 > > - Available EU Per Subslice: 8 > > + Available Subslice Total: 6 > > + Available Slice0 subslices: 6 > > + Available EU Total: 96 > > + Available EU Per Subslice: 16 > > > > Reviewed-by: Lucas De Marchi <[email protected]> > > > > Lucas De Marchi > > > Btw, shouldn't we print "Dualsubslice" rather than "Subslice" for > TGL?
The idea here is that from the userspace perspective, there is still
only one unit which can be utilized in hardware.
Thanks,
Stuart
>
>
> -Lionel
>
>
> > > drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +--
> > > drivers/gpu/drm/i915/i915_debugfs.c | 3 +-
> > > drivers/gpu/drm/i915/i915_reg.h | 2 +
> > > drivers/gpu/drm/i915/intel_device_info.c | 83
> > > ++++++++++++++++++------
> > > include/uapi/drm/i915_drm.h | 6 +-
> > > 5 files changed, 72 insertions(+), 31 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h
> > > b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > > index 4070f6ff1db6..d1d225204f09 100644
> > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> > > @@ -18,12 +18,13 @@ struct drm_i915_private;
> > > #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
> > > #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries,
> > > BITS_PER_BYTE)
> > > #define GEN_MAX_SUBSLICE_STRIDE
> > > GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> > > -#define GEN_MAX_EUS (10) /* HSW upper bound */
> > > +#define GEN_MAX_EUS (16) /* TGL upper bound */
> > > #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
> > >
> > > struct sseu_dev_info {
> > > u8 slice_mask;
> > > u8 subslice_mask[GEN_MAX_SLICES *
> > > GEN_MAX_SUBSLICE_STRIDE];
> > > + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES *
> > > GEN_MAX_EU_STRIDE];
> > > u16 eu_total;
> > > u8 eu_per_subslice;
> > > u8 min_eu_in_pool;
> > > @@ -40,12 +41,6 @@ struct sseu_dev_info {
> > >
> > > u8 ss_stride;
> > > u8 eu_stride;
> > > -
> > > - /* We don't have more than 8 eus per subslice at the
> > > moment and as we
> > > - * store eus enabled using bits, no need to multiply by
> > > eus per
> > > - * subslice.
> > > - */
> > > - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
> > > };
> > >
> > > /*
> > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> > > b/drivers/gpu/drm/i915/i915_debugfs.c
> > > index 43db50095257..b5b449a88cf1 100644
> > > --- a/drivers/gpu/drm/i915/i915_debugfs.c
> > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> > > @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct
> > > drm_i915_private *dev_priv,
> > > for (ss = 0; ss < info->sseu.max_subslices;
> > > ss++) {
> > > unsigned int eu_cnt;
> > >
> > > - if (!(s_reg[s] &
> > > (GEN9_PGCTL_SS_ACK(ss))))
> > > + if (info->sseu.has_subslice_pg &&
> > > + !(s_reg[s] &
> > > (GEN9_PGCTL_SS_ACK(ss))))
> > > /* skip disabled subslice */
> > > continue;
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > > b/drivers/gpu/drm/i915/i915_reg.h
> > > index bf37ecebc82f..47847135a11f 100644
> > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > @@ -2956,6 +2956,8 @@ static inline bool
> > > i915_mmio_reg_valid(i915_reg_t reg)
> > >
> > > #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
> > >
> > > +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
> > > +
> > > #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050)
> > > #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0)
> > > #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2)
> > > diff --git a/drivers/gpu/drm/i915/intel_device_info.c
> > > b/drivers/gpu/drm/i915/intel_device_info.c
> > > index 50b05a5de53b..b91a960b037f 100644
> > > --- a/drivers/gpu/drm/i915/intel_device_info.c
> > > +++ b/drivers/gpu/drm/i915/intel_device_info.c
> > > @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct
> > > sseu_dev_info *sseu)
> > > return total;
> > > }
> > >
> > > +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
> > > + u8 s_en, u32 ss_en, u16
> > > eu_en)
> > > +{
> > > + int s, ss;
> > > +
> > > + /* ss_en represents entire subslice mask across all
> > > slices */
> > > + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
> > > + sizeof(ss_en) * BITS_PER_BYTE);
> > > +
> > > + for (s = 0; s < sseu->max_slices; s++) {
> > > + if ((s_en & BIT(s)) == 0)
> > > + continue;
> > > +
> > > + sseu->slice_mask |= BIT(s);
> > > +
> > > + intel_sseu_set_subslices(sseu, s, ss_en);
> > > +
> > > + for (ss = 0; ss < sseu->max_subslices; ss++)
> > > + if (intel_sseu_has_subslice(sseu, s, ss))
> > > + sseu_set_eus(sseu, s, ss, eu_en);
> > > + }
> > > + sseu->eu_per_subslice = hweight16(eu_en);
> > > + sseu->eu_total = compute_eu_total(sseu);
> > > +}
> > > +
> > > +static void gen12_sseu_info_init(struct drm_i915_private
> > > *dev_priv)
> > > +{
> > > + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)-
> > > >sseu;
> > > + u8 s_en;
> > > + u32 dss_en;
> > > + u16 eu_en = 0;
> > > + u8 eu_en_fuse;
> > > + int eu;
> > > +
> > > + /*
> > > + * Gen12 has Dual-Subslices, which behave similarly to 2
> > > gen11 SS.
> > > + * Instead of splitting these, provide userspace with an
> > > array
> > > + * of DSS to more closely represent the hardware
> > > resource.
> > > + */
> > > + intel_sseu_set_info(sseu, 1, 6, 16);
> > > +
> > > + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) &
> > > GEN11_GT_S_ENA_MASK;
> > > +
> > > + dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
> > > +
> > > + /* one bit per pair of EUs */
> > > + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) &
> > > GEN11_EU_DIS_MASK);
> > > + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
> > > + if (eu_en_fuse & BIT(eu))
> > > + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
> > > +
> > > + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
> > > +
> > > + /* TGL only supports slice-level power gating */
> > > + sseu->has_slice_pg = 1;
> > > +}
> > > +
> > > static void gen11_sseu_info_init(struct drm_i915_private
> > > *dev_priv)
> > > {
> > > struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)-
> > > >sseu;
> > > u8 s_en;
> > > - u32 ss_en, ss_en_mask;
> > > + u32 ss_en;
> > > u8 eu_en;
> > > - int s;
> > >
> > > if (IS_ELKHARTLAKE(dev_priv))
> > > intel_sseu_set_info(sseu, 1, 4, 8);
> > > @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct
> > > drm_i915_private *dev_priv)
> > >
> > > s_en = I915_READ(GEN11_GT_SLICE_ENABLE) &
> > > GEN11_GT_S_ENA_MASK;
> > > ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
> > > - ss_en_mask = BIT(sseu->max_subslices) - 1;
> > > eu_en = ~(I915_READ(GEN11_EU_DISABLE) &
> > > GEN11_EU_DIS_MASK);
> > >
> > > - for (s = 0; s < sseu->max_slices; s++) {
> > > - if (s_en & BIT(s)) {
> > > - int ss_idx = sseu->max_subslices * s;
> > > - int ss;
> > > -
> > > - sseu->slice_mask |= BIT(s);
> > > -
> > > - intel_sseu_set_subslices(sseu, s, (ss_en
> > > >> ss_idx) &
> > > - ss_en_m
> > > ask);
> > > -
> > > - for (ss = 0; ss < sseu->max_subslices;
> > > ss++)
> > > - if (intel_sseu_has_subslice(sseu,
> > > s, ss))
> > > - sseu_set_eus(sseu, s, ss,
> > > eu_en);
> > > - }
> > > - }
> > > - sseu->eu_per_subslice = hweight8(eu_en);
> > > - sseu->eu_total = compute_eu_total(sseu);
> > > + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
> > >
> > > /* ICL has no power gating restrictions. */
> > > sseu->has_slice_pg = 1;
> > > @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct
> > > drm_i915_private *dev_priv)
> > > gen9_sseu_info_init(dev_priv);
> > > else if (IS_GEN(dev_priv, 10))
> > > gen10_sseu_info_init(dev_priv);
> > > - else if (INTEL_GEN(dev_priv) >= 11)
> > > + else if (IS_GEN(dev_priv, 11))
> > > gen11_sseu_info_init(dev_priv);
> > > + else if (INTEL_GEN(dev_priv) >= 12)
> > > + gen12_sseu_info_init(dev_priv);
> > >
> > > if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
> > > DRM_INFO("Disabling ppGTT for VT-d support\n");
> > > diff --git a/include/uapi/drm/i915_drm.h
> > > b/include/uapi/drm/i915_drm.h
> > > index 469dc512cca3..30c542144016 100644
> > > --- a/include/uapi/drm/i915_drm.h
> > > +++ b/include/uapi/drm/i915_drm.h
> > > @@ -2033,8 +2033,10 @@ struct drm_i915_query {
> > > * (data[X / 8] >> (X % 8)) & 1
> > > *
> > > * - the subslice mask for each slice with one bit per subslice
> > > telling
> > > - * whether a subslice is available. The availability of
> > > subslice Y in slice
> > > - * X can be queried with the following formula :
> > > + * whether a subslice is available. Gen12 has dual-subslices,
> > > which are
> > > + * similar to two gen11 subslices. For gen12, this array
> > > represents dual-
> > > + * subslices. The availability of subslice Y in slice X can be
> > > queried
> > > + * with the following formula :
> > > *
> > > * (data[subslice_offset +
> > > * X * subslice_stride +
> > > --
> > > 2.23.0
> > >
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > [email protected]
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
> >
>
> _______________________________________________
> Intel-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
smime.p7s
Description: S/MIME cryptographic signature
_______________________________________________ Intel-gfx mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/intel-gfx
